Skip to content

Commit 13edca9

Browse files
committed
Merge: x86/hyperv: Fix kdump on Azure CVMs
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-10/-/merge_requests/1446 JIRA: https://issues.redhat.com/browse/RHEL-75576 Fix kdump on Azure CVMs. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Approved-by: Emanuele Giuseppe Esposito <eesposit@redhat.com> Approved-by: Maxim Levitsky <mlevitsk@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Scott Weaver <scweaver@redhat.com>
2 parents 7a43980 + fc3eb8a commit 13edca9

File tree

1 file changed

+210
-1
lines changed

1 file changed

+210
-1
lines changed

arch/x86/hyperv/ivm.c

Lines changed: 210 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value)
460460
hv_ghcb_msr_read(msr, value);
461461
}
462462

463+
/*
464+
* Keep track of the PFN regions which were shared with the host. The access
465+
* must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()).
466+
*/
467+
struct hv_enc_pfn_region {
468+
struct list_head list;
469+
u64 pfn;
470+
int count;
471+
};
472+
473+
static LIST_HEAD(hv_list_enc);
474+
static DEFINE_RAW_SPINLOCK(hv_list_enc_lock);
475+
476+
static int hv_list_enc_add(const u64 *pfn_list, int count)
477+
{
478+
struct hv_enc_pfn_region *ent;
479+
unsigned long flags;
480+
u64 pfn;
481+
int i;
482+
483+
for (i = 0; i < count; i++) {
484+
pfn = pfn_list[i];
485+
486+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
487+
/* Check if the PFN already exists in some region first */
488+
list_for_each_entry(ent, &hv_list_enc, list) {
489+
if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn))
490+
/* Nothing to do - pfn is already in the list */
491+
goto unlock_done;
492+
}
493+
494+
/*
495+
* Check if the PFN is adjacent to an existing region. Growing
496+
* a region can make it adjacent to another one but merging is
497+
* not (yet) implemented for simplicity. A PFN cannot be added
498+
* to two regions to keep the logic in hv_list_enc_remove()
499+
* correct.
500+
*/
501+
list_for_each_entry(ent, &hv_list_enc, list) {
502+
if (ent->pfn + ent->count == pfn) {
503+
/* Grow existing region up */
504+
ent->count++;
505+
goto unlock_done;
506+
} else if (pfn + 1 == ent->pfn) {
507+
/* Grow existing region down */
508+
ent->pfn--;
509+
ent->count++;
510+
goto unlock_done;
511+
}
512+
}
513+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
514+
515+
/* No adjacent region found -- create a new one */
516+
ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
517+
if (!ent)
518+
return -ENOMEM;
519+
520+
ent->pfn = pfn;
521+
ent->count = 1;
522+
523+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
524+
list_add(&ent->list, &hv_list_enc);
525+
526+
unlock_done:
527+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
528+
}
529+
530+
return 0;
531+
}
532+
533+
static int hv_list_enc_remove(const u64 *pfn_list, int count)
534+
{
535+
struct hv_enc_pfn_region *ent, *t;
536+
struct hv_enc_pfn_region new_region;
537+
unsigned long flags;
538+
u64 pfn;
539+
int i;
540+
541+
for (i = 0; i < count; i++) {
542+
pfn = pfn_list[i];
543+
544+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
545+
list_for_each_entry_safe(ent, t, &hv_list_enc, list) {
546+
if (pfn == ent->pfn + ent->count - 1) {
547+
/* Removing tail pfn */
548+
ent->count--;
549+
if (!ent->count) {
550+
list_del(&ent->list);
551+
kfree(ent);
552+
}
553+
goto unlock_done;
554+
} else if (pfn == ent->pfn) {
555+
/* Removing head pfn */
556+
ent->count--;
557+
ent->pfn++;
558+
if (!ent->count) {
559+
list_del(&ent->list);
560+
kfree(ent);
561+
}
562+
goto unlock_done;
563+
} else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) {
564+
/*
565+
* Removing a pfn in the middle. Cut off the tail
566+
* of the existing region and create a template for
567+
* the new one.
568+
*/
569+
new_region.pfn = pfn + 1;
570+
new_region.count = ent->count - (pfn - ent->pfn + 1);
571+
ent->count = pfn - ent->pfn;
572+
goto unlock_split;
573+
}
574+
575+
}
576+
unlock_done:
577+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
578+
continue;
579+
580+
unlock_split:
581+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
582+
583+
ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
584+
if (!ent)
585+
return -ENOMEM;
586+
587+
ent->pfn = new_region.pfn;
588+
ent->count = new_region.count;
589+
590+
raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
591+
list_add(&ent->list, &hv_list_enc);
592+
raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
593+
}
594+
595+
return 0;
596+
}
597+
598+
/* Stop new private<->shared conversions */
599+
static void hv_vtom_kexec_begin(void)
600+
{
601+
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
602+
return;
603+
604+
/*
605+
* Crash kernel reaches here with interrupts disabled: can't wait for
606+
* conversions to finish.
607+
*
608+
* If race happened, just report and proceed.
609+
*/
610+
if (!set_memory_enc_stop_conversion())
611+
pr_warn("Failed to stop shared<->private conversions\n");
612+
}
613+
614+
static void hv_vtom_kexec_finish(void)
615+
{
616+
struct hv_gpa_range_for_visibility *input;
617+
struct hv_enc_pfn_region *ent;
618+
unsigned long flags;
619+
u64 hv_status;
620+
int cur, i;
621+
622+
local_irq_save(flags);
623+
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
624+
625+
if (unlikely(!input))
626+
goto out;
627+
628+
list_for_each_entry(ent, &hv_list_enc, list) {
629+
for (i = 0, cur = 0; i < ent->count; i++) {
630+
input->gpa_page_list[cur] = ent->pfn + i;
631+
cur++;
632+
633+
if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) {
634+
input->partition_id = HV_PARTITION_ID_SELF;
635+
input->host_visibility = VMBUS_PAGE_NOT_VISIBLE;
636+
input->reserved0 = 0;
637+
input->reserved1 = 0;
638+
hv_status = hv_do_rep_hypercall(
639+
HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY,
640+
cur, 0, input, NULL);
641+
WARN_ON_ONCE(!hv_result_success(hv_status));
642+
cur = 0;
643+
}
644+
}
645+
646+
}
647+
648+
out:
649+
local_irq_restore(flags);
650+
}
651+
463652
/*
464653
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
465654
*
@@ -473,6 +662,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
473662
struct hv_gpa_range_for_visibility *input;
474663
u64 hv_status;
475664
unsigned long flags;
665+
int ret;
476666

477667
/* no-op if partition isolation is not enabled */
478668
if (!hv_is_isolation_supported())
@@ -484,6 +674,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
484674
return -EINVAL;
485675
}
486676

677+
if (visibility == VMBUS_PAGE_NOT_VISIBLE)
678+
ret = hv_list_enc_remove(pfn, count);
679+
else
680+
ret = hv_list_enc_add(pfn, count);
681+
if (ret)
682+
return ret;
683+
487684
local_irq_save(flags);
488685
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
489686

@@ -504,8 +701,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
504701

505702
if (hv_result_success(hv_status))
506703
return 0;
704+
705+
if (visibility == VMBUS_PAGE_NOT_VISIBLE)
706+
ret = hv_list_enc_add(pfn, count);
507707
else
508-
return -EFAULT;
708+
ret = hv_list_enc_remove(pfn, count);
709+
/*
710+
* There's no good way to recover from -ENOMEM here, the accounting is
711+
* wrong either way.
712+
*/
713+
WARN_ON_ONCE(ret);
714+
715+
return -EFAULT;
509716
}
510717

511718
/*
@@ -667,6 +874,8 @@ void __init hv_vtom_init(void)
667874
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
668875
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
669876
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
877+
x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin;
878+
x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish;
670879

671880
/* Set WB as the default cache mode. */
672881
mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);

0 commit comments

Comments
 (0)