summaryrefslogtreecommitdiff
path: root/arch/x86/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/hyperv')
-rw-r--r--arch/x86/hyperv/irqdomain.c111
-rw-r--r--arch/x86/hyperv/ivm.c211
2 files changed, 286 insertions, 36 deletions
diff --git a/arch/x86/hyperv/irqdomain.c b/arch/x86/hyperv/irqdomain.c
index 090f5ac9f492..c3ba12b1bc07 100644
--- a/arch/x86/hyperv/irqdomain.c
+++ b/arch/x86/hyperv/irqdomain.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <linux/export.h>
+#include <linux/irqchip/irq-msi-lib.h>
#include <asm/mshyperv.h>
static int hv_map_interrupt(union hv_device_id device_id, bool level,
@@ -289,59 +290,99 @@ static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
(void)hv_unmap_msi_interrupt(dev, &old_entry);
}
-static void hv_msi_free_irq(struct irq_domain *domain,
- struct msi_domain_info *info, unsigned int virq)
-{
- struct irq_data *irqd = irq_get_irq_data(virq);
- struct msi_desc *desc;
-
- if (!irqd)
- return;
-
- desc = irq_data_get_msi_desc(irqd);
- if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
- return;
-
- hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
-}
-
/*
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
* which implement the MSI or MSI-X Capability Structure.
*/
static struct irq_chip hv_pci_msi_controller = {
.name = "HV-PCI-MSI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
.irq_ack = irq_chip_ack_parent,
- .irq_retrigger = irq_chip_retrigger_hierarchy,
.irq_compose_msi_msg = hv_irq_compose_msi_msg,
- .irq_set_affinity = msi_domain_set_affinity,
- .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED,
+ .irq_set_affinity = irq_chip_set_affinity_parent,
};
-static struct msi_domain_ops pci_msi_domain_ops = {
- .msi_free = hv_msi_free_irq,
- .msi_prepare = pci_msi_prepare,
+static bool hv_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
+ struct irq_domain *real_parent, struct msi_domain_info *info)
+{
+ struct irq_chip *chip = info->chip;
+
+ if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
+ return false;
+
+ chip->flags |= IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MOVE_DEFERRED;
+
+ info->ops->msi_prepare = pci_msi_prepare;
+
+ return true;
+}
+
+#define HV_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
+#define HV_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS)
+
+static struct msi_parent_ops hv_msi_parent_ops = {
+ .supported_flags = HV_MSI_FLAGS_SUPPORTED,
+ .required_flags = HV_MSI_FLAGS_REQUIRED,
+ .bus_select_token = DOMAIN_BUS_NEXUS,
+ .bus_select_mask = MATCH_PCI_MSI,
+ .chip_flags = MSI_CHIP_FLAG_SET_ACK,
+ .prefix = "HV-",
+ .init_dev_msi_info = hv_init_dev_msi_info,
};
-static struct msi_domain_info hv_pci_msi_domain_info = {
- .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
- MSI_FLAG_PCI_MSIX,
- .ops = &pci_msi_domain_ops,
- .chip = &hv_pci_msi_controller,
- .handler = handle_edge_irq,
- .handler_name = "edge",
+static int hv_msi_domain_alloc(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs,
+ void *arg)
+{
+ /*
+ * TODO: The allocation bits of hv_irq_compose_msi_msg(), i.e. everything except
+ * entry_to_msi_msg() should be in here.
+ */
+
+ int ret;
+
+ ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, arg);
+ if (ret)
+ return ret;
+
+ for (int i = 0; i < nr_irqs; ++i) {
+ irq_domain_set_info(d, virq + i, 0, &hv_pci_msi_controller, NULL,
+ handle_edge_irq, NULL, "edge");
+ }
+ return 0;
+}
+
+static void hv_msi_domain_free(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs)
+{
+ for (int i = 0; i < nr_irqs; ++i) {
+ struct irq_data *irqd = irq_domain_get_irq_data(d, virq);
+ struct msi_desc *desc;
+
+ desc = irq_data_get_msi_desc(irqd);
+ if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
+ continue;
+
+ hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
+ }
+ irq_domain_free_irqs_top(d, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops hv_msi_domain_ops = {
+ .select = msi_lib_irq_domain_select,
+ .alloc = hv_msi_domain_alloc,
+ .free = hv_msi_domain_free,
};
struct irq_domain * __init hv_create_pci_msi_domain(void)
{
struct irq_domain *d = NULL;
- struct fwnode_handle *fn;
- fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
- if (fn)
- d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
+ struct irq_domain_info info = {
+ .fwnode = irq_domain_alloc_named_fwnode("HV-PCI-MSI"),
+ .ops = &hv_msi_domain_ops,
+ .parent = x86_vector_domain,
+ };
+
+ if (info.fwnode)
+ d = msi_create_parent_irq_domain(&info, &hv_msi_parent_ops);
/* No point in going further if we can't get an irq domain */
BUG_ON(!d);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index ade6c665c97e..a4615b889f3e 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -463,6 +463,195 @@ void hv_ivm_msr_read(u64 msr, u64 *value)
}
/*
+ * Keep track of the PFN regions which were shared with the host. The access
+ * must be revoked upon kexec/kdump (see hv_ivm_clear_host_access()).
+ */
+struct hv_enc_pfn_region {
+ struct list_head list;
+ u64 pfn;
+ int count;
+};
+
+static LIST_HEAD(hv_list_enc);
+static DEFINE_RAW_SPINLOCK(hv_list_enc_lock);
+
+static int hv_list_enc_add(const u64 *pfn_list, int count)
+{
+ struct hv_enc_pfn_region *ent;
+ unsigned long flags;
+ u64 pfn;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ pfn = pfn_list[i];
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ /* Check if the PFN already exists in some region first */
+ list_for_each_entry(ent, &hv_list_enc, list) {
+ if ((ent->pfn <= pfn) && (ent->pfn + ent->count - 1 >= pfn))
+ /* Nothing to do - pfn is already in the list */
+ goto unlock_done;
+ }
+
+ /*
+ * Check if the PFN is adjacent to an existing region. Growing
+ * a region can make it adjacent to another one but merging is
+ * not (yet) implemented for simplicity. A PFN cannot be added
+ * to two regions to keep the logic in hv_list_enc_remove()
+ * correct.
+ */
+ list_for_each_entry(ent, &hv_list_enc, list) {
+ if (ent->pfn + ent->count == pfn) {
+ /* Grow existing region up */
+ ent->count++;
+ goto unlock_done;
+ } else if (pfn + 1 == ent->pfn) {
+ /* Grow existing region down */
+ ent->pfn--;
+ ent->count++;
+ goto unlock_done;
+ }
+ }
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+
+ /* No adjacent region found -- create a new one */
+ ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->pfn = pfn;
+ ent->count = 1;
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ list_add(&ent->list, &hv_list_enc);
+
+unlock_done:
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+ }
+
+ return 0;
+}
+
+static int hv_list_enc_remove(const u64 *pfn_list, int count)
+{
+ struct hv_enc_pfn_region *ent, *t;
+ struct hv_enc_pfn_region new_region;
+ unsigned long flags;
+ u64 pfn;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ pfn = pfn_list[i];
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ list_for_each_entry_safe(ent, t, &hv_list_enc, list) {
+ if (pfn == ent->pfn + ent->count - 1) {
+ /* Removing tail pfn */
+ ent->count--;
+ if (!ent->count) {
+ list_del(&ent->list);
+ kfree(ent);
+ }
+ goto unlock_done;
+ } else if (pfn == ent->pfn) {
+ /* Removing head pfn */
+ ent->count--;
+ ent->pfn++;
+ if (!ent->count) {
+ list_del(&ent->list);
+ kfree(ent);
+ }
+ goto unlock_done;
+ } else if (pfn > ent->pfn && pfn < ent->pfn + ent->count - 1) {
+ /*
+ * Removing a pfn in the middle. Cut off the tail
+ * of the existing region and create a template for
+ * the new one.
+ */
+ new_region.pfn = pfn + 1;
+ new_region.count = ent->count - (pfn - ent->pfn + 1);
+ ent->count = pfn - ent->pfn;
+ goto unlock_split;
+ }
+
+ }
+unlock_done:
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+ continue;
+
+unlock_split:
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+
+ ent = kzalloc(sizeof(struct hv_enc_pfn_region), GFP_KERNEL);
+ if (!ent)
+ return -ENOMEM;
+
+ ent->pfn = new_region.pfn;
+ ent->count = new_region.count;
+
+ raw_spin_lock_irqsave(&hv_list_enc_lock, flags);
+ list_add(&ent->list, &hv_list_enc);
+ raw_spin_unlock_irqrestore(&hv_list_enc_lock, flags);
+ }
+
+ return 0;
+}
+
+/* Stop new private<->shared conversions */
+static void hv_vtom_kexec_begin(void)
+{
+ if (!IS_ENABLED(CONFIG_KEXEC_CORE))
+ return;
+
+ /*
+ * Crash kernel reaches here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!set_memory_enc_stop_conversion())
+ pr_warn("Failed to stop shared<->private conversions\n");
+}
+
+static void hv_vtom_kexec_finish(void)
+{
+ struct hv_gpa_range_for_visibility *input;
+ struct hv_enc_pfn_region *ent;
+ unsigned long flags;
+ u64 hv_status;
+ int cur, i;
+
+ local_irq_save(flags);
+ input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
+ if (unlikely(!input))
+ goto out;
+
+ list_for_each_entry(ent, &hv_list_enc, list) {
+ for (i = 0, cur = 0; i < ent->count; i++) {
+ input->gpa_page_list[cur] = ent->pfn + i;
+ cur++;
+
+ if (cur == HV_MAX_MODIFY_GPA_REP_COUNT || i == ent->count - 1) {
+ input->partition_id = HV_PARTITION_ID_SELF;
+ input->host_visibility = VMBUS_PAGE_NOT_VISIBLE;
+ input->reserved0 = 0;
+ input->reserved1 = 0;
+ hv_status = hv_do_rep_hypercall(
+ HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY,
+ cur, 0, input, NULL);
+ WARN_ON_ONCE(!hv_result_success(hv_status));
+ cur = 0;
+ }
+ }
+
+ }
+
+out:
+ local_irq_restore(flags);
+}
+
+/*
* hv_mark_gpa_visibility - Set pages visible to host via hvcall.
*
* In Isolation VM, all guest memory is encrypted from host and guest
@@ -475,6 +664,7 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
struct hv_gpa_range_for_visibility *input;
u64 hv_status;
unsigned long flags;
+ int ret;
/* no-op if partition isolation is not enabled */
if (!hv_is_isolation_supported())
@@ -486,6 +676,13 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
return -EINVAL;
}
+ if (visibility == VMBUS_PAGE_NOT_VISIBLE)
+ ret = hv_list_enc_remove(pfn, count);
+ else
+ ret = hv_list_enc_add(pfn, count);
+ if (ret)
+ return ret;
+
local_irq_save(flags);
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
@@ -506,8 +703,18 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
if (hv_result_success(hv_status))
return 0;
+
+ if (visibility == VMBUS_PAGE_NOT_VISIBLE)
+ ret = hv_list_enc_add(pfn, count);
else
- return -EFAULT;
+ ret = hv_list_enc_remove(pfn, count);
+ /*
+ * There's no good way to recover from -ENOMEM here, the accounting is
+ * wrong either way.
+ */
+ WARN_ON_ONCE(ret);
+
+ return -EFAULT;
}
/*
@@ -669,6 +876,8 @@ void __init hv_vtom_init(void)
x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required;
x86_platform.guest.enc_status_change_prepare = hv_vtom_clear_present;
x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility;
+ x86_platform.guest.enc_kexec_begin = hv_vtom_kexec_begin;
+ x86_platform.guest.enc_kexec_finish = hv_vtom_kexec_finish;
/* Set WB as the default cache mode. */
guest_force_mtrr_state(NULL, 0, MTRR_TYPE_WRBACK);