summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-27 15:26:06 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-27 15:26:06 -0800
commitdeee7487f5d495d0d9e5ab40d866d69ad524c46a (patch)
tree771e875dcbc8fa7d5a74af283e194ea08c09dfb8
parent805ba04cb7ccfc7d72e834ebd796e043142156ba (diff)
parent5820a3b08987951e3e4a89fca8ab6e1448f672e1 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin: "A small number of improvements all over the place: - vdpa/octeon support for multiple interrupts - virtio-pci support for error recovery - vp_vdpa support for notification with data - vhost/net fix to set num_buffers for spec compliance - virtio-mem now works with kdump on s390 And small cleanups all over the place" * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (23 commits) virtio_blk: Add support for transport error recovery virtio_pci: Add support for PCIe Function Level Reset vhost/net: Set num_buffers for virtio 1.0 vdpa/octeon_ep: read vendor-specific PCI capability virtio-pci: define type and header for PCI vendor data vdpa/octeon_ep: handle device config change events vdpa/octeon_ep: enable support for multiple interrupts per device vdpa: solidrun: Replace deprecated PCI functions s390/kdump: virtio-mem kdump support (CONFIG_PROC_VMCORE_DEVICE_RAM) virtio-mem: support CONFIG_PROC_VMCORE_DEVICE_RAM virtio-mem: remember usable region size virtio-mem: mark device ready before registering callbacks in kdump mode fs/proc/vmcore: introduce PROC_VMCORE_DEVICE_RAM to detect device RAM ranges in 2nd kernel fs/proc/vmcore: factor out freeing a list of vmcore ranges fs/proc/vmcore: factor out allocating a vmcore range and adding it to a list fs/proc/vmcore: move vmcore definitions out of kcore.h fs/proc/vmcore: prefix all pr_* with "vmcore:" fs/proc/vmcore: disallow vmcore modifications while the vmcore is open fs/proc/vmcore: replace vmcoredd_mutex by vmcore_mutex fs/proc/vmcore: convert vmcore_cb_lock into vmcore_mutex ...
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/kernel/crash_dump.c39
-rw-r--r--drivers/block/virtio_blk.c28
-rw-r--r--drivers/vdpa/octeon_ep/octep_vdpa.h32
-rw-r--r--drivers/vdpa/octeon_ep/octep_vdpa_hw.c38
-rw-r--r--drivers/vdpa/octeon_ep/octep_vdpa_main.c99
-rw-r--r--drivers/vdpa/solidrun/snet_main.c57
-rw-r--r--drivers/vdpa/virtio_pci/vp_vdpa.c9
-rw-r--r--drivers/vhost/net.c5
-rw-r--r--drivers/virtio/virtio.c94
-rw-r--r--drivers/virtio/virtio_balloon.c2
-rw-r--r--drivers/virtio/virtio_mem.c103
-rw-r--r--drivers/virtio/virtio_pci_common.c41
-rw-r--r--fs/proc/Kconfig19
-rw-r--r--fs/proc/vmcore.c283
-rw-r--r--include/linux/crash_dump.h41
-rw-r--r--include/linux/kcore.h13
-rw-r--r--include/linux/virtio.h8
-rw-r--r--include/uapi/linux/vduse.h2
-rw-r--r--include/uapi/linux/virtio_pci.h14
20 files changed, 735 insertions, 193 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 570558a99359d..6e9545d8b0c72 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -244,6 +244,7 @@ config S390
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_PER_CPU_EMBED_FIRST_CHUNK
+ select NEED_PROC_VMCORE_DEVICE_RAM if PROC_VMCORE
select NEED_SG_DMA_LENGTH if PCI
select OLD_SIGACTION
select OLD_SIGSUSPEND3
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index dc7328fd2ec4d..276cb4c1e11be 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -506,6 +506,19 @@ static int get_mem_chunk_cnt(void)
return cnt;
}
+static void fill_ptload(Elf64_Phdr *phdr, unsigned long paddr,
+ unsigned long vaddr, unsigned long size)
+{
+ phdr->p_type = PT_LOAD;
+ phdr->p_vaddr = vaddr;
+ phdr->p_offset = paddr;
+ phdr->p_paddr = paddr;
+ phdr->p_filesz = size;
+ phdr->p_memsz = size;
+ phdr->p_flags = PF_R | PF_W | PF_X;
+ phdr->p_align = PAGE_SIZE;
+}
+
/*
* Initialize ELF loads (new kernel)
*/
@@ -518,14 +531,8 @@ static void loads_init(Elf64_Phdr *phdr, bool os_info_has_vm)
if (os_info_has_vm)
old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
for_each_physmem_range(idx, &oldmem_type, &start, &end) {
- phdr->p_type = PT_LOAD;
- phdr->p_vaddr = old_identity_base + start;
- phdr->p_offset = start;
- phdr->p_paddr = start;
- phdr->p_filesz = end - start;
- phdr->p_memsz = end - start;
- phdr->p_flags = PF_R | PF_W | PF_X;
- phdr->p_align = PAGE_SIZE;
+ fill_ptload(phdr, start, old_identity_base + start,
+ end - start);
phdr++;
}
}
@@ -535,6 +542,22 @@ static bool os_info_has_vm(void)
return os_info_old_value(OS_INFO_KASLR_OFFSET);
}
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+/*
+ * Fill PT_LOAD for a physical memory range owned by a device and detected by
+ * its device driver.
+ */
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size)
+{
+ unsigned long old_identity_base = 0;
+
+ if (os_info_has_vm())
+ old_identity_base = os_info_old_value(OS_INFO_IDENTITY_BASE);
+ fill_ptload(phdr, paddr, old_identity_base + paddr, size);
+}
+#endif
+
/*
* Prepare PT_LOAD type program header for kernel image region
*/
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index bbaa26b523b8d..bfbe391c20fee 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -1579,8 +1579,7 @@ static void virtblk_remove(struct virtio_device *vdev)
put_disk(vblk->disk);
}
-#ifdef CONFIG_PM_SLEEP
-static int virtblk_freeze(struct virtio_device *vdev)
+static int virtblk_freeze_priv(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
struct request_queue *q = vblk->disk->queue;
@@ -1602,7 +1601,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
return 0;
}
-static int virtblk_restore(struct virtio_device *vdev)
+static int virtblk_restore_priv(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
int ret;
@@ -1616,8 +1615,29 @@ static int virtblk_restore(struct virtio_device *vdev)
return 0;
}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtblk_freeze(struct virtio_device *vdev)
+{
+ return virtblk_freeze_priv(vdev);
+}
+
+static int virtblk_restore(struct virtio_device *vdev)
+{
+ return virtblk_restore_priv(vdev);
+}
#endif
+static int virtblk_reset_prepare(struct virtio_device *vdev)
+{
+ return virtblk_freeze_priv(vdev);
+}
+
+static int virtblk_reset_done(struct virtio_device *vdev)
+{
+ return virtblk_restore_priv(vdev);
+}
+
static const struct virtio_device_id id_table[] = {
{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -1653,6 +1673,8 @@ static struct virtio_driver virtio_blk = {
.freeze = virtblk_freeze,
.restore = virtblk_restore,
#endif
+ .reset_prepare = virtblk_reset_prepare,
+ .reset_done = virtblk_reset_done,
};
static int __init virtio_blk_init(void)
diff --git a/drivers/vdpa/octeon_ep/octep_vdpa.h b/drivers/vdpa/octeon_ep/octep_vdpa.h
index 046710ec4d424..53b020b019f73 100644
--- a/drivers/vdpa/octeon_ep/octep_vdpa.h
+++ b/drivers/vdpa/octeon_ep/octep_vdpa.h
@@ -8,6 +8,7 @@
#include <linux/pci_regs.h>
#include <linux/vdpa.h>
#include <linux/virtio_pci_modern.h>
+#include <uapi/linux/virtio_crypto.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/virtio_blk.h>
#include <uapi/linux/virtio_config.h>
@@ -29,12 +30,12 @@
#define OCTEP_EPF_RINFO(x) (0x000209f0 | ((x) << 25))
#define OCTEP_VF_MBOX_DATA(x) (0x00010210 | ((x) << 17))
#define OCTEP_PF_MBOX_DATA(x) (0x00022000 | ((x) << 4))
-
-#define OCTEP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF)
-#define OCTEP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0x7F)
+#define OCTEP_VF_IN_CTRL(x) (0x00010000 | ((x) << 17))
+#define OCTEP_VF_IN_CTRL_RPVF(val) (((val) >> 48) & 0xF)
#define OCTEP_FW_READY_SIGNATURE0 0xFEEDFEED
#define OCTEP_FW_READY_SIGNATURE1 0x3355ffaa
+#define OCTEP_MAX_CB_INTR 8
enum octep_vdpa_dev_status {
OCTEP_VDPA_DEV_STATUS_INVALID,
@@ -48,9 +49,26 @@ enum octep_vdpa_dev_status {
struct octep_vring_info {
struct vdpa_callback cb;
void __iomem *notify_addr;
- u32 __iomem *cb_notify_addr;
+ void __iomem *cb_notify_addr;
phys_addr_t notify_pa;
- char msix_name[256];
+};
+
+enum octep_pci_vndr_cfg_type {
+ OCTEP_PCI_VNDR_CFG_TYPE_VIRTIO_ID,
+ OCTEP_PCI_VNDR_CFG_TYPE_MAX,
+};
+
+struct octep_pci_vndr_data {
+ struct virtio_pci_vndr_data hdr;
+ u8 id;
+ u8 bar;
+ union {
+ u64 data;
+ struct {
+ u32 offset;
+ u32 length;
+ };
+ };
};
struct octep_hw {
@@ -68,7 +86,9 @@ struct octep_hw {
u64 features;
u16 nr_vring;
u32 config_size;
- int irq;
+ int nb_irqs;
+ int *irqs;
+ u8 dev_id;
};
u8 octep_hw_get_status(struct octep_hw *oct_hw);
diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c
index 1d4767b33315e..74240101c5052 100644
--- a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c
+++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c
@@ -2,6 +2,7 @@
/* Copyright (C) 2024 Marvell. */
#include <linux/iopoll.h>
+#include <linux/build_bug.h>
#include "octep_vdpa.h"
@@ -358,7 +359,14 @@ u16 octep_get_vq_size(struct octep_hw *oct_hw)
static u32 octep_get_config_size(struct octep_hw *oct_hw)
{
- return sizeof(struct virtio_net_config);
+ switch (oct_hw->dev_id) {
+ case VIRTIO_ID_NET:
+ return sizeof(struct virtio_net_config);
+ case VIRTIO_ID_CRYPTO:
+ return sizeof(struct virtio_crypto_config);
+ default:
+ return 0;
+ }
}
static void __iomem *octep_get_cap_addr(struct octep_hw *oct_hw, struct virtio_pci_cap *cap)
@@ -416,8 +424,25 @@ static int octep_pci_signature_verify(struct octep_hw *oct_hw)
return 0;
}
+static void octep_vndr_data_process(struct octep_hw *oct_hw,
+ struct octep_pci_vndr_data *vndr_data)
+{
+ BUILD_BUG_ON(sizeof(struct octep_pci_vndr_data) % 4 != 0);
+
+ switch (vndr_data->id) {
+ case OCTEP_PCI_VNDR_CFG_TYPE_VIRTIO_ID:
+ oct_hw->dev_id = (u8)vndr_data->data;
+ break;
+ default:
+ dev_err(&oct_hw->pdev->dev, "Invalid vendor data id %u\n",
+ vndr_data->id);
+ break;
+ }
+}
+
int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev)
{
+ struct octep_pci_vndr_data vndr_data;
struct octep_mbox __iomem *mbox;
struct device *dev = &pdev->dev;
struct virtio_pci_cap cap;
@@ -466,6 +491,15 @@ int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev)
case VIRTIO_PCI_CAP_ISR_CFG:
oct_hw->isr = octep_get_cap_addr(oct_hw, &cap);
break;
+ case VIRTIO_PCI_CAP_VENDOR_CFG:
+ octep_pci_caps_read(oct_hw, &vndr_data, sizeof(vndr_data), pos);
+ if (vndr_data.hdr.vendor_id != PCI_VENDOR_ID_CAVIUM) {
+ dev_err(dev, "Invalid vendor data\n");
+ return -EINVAL;
+ }
+
+ octep_vndr_data_process(oct_hw, &vndr_data);
+ break;
}
pos = cap.cap_next;
@@ -495,8 +529,6 @@ int octep_hw_caps_read(struct octep_hw *oct_hw, struct pci_dev *pdev)
if (!oct_hw->vqs)
return -ENOMEM;
- oct_hw->irq = -1;
-
dev_info(&pdev->dev, "Device features : %llx\n", oct_hw->features);
dev_info(&pdev->dev, "Maximum queues : %u\n", oct_hw->nr_vring);
diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_main.c b/drivers/vdpa/octeon_ep/octep_vdpa_main.c
index cd55b1aac1512..f3d4dda4e04cd 100644
--- a/drivers/vdpa/octeon_ep/octep_vdpa_main.c
+++ b/drivers/vdpa/octeon_ep/octep_vdpa_main.c
@@ -49,58 +49,89 @@ static irqreturn_t octep_vdpa_intr_handler(int irq, void *data)
struct octep_hw *oct_hw = data;
int i;
- for (i = 0; i < oct_hw->nr_vring; i++) {
- if (oct_hw->vqs[i].cb.callback && ioread32(oct_hw->vqs[i].cb_notify_addr)) {
- /* Acknowledge the per queue notification to the device */
- iowrite32(0, oct_hw->vqs[i].cb_notify_addr);
- oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private);
+ /* Each device has multiple interrupts (nb_irqs) shared among rings
+ * (nr_vring). Device interrupts are mapped to the rings in a
+ * round-robin fashion.
+ *
+ * For example, if nb_irqs = 8 and nr_vring = 64:
+ * 0 -> 0, 8, 16, 24, 32, 40, 48, 56;
+ * 1 -> 1, 9, 17, 25, 33, 41, 49, 57;
+ * ...
+ * 7 -> 7, 15, 23, 31, 39, 47, 55, 63;
+ */
+
+ for (i = irq - oct_hw->irqs[0]; i < oct_hw->nr_vring; i += oct_hw->nb_irqs) {
+ if (ioread8(oct_hw->vqs[i].cb_notify_addr)) {
+ /* Acknowledge the per ring notification to the device */
+ iowrite8(0, oct_hw->vqs[i].cb_notify_addr);
+
+ if (likely(oct_hw->vqs[i].cb.callback))
+ oct_hw->vqs[i].cb.callback(oct_hw->vqs[i].cb.private);
+ break;
}
}
+ /* Check for config interrupt. Config uses the first interrupt */
+ if (unlikely(irq == oct_hw->irqs[0] && ioread8(oct_hw->isr))) {
+ iowrite8(0, oct_hw->isr);
+
+ if (oct_hw->config_cb.callback)
+ oct_hw->config_cb.callback(oct_hw->config_cb.private);
+ }
+
return IRQ_HANDLED;
}
static void octep_free_irqs(struct octep_hw *oct_hw)
{
struct pci_dev *pdev = oct_hw->pdev;
+ int irq;
- if (oct_hw->irq != -1) {
- devm_free_irq(&pdev->dev, oct_hw->irq, oct_hw);
- oct_hw->irq = -1;
+ if (!oct_hw->irqs)
+ return;
+
+ for (irq = 0; irq < oct_hw->nb_irqs; irq++) {
+ if (!oct_hw->irqs[irq])
+ break;
+
+ devm_free_irq(&pdev->dev, oct_hw->irqs[irq], oct_hw);
}
+
pci_free_irq_vectors(pdev);
+ devm_kfree(&pdev->dev, oct_hw->irqs);
+ oct_hw->irqs = NULL;
}
static int octep_request_irqs(struct octep_hw *oct_hw)
{
struct pci_dev *pdev = oct_hw->pdev;
- int ret, irq;
+ int ret, irq, idx;
- /* Currently HW device provisions one IRQ per VF, hence
- * allocate one IRQ for all virtqueues call interface.
- */
- ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
+ oct_hw->irqs = devm_kcalloc(&pdev->dev, oct_hw->nb_irqs, sizeof(int), GFP_KERNEL);
+ if (!oct_hw->irqs)
+ return -ENOMEM;
+
+ ret = pci_alloc_irq_vectors(pdev, 1, oct_hw->nb_irqs, PCI_IRQ_MSIX);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to alloc msix vector");
return ret;
}
- snprintf(oct_hw->vqs->msix_name, sizeof(oct_hw->vqs->msix_name),
- OCTEP_VDPA_DRIVER_NAME "-vf-%d", pci_iov_vf_id(pdev));
-
- irq = pci_irq_vector(pdev, 0);
- ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0,
- oct_hw->vqs->msix_name, oct_hw);
- if (ret) {
- dev_err(&pdev->dev, "Failed to register interrupt handler\n");
- goto free_irq_vec;
+ for (idx = 0; idx < oct_hw->nb_irqs; idx++) {
+ irq = pci_irq_vector(pdev, idx);
+ ret = devm_request_irq(&pdev->dev, irq, octep_vdpa_intr_handler, 0,
+ dev_name(&pdev->dev), oct_hw);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register interrupt handler\n");
+ goto free_irqs;
+ }
+ oct_hw->irqs[idx] = irq;
}
- oct_hw->irq = irq;
return 0;
-free_irq_vec:
- pci_free_irq_vectors(pdev);
+free_irqs:
+ octep_free_irqs(oct_hw);
return ret;
}
@@ -271,7 +302,9 @@ static u32 octep_vdpa_get_generation(struct vdpa_device *vdpa_dev)
static u32 octep_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
{
- return VIRTIO_ID_NET;
+ struct octep_hw *oct_hw = vdpa_to_octep_hw(vdpa_dev);
+
+ return oct_hw->dev_id;
}
static u32 octep_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
@@ -559,6 +592,7 @@ static void octep_vdpa_setup_task(struct work_struct *work)
struct device *dev = &pdev->dev;
struct octep_hw *oct_hw;
unsigned long timeout;
+ u64 val;
int ret;
oct_hw = &mgmt_dev->oct_hw;
@@ -590,6 +624,13 @@ static void octep_vdpa_setup_task(struct work_struct *work)
if (ret)
return;
+ val = readq(oct_hw->base[OCTEP_HW_MBOX_BAR] + OCTEP_VF_IN_CTRL(0));
+ oct_hw->nb_irqs = OCTEP_VF_IN_CTRL_RPVF(val);
+ if (!oct_hw->nb_irqs || oct_hw->nb_irqs > OCTEP_MAX_CB_INTR) {
+ dev_err(dev, "Invalid number of interrupts %d\n", oct_hw->nb_irqs);
+ goto unmap_region;
+ }
+
ret = octep_hw_caps_read(oct_hw, pdev);
if (ret < 0)
goto unmap_region;
@@ -768,12 +809,6 @@ static int octep_vdpa_pf_setup(struct octep_pf *octpf)
return -EINVAL;
}
- if (OCTEP_EPF_RINFO_RPVF(val) != BIT_ULL(0)) {
- val &= ~GENMASK_ULL(35, 32);
- val |= BIT_ULL(32);
- writeq(val, addr + OCTEP_EPF_RINFO(0));
- }
-
len = pci_resource_len(pdev, OCTEP_HW_CAPS_BAR);
octpf->vf_stride = len / totalvfs;
diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c
index c8b74980dbd17..55ec51c17ab35 100644
--- a/drivers/vdpa/solidrun/snet_main.c
+++ b/drivers/vdpa/solidrun/snet_main.c
@@ -556,36 +556,38 @@ static const struct vdpa_config_ops snet_config_ops = {
static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
{
char *name;
- int ret, i, mask = 0;
+ unsigned short i;
+ bool bars_found = false;
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev));
+ if (!name)
+ return -ENOMEM;
+
/* We don't know which BAR will be used to communicate..
* We will map every bar with len > 0.
*
* Later, we will discover the BAR and unmap all other BARs.
*/
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (pci_resource_len(pdev, i))
- mask |= (1 << i);
- }
+ void __iomem *io;
- /* No BAR can be used.. */
- if (!mask) {
- SNET_ERR(pdev, "Failed to find a PCI BAR\n");
- return -ENODEV;
- }
+ if (pci_resource_len(pdev, i) == 0)
+ continue;
- name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "psnet[%s]-bars", pci_name(pdev));
- if (!name)
- return -ENOMEM;
+ io = pcim_iomap_region(pdev, i, name);
+ if (IS_ERR(io)) {
+ SNET_ERR(pdev, "Failed to request and map PCI BARs\n");
+ return PTR_ERR(io);
+ }
- ret = pcim_iomap_regions(pdev, mask, name);
- if (ret) {
- SNET_ERR(pdev, "Failed to request and map PCI BARs\n");
- return ret;
+ psnet->bars[i] = io;
+ bars_found = true;
}
- for (i = 0; i < PCI_STD_NUM_BARS; i++) {
- if (mask & (1 << i))
- psnet->bars[i] = pcim_iomap_table(pdev)[i];
+ /* No BAR can be used.. */
+ if (!bars_found) {
+ SNET_ERR(pdev, "Failed to find a PCI BAR\n");
+ return -ENODEV;
}
return 0;
@@ -594,20 +596,20 @@ static int psnet_open_pf_bar(struct pci_dev *pdev, struct psnet *psnet)
static int snet_open_vf_bar(struct pci_dev *pdev, struct snet *snet)
{
char *name;
- int ret;
+ void __iomem *io;
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "snet[%s]-bars", pci_name(pdev));
if (!name)
return -ENOMEM;
/* Request and map BAR */
- ret = pcim_iomap_regions(pdev, BIT(snet->psnet->cfg.vf_bar), name);
- if (ret) {
+ io = pcim_iomap_region(pdev, snet->psnet->cfg.vf_bar, name);
+ if (IS_ERR(io)) {
SNET_ERR(pdev, "Failed to request and map PCI BAR for a VF\n");
- return ret;
+ return PTR_ERR(io);
}
- snet->bar = pcim_iomap_table(pdev)[snet->psnet->cfg.vf_bar];
+ snet->bar = io;
return 0;
}
@@ -656,15 +658,12 @@ static int psnet_detect_bar(struct psnet *psnet, u32 off)
static void psnet_unmap_unused_bars(struct pci_dev *pdev, struct psnet *psnet)
{
- int i, mask = 0;
+ unsigned short i;
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (psnet->bars[i] && i != psnet->barno)
- mask |= (1 << i);
+ pcim_iounmap_region(pdev, i);
}
-
- if (mask)
- pcim_iounmap_regions(pdev, mask);
}
/* Read SNET config from PCI BAR */
diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index 16380764275ea..8787407f75b06 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -367,6 +367,14 @@ static void vp_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid)
vp_iowrite16(qid, vp_vdpa->vring[qid].notify);
}
+static void vp_vdpa_kick_vq_with_data(struct vdpa_device *vdpa, u32 data)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ u16 qid = data & 0xFFFF;
+
+ vp_iowrite32(data, vp_vdpa->vring[qid].notify);
+}
+
static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa)
{
struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
@@ -472,6 +480,7 @@ static const struct vdpa_config_ops vp_vdpa_ops = {
.get_vq_size = vp_vdpa_get_vq_size,
.set_vq_address = vp_vdpa_set_vq_address,
.kick_vq = vp_vdpa_kick_vq,
+ .kick_vq_with_data = vp_vdpa_kick_vq_with_data,
.get_generation = vp_vdpa_get_generation,
.get_device_id = vp_vdpa_get_device_id,
.get_vendor_id = vp_vdpa_get_vendor_id,
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9ad37c0121890..b9b9e9d409518 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1107,6 +1107,7 @@ static void handle_rx(struct vhost_net *net)
size_t vhost_hlen, sock_hlen;
size_t vhost_len, sock_len;
bool busyloop_intr = false;
+ bool set_num_buffers;
struct socket *sock;
struct iov_iter fixup;
__virtio16 num_buffers;
@@ -1129,6 +1130,8 @@ static void handle_rx(struct vhost_net *net)
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
+ set_num_buffers = mergeable ||
+ vhost_has_feature(vq, VIRTIO_F_VERSION_1);
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
@@ -1205,7 +1208,7 @@ static void handle_rx(struct vhost_net *net)
/* TODO: Should check and handle checksum. */
num_buffers = cpu_to_vhost16(vq, headcount);
- if (likely(mergeable) &&
+ if (likely(set_num_buffers) &&
copy_to_iter(&num_buffers, sizeof num_buffers,
&fixup) != sizeof num_buffers) {
vq_err(vq, "Failed num_buffers write");
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index b10ed9f5b5435..ba37665188b51 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -546,29 +546,7 @@ void unregister_virtio_device(struct virtio_device *dev)
}
EXPORT_SYMBOL_GPL(unregister_virtio_device);
-#ifdef CONFIG_PM_SLEEP
-int virtio_device_freeze(struct virtio_device *dev)
-{
- struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
- int ret;
-
- virtio_config_core_disable(dev);
-
- dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
-
- if (drv && drv->freeze) {
- ret = drv->freeze(dev);
- if (ret) {
- virtio_config_core_enable(dev);
- return ret;
- }
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(virtio_device_freeze);
-
-int virtio_device_restore(struct virtio_device *dev)
+static int virtio_device_restore_priv(struct virtio_device *dev, bool restore)
{
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
int ret;
@@ -599,8 +577,14 @@ int virtio_device_restore(struct virtio_device *dev)
if (ret)
goto err;
- if (drv->restore) {
- ret = drv->restore(dev);
+ if (restore) {
+ if (drv->restore) {
+ ret = drv->restore(dev);
+ if (ret)
+ goto err;
+ }
+ } else {
+ ret = drv->reset_done(dev);
if (ret)
goto err;
}
@@ -617,9 +601,69 @@ err:
virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
return ret;
}
+
+#ifdef CONFIG_PM_SLEEP
+int virtio_device_freeze(struct virtio_device *dev)
+{
+ struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+ int ret;
+
+ virtio_config_core_disable(dev);
+
+ dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
+
+ if (drv && drv->freeze) {
+ ret = drv->freeze(dev);
+ if (ret) {
+ virtio_config_core_enable(dev);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_device_freeze);
+
+int virtio_device_restore(struct virtio_device *dev)
+{
+ return virtio_device_restore_priv(dev, true);
+}
EXPORT_SYMBOL_GPL(virtio_device_restore);
#endif
+int virtio_device_reset_prepare(struct virtio_device *dev)
+{
+ struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+ int ret;
+
+ if (!drv || !drv->reset_prepare)
+ return -EOPNOTSUPP;
+
+ virtio_config_core_disable(dev);
+
+ dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
+
+ ret = drv->reset_prepare(dev);
+ if (ret) {
+ virtio_config_core_enable(dev);
+ return ret;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_device_reset_prepare);
+
+int virtio_device_reset_done(struct virtio_device *dev)
+{
+ struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+ if (!drv || !drv->reset_done)
+ return -EOPNOTSUPP;
+
+ return virtio_device_restore_priv(dev, false);
+}
+EXPORT_SYMBOL_GPL(virtio_device_reset_done);
+
static int virtio_init(void)
{
if (bus_register(&virtio_bus) != 0)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index b36d2803674ef..89da052f4f687 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -251,7 +251,7 @@ static unsigned int fill_balloon(struct virtio_balloon *vb, size_t num)
for (num_pfns = 0; num_pfns < num;
num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
- struct page *page = balloon_page_alloc();
+ page = balloon_page_alloc();
if (!page) {
dev_info_ratelimited(&vb->vdev->dev,
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index b0b8714415783..8a294b9cbcf68 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -133,6 +133,8 @@ struct virtio_mem {
uint64_t addr;
/* Maximum region size in bytes. */
uint64_t region_size;
+ /* Usable region size in bytes. */
+ uint64_t usable_region_size;
/* The parent resource for all memory added via this device. */
struct resource *parent_resource;
@@ -2368,7 +2370,7 @@ static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
static void virtio_mem_refresh_config(struct virtio_mem *vm)
{
const struct range pluggable_range = mhp_get_pluggable_range(true);
- uint64_t new_plugged_size, usable_region_size, end_addr;
+ uint64_t new_plugged_size, end_addr;
/* the plugged_size is just a reflection of what _we_ did previously */
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
@@ -2378,8 +2380,8 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
/* calculate the last usable memory block id */
virtio_cread_le(vm->vdev, struct virtio_mem_config,
- usable_region_size, &usable_region_size);
- end_addr = min(vm->addr + usable_region_size - 1,
+ usable_region_size, &vm->usable_region_size);
+ end_addr = min(vm->addr + vm->usable_region_size - 1,
pluggable_range.end);
if (vm->in_sbm) {
@@ -2648,6 +2650,7 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm)
if (rc)
goto out_unreg_pm;
+ virtio_device_ready(vm->vdev);
return 0;
out_unreg_pm:
unregister_pm_notifier(&vm->pm_notifier);
@@ -2725,13 +2728,103 @@ static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb,
mutex_unlock(&vm->hotplug_mutex);
return is_ram;
}
+
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+static int virtio_mem_vmcore_add_device_ram(struct virtio_mem *vm,
+ struct list_head *list, uint64_t start, uint64_t end)
+{
+ int rc;
+
+ rc = vmcore_alloc_add_range(list, start, end - start);
+ if (rc)
+ dev_err(&vm->vdev->dev,
+ "Error adding device RAM range: %d\n", rc);
+ return rc;
+}
+
+static int virtio_mem_vmcore_get_device_ram(struct vmcore_cb *cb,
+ struct list_head *list)
+{
+ struct virtio_mem *vm = container_of(cb, struct virtio_mem,
+ vmcore_cb);
+ const uint64_t device_start = vm->addr;
+ const uint64_t device_end = vm->addr + vm->usable_region_size;
+ uint64_t chunk_size, cur_start, cur_end, plugged_range_start = 0;
+ LIST_HEAD(tmp_list);
+ int rc;
+
+ if (!vm->plugged_size)
+ return 0;
+
+ /* Process memory sections, unless the device block size is bigger. */
+ chunk_size = max_t(uint64_t, PFN_PHYS(PAGES_PER_SECTION),
+ vm->device_block_size);
+
+ mutex_lock(&vm->hotplug_mutex);
+
+ /*
+ * We process larger chunks and indicate the complete chunk if any
+ * block in there is plugged. This reduces the number of pfn_is_ram()
+ * callbacks and mimic what is effectively being done when the old
+ * kernel would add complete memory sections/blocks to the elfcore hdr.
+ */
+ cur_start = device_start;
+ for (cur_start = device_start; cur_start < device_end; cur_start = cur_end) {
+ cur_end = ALIGN_DOWN(cur_start + chunk_size, chunk_size);
+ cur_end = min_t(uint64_t, cur_end, device_end);
+
+ rc = virtio_mem_send_state_request(vm, cur_start,
+ cur_end - cur_start);
+
+ if (rc < 0) {
+ dev_err(&vm->vdev->dev,
+ "Error querying block states: %d\n", rc);
+ goto out;
+ } else if (rc != VIRTIO_MEM_STATE_UNPLUGGED) {
+ /* Merge ranges with plugged memory. */
+ if (!plugged_range_start)
+ plugged_range_start = cur_start;
+ continue;
+ }
+
+ /* Flush any plugged range. */
+ if (plugged_range_start) {
+ rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list,
+ plugged_range_start,
+ cur_start);
+ if (rc)
+ goto out;
+ plugged_range_start = 0;
+ }
+ }
+
+ /* Flush any plugged range. */
+ if (plugged_range_start)
+ rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list,
+ plugged_range_start,
+ cur_start);
+out:
+ mutex_unlock(&vm->hotplug_mutex);
+ if (rc < 0) {
+ vmcore_free_ranges(&tmp_list);
+ return rc;
+ }
+ list_splice_tail(&tmp_list, list);
+ return 0;
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */
#endif /* CONFIG_PROC_VMCORE */
static int virtio_mem_init_kdump(struct virtio_mem *vm)
{
+ /* We must be prepared to receive a callback immediately. */
+ virtio_device_ready(vm->vdev);
#ifdef CONFIG_PROC_VMCORE
dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n");
vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram;
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+ vm->vmcore_cb.get_device_ram = virtio_mem_vmcore_get_device_ram;
+#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */
register_vmcore_cb(&vm->vmcore_cb);
return 0;
#else /* CONFIG_PROC_VMCORE */
@@ -2760,6 +2853,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
+ virtio_cread_le(vm->vdev, struct virtio_mem_config, usable_region_size,
+ &vm->usable_region_size);
/* Determine the nid for the device based on the lowest address. */
if (vm->nid == NUMA_NO_NODE)
@@ -2870,8 +2965,6 @@ static int virtio_mem_probe(struct virtio_device *vdev)
if (rc)
goto out_del_vq;
- virtio_device_ready(vdev);
-
/* trigger a config update to start processing the requested_size */
if (!vm->in_kdump) {
atomic_set(&vm->config_changed, 1);
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 88074451dd615..d6d79af44569b 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -794,6 +794,46 @@ static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs)
return num_vfs;
}
+static void virtio_pci_reset_prepare(struct pci_dev *pci_dev)
+{
+ struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+ int ret = 0;
+
+ ret = virtio_device_reset_prepare(&vp_dev->vdev);
+ if (ret) {
+ if (ret != -EOPNOTSUPP)
+ dev_warn(&pci_dev->dev, "Reset prepare failure: %d",
+ ret);
+ return;
+ }
+
+ if (pci_is_enabled(pci_dev))
+ pci_disable_device(pci_dev);
+}
+
+static void virtio_pci_reset_done(struct pci_dev *pci_dev)
+{
+ struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+ int ret;
+
+ if (pci_is_enabled(pci_dev))
+ return;
+
+ ret = pci_enable_device(pci_dev);
+ if (!ret) {
+ pci_set_master(pci_dev);
+ ret = virtio_device_reset_done(&vp_dev->vdev);
+ }
+
+ if (ret && ret != -EOPNOTSUPP)
+ dev_warn(&pci_dev->dev, "Reset done failure: %d", ret);
+}
+
+static const struct pci_error_handlers virtio_pci_err_handler = {
+ .reset_prepare = virtio_pci_reset_prepare,
+ .reset_done = virtio_pci_reset_done,
+};
+
static struct pci_driver virtio_pci_driver = {
.name = "virtio-pci",
.id_table = virtio_pci_id_table,
@@ -803,6 +843,7 @@ static struct pci_driver virtio_pci_driver = {
.driver.pm = &virtio_pci_pm_ops,
#endif
.sriov_configure = virtio_pci_sriov_configure,
+ .err_handler = &virtio_pci_err_handler,
};
struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev)
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index d80a1431ef7be..6ae966c561e73 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -61,6 +61,25 @@ config PROC_VMCORE_DEVICE_DUMP
as ELF notes to /proc/vmcore. You can still disable device
dump using the kernel command line option 'novmcoredd'.
+config NEED_PROC_VMCORE_DEVICE_RAM
+ bool
+
+config PROC_VMCORE_DEVICE_RAM
+ def_bool y
+ depends on PROC_VMCORE && NEED_PROC_VMCORE_DEVICE_RAM
+ depends on VIRTIO_MEM
+ help
+ If the elfcore hdr is allocated and prepared by the dump kernel
+ ("2nd kernel") instead of the crashed kernel, RAM provided by memory
+ devices such as virtio-mem will not be included in the dump
+ image, because only the device driver can properly detect them.
+
+ With this config enabled, these RAM ranges will be queried from the
+ device drivers once the device gets probed, so they can be included
+ in the crash dump.
+
+ Relevant architectures should select NEED_PROC_VMCORE_DEVICE_RAM.
+
config PROC_SYSCTL
bool "Sysctl support (/proc/sys)" if EXPERT
depends on PROC_FS
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 658bf199d4247..a00120a3c0994 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -8,6 +8,8 @@
*
*/
+#define pr_fmt(fmt) "vmcore: " fmt
+
#include <linux/mm.h>
#include <linux/kcore.h>
#include <linux/user.h>
@@ -51,9 +53,14 @@ static u64 vmcore_size;
static struct proc_dir_entry *proc_vmcore;
#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
+struct vmcoredd_node {
+ struct list_head list; /* List of dumps */
+ void *buf; /* Buffer containing device's dump */
+ unsigned int size; /* Size of the buffer */
+};
+
/* Device Dump list and mutex to synchronize access to list */
static LIST_HEAD(vmcoredd_list);
-static DEFINE_MUTEX(vmcoredd_mutex);
static bool vmcoredd_disabled;
core_param(novmcoredd, vmcoredd_disabled, bool, 0);
@@ -62,17 +69,22 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
/* Device Dump Size */
static size_t vmcoredd_orig_sz;
-static DEFINE_SPINLOCK(vmcore_cb_lock);
+static DEFINE_MUTEX(vmcore_mutex);
+
DEFINE_STATIC_SRCU(vmcore_cb_srcu);
/* List of registered vmcore callbacks. */
static LIST_HEAD(vmcore_cb_list);
/* Whether the vmcore has been opened once. */
static bool vmcore_opened;
+/* Whether the vmcore is currently open. */
+static unsigned int vmcore_open;
+
+static void vmcore_process_device_ram(struct vmcore_cb *cb);
void register_vmcore_cb(struct vmcore_cb *cb)
{
INIT_LIST_HEAD(&cb->next);
- spin_lock(&vmcore_cb_lock);
+ mutex_lock(&vmcore_mutex);
list_add_tail(&cb->next, &vmcore_cb_list);
/*
* Registering a vmcore callback after the vmcore was opened is
@@ -80,13 +92,15 @@ void register_vmcore_cb(struct vmcore_cb *cb)
*/
if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback registration\n");
- spin_unlock(&vmcore_cb_lock);
+ if (!vmcore_open && cb->get_device_ram)
+ vmcore_process_device_ram(cb);
+ mutex_unlock(&vmcore_mutex);
}
EXPORT_SYMBOL_GPL(register_vmcore_cb);
void unregister_vmcore_cb(struct vmcore_cb *cb)
{
- spin_lock(&vmcore_cb_lock);
+ mutex_lock(&vmcore_mutex);
list_del_rcu(&cb->next);
/*
* Unregistering a vmcore callback after the vmcore was opened is
@@ -95,7 +109,7 @@ void unregister_vmcore_cb(struct vmcore_cb *cb)
*/
if (vmcore_opened)
pr_warn_once("Unexpected vmcore callback unregistration\n");
- spin_unlock(&vmcore_cb_lock);
+ mutex_unlock(&vmcore_mutex);
synchronize_srcu(&vmcore_cb_srcu);
}
@@ -120,9 +134,23 @@ static bool pfn_is_ram(unsigned long pfn)
static int open_vmcore(struct inode *inode, struct file *file)
{
- spin_lock(&vmcore_cb_lock);
+ mutex_lock(&vmcore_mutex);
vmcore_opened = true;
- spin_unlock(&vmcore_cb_lock);
+ if (vmcore_open + 1 == 0) {
+ mutex_unlock(&vmcore_mutex);
+ return -EBUSY;
+ }
+ vmcore_open++;
+ mutex_unlock(&vmcore_mutex);
+
+ return 0;
+}
+
+static int release_vmcore(struct inode *inode, struct file *file)
+{
+ mutex_lock(&vmcore_mutex);
+ vmcore_open--;
+ mutex_unlock(&vmcore_mutex);
return 0;
}
@@ -243,33 +271,27 @@ static int vmcoredd_copy_dumps(struct iov_iter *iter, u64 start, size_t size)
{
struct vmcoredd_node *dump;
u64 offset = 0;
- int ret = 0;
size_t tsz;
char *buf;
- mutex_lock(&vmcoredd_mutex);
list_for_each_entry(dump, &vmcoredd_list, list) {
if (start < offset + dump->size) {
tsz = min(offset + (u64)dump->size - start, (u64)size);
buf = dump->buf + start - offset;
- if (copy_to_iter(buf, tsz, iter) < tsz) {
- ret = -EFAULT;
- goto out_unlock;
- }
+ if (copy_to_iter(buf, tsz, iter) < tsz)
+ return -EFAULT;
size -= tsz;
start += tsz;
/* Leave now if buffer filled already */
if (!size)
- goto out_unlock;
+ return 0;
}
offset += dump->size;
}
-out_unlock:
- mutex_unlock(&vmcoredd_mutex);
- return ret;
+ return 0;
}
#ifdef CONFIG_MMU
@@ -278,20 +300,16 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
{
struct vmcoredd_node *dump;
u64 offset = 0;
- int ret = 0;
size_t tsz;
char *buf;
- mutex_lock(&vmcoredd_mutex);
list_for_each_entry(dump, &vmcoredd_list, list) {
if (start < offset + dump->size) {
tsz = min(offset + (u64)dump->size - start, (u64)size);
buf = dump->buf + start - offset;
if (remap_vmalloc_range_partial(vma, dst, buf, 0,
- tsz)) {
- ret = -EFAULT;
- goto out_unlock;
- }
+ tsz))
+ return -EFAULT;
size -= tsz;
start += tsz;
@@ -299,14 +317,12 @@ static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
/* Leave now if buffer filled already */
if (!size)
- goto out_unlock;
+ return 0;
}
offset += dump->size;
}
-out_unlock:
- mutex_unlock(&vmcoredd_mutex);
- return ret;
+ return 0;
}
#endif /* CONFIG_MMU */
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
@@ -316,10 +332,10 @@ out_unlock:
*/
static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos)
{
+ struct vmcore_range *m = NULL;
ssize_t acc = 0, tmp;
size_t tsz;
u64 start;
- struct vmcore *m = NULL;
if (!iov_iter_count(iter) || *fpos >= vmcore_size)
return 0;
@@ -576,7 +592,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
u64 start, end, len, tsz;
- struct vmcore *m;
+ struct vmcore_range *m;
start = (u64)vma->vm_pgoff << PAGE_SHIFT;
end = start + size;
@@ -693,21 +709,17 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
static const struct proc_ops vmcore_proc_ops = {
.proc_open = open_vmcore,
+ .proc_release = release_vmcore,
.proc_read_iter = read_vmcore,
.proc_lseek = default_llseek,
.proc_mmap = mmap_vmcore,
};
-static struct vmcore* __init get_new_element(void)
-{
- return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
-}
-
static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
struct list_head *vc_list)
{
+ struct vmcore_range *m;
u64 size;
- struct vmcore *m;
size = elfsz + elfnotesegsz;
list_for_each_entry(m, vc_list, list) {
@@ -1109,7 +1121,6 @@ static int __init process_ptload_program_headers_elf64(char *elfptr,
Elf64_Ehdr *ehdr_ptr;
Elf64_Phdr *phdr_ptr;
loff_t vmcore_off;
- struct vmcore *new;
ehdr_ptr = (Elf64_Ehdr *)elfptr;
phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
@@ -1128,13 +1139,8 @@ static int __init process_ptload_program_headers_elf64(char *elfptr,
end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
size = end - start;
- /* Add this contiguous chunk of memory to vmcore list.*/
- new = get_new_element();
- if (!new)
+ if (vmcore_alloc_add_range(vc_list, start, size))
return -ENOMEM;
- new->paddr = start;
- new->size = size;
- list_add_tail(&new->list, vc_list);
/* Update the program header offset. */
phdr_ptr->p_offset = vmcore_off + (paddr - start);
@@ -1152,7 +1158,6 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
Elf32_Ehdr *ehdr_ptr;
Elf32_Phdr *phdr_ptr;
loff_t vmcore_off;
- struct vmcore *new;
ehdr_ptr = (Elf32_Ehdr *)elfptr;
phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
@@ -1171,13 +1176,8 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
size = end - start;
- /* Add this contiguous chunk of memory to vmcore list.*/
- new = get_new_element();
- if (!new)
+ if (vmcore_alloc_add_range(vc_list, start, size))
return -ENOMEM;
- new->paddr = start;
- new->size = size;
- list_add_tail(&new->list, vc_list);
/* Update the program header offset */
phdr_ptr->p_offset = vmcore_off + (paddr - start);
@@ -1190,8 +1190,8 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
struct list_head *vc_list)
{
+ struct vmcore_range *m;
loff_t vmcore_off;
- struct vmcore *m;
/* Skip ELF header, program headers and ELF note segment. */
vmcore_off = elfsz + elfnotes_sz;
@@ -1518,12 +1518,18 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
dump->buf = buf;
dump->size = data_size;
- /* Add the dump to driver sysfs list */
- mutex_lock(&vmcoredd_mutex);
- list_add_tail(&dump->list, &vmcoredd_list);
- mutex_unlock(&vmcoredd_mutex);
+ /* Add the dump to driver sysfs list and update the elfcore hdr */
+ mutex_lock(&vmcore_mutex);
+ if (vmcore_opened)
+ pr_warn_once("Unexpected adding of device dump\n");
+ if (vmcore_open) {
+ ret = -EBUSY;
+ goto out_err;
+ }
+ list_add_tail(&dump->list, &vmcoredd_list);
vmcoredd_update_size(data_size);
+ mutex_unlock(&vmcore_mutex);
return 0;
out_err:
@@ -1535,11 +1541,163 @@ out_err:
EXPORT_SYMBOL(vmcore_add_device_dump);
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+#ifdef CONFIG_PROC_VMCORE_DEVICE_RAM
+static int vmcore_realloc_elfcore_buffer_elf64(size_t new_size)
+{
+ char *elfcorebuf_new;
+
+ if (WARN_ON_ONCE(new_size < elfcorebuf_sz))
+ return -EINVAL;
+ if (get_order(elfcorebuf_sz_orig) == get_order(new_size)) {
+ elfcorebuf_sz_orig = new_size;
+ return 0;
+ }
+
+ elfcorebuf_new = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(new_size));
+ if (!elfcorebuf_new)
+ return -ENOMEM;
+ memcpy(elfcorebuf_new, elfcorebuf, elfcorebuf_sz);
+ free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig));
+ elfcorebuf = elfcorebuf_new;
+ elfcorebuf_sz_orig = new_size;
+ return 0;
+}
+
+static void vmcore_reset_offsets_elf64(void)
+{
+ Elf64_Phdr *phdr_start = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr));
+ loff_t vmcore_off = elfcorebuf_sz + elfnotes_sz;
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfcorebuf;
+ Elf64_Phdr *phdr;
+ int i;
+
+ for (i = 0, phdr = phdr_start; i < ehdr->e_phnum; i++, phdr++) {
+ u64 start, end;
+
+ /*
+ * After merge_note_headers_elf64() we should only have a single
+ * PT_NOTE entry that starts immediately after elfcorebuf_sz.
+ */
+ if (phdr->p_type == PT_NOTE) {
+ phdr->p_offset = elfcorebuf_sz;
+ continue;
+ }
+
+ start = rounddown(phdr->p_offset, PAGE_SIZE);
+ end = roundup(phdr->p_offset + phdr->p_memsz, PAGE_SIZE);
+ phdr->p_offset = vmcore_off + (phdr->p_offset - start);
+ vmcore_off = vmcore_off + end - start;
+ }
+ set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
+}
+
+static int vmcore_add_device_ram_elf64(struct list_head *list, size_t count)
+{
+ Elf64_Phdr *phdr_start = (Elf64_Phdr *)(elfcorebuf + sizeof(Elf64_Ehdr));
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfcorebuf;
+ struct vmcore_range *cur;
+ Elf64_Phdr *phdr;
+ size_t new_size;
+ int rc;
+
+ if ((Elf32_Half)(ehdr->e_phnum + count) != ehdr->e_phnum + count) {
+ pr_err("too many device ram ranges\n");
+ return -ENOSPC;
+ }
+
+ /* elfcorebuf_sz must always cover full pages. */
+ new_size = sizeof(Elf64_Ehdr) +
+ (ehdr->e_phnum + count) * sizeof(Elf64_Phdr);
+ new_size = roundup(new_size, PAGE_SIZE);
+
+ /*
+ * Make sure we have sufficient space to include the new PT_LOAD
+ * entries.
+ */
+ rc = vmcore_realloc_elfcore_buffer_elf64(new_size);
+ if (rc) {
+ pr_err("resizing elfcore failed\n");
+ return rc;
+ }
+
+ /* Modify our used elfcore buffer size to cover the new entries. */
+ elfcorebuf_sz = new_size;
+
+ /* Fill the added PT_LOAD entries. */
+ phdr = phdr_start + ehdr->e_phnum;
+ list_for_each_entry(cur, list, list) {
+ WARN_ON_ONCE(!IS_ALIGNED(cur->paddr | cur->size, PAGE_SIZE));
+ elfcorehdr_fill_device_ram_ptload_elf64(phdr, cur->paddr, cur->size);
+
+ /* p_offset will be adjusted later. */
+ phdr++;
+ ehdr->e_phnum++;
+ }
+ list_splice_tail(list, &vmcore_list);
+
+ /* We changed elfcorebuf_sz and added new entries; reset all offsets. */
+ vmcore_reset_offsets_elf64();
+
+ /* Finally, recalculate the total vmcore size. */
+ vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
+ &vmcore_list);
+ proc_vmcore->size = vmcore_size;
+ return 0;
+}
+
+static void vmcore_process_device_ram(struct vmcore_cb *cb)
+{
+ unsigned char *e_ident = (unsigned char *)elfcorebuf;
+ struct vmcore_range *first, *m;
+ LIST_HEAD(list);
+ int count;
+
+ /* We only support Elf64 dumps for now. */
+ if (WARN_ON_ONCE(e_ident[EI_CLASS] != ELFCLASS64)) {
+ pr_err("device ram ranges only support Elf64\n");
+ return;
+ }
+
+ if (cb->get_device_ram(cb, &list)) {
+ pr_err("obtaining device ram ranges failed\n");
+ return;
+ }
+ count = list_count_nodes(&list);
+ if (!count)
+ return;
+
+ /*
+ * For some reason these ranges are already know? Might happen
+ * with unusual register->unregister->register sequences; we'll simply
+ * sanity check using the first range.
+ */
+ first = list_first_entry(&list, struct vmcore_range, list);
+ list_for_each_entry(m, &vmcore_list, list) {
+ unsigned long long m_end = m->paddr + m->size;
+ unsigned long long first_end = first->paddr + first->size;
+
+ if (first->paddr < m_end && m->paddr < first_end)
+ goto out_free;
+ }
+
+ /* If adding the mem nodes succeeds, they must not be freed. */
+ if (!vmcore_add_device_ram_elf64(&list, count))
+ return;
+out_free:
+ vmcore_free_ranges(&list);
+}
+#else /* !CONFIG_PROC_VMCORE_DEVICE_RAM */
+static void vmcore_process_device_ram(struct vmcore_cb *cb)
+{
+}
+#endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */
+
/* Free all dumps in vmcore device dump list */
static void vmcore_free_device_dumps(void)
{
#ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
- mutex_lock(&vmcoredd_mutex);
+ mutex_lock(&vmcore_mutex);
while (!list_empty(&vmcoredd_list)) {
struct vmcoredd_node *dump;
@@ -1549,7 +1707,7 @@ static void vmcore_free_device_dumps(void)
vfree(dump->buf);
vfree(dump);
}
- mutex_unlock(&vmcoredd_mutex);
+ mutex_unlock(&vmcore_mutex);
#endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
}
@@ -1571,7 +1729,7 @@ static int __init vmcore_init(void)
rc = parse_crash_elf_headers();
if (rc) {
elfcorehdr_free(elfcorehdr_addr);
- pr_warn("Kdump: vmcore not initialized\n");
+ pr_warn("not initialized\n");
return rc;
}
elfcorehdr_free(elfcorehdr_addr);
@@ -1592,14 +1750,7 @@ void vmcore_cleanup(void)
proc_vmcore = NULL;
}
- /* clear the vmcore list. */
- while (!list_empty(&vmcore_list)) {
- struct vmcore *m;
-
- m = list_first_entry(&vmcore_list, struct vmcore, list);
- list_del(&m->list);
- kfree(m);
- }
+ vmcore_free_ranges(&vmcore_list);
free_elfcorebuf();
/* clear vmcore device dump list */
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index acc55626afdcd..2f2555e6407ce 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -20,6 +20,8 @@ extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
extern void elfcorehdr_free(unsigned long long addr);
extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos);
extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
+void elfcorehdr_fill_device_ram_ptload_elf64(Elf64_Phdr *phdr,
+ unsigned long long paddr, unsigned long long size);
extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
unsigned long from, unsigned long pfn,
unsigned long size, pgprot_t prot);
@@ -99,6 +101,12 @@ static inline void vmcore_unusable(void)
* indicated in the vmcore instead. For example, a ballooned page
* contains no data and reading from such a page will cause high
* load in the hypervisor.
+ * @get_device_ram: query RAM ranges that can only be detected by device
+ * drivers, such as the virtio-mem driver, so they can be included in
+ * the crash dump on architectures that allocate the elfcore hdr in the dump
+ * ("2nd") kernel. Indicated RAM ranges may contain holes to reduce the
+ * total number of ranges; such holes can be detected using the pfn_is_ram
+ * callback just like for other RAM.
* @next: List head to manage registered callbacks internally; initialized by
* register_vmcore_cb().
*
@@ -109,11 +117,44 @@ static inline void vmcore_unusable(void)
*/
struct vmcore_cb {
bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
+ int (*get_device_ram)(struct vmcore_cb *cb, struct list_head *list);
struct list_head next;
};
extern void register_vmcore_cb(struct vmcore_cb *cb);
extern void unregister_vmcore_cb(struct vmcore_cb *cb);
+struct vmcore_range {
+ struct list_head list;
+ unsigned long long paddr;
+ unsigned long long size;
+ loff_t offset;
+};
+
+/* Allocate a vmcore range and add it to the list. */
+static inline int vmcore_alloc_add_range(struct list_head *list,
+ unsigned long long paddr, unsigned long long size)
+{
+ struct vmcore_range *m = kzalloc(sizeof(*m), GFP_KERNEL);
+
+ if (!m)
+ return -ENOMEM;
+ m->paddr = paddr;
+ m->size = size;
+ list_add_tail(&m->list, list);
+ return 0;
+}
+
+/* Free a list of vmcore ranges. */
+static inline void vmcore_free_ranges(struct list_head *list)
+{
+ struct vmcore_range *m, *tmp;
+
+ list_for_each_entry_safe(m, tmp, list, list) {
+ list_del(&m->list);
+ kfree(m);
+ }
+}
+
#else /* !CONFIG_CRASH_DUMP */
static inline bool is_kdump_kernel(void) { return false; }
#endif /* CONFIG_CRASH_DUMP */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 86c0f1d189988..9a2fa013c91de 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -20,19 +20,6 @@ struct kcore_list {
int type;
};
-struct vmcore {
- struct list_head list;
- unsigned long long paddr;
- unsigned long long size;
- loff_t offset;
-};
-
-struct vmcoredd_node {
- struct list_head list; /* List of dumps */
- void *buf; /* Buffer containing device's dump */
- unsigned int size; /* Size of the buffer */
-};
-
#ifdef CONFIG_PROC_KCORE
void __init kclist_add(struct kcore_list *, void *, size_t, int type);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index dd88682e27e31..4d16c13d0df58 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -190,6 +190,8 @@ int virtio_device_freeze(struct virtio_device *dev);
int virtio_device_restore(struct virtio_device *dev);
#endif
void virtio_reset_device(struct virtio_device *dev);
+int virtio_device_reset_prepare(struct virtio_device *dev);
+int virtio_device_reset_done(struct virtio_device *dev);
size_t virtio_max_dma_size(const struct virtio_device *vdev);
@@ -214,6 +216,10 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
* changes; may be called in interrupt context.
* @freeze: optional function to call during suspend/hibernation.
* @restore: optional function to call on resume.
+ * @reset_prepare: optional function to call when a transport specific reset
+ * occurs.
+ * @reset_done: optional function to call after transport specific reset
+ * operation has finished.
*/
struct virtio_driver {
struct device_driver driver;
@@ -229,6 +235,8 @@ struct virtio_driver {
void (*config_changed)(struct virtio_device *dev);
int (*freeze)(struct virtio_device *dev);
int (*restore)(struct virtio_device *dev);
+ int (*reset_prepare)(struct virtio_device *dev);
+ int (*reset_done)(struct virtio_device *dev);
};
#define drv_to_virtio(__drv) container_of_const(__drv, struct virtio_driver, driver)
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
index 11bd48c72c6cc..68a627d04afa1 100644
--- a/include/uapi/linux/vduse.h
+++ b/include/uapi/linux/vduse.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
#ifndef _UAPI_VDUSE_H_
#define _UAPI_VDUSE_H_
diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index 1beb317df1b9b..8549d45712571 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h
@@ -116,6 +116,8 @@
#define VIRTIO_PCI_CAP_PCI_CFG 5
/* Additional shared memory capability */
#define VIRTIO_PCI_CAP_SHARED_MEMORY_CFG 8
+/* PCI vendor data configuration */
+#define VIRTIO_PCI_CAP_VENDOR_CFG 9
/* This is the PCI capability header: */
struct virtio_pci_cap {
@@ -130,6 +132,18 @@ struct virtio_pci_cap {
__le32 length; /* Length of the structure, in bytes. */
};
+/* This is the PCI vendor data capability header: */
+struct virtio_pci_vndr_data {
+ __u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
+ __u8 cap_next; /* Generic PCI field: next ptr. */
+ __u8 cap_len; /* Generic PCI field: capability length */
+ __u8 cfg_type; /* Identifies the structure. */
+ __u16 vendor_id; /* Identifies the vendor-specific format. */
+ /* For Vendor Definition */
+ /* Pads structure to a multiple of 4 bytes */
+ /* Reads must not have side effects */
+};
+
struct virtio_pci_cap64 {
struct virtio_pci_cap cap;
__le32 offset_hi; /* Most sig 32 bits of offset */