diff options
author | Alan Brady <alan.brady@intel.com> | 2016-09-14 16:24:38 -0700 |
---|---|---|
committer | Jeff Kirsher <jeffrey.t.kirsher@intel.com> | 2016-10-28 23:28:39 -0700 |
commit | 96db776a368263bcce9f7eb12e878b0aef1a1974 (patch) | |
tree | 76ac13abdff2dc9d80d17f51ce89fdb569864ab1 /drivers/net/ethernet/intel/i40e/i40e_main.c | |
parent | 6d6fd1be2a1052cbe5b04c3678b30925a9344ac7 (diff) |
i40e/i40evf: fix interrupt affinity bug
There exists a bug in which a 'perfect storm' can occur and cause
interrupts to fail to be correctly affinitized. This causes unexpected
behavior and has a substantial impact on performance when it happens.
The bug occurs if there is heavy traffic, any number of CPUs that have
an i40e interrupt are pegged at 100%, and the interrupt afffinity for
those CPUs is changed. Instead of moving to the new CPU, the interrupt
continues to be polled while there is heavy traffic.
The bug is most readily realized as the driver is first brought up and
all interrupts start on CPU0. If there is heavy traffic and the
interrupt starts polling before the interrupt is affinitized, the
interrupt will be stuck on CPU0 until traffic stops. The bug, however,
can also be wrought out more simply by affinitizing all the interrupts
to a single CPU and then attempting to move any of those interrupts off
while there is heavy traffic.
This patch fixes the bug by registering for update notifications from
the kernel when the interrupt affinity changes. When that fires, we
cache the intended affinity mask. Then, while polling, if the cpu is
pegged at 100% and we failed to clean the rings, we check to make sure
we have the correct affinity and stop polling if we're firing on the
wrong CPU. When the kernel successfully moves the interrupt, it will
start polling on the correct CPU. The performance impact is minimal
since the only time this section gets executed is when performance is
already compromised by the CPU.
Change-ID: I4410a880159b9dba1f8297aa72bef36dca34e830
Signed-off-by: Alan Brady <alan.brady@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_main.c')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 64 |
1 files changed, 51 insertions, 13 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 83edbe8e36182..9382ba80b70c6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3317,6 +3317,33 @@ static irqreturn_t i40e_msix_clean_rings(int irq, void *data) } /** + * i40e_irq_affinity_notify - Callback for affinity changes + * @notify: context as to what irq was changed + * @mask: the new affinity mask + * + * This is a callback function used by the irq_set_affinity_notifier function + * so that we may register to receive changes to the irq affinity masks. + **/ +static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + struct i40e_q_vector *q_vector = + container_of(notify, struct i40e_q_vector, affinity_notify); + + q_vector->affinity_mask = *mask; +} + +/** + * i40e_irq_affinity_release - Callback for affinity notifier release + * @ref: internal core kernel usage + * + * This is a callback function used by the irq_set_affinity_notifier function + * to inform the current notification subscriber that they will no longer + * receive notifications. + **/ +static void i40e_irq_affinity_release(struct kref *ref) {} + +/** * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts * @vsi: the VSI being configured * @basename: name for the vector @@ -3331,10 +3358,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; + int irq_num; for (vector = 0; vector < q_vectors; vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[vector]; + irq_num = pf->msix_entries[base + vector].vector; + if (q_vector->tx.ring && q_vector->rx.ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "TxRx", rx_int_idx++); @@ -3349,7 +3379,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) /* skip this unused q_vector */ continue; } - err = request_irq(pf->msix_entries[base + vector].vector, + err = request_irq(irq_num, vsi->irq_handler, 0, q_vector->name, @@ -3359,9 +3389,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) "MSIX request_irq failed, error: %d\n", err); goto free_queue_irqs; } + + /* register for affinity change notifications */ + q_vector->affinity_notify.notify = i40e_irq_affinity_notify; + q_vector->affinity_notify.release = i40e_irq_affinity_release; + irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); /* assign the mask for this irq */ - irq_set_affinity_hint(pf->msix_entries[base + vector].vector, - &q_vector->affinity_mask); + irq_set_affinity_hint(irq_num, &q_vector->affinity_mask); } vsi->irqs_ready = true; @@ -3370,10 +3404,10 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename) free_queue_irqs: while (vector) { vector--; - irq_set_affinity_hint(pf->msix_entries[base + vector].vector, - NULL); - free_irq(pf->msix_entries[base + vector].vector, - &(vsi->q_vectors[vector])); + irq_num = pf->msix_entries[base + vector].vector; + irq_set_affinity_notifier(irq_num, NULL); + irq_set_affinity_hint(irq_num, NULL); + free_irq(irq_num, &vsi->q_vectors[vector]); } return err; } @@ -4012,19 +4046,23 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi) vsi->irqs_ready = false; for (i = 0; i < vsi->num_q_vectors; i++) { - u16 vector = i + base; + int irq_num; + u16 vector; + + vector = i + base; + irq_num = pf->msix_entries[vector].vector; /* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] || !vsi->q_vectors[i]->num_ringpairs) continue; + /* clear the affinity notifier in the IRQ descriptor */ + irq_set_affinity_notifier(irq_num, NULL); /* clear the affinity_mask in the IRQ descriptor */ - irq_set_affinity_hint(pf->msix_entries[vector].vector, - NULL); - synchronize_irq(pf->msix_entries[vector].vector); - free_irq(pf->msix_entries[vector].vector, - vsi->q_vectors[i]); + irq_set_affinity_hint(irq_num, NULL); + synchronize_irq(irq_num); + free_irq(irq_num, vsi->q_vectors[i]); /* Tear down the interrupt queue link list * |