diff options
Diffstat (limited to 'arch/x86/kernel/irq.c')
-rw-r--r-- | arch/x86/kernel/irq.c | 126 |
1 files changed, 58 insertions, 68 deletions
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 81f9b78e0f7ba..10721a1252269 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc, __handle_irq(desc, regs); } -static __always_inline int call_irq_handler(int vector, struct pt_regs *regs) +static struct irq_desc *reevaluate_vector(int vector) { - struct irq_desc *desc; - int ret = 0; + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); + + if (!IS_ERR_OR_NULL(desc)) + return desc; + + if (desc == VECTOR_UNUSED) + pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); + else + __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + return NULL; +} + +static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) +{ + struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); - desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { handle_irq(desc, regs); - } else { - ret = -EINVAL; - if (desc == VECTOR_UNUSED) { - pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n", - __func__, smp_processor_id(), - vector); - } else { - __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); - } + return true; } - return ret; + /* + * Reevaluate with vector_lock held to prevent a race against + * request_irq() setting up the vector: + * + * CPU0 CPU1 + * interrupt is raised in APIC IRR + * but not handled + * free_irq() + * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; + * + * request_irq() common_interrupt() + * d = this_cpu_read(vector_irq[vector]); + * + * per_cpu(vector_irq, CPU1)[vector] = desc; + * + * if (d == VECTOR_SHUTDOWN) + * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + * + * This requires that the same vector on the same target CPU is + * handed out or that a spurious interrupt hits that CPU/vector. + */ + lock_vector_lock(); + desc = reevaluate_vector(vector); + unlock_vector_lock(); + + if (!desc) + return false; + + handle_irq(desc, regs); + return true; } /* @@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt) /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); - if (unlikely(call_irq_handler(vector, regs))) + if (unlikely(!call_irq_handler(vector, regs))) apic_eoi(); set_irq_regs(old_regs); @@ -380,61 +413,18 @@ void intel_posted_msi_init(void) this_cpu_write(posted_msi_pi_desc.ndst, destination); } -/* - * De-multiplexing posted interrupts is on the performance path, the code - * below is written to optimize the cache performance based on the following - * considerations: - * 1.Posted interrupt descriptor (PID) fits in a cache line that is frequently - * accessed by both CPU and IOMMU. - * 2.During posted MSI processing, the CPU needs to do 64-bit read and xchg - * for checking and clearing posted interrupt request (PIR), a 256 bit field - * within the PID. - * 3.On the other side, the IOMMU does atomic swaps of the entire PID cache - * line when posting interrupts and setting control bits. - * 4.The CPU can access the cache line a magnitude faster than the IOMMU. - * 5.Each time the IOMMU does interrupt posting to the PIR will evict the PID - * cache line. The cache line states after each operation are as follows: - * CPU IOMMU PID Cache line state - * --------------------------------------------------------------- - *...read64 exclusive - *...lock xchg64 modified - *... post/atomic swap invalid - *...------------------------------------------------------------- - * - * To reduce L1 data cache miss, it is important to avoid contention with - * IOMMU's interrupt posting/atomic swap. Therefore, a copy of PIR is used - * to dispatch interrupt handlers. - * - * In addition, the code is trying to keep the cache line state consistent - * as much as possible. e.g. when making a copy and clearing the PIR - * (assuming non-zero PIR bits are present in the entire PIR), it does: - * read, read, read, read, xchg, xchg, xchg, xchg - * instead of: - * read, xchg, read, xchg, read, xchg, read, xchg - */ -static __always_inline bool handle_pending_pir(u64 *pir, struct pt_regs *regs) +static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) { - int i, vec = FIRST_EXTERNAL_VECTOR; - unsigned long pir_copy[4]; - bool handled = false; - - for (i = 0; i < 4; i++) - pir_copy[i] = pir[i]; - - for (i = 0; i < 4; i++) { - if (!pir_copy[i]) - continue; + unsigned long pir_copy[NR_PIR_WORDS]; + int vec = FIRST_EXTERNAL_VECTOR; - pir_copy[i] = arch_xchg(&pir[i], 0); - handled = true; - } + if (!pi_harvest_pir(pir, pir_copy)) + return false; - if (handled) { - for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) - call_irq_handler(vec, regs); - } + for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) + call_irq_handler(vec, regs); - return handled; + return true; } /* @@ -464,7 +454,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. */ while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { - if (!handle_pending_pir(pid->pir64, regs)) + if (!handle_pending_pir(pid->pir, regs)) break; } @@ -479,7 +469,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) * process PIR bits one last time such that handling the new interrupts * are not delayed until the next IRQ. */ - handle_pending_pir(pid->pir64, regs); + handle_pending_pir(pid->pir, regs); apic_eoi(); irq_exit(); |