kern/xcall: optimize and improve documentation

Don't uselessly align to the CPU cache line size. Remove the useless array of preallocated cross-calls, and allocate from the caller stack instead. Since that array was the only object protected by disabling preemption, leave preemption enabled accordingly. Document locking keys, and describe memory ordering guarantees and enforcing. Add TODO entry about asynchronous cross-call support.
author: Richard Braun <rbraun@sceen.net> 2018-02-09 21:56:47 +0100
committer: Richard Braun <rbraun@sceen.net> 2018-02-12 21:34:32 +0100
commit: e63c4bcecf5966661862e4fa29281b3c00319c78 (patch)
tree: ed19767eb20f586f9e6edffab99448f4bf5ea105
parent: 9656d4234d97ec434d6efbcf1a33c53652faa9f2 (diff)
2 files changed, 60 insertions, 59 deletions
diff --git a/kern/xcall.c b/kern/xcall.c
index fb60bacc..2096f619 100644
--- a/kern/xcall.c
+++ b/kern/xcall.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2017 Richard Braun.
+ * Copyright (c) 2014-2018 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -32,45 +32,58 @@
 #include <machine/cpu.h>
 
 struct xcall {
-    alignas(CPU_L1_SIZE) xcall_fn_t fn;
+    xcall_fn_t fn;
     void *arg;
 };
 
 /*
  * Per-CPU data.
  *
- * Send calls are sent to remote processors. Their access is synchronized
- * by disabling preemption.
+ * The lock is used to serialize cross-calls from different processors
+ * to the same processor. It is held during the complete cross-call
+ * sequence. Inside the critical section, accesses to the receive call
+ * are used to enforce release-acquire ordering between the sending
+ * and receiving processors.
  *
- * The received call points to either NULL if there is no call to process,
- * or a remote send call otherwise. The lock serializes the complete
- * inter-processor operation, i.e. setting the received call pointer,
- * communication through an IPI, and waiting for the processor to
- * acknowledge execution. By serializing interrupts, it is certain that
- * there is a 1:1 mapping between interrupts and cross-calls, allowing
- * the handler to process only one cross-call instead of iterating over
- * a queue. This way, interrupts with higher priority can be handled
- * between multiple cross-calls.
+ * Locking keys :
+ * (a) atomic
+ * (c) cpu_data
  */
 struct xcall_cpu_data {
-    alignas(CPU_L1_SIZE) struct xcall send_calls[CONFIG_MAX_CPUS];
-
-    struct syscnt sc_sent;
-    struct syscnt sc_received;
-    struct xcall *recv_call;
-    struct spinlock lock;
+    alignas(CPU_L1_SIZE) struct spinlock lock;
+    struct xcall *recv_call;    /* (c) */
+    struct syscnt sc_sent;      /* (a) */
+    struct syscnt sc_received;  /* (a) */
 };
 
 static struct xcall_cpu_data xcall_cpu_data __percpu;
 
-static inline void
-xcall_set(struct xcall *call, xcall_fn_t fn, void *arg)
+static struct xcall_cpu_data *
+xcall_get_local_cpu_data(void)
+{
+    return cpu_local_ptr(xcall_cpu_data);
+}
+
+static struct xcall_cpu_data *
+xcall_get_cpu_data(unsigned int cpu)
+{
+    return percpu_ptr(xcall_cpu_data, cpu);
+}
+
+static void
+xcall_init(struct xcall *call, xcall_fn_t fn, void *arg)
 {
     call->fn = fn;
     call->arg = arg;
 }
 
 static void
+xcall_process(struct xcall *call)
+{
+    call->fn(call->arg);
+}
+
+static void
 xcall_cpu_data_init(struct xcall_cpu_data *cpu_data, unsigned int cpu)
 {
     char name[SYSCNT_NAME_SIZE];
@@ -83,20 +96,6 @@ xcall_cpu_data_init(struct xcall_cpu_data *cpu_data, unsigned int cpu)
     spinlock_init(&cpu_data->lock);
 }
 
-static struct xcall_cpu_data *
-xcall_cpu_data_get(void)
-{
-    assert(!thread_preempt_enabled());
-    return cpu_local_ptr(xcall_cpu_data);
-}
-
-static struct xcall *
-xcall_cpu_data_get_send_call(struct xcall_cpu_data *cpu_data, unsigned int cpu)
-{
-    assert(cpu < ARRAY_SIZE(cpu_data->send_calls));
-    return &cpu_data->send_calls[cpu];
-}
-
 static struct xcall *
 xcall_cpu_data_get_recv_call(const struct xcall_cpu_data *cpu_data)
 {
@@ -122,7 +121,7 @@ xcall_setup(void)
     unsigned int i;
 
     for (i = 0; i < cpu_count(); i++) {
-        xcall_cpu_data_init(percpu_ptr(xcall_cpu_data, i), i);
+        xcall_cpu_data_init(xcall_get_cpu_data(i), i);
     }
 
     return 0;
@@ -136,34 +135,30 @@ INIT_OP_DEFINE(xcall_setup,
 void
 xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu)
 {
-    struct xcall_cpu_data *local_data, *remote_data;
-    struct xcall *call;
+    struct xcall_cpu_data *cpu_data;
+    struct xcall call;
 
     assert(cpu_intr_enabled());
-    assert(fn != NULL);
-
-    remote_data = percpu_ptr(xcall_cpu_data, cpu);
-
-    thread_preempt_disable();
+    assert(fn);
 
-    local_data = xcall_cpu_data_get();
-    call = xcall_cpu_data_get_send_call(local_data, cpu);
-    xcall_set(call, fn, arg);
+    xcall_init(&call, fn, arg);
+    cpu_data = xcall_get_cpu_data(cpu);
 
-    spinlock_lock(&remote_data->lock);
+    spinlock_lock(&cpu_data->lock);
 
-    xcall_cpu_data_set_recv_call(remote_data, call);
+    /* Enforce release ordering on the receive call */
+    xcall_cpu_data_set_recv_call(cpu_data, &call);
 
     cpu_send_xcall(cpu);
-    syscnt_inc(&remote_data->sc_sent);
 
-    while (xcall_cpu_data_get_recv_call(remote_data) != NULL) {
+    /* Enforce acquire ordering on the receive call */
+    while (xcall_cpu_data_get_recv_call(cpu_data) != NULL) {
         cpu_pause();
     }
 
-    spinlock_unlock(&remote_data->lock);
+    spinlock_unlock(&cpu_data->lock);
 
-    thread_preempt_enable();
+    syscnt_inc(&cpu_data->sc_sent);
 }
 
 void
@@ -174,17 +169,19 @@ xcall_intr(void)
 
     assert(thread_check_intr_context());
 
-    cpu_data = xcall_cpu_data_get();
+    cpu_data = xcall_get_local_cpu_data();
 
+    /* Enforce acquire ordering on the receive call */
     call = xcall_cpu_data_get_recv_call(cpu_data);
 
     if (call) {
-        call->fn(call->arg);
+        xcall_process(call);
     } else {
         log_warning("xcall: spurious interrupt on cpu%u", cpu_id());
     }
 
     syscnt_inc(&cpu_data->sc_received);
 
+    /* Enforce release ordering on the receive call */
     xcall_cpu_data_clear_recv_call(cpu_data);
 }
diff --git a/kern/xcall.h b/kern/xcall.h
index 0e954f97..47331526 100644
--- a/kern/xcall.h
+++ b/kern/xcall.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Richard Braun.
+ * Copyright (c) 2014-2018 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -19,6 +19,8 @@
  *
  * This module provides the ability to run functions, called cross-calls,
  * on specific processors.
+ *
+ * TODO Asynchronous cross-calls.
  */
 
 #ifndef _KERN_XCALL_H
@@ -35,16 +37,18 @@ typedef void (*xcall_fn_t)(void *arg);
  * Run the given cross-call function on a specific processor.
  *
  * The operation is completely synchronous, returning only when the function
- * has finished running on the target processor, with the side effects of
- * the function visible.
+ * has finished running on the target processor. Release-acquire ordering is
+ * enforced both before and after the function runs on the target processor,
+ * so that side effects produced by the caller are visible to the function,
+ * and vice-versa on return.
  *
- * The function is run in interrupt context. Interrupts must be enabled
+ * The callback function runs in interrupt context. Interrupts must be enabled
  * when calling this function.
  */
 void xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu);
 
 /*
- * Report a cross-call interrupt from a remote processor.
+ * Handle a cross-call interrupt from a remote processor.
  *
  * Called from interrupt context.
  */
author	Richard Braun <rbraun@sceen.net>	2018-02-09 21:56:47 +0100
committer	Richard Braun <rbraun@sceen.net>	2018-02-12 21:34:32 +0100
commit	e63c4bcecf5966661862e4fa29281b3c00319c78 (patch)
tree	ed19767eb20f586f9e6edffab99448f4bf5ea105
parent	9656d4234d97ec434d6efbcf1a33c53652faa9f2 (diff)