2 files changed, 79 insertions, 64 deletions
diff --git a/arch/x86/machine/atomic.h b/arch/x86/machine/atomic.h
index 3deface6..c9ec05e4 100644
--- a/arch/x86/machine/atomic.h
+++ b/arch/x86/machine/atomic.h
@@ -16,27 +16,29 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *
  *
- * Architecture-specific atomic operations and definitions.
- *
+ * Architecture-specific definitions for atomic operations.
  */
 
 #ifndef _X86_ATOMIC_H
 #define _X86_ATOMIC_H
 
-#ifdef __LP64__
+#ifndef _KERN_ATOMIC_H
+#error "don't include <machine/atomic.h> directly, use <kern/atomic.h> instead"
+#endif
+
+#include <stdbool.h>
 
-#define atomic_load(ptr, mo)         __atomic_load_n((ptr), mo)
-#define atomic_store(ptr, val, mo)   __atomic_store_n((ptr), (val), mo)
+#include <kern/macros.h>
 
-#else /* __LP64__ */
+#ifndef __LP64__
 
 /*
- * On x86, the compiler generates either an FP-stack read/write, or an SSE2
+ * On i386, the compiler generates either an FP-stack read/write, or an SSE2
  * store/load to implement these 64-bit atomic operations. Since that's not
- * feasible on kernel-land, we fallback to cmpxchg8b. Note that this means
- * that 'atomic_load' cannot be used on a const pointer. However, if it's
- * being accessed by an atomic operation, then it's very likely that it can
- * also be modified, so it should be OK.
+ * feasible in the kernel, fall back to cmpxchg8b. Note that, in this case,
+ * loading becomes a potentially mutating operation, but it's not expected
+ * to be a problem since atomic operations are normally not used on read-only
+ * memory. Also note that this assumes the processor is at least an i586.
  */
 
 #define atomic_load(ptr, mo)                                              \
@@ -44,11 +46,11 @@ MACRO_BEGIN                                                               \
     typeof(*(ptr)) ___ret;                                                \
                                                                           \
     if (sizeof(___ret) != 8) {                                            \
-        ___ret = __atomic_load_n((ptr), mo);                              \
+        ___ret = __atomic_load_n(ptr, mo);                                \
     } else {                                                              \
         ___ret = 0;                                                       \
-        __atomic_compare_exchange_n((uint64_t *)(ptr), &___ret, ___ret,   \
-                                    0, mo, __ATOMIC_RELAXED);             \
+        __atomic_compare_exchange_n(ptr, &___ret, ___ret,                 \
+                                    false, mo, __ATOMIC_RELAXED);         \
     }                                                                     \
                                                                           \
     ___ret;                                                               \
@@ -57,29 +59,34 @@ MACRO_END
 #define atomic_store(ptr, val, mo)                                        \
 MACRO_BEGIN                                                               \
     if (sizeof(*(ptr) != 8)) {                                            \
-        __atomic_store_n((ptr), (val), mo);                               \
+        __atomic_store_n(ptr, val, mo);                                   \
     } else {                                                              \
-        typeof(ptr) ___ptr;                                               \
-        typeof(val) ___val, ___exp;                                       \
+        typeof(val) ___oval, ___nval;                                     \
+        bool ___done;                                                     \
                                                                           \
-        ___ptr = (uint64_t *)(ptr);                                       \
-        ___val = (val);                                                   \
-        ___exp = *___ptr;                                                 \
+        ___oval = *(ptr);                                                 \
+        ___nval = (val);                                                  \
                                                                           \
-        while (!__atomic_compare_exchange_n(___ptr, &___exp, ___val, 0,   \
-               momo, __ATOMIC_RELAXED)) {                                 \
-        }                                                                 \
+        do {                                                              \
+            ___done = __atomic_compare_exchange_n(ptr, &___oval, ___nval, \
+                                                  false, mo,              \
+                                                  __ATOMIC_RELAXED);      \
+        } while (!___done);                                               \
                                                                           \
     }                                                                     \
 MACRO_END
 
-#endif /* __LP64__ */
+/*
+ * Report that load and store are architecture-specific.
+ */
+#define ATOMIC_ARCH_SPECIFIC_LOAD
+#define ATOMIC_ARCH_SPECIFIC_STORE
 
-/* Notify the generic header that we implemented loads and stores */
-#define ATOMIC_LOAD_DEFINED
-#define ATOMIC_STORE_DEFINED
+#endif /* __LP64__ */
 
-/* Both x86 and x86_64 can use atomic operations on 64-bit values */
+/*
+ * Report that 64-bits operations are supported.
+ */
 #define ATOMIC_HAVE_64B_OPS
 
 #endif /* _X86_ATOMIC_H */
diff --git a/kern/atomic.h b/kern/atomic.h
index 1f0430e3..67f775db 100644
--- a/kern/atomic.h
+++ b/kern/atomic.h
@@ -13,18 +13,22 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Type-generic memory-model aware atomic operations.
  */
 
 #ifndef _KERN_ATOMIC_H
 #define _KERN_ATOMIC_H
 
+#include <stdbool.h>
+
 #include <kern/macros.h>
 #include <machine/atomic.h>
 
 /*
  * Supported memory orders.
  */
-
 #define ATOMIC_RELAXED   __ATOMIC_RELAXED
 #define ATOMIC_ACQUIRE   __ATOMIC_ACQUIRE
 #define ATOMIC_RELEASE   __ATOMIC_RELEASE
@@ -34,85 +38,89 @@
 /*
  * Type-generic atomic operations.
  */
+#define atomic_fetch_add(ptr, val, mo)  __atomic_fetch_add(ptr, val, mo)
 
-#define atomic_fetch_add(ptr, val, mo)   __atomic_fetch_add((ptr), (val), mo)
+#define atomic_fetch_sub(ptr, val, mo)  __atomic_fetch_sub(ptr, val, mo)
 
-#define atomic_fetch_sub(ptr, val, mo)   __atomic_fetch_sub((ptr), (val), mo)
+#define atomic_fetch_and(ptr, val, mo)  __atomic_fetch_and(ptr, val, mo)
 
-#define atomic_fetch_and(ptr, val, mo)   __atomic_fetch_and((ptr), (val), mo)
+#define atomic_fetch_or(ptr, val, mo)   __atomic_fetch_or(ptr, val, mo)
 
-#define atomic_fetch_or(ptr, val, mo)    __atomic_fetch_or((ptr), (val), mo)
+#define atomic_fetch_xor(ptr, val, mo)  __atomic_fetch_xor(ptr, val, mo)
 
-#define atomic_fetch_xor(ptr, val, mo)   __atomic_fetch_xor((ptr), (val), mo)
+#define atomic_add(ptr, val, mo)        (void)__atomic_add_fetch(ptr, val, mo)
 
-#define atomic_add(ptr, val, mo)   ((void)__atomic_add_fetch((ptr), (val), mo))
+#define atomic_sub(ptr, val, mo)        (void)__atomic_sub_fetch(ptr, val, mo)
 
-#define atomic_sub(ptr, val, mo)   ((void)__atomic_sub_fetch((ptr), (val), mo))
+#define atomic_and(ptr, val, mo)        (void)__atomic_and_fetch(ptr, val, mo)
 
-#define atomic_and(ptr, val, mo)   ((void)__atomic_and_fetch((ptr), (val), mo))
+#define atomic_or(ptr, val, mo)         (void)__atomic_or_fetch(ptr, val, mo)
 
-#define atomic_or(ptr, val, mo)    ((void)__atomic_or_fetch((ptr), (val), mo))
+#define atomic_xor(ptr, val, mo)        (void)__atomic_xor_fetch(ptr, val, mo)
 
-#define atomic_xor(ptr, val, mo)   ((void)__atomic_xor_fetch((ptr), (val), mo))
-
-#define atomic_swap(ptr, val, mo)   __atomic_exchange_n((ptr), (val), mo)
+#define atomic_swap(ptr, val, mo)       __atomic_exchange_n(ptr, val, mo)
 
 /*
- * For compare-and-swap, we deviate a little from the standard, and only
+ * For compare-and-swap, deviate a little from the standard, and only
  * return the value before the comparison, leaving it up to the user to
  * determine whether the swap was actually performed or not.
+ *
  * Also, note that the memory order in case of failure is relaxed. This is
  * because atomic CAS is typically used in a loop. However, if a different
  * code path is taken on failure (rather than retrying), then the user
  * should be aware that a memory fence might be necessary.
+ *
+ * Finally, although a local variable isn't strictly needed for the new
+ * value, some compilers seem to have trouble when all parameters don't
+ * have the same type.
  */
-
-#define atomic_cas(ptr, exp, nval, mo)                            \
+#define atomic_cas(ptr, oval, nval, mo)                           \
 MACRO_BEGIN                                                       \
-    typeof(*(ptr)) ___exp, ___nval;                               \
+    typeof(*(ptr)) ___oval, ___nval;                              \
                                                                   \
-    ___exp = (exp);                                               \
+    ___oval = (oval);                                             \
     ___nval = (nval);                                             \
-    __atomic_compare_exchange_n((ptr), &___exp, ___nval, 0, mo,   \
-                                ATOMIC_RELAXED);                  \
-    ___exp;                                                       \
+    __atomic_compare_exchange_n(ptr, &___oval, ___nval, false,    \
+                                mo, ATOMIC_RELAXED);              \
+    ___oval;                                                      \
 MACRO_END
 
 /*
  * Some architectures may need specific definitions for loads and stores,
  * in order to prevent the compiler from emitting unsupported instructions.
- * As such, we only define these if the arch header didn't already.
+ * As such, only define these if the architecture-specific part of the
+ * module didn't already.
  */
 
-#ifndef ATOMIC_LOAD_DEFINED
-#define atomic_load(ptr, mo)   __atomic_load_n((ptr), mo)
-#endif /* ATOMIC_LOAD_DEFINED */
+#ifndef ATOMIC_ARCH_SPECIFIC_LOAD
+#define atomic_load(ptr, mo) __atomic_load_n(ptr, mo)
+#endif
 
-#ifndef ATOMIC_STORE_DEFINED
-#define atomic_store(ptr, val, mo)   __atomic_store_n((ptr), (val), mo)
-#endif /* ATOMIC_STORE_DEFINED */
+#ifndef ATOMIC_ARCH_SPECIFIC_STORE
+#define atomic_store(ptr, val, mo) __atomic_store_n(ptr, val, mo)
+#endif
 
 /*
  * Common shortcuts.
  */
 
-#define atomic_cas_acquire(ptr, exp, val)   \
-    atomic_cas(ptr, exp, val, ATOMIC_ACQUIRE)
+#define atomic_cas_acquire(ptr, oval, nval) \
+    atomic_cas(ptr, oval, nval, ATOMIC_ACQUIRE)
 
-#define atomic_cas_release(ptr, exp, val)   \
-    atomic_cas(ptr, exp, val, ATOMIC_RELEASE)
+#define atomic_cas_release(ptr, oval, nval) \
+    atomic_cas(ptr, oval, nval, ATOMIC_RELEASE)
 
-#define atomic_cas_seq_cst(ptr, exp, val)   \
-    atomic_cas(ptr, exp, val, ATOMIC_SEQ_CST)
+#define atomic_cas_seq_cst(ptr, oval, nval) \
+    atomic_cas(ptr, oval, nval, ATOMIC_SEQ_CST)
 
 #define atomic_swap_acquire(ptr, val)   atomic_swap(ptr, val, ATOMIC_ACQUIRE)
 #define atomic_swap_release(ptr, val)   atomic_swap(ptr, val, ATOMIC_RELEASE)
 #define atomic_swap_seq_cst(ptr, val)   atomic_swap(ptr, val, ATOMIC_SEQ_CST)
 
-#define atomic_fetch_add_acq_rel(ptr, val)   \
+#define atomic_fetch_add_acq_rel(ptr, val) \
     atomic_fetch_add(ptr, val, ATOMIC_ACQ_REL)
 
-#define atomic_fetch_sub_acq_rel(ptr, val)   \
+#define atomic_fetch_sub_acq_rel(ptr, val) \
     atomic_fetch_sub(ptr, val, ATOMIC_ACQ_REL)
 
 #define atomic_or_acq_rel(ptr, val)    atomic_or(ptr, val, ATOMIC_ACQ_REL)