/* * Copyright (c) 2018 Agustina Arzille. * Copyright (c) 2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * * Architecture-specific definitions for local atomic operations. */ #ifndef X86_LATOMIC_H #define X86_LATOMIC_H #ifndef KERN_LATOMIC_H #error "don't include directly, use instead" #endif #include #include /* * Memory ordering is implemented with compiler barriers on entry, exit, * both, or neither, according to the specified ordering. */ #define latomic_x86_enter(memorder) \ MACRO_BEGIN \ if ((memorder) != LATOMIC_RELAXED && (memorder) != LATOMIC_ACQUIRE) { \ barrier(); \ } \ MACRO_END #define latomic_x86_leave(memorder) \ MACRO_BEGIN \ if ((memorder) != LATOMIC_RELAXED && (memorder) != LATOMIC_RELEASE) { \ barrier(); \ } \ MACRO_END #define latomic_x86_cas_n(ptr, oval, nval) \ MACRO_BEGIN \ typeof(oval) prev_; \ \ asm volatile("cmpxchg %3, %1" \ : "=a" (prev_), "+m" (*(ptr)) \ : "0" (oval), "r" (nval)); \ prev_; \ MACRO_END static unsigned int latomic_x86_cas_ui(unsigned int *ptr, unsigned int oval, unsigned int nval) { return latomic_x86_cas_n(ptr, oval, nval); } /* * 64-bit local atomic operations on i386 are implemented with loops using * the cmpxchg8b instruction. This assumes the processor is at least an i586. */ static unsigned long long latomic_x86_cas_ull(unsigned long long *ptr, unsigned long long oval, unsigned long long nval) { #ifdef __LP64__ return latomic_x86_cas_n(ptr, oval, nval); #else /* __LP64__ */ asm volatile("cmpxchg8b %0" : "+m" (*ptr), "+A" (oval) : "b" ((unsigned long)nval), "c" ((unsigned long)(nval >> 32))); return oval; #endif /* __LP64__ */ } /* * Helper for operations implemented with a CAS loop. */ #define latomic_x86_cas_loop_n(ptr, cas, op, val) \ MACRO_BEGIN \ typeof(val) prev_, oval_, nval_; \ \ do { \ oval_ = *(ptr); \ nval_ = oval_ op (val); \ prev_ = cas(ptr, oval_, nval_); \ } while (prev_ != oval_); \ \ prev_; \ MACRO_END /* latomic_load */ #ifndef __LP64__ static inline unsigned long long latomic_i386_load_64(union atomic_constptr_64 ptr, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_ull((unsigned long long *)ptr.ull_ptr, 0ULL, 0ULL); latomic_x86_leave(memorder); return prev; } #define latomic_load_64 latomic_i386_load_64 #endif /* __LP64__ */ /* latomic_store */ #ifndef __LP64__ static inline void latomic_i386_store_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long oval, prev; latomic_x86_enter(memorder); do { oval = *ptr.ull_ptr; prev = latomic_x86_cas_ull(ptr.ull_ptr, oval, val.ull); } while (prev != oval); latomic_x86_leave(memorder); } #define latomic_store_64 latomic_i386_store_64 #endif /* __LP64__ */ /* latomic_swap */ /* * The swap operation is implemented with the xchg instruction, which * implies the lock prefix. As a result, simply reuse the built-in * provided by the compiler. */ static inline unsigned int latomic_x86_swap_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { return __atomic_exchange_n(ptr.ui_ptr, val.ui, memorder); } #define latomic_swap_32 latomic_x86_swap_32 #ifdef __LP64__ static inline unsigned long long latomic_amd64_swap_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { return __atomic_exchange_n(ptr.ull_ptr, val.ull, memorder); } #define latomic_swap_64 latomic_amd64_swap_64 #else /* __LP64__ */ static inline unsigned long long latomic_i386_swap_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long oval, prev; latomic_x86_enter(memorder); do { oval = *ptr.ull_ptr; prev = latomic_x86_cas_ull(ptr.ull_ptr, oval, val.ull); } while (prev != oval); latomic_x86_leave(memorder); return prev; } #define latomic_swap_64 latomic_i386_swap_64 #endif /* __LP64__ */ /* latomic_cas */ static inline unsigned int latomic_x86_cas_32(union atomic_ptr_32 ptr, union atomic_val_32 oval, union atomic_val_32 nval, int memorder) { unsigned int prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_ui(ptr.ui_ptr, oval.ui, nval.ui); latomic_x86_leave(memorder); return prev; } #define latomic_cas_32 latomic_x86_cas_32 static inline unsigned long long latomic_x86_cas_64(union atomic_ptr_64 ptr, union atomic_val_64 oval, union atomic_val_64 nval, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_ull(ptr.ull_ptr, oval.ull, nval.ull); latomic_x86_leave(memorder); return prev; } #define latomic_cas_64 latomic_x86_cas_64 /* latomic_fetch_add */ /* * The fetch_add and fetch_sub operations are the only fetch_xxx operations * that may not be implemented with a CAS loop, but with the xadd instruction * instead. */ #define latomic_x86_fetch_add_n(ptr, val) \ MACRO_BEGIN \ typeof(val) prev_; \ \ asm volatile("xadd %0, %1" \ : "=r" (prev_), "+m" (*(ptr)) \ : "0" (val)); \ prev_; \ MACRO_END static inline unsigned int latomic_x86_fetch_add_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { unsigned int prev; latomic_x86_enter(memorder); prev = latomic_x86_fetch_add_n(ptr.ui_ptr, val.ui); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_add_32 latomic_x86_fetch_add_32 #ifdef __LP64__ static inline unsigned long long latomic_amd64_fetch_add_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_fetch_add_n(ptr.ull_ptr, val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_add_64 latomic_amd64_fetch_add_64 #else /* __LP64__ */ static inline unsigned long long latomic_i386_fetch_add_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ull_ptr, latomic_x86_cas_ull, +, val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_add_64 latomic_i386_fetch_add_64 #endif /* __LP64__ */ /* latomic_fetch_sub */ static inline unsigned int latomic_x86_fetch_sub_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { unsigned int prev; latomic_x86_enter(memorder); prev = latomic_x86_fetch_add_n(ptr.ui_ptr, -val.ui); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_sub_32 latomic_x86_fetch_sub_32 #ifdef __LP64__ static inline unsigned long long latomic_amd64_fetch_sub_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_fetch_add_n(ptr.ull_ptr, -val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_sub_64 latomic_amd64_fetch_sub_64 #else /* __LP64__ */ static inline unsigned long long latomic_i386_fetch_sub_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ull_ptr, latomic_x86_cas_ull, -, val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_sub_64 latomic_i386_fetch_sub_64 #endif /* __LP64__ */ /* latomic_fetch_and */ static inline unsigned int latomic_x86_fetch_and_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { unsigned int prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ui_ptr, latomic_x86_cas_ui, &, val.ui); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_and_32 latomic_x86_fetch_and_32 static inline unsigned long long latomic_x86_fetch_and_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ull_ptr, latomic_x86_cas_ull, &, val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_and_64 latomic_x86_fetch_and_64 /* latomic_fetch_or */ static inline unsigned int latomic_x86_fetch_or_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { unsigned int prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ui_ptr, latomic_x86_cas_ui, |, val.ui); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_or_32 latomic_x86_fetch_or_32 static inline unsigned long long latomic_x86_fetch_or_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ull_ptr, latomic_x86_cas_ull, |, val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_or_64 latomic_x86_fetch_or_64 /* latomic_fetch_xor */ static inline unsigned int latomic_x86_fetch_xor_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { unsigned int prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ui_ptr, latomic_x86_cas_ui, ^, val.ui); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_xor_32 latomic_x86_fetch_xor_32 static inline unsigned long long latomic_x86_fetch_xor_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { unsigned long long prev; latomic_x86_enter(memorder); prev = latomic_x86_cas_loop_n(ptr.ull_ptr, latomic_x86_cas_ull, ^, val.ull); latomic_x86_leave(memorder); return prev; } #define latomic_fetch_xor_64 latomic_x86_fetch_xor_64 /* latomic_add */ #define latomic_x86_add_n(ptr, val, suffix) \ MACRO_BEGIN \ asm volatile("add" suffix " %1, %0" \ : "+m" (*(ptr)) \ : "ir" (val)); \ MACRO_END static inline void latomic_x86_add_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_add_n(ptr.ui_ptr, val.ui, "l"); latomic_x86_leave(memorder); } #define latomic_add_32 latomic_x86_add_32 #ifdef __LP64__ static inline void latomic_amd64_add_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_add_n(ptr.ull_ptr, val.ull, "q"); latomic_x86_leave(memorder); } #define latomic_add_64 latomic_amd64_add_64 #else /* __LP64__ */ static inline void latomic_i386_add_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_fetch_add_64(ptr, val, memorder); } #define latomic_add_64 latomic_i386_add_64 #endif /* __LP64__ */ /* latomic_sub */ static inline void latomic_x86_sub_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_add_n(ptr.ui_ptr, -val.ui, "l"); latomic_x86_leave(memorder); } #define latomic_sub_32 latomic_x86_sub_32 #ifdef __LP64__ static inline void latomic_amd64_sub_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_add_n(ptr.ull_ptr, -val.ull, "q"); latomic_x86_leave(memorder); } #define latomic_sub_64 latomic_amd64_sub_64 #else /* __LP64__ */ static inline void latomic_i386_sub_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_fetch_sub_64(ptr, val, memorder); } #define latomic_sub_64 latomic_i386_sub_64 #endif /* __LP64__ */ /* latomic_and */ #define latomic_x86_and_n(ptr, val, suffix) \ MACRO_BEGIN \ asm volatile("and" suffix " %1, %0" \ : "+m" (*(ptr)) \ : "ir" (val)); \ MACRO_END static inline void latomic_x86_and_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_and_n(ptr.ui_ptr, val.ui, "l"); latomic_x86_leave(memorder); } #define latomic_and_32 latomic_x86_and_32 #ifdef __LP64__ static inline void latomic_amd64_and_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_and_n(ptr.ull_ptr, val.ull, "q"); latomic_x86_leave(memorder); } #define latomic_and_64 latomic_amd64_and_64 #else /* __LP64__ */ static inline void latomic_i386_and_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_fetch_and_64(ptr, val, memorder); } #define latomic_and_64 latomic_i386_and_64 #endif /* __LP64__ */ /* latomic_or */ #define latomic_x86_or_n(ptr, val, suffix) \ MACRO_BEGIN \ asm volatile("or" suffix " %1, %0" \ : "+m" (*(ptr)) \ : "ir" (val)); \ MACRO_END static inline void latomic_x86_or_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_or_n(ptr.ui_ptr, val.ui, "l"); latomic_x86_leave(memorder); } #define latomic_or_32 latomic_x86_or_32 #ifdef __LP64__ static inline void latomic_amd64_or_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_or_n(ptr.ull_ptr, val.ull, "q"); latomic_x86_leave(memorder); } #define latomic_or_64 latomic_amd64_or_64 #else /* __LP64__ */ static inline void latomic_i386_or_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_fetch_or_64(ptr, val, memorder); } #define latomic_or_64 latomic_i386_or_64 #endif /* __LP64__ */ /* latomic_xor */ #define latomic_x86_xor_n(ptr, val, suffix) \ MACRO_BEGIN \ asm volatile("xor" suffix " %1, %0" \ : "+m" (*(ptr)) \ : "ir" (val)); \ MACRO_END static inline void latomic_x86_xor_32(union atomic_ptr_32 ptr, union atomic_val_32 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_xor_n(ptr.ui_ptr, val.ui, "l"); latomic_x86_leave(memorder); } #define latomic_xor_32 latomic_x86_xor_32 #ifdef __LP64__ static inline void latomic_amd64_xor_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_x86_enter(memorder); latomic_x86_xor_n(ptr.ull_ptr, val.ull, "q"); latomic_x86_leave(memorder); } #define latomic_xor_64 latomic_amd64_xor_64 #else /* __LP64__ */ static inline void latomic_i386_xor_64(union atomic_ptr_64 ptr, union atomic_val_64 val, int memorder) { latomic_fetch_xor_64(ptr, val, memorder); } #define latomic_xor_64 latomic_i386_xor_64 #endif /* __LP64__ */ #endif /* X86_LATOMIC_H */