199a2dd95SBruce Richardson /* SPDX-License-Identifier: BSD-3-Clause 299a2dd95SBruce Richardson * Copyright(c) 2010-2014 Intel Corporation 399a2dd95SBruce Richardson */ 499a2dd95SBruce Richardson 599a2dd95SBruce Richardson #ifndef _RTE_ATOMIC_X86_H_ 699a2dd95SBruce Richardson #define _RTE_ATOMIC_X86_H_ 799a2dd95SBruce Richardson 899a2dd95SBruce Richardson #include <stdint.h> 999a2dd95SBruce Richardson #include <rte_common.h> 1099a2dd95SBruce Richardson #include <rte_config.h> 1199a2dd95SBruce Richardson #include <emmintrin.h> 1299a2dd95SBruce Richardson #include "generic/rte_atomic.h" 1399a2dd95SBruce Richardson 1499a2dd95SBruce Richardson #if RTE_MAX_LCORE == 1 1599a2dd95SBruce Richardson #define MPLOCKED /**< No need to insert MP lock prefix. */ 1699a2dd95SBruce Richardson #else 1799a2dd95SBruce Richardson #define MPLOCKED "lock ; " /**< Insert MP lock prefix. */ 1899a2dd95SBruce Richardson #endif 1999a2dd95SBruce Richardson 2099a2dd95SBruce Richardson #define rte_mb() _mm_mfence() 2199a2dd95SBruce Richardson 2299a2dd95SBruce Richardson #define rte_wmb() _mm_sfence() 2399a2dd95SBruce Richardson 2499a2dd95SBruce Richardson #define rte_rmb() _mm_lfence() 2599a2dd95SBruce Richardson 2699a2dd95SBruce Richardson #define rte_smp_wmb() rte_compiler_barrier() 2799a2dd95SBruce Richardson 2899a2dd95SBruce Richardson #define rte_smp_rmb() rte_compiler_barrier() 2999a2dd95SBruce Richardson 30*719834a6SMattias Rönnblom #ifdef __cplusplus 31*719834a6SMattias Rönnblom extern "C" { 32*719834a6SMattias Rönnblom #endif 33*719834a6SMattias Rönnblom 3499a2dd95SBruce Richardson /* 3599a2dd95SBruce Richardson * From Intel Software Development Manual; Vol 3; 3699a2dd95SBruce Richardson * 8.2.2 Memory Ordering in P6 and More Recent Processor Families: 3799a2dd95SBruce Richardson * ... 3899a2dd95SBruce Richardson * . Reads are not reordered with other reads. 3999a2dd95SBruce Richardson * . Writes are not reordered with older reads. 4099a2dd95SBruce Richardson * . Writes to memory are not reordered with other writes, 4199a2dd95SBruce Richardson * with the following exceptions: 4299a2dd95SBruce Richardson * . streaming stores (writes) executed with the non-temporal move 4399a2dd95SBruce Richardson * instructions (MOVNTI, MOVNTQ, MOVNTDQ, MOVNTPS, and MOVNTPD); and 4499a2dd95SBruce Richardson * . string operations (see Section 8.2.4.1). 4599a2dd95SBruce Richardson * ... 4699a2dd95SBruce Richardson * . Reads may be reordered with older writes to different locations but not 4799a2dd95SBruce Richardson * with older writes to the same location. 4899a2dd95SBruce Richardson * . Reads or writes cannot be reordered with I/O instructions, 4999a2dd95SBruce Richardson * locked instructions, or serializing instructions. 5099a2dd95SBruce Richardson * . Reads cannot pass earlier LFENCE and MFENCE instructions. 5199a2dd95SBruce Richardson * . Writes ... cannot pass earlier LFENCE, SFENCE, and MFENCE instructions. 5299a2dd95SBruce Richardson * . LFENCE instructions cannot pass earlier reads. 5399a2dd95SBruce Richardson * . SFENCE instructions cannot pass earlier writes ... 5499a2dd95SBruce Richardson * . MFENCE instructions cannot pass earlier reads, writes ... 5599a2dd95SBruce Richardson * 5699a2dd95SBruce Richardson * As pointed by Java guys, that makes possible to use lock-prefixed 5799a2dd95SBruce Richardson * instructions to get the same effect as mfence and on most modern HW 5899a2dd95SBruce Richardson * that gives a better performance then using mfence: 5999a2dd95SBruce Richardson * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ 6099a2dd95SBruce Richardson * Basic idea is to use lock prefixed add with some dummy memory location 6199a2dd95SBruce Richardson * as the destination. From their experiments 128B(2 cache lines) below 6299a2dd95SBruce Richardson * current stack pointer looks like a good candidate. 637be78d02SJosh Soref * So below we use that technique for rte_smp_mb() implementation. 6499a2dd95SBruce Richardson */ 6599a2dd95SBruce Richardson 6699a2dd95SBruce Richardson static __rte_always_inline void 6799a2dd95SBruce Richardson rte_smp_mb(void) 6899a2dd95SBruce Richardson { 692dbaa926STyler Retzlaff #ifdef RTE_TOOLCHAIN_MSVC 702dbaa926STyler Retzlaff _mm_mfence(); 712dbaa926STyler Retzlaff #else 7299a2dd95SBruce Richardson #ifdef RTE_ARCH_I686 7399a2dd95SBruce Richardson asm volatile("lock addl $0, -128(%%esp); " ::: "memory"); 7499a2dd95SBruce Richardson #else 7599a2dd95SBruce Richardson asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); 7699a2dd95SBruce Richardson #endif 772dbaa926STyler Retzlaff #endif 7899a2dd95SBruce Richardson } 7999a2dd95SBruce Richardson 8099a2dd95SBruce Richardson #define rte_io_mb() rte_mb() 8199a2dd95SBruce Richardson 8299a2dd95SBruce Richardson #define rte_io_wmb() rte_compiler_barrier() 8399a2dd95SBruce Richardson 8499a2dd95SBruce Richardson #define rte_io_rmb() rte_compiler_barrier() 8599a2dd95SBruce Richardson 8699a2dd95SBruce Richardson /** 8799a2dd95SBruce Richardson * Synchronization fence between threads based on the specified memory order. 8899a2dd95SBruce Richardson * 891ec6a845STyler Retzlaff * On x86 the __rte_atomic_thread_fence(rte_memory_order_seq_cst) generates full 'mfence' 9099a2dd95SBruce Richardson * which is quite expensive. The optimized implementation of rte_smp_mb is 9199a2dd95SBruce Richardson * used instead. 9299a2dd95SBruce Richardson */ 9399a2dd95SBruce Richardson static __rte_always_inline void 941ec6a845STyler Retzlaff rte_atomic_thread_fence(rte_memory_order memorder) 9599a2dd95SBruce Richardson { 961ec6a845STyler Retzlaff if (memorder == rte_memory_order_seq_cst) 9799a2dd95SBruce Richardson rte_smp_mb(); 9899a2dd95SBruce Richardson else 991ec6a845STyler Retzlaff __rte_atomic_thread_fence(memorder); 10099a2dd95SBruce Richardson } 10199a2dd95SBruce Richardson 102*719834a6SMattias Rönnblom #ifdef __cplusplus 103*719834a6SMattias Rönnblom } 104*719834a6SMattias Rönnblom #endif 105*719834a6SMattias Rönnblom 10622384747STyler Retzlaff #ifndef RTE_TOOLCHAIN_MSVC 10722384747STyler Retzlaff 10899a2dd95SBruce Richardson /*------------------------- 16 bit atomic operations -------------------------*/ 10999a2dd95SBruce Richardson 110*719834a6SMattias Rönnblom #ifdef __cplusplus 111*719834a6SMattias Rönnblom extern "C" { 112*719834a6SMattias Rönnblom #endif 113*719834a6SMattias Rönnblom 11499a2dd95SBruce Richardson #ifndef RTE_FORCE_INTRINSICS 11599a2dd95SBruce Richardson static inline int 11699a2dd95SBruce Richardson rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) 11799a2dd95SBruce Richardson { 11899a2dd95SBruce Richardson uint8_t res; 11999a2dd95SBruce Richardson 12099a2dd95SBruce Richardson asm volatile( 12199a2dd95SBruce Richardson MPLOCKED 12299a2dd95SBruce Richardson "cmpxchgw %[src], %[dst];" 12399a2dd95SBruce Richardson "sete %[res];" 12499a2dd95SBruce Richardson : [res] "=a" (res), /* output */ 12599a2dd95SBruce Richardson [dst] "=m" (*dst) 12699a2dd95SBruce Richardson : [src] "r" (src), /* input */ 12799a2dd95SBruce Richardson "a" (exp), 12899a2dd95SBruce Richardson "m" (*dst) 12999a2dd95SBruce Richardson : "memory"); /* no-clobber list */ 13099a2dd95SBruce Richardson return res; 13199a2dd95SBruce Richardson } 13299a2dd95SBruce Richardson 13399a2dd95SBruce Richardson static inline uint16_t 13499a2dd95SBruce Richardson rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val) 13599a2dd95SBruce Richardson { 13699a2dd95SBruce Richardson asm volatile( 13799a2dd95SBruce Richardson MPLOCKED 13899a2dd95SBruce Richardson "xchgw %0, %1;" 13999a2dd95SBruce Richardson : "=r" (val), "=m" (*dst) 14099a2dd95SBruce Richardson : "0" (val), "m" (*dst) 14199a2dd95SBruce Richardson : "memory"); /* no-clobber list */ 14299a2dd95SBruce Richardson return val; 14399a2dd95SBruce Richardson } 14499a2dd95SBruce Richardson 14599a2dd95SBruce Richardson static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) 14699a2dd95SBruce Richardson { 14799a2dd95SBruce Richardson return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); 14899a2dd95SBruce Richardson } 14999a2dd95SBruce Richardson 15099a2dd95SBruce Richardson static inline void 15199a2dd95SBruce Richardson rte_atomic16_inc(rte_atomic16_t *v) 15299a2dd95SBruce Richardson { 15399a2dd95SBruce Richardson asm volatile( 15499a2dd95SBruce Richardson MPLOCKED 15599a2dd95SBruce Richardson "incw %[cnt]" 15699a2dd95SBruce Richardson : [cnt] "=m" (v->cnt) /* output */ 15799a2dd95SBruce Richardson : "m" (v->cnt) /* input */ 15899a2dd95SBruce Richardson ); 15999a2dd95SBruce Richardson } 16099a2dd95SBruce Richardson 16199a2dd95SBruce Richardson static inline void 16299a2dd95SBruce Richardson rte_atomic16_dec(rte_atomic16_t *v) 16399a2dd95SBruce Richardson { 16499a2dd95SBruce Richardson asm volatile( 16599a2dd95SBruce Richardson MPLOCKED 16699a2dd95SBruce Richardson "decw %[cnt]" 16799a2dd95SBruce Richardson : [cnt] "=m" (v->cnt) /* output */ 16899a2dd95SBruce Richardson : "m" (v->cnt) /* input */ 16999a2dd95SBruce Richardson ); 17099a2dd95SBruce Richardson } 17199a2dd95SBruce Richardson 17299a2dd95SBruce Richardson static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) 17399a2dd95SBruce Richardson { 17499a2dd95SBruce Richardson uint8_t ret; 17599a2dd95SBruce Richardson 17699a2dd95SBruce Richardson asm volatile( 17799a2dd95SBruce Richardson MPLOCKED 17899a2dd95SBruce Richardson "incw %[cnt] ; " 17999a2dd95SBruce Richardson "sete %[ret]" 18099a2dd95SBruce Richardson : [cnt] "+m" (v->cnt), /* output */ 18199a2dd95SBruce Richardson [ret] "=qm" (ret) 18299a2dd95SBruce Richardson ); 18399a2dd95SBruce Richardson return ret != 0; 18499a2dd95SBruce Richardson } 18599a2dd95SBruce Richardson 18699a2dd95SBruce Richardson static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) 18799a2dd95SBruce Richardson { 18899a2dd95SBruce Richardson uint8_t ret; 18999a2dd95SBruce Richardson 19099a2dd95SBruce Richardson asm volatile(MPLOCKED 19199a2dd95SBruce Richardson "decw %[cnt] ; " 19299a2dd95SBruce Richardson "sete %[ret]" 19399a2dd95SBruce Richardson : [cnt] "+m" (v->cnt), /* output */ 19499a2dd95SBruce Richardson [ret] "=qm" (ret) 19599a2dd95SBruce Richardson ); 19699a2dd95SBruce Richardson return ret != 0; 19799a2dd95SBruce Richardson } 19899a2dd95SBruce Richardson 19999a2dd95SBruce Richardson /*------------------------- 32 bit atomic operations -------------------------*/ 20099a2dd95SBruce Richardson 20199a2dd95SBruce Richardson static inline int 20299a2dd95SBruce Richardson rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) 20399a2dd95SBruce Richardson { 20499a2dd95SBruce Richardson uint8_t res; 20599a2dd95SBruce Richardson 20699a2dd95SBruce Richardson asm volatile( 20799a2dd95SBruce Richardson MPLOCKED 20899a2dd95SBruce Richardson "cmpxchgl %[src], %[dst];" 20999a2dd95SBruce Richardson "sete %[res];" 21099a2dd95SBruce Richardson : [res] "=a" (res), /* output */ 21199a2dd95SBruce Richardson [dst] "=m" (*dst) 21299a2dd95SBruce Richardson : [src] "r" (src), /* input */ 21399a2dd95SBruce Richardson "a" (exp), 21499a2dd95SBruce Richardson "m" (*dst) 21599a2dd95SBruce Richardson : "memory"); /* no-clobber list */ 21699a2dd95SBruce Richardson return res; 21799a2dd95SBruce Richardson } 21899a2dd95SBruce Richardson 21999a2dd95SBruce Richardson static inline uint32_t 22099a2dd95SBruce Richardson rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val) 22199a2dd95SBruce Richardson { 22299a2dd95SBruce Richardson asm volatile( 22399a2dd95SBruce Richardson MPLOCKED 22499a2dd95SBruce Richardson "xchgl %0, %1;" 22599a2dd95SBruce Richardson : "=r" (val), "=m" (*dst) 22699a2dd95SBruce Richardson : "0" (val), "m" (*dst) 22799a2dd95SBruce Richardson : "memory"); /* no-clobber list */ 22899a2dd95SBruce Richardson return val; 22999a2dd95SBruce Richardson } 23099a2dd95SBruce Richardson 23199a2dd95SBruce Richardson static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) 23299a2dd95SBruce Richardson { 23399a2dd95SBruce Richardson return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); 23499a2dd95SBruce Richardson } 23599a2dd95SBruce Richardson 23699a2dd95SBruce Richardson static inline void 23799a2dd95SBruce Richardson rte_atomic32_inc(rte_atomic32_t *v) 23899a2dd95SBruce Richardson { 23999a2dd95SBruce Richardson asm volatile( 24099a2dd95SBruce Richardson MPLOCKED 24199a2dd95SBruce Richardson "incl %[cnt]" 24299a2dd95SBruce Richardson : [cnt] "=m" (v->cnt) /* output */ 24399a2dd95SBruce Richardson : "m" (v->cnt) /* input */ 24499a2dd95SBruce Richardson ); 24599a2dd95SBruce Richardson } 24699a2dd95SBruce Richardson 24799a2dd95SBruce Richardson static inline void 24899a2dd95SBruce Richardson rte_atomic32_dec(rte_atomic32_t *v) 24999a2dd95SBruce Richardson { 25099a2dd95SBruce Richardson asm volatile( 25199a2dd95SBruce Richardson MPLOCKED 25299a2dd95SBruce Richardson "decl %[cnt]" 25399a2dd95SBruce Richardson : [cnt] "=m" (v->cnt) /* output */ 25499a2dd95SBruce Richardson : "m" (v->cnt) /* input */ 25599a2dd95SBruce Richardson ); 25699a2dd95SBruce Richardson } 25799a2dd95SBruce Richardson 25899a2dd95SBruce Richardson static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) 25999a2dd95SBruce Richardson { 26099a2dd95SBruce Richardson uint8_t ret; 26199a2dd95SBruce Richardson 26299a2dd95SBruce Richardson asm volatile( 26399a2dd95SBruce Richardson MPLOCKED 26499a2dd95SBruce Richardson "incl %[cnt] ; " 26599a2dd95SBruce Richardson "sete %[ret]" 26699a2dd95SBruce Richardson : [cnt] "+m" (v->cnt), /* output */ 26799a2dd95SBruce Richardson [ret] "=qm" (ret) 26899a2dd95SBruce Richardson ); 26999a2dd95SBruce Richardson return ret != 0; 27099a2dd95SBruce Richardson } 27199a2dd95SBruce Richardson 27299a2dd95SBruce Richardson static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) 27399a2dd95SBruce Richardson { 27499a2dd95SBruce Richardson uint8_t ret; 27599a2dd95SBruce Richardson 27699a2dd95SBruce Richardson asm volatile(MPLOCKED 27799a2dd95SBruce Richardson "decl %[cnt] ; " 27899a2dd95SBruce Richardson "sete %[ret]" 27999a2dd95SBruce Richardson : [cnt] "+m" (v->cnt), /* output */ 28099a2dd95SBruce Richardson [ret] "=qm" (ret) 28199a2dd95SBruce Richardson ); 28299a2dd95SBruce Richardson return ret != 0; 28399a2dd95SBruce Richardson } 284*719834a6SMattias Rönnblom 285*719834a6SMattias Rönnblom #ifdef __cplusplus 286*719834a6SMattias Rönnblom } 287*719834a6SMattias Rönnblom #endif 288*719834a6SMattias Rönnblom 28999a2dd95SBruce Richardson #endif 29099a2dd95SBruce Richardson 29199a2dd95SBruce Richardson #ifdef RTE_ARCH_I686 29299a2dd95SBruce Richardson #include "rte_atomic_32.h" 29399a2dd95SBruce Richardson #else 29499a2dd95SBruce Richardson #include "rte_atomic_64.h" 29599a2dd95SBruce Richardson #endif 29699a2dd95SBruce Richardson 29727da6a12STyler Retzlaff #endif 29827da6a12STyler Retzlaff 29999a2dd95SBruce Richardson #endif /* _RTE_ATOMIC_X86_H_ */ 300