1 /* TILE atomics. 2 Copyright (C) 2011-2013 Free Software Foundation, Inc. 3 Contributed by Walter Lee (walt@tilera.com) 4 5 This file is free software; you can redistribute it and/or modify it 6 under the terms of the GNU General Public License as published by the 7 Free Software Foundation; either version 3, or (at your option) any 8 later version. 9 10 This file is distributed in the hope that it will be useful, but 11 WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #include "system.h" 25 #include "coretypes.h" 26 #include "atomic.h" 27 28 /* This code should be inlined by the compiler, but for now support 29 it as out-of-line methods in libgcc. */ 30 31 static inline void 32 pre_atomic_barrier (int model) 33 { 34 switch ((enum memmodel) model) 35 { 36 case MEMMODEL_RELEASE: 37 case MEMMODEL_ACQ_REL: 38 case MEMMODEL_SEQ_CST: 39 __atomic_thread_fence (model); 40 break; 41 default: 42 break; 43 } 44 return; 45 } 46 47 static inline void 48 post_atomic_barrier (int model) 49 { 50 switch ((enum memmodel) model) 51 { 52 case MEMMODEL_ACQUIRE: 53 case MEMMODEL_ACQ_REL: 54 case MEMMODEL_SEQ_CST: 55 __atomic_thread_fence (model); 56 break; 57 default: 58 break; 59 } 60 return; 61 } 62 63 #define __unused __attribute__((unused)) 64 65 #define __fetch_and_do(proto, type, size, opname, top, bottom) \ 66 proto \ 67 { \ 68 top; \ 69 type rv = arch_atomic_##opname(p, i); \ 70 bottom; \ 71 return rv; \ 72 } 73 74 #define __atomic_fetch_and_do(type, size, opname) \ 75 __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \ 76 type, size, opname, \ 77 pre_atomic_barrier(model), \ 78 post_atomic_barrier(model)) \ 79 80 __atomic_fetch_and_do (int, 4, add) 81 __atomic_fetch_and_do (int, 4, sub) 82 __atomic_fetch_and_do (int, 4, or) 83 __atomic_fetch_and_do (int, 4, and) 84 __atomic_fetch_and_do (int, 4, xor) 85 __atomic_fetch_and_do (int, 4, nand) 86 __atomic_fetch_and_do (long long, 8, add) 87 __atomic_fetch_and_do (long long, 8, sub) 88 __atomic_fetch_and_do (long long, 8, or) 89 __atomic_fetch_and_do (long long, 8, and) 90 __atomic_fetch_and_do (long long, 8, xor) 91 __atomic_fetch_and_do (long long, 8, nand) 92 93 #define __sync_fetch_and_do(type, size, opname) \ 94 __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \ 95 type, size, opname, \ 96 arch_atomic_write_barrier(), \ 97 arch_atomic_read_barrier()) 98 99 __sync_fetch_and_do (int, 4, add) 100 __sync_fetch_and_do (int, 4, sub) 101 __sync_fetch_and_do (int, 4, or) 102 __sync_fetch_and_do (int, 4, and) 103 __sync_fetch_and_do (int, 4, xor) 104 __sync_fetch_and_do (int, 4, nand) 105 __sync_fetch_and_do (long long, 8, add) 106 __sync_fetch_and_do (long long, 8, sub) 107 __sync_fetch_and_do (long long, 8, or) 108 __sync_fetch_and_do (long long, 8, and) 109 __sync_fetch_and_do (long long, 8, xor) 110 __sync_fetch_and_do (long long, 8, nand) 111 112 #define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom) \ 113 proto \ 114 { \ 115 top; \ 116 type rv = op2 (arch_atomic_##opname(p, i) op i); \ 117 bottom; \ 118 return rv; \ 119 } 120 121 #define __atomic_do_and_fetch(type, size, opname, op, op2) \ 122 __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \ 123 type, size, opname, op, op2, \ 124 pre_atomic_barrier(model), \ 125 post_atomic_barrier(model)) \ 126 127 __atomic_do_and_fetch (int, 4, add, +, ) 128 __atomic_do_and_fetch (int, 4, sub, -, ) 129 __atomic_do_and_fetch (int, 4, or, |, ) 130 __atomic_do_and_fetch (int, 4, and, &, ) 131 __atomic_do_and_fetch (int, 4, xor, |, ) 132 __atomic_do_and_fetch (int, 4, nand, &, ~) 133 __atomic_do_and_fetch (long long, 8, add, +, ) 134 __atomic_do_and_fetch (long long, 8, sub, -, ) 135 __atomic_do_and_fetch (long long, 8, or, |, ) 136 __atomic_do_and_fetch (long long, 8, and, &, ) 137 __atomic_do_and_fetch (long long, 8, xor, |, ) 138 __atomic_do_and_fetch (long long, 8, nand, &, ~) 139 140 #define __sync_do_and_fetch(type, size, opname, op, op2) \ 141 __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \ 142 type, size, opname, op, op2, \ 143 arch_atomic_write_barrier(), \ 144 arch_atomic_read_barrier()) \ 145 146 __sync_do_and_fetch (int, 4, add, +, ) 147 __sync_do_and_fetch (int, 4, sub, -, ) 148 __sync_do_and_fetch (int, 4, or, |, ) 149 __sync_do_and_fetch (int, 4, and, &, ) 150 __sync_do_and_fetch (int, 4, xor, |, ) 151 __sync_do_and_fetch (int, 4, nand, &, ~) 152 __sync_do_and_fetch (long long, 8, add, +, ) 153 __sync_do_and_fetch (long long, 8, sub, -, ) 154 __sync_do_and_fetch (long long, 8, or, |, ) 155 __sync_do_and_fetch (long long, 8, and, &, ) 156 __sync_do_and_fetch (long long, 8, xor, |, ) 157 __sync_do_and_fetch (long long, 8, nand, &, ~) 158 159 #define __atomic_exchange_methods(type, size) \ 160 bool \ 161 __atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \ 162 type newval, bool weak __unused, \ 163 int models, int modelf __unused) \ 164 { \ 165 type oldval = *oldvalp; \ 166 pre_atomic_barrier(models); \ 167 type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \ 168 post_atomic_barrier(models); \ 169 bool success = (retval == oldval); \ 170 *oldvalp = retval; \ 171 return success; \ 172 } \ 173 \ 174 type \ 175 __atomic_exchange_##size(volatile type* ptr, type val, int model) \ 176 { \ 177 pre_atomic_barrier(model); \ 178 type retval = arch_atomic_exchange(ptr, val); \ 179 post_atomic_barrier(model); \ 180 return retval; \ 181 } 182 183 __atomic_exchange_methods (int, 4) 184 __atomic_exchange_methods (long long, 8) 185 186 #define __sync_exchange_methods(type, size) \ 187 type \ 188 __sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval) \ 189 { \ 190 arch_atomic_write_barrier(); \ 191 type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \ 192 arch_atomic_read_barrier(); \ 193 return retval; \ 194 } \ 195 \ 196 bool \ 197 __sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \ 198 { \ 199 arch_atomic_write_barrier(); \ 200 bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \ 201 arch_atomic_read_barrier(); \ 202 return retval; \ 203 } \ 204 \ 205 type \ 206 __sync_lock_test_and_set_##size(type* ptr, type val) \ 207 { \ 208 type retval = arch_atomic_exchange(ptr, val); \ 209 arch_atomic_acquire_barrier_value(retval); \ 210 return retval; \ 211 } 212 213 __sync_exchange_methods (int, 4) 214 __sync_exchange_methods (long long, 8) 215 216 #ifdef __LITTLE_ENDIAN__ 217 #define BIT_OFFSET(n, type) ((n) * 8) 218 #else 219 #define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8) 220 #endif 221 222 /* Subword methods require the same approach for both TILEPro and 223 TILE-Gx. We load the background data for the word, insert the 224 desired subword piece, then compare-and-exchange it into place. */ 225 #define u8 unsigned char 226 #define u16 unsigned short 227 228 #define __subword_cmpxchg_body(type, size, ptr, guess, val) \ 229 ({ \ 230 unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ 231 const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \ 232 const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ 233 const unsigned int bgmask = ~(valmask << shift); \ 234 unsigned int oldword = *p; \ 235 type oldval = (oldword >> shift) & valmask; \ 236 if (__builtin_expect((oldval == guess), 1)) { \ 237 unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ 238 oldword = arch_atomic_val_compare_and_exchange(p, oldword, word); \ 239 oldval = (oldword >> shift) & valmask; \ 240 } \ 241 oldval; \ 242 }) \ 243 244 #define __atomic_subword_cmpxchg(type, size) \ 245 \ 246 bool \ 247 __atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr, \ 248 type val, bool weak __unused, int models, \ 249 int modelf __unused) \ 250 { \ 251 pre_atomic_barrier(models); \ 252 type guess = *guess_ptr; \ 253 type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \ 254 post_atomic_barrier(models); \ 255 bool success = (oldval == guess); \ 256 *guess_ptr = oldval; \ 257 return success; \ 258 } 259 260 __atomic_subword_cmpxchg (u8, 1) 261 __atomic_subword_cmpxchg (u16, 2) 262 263 #define __sync_subword_cmpxchg(type, size) \ 264 \ 265 type \ 266 __sync_val_compare_and_swap_##size(type* ptr, type guess, type val) \ 267 { \ 268 arch_atomic_write_barrier(); \ 269 type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val); \ 270 arch_atomic_read_barrier(); \ 271 return oldval; \ 272 } \ 273 \ 274 bool \ 275 __sync_bool_compare_and_swap_##size(type* ptr, type guess, type val) \ 276 { \ 277 type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val); \ 278 return oldval == guess; \ 279 } 280 281 __sync_subword_cmpxchg (u8, 1) 282 __sync_subword_cmpxchg (u16, 2) 283 284 /* For the atomic-update subword methods, we use the same approach as 285 above, but we retry until we succeed if the compare-and-exchange 286 fails. */ 287 #define __subword(type, proto, top, expr, bottom) \ 288 proto \ 289 { \ 290 top \ 291 unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \ 292 const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type); \ 293 const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \ 294 const unsigned int bgmask = ~(valmask << shift); \ 295 unsigned int oldword, xword = *p; \ 296 type val, oldval; \ 297 do { \ 298 oldword = xword; \ 299 oldval = (oldword >> shift) & valmask; \ 300 val = expr; \ 301 unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \ 302 xword = arch_atomic_val_compare_and_exchange(p, oldword, word); \ 303 } while (__builtin_expect(xword != oldword, 0)); \ 304 bottom \ 305 } 306 307 #define __atomic_subword_fetch(type, funcname, expr, retval) \ 308 __subword(type, \ 309 type __atomic_ ## funcname(volatile type *ptr, type i, int model), \ 310 pre_atomic_barrier(model);, \ 311 expr, \ 312 post_atomic_barrier(model); return retval;) 313 314 __atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval) 315 __atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval) 316 __atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval) 317 __atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval) 318 __atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval) 319 __atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval) 320 321 __atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval) 322 __atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval) 323 __atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval) 324 __atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval) 325 __atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval) 326 __atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval) 327 328 __atomic_subword_fetch (u8, add_fetch_1, oldval + i, val) 329 __atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val) 330 __atomic_subword_fetch (u8, or_fetch_1, oldval | i, val) 331 __atomic_subword_fetch (u8, and_fetch_1, oldval & i, val) 332 __atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val) 333 __atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val) 334 335 __atomic_subword_fetch (u16, add_fetch_2, oldval + i, val) 336 __atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val) 337 __atomic_subword_fetch (u16, or_fetch_2, oldval | i, val) 338 __atomic_subword_fetch (u16, and_fetch_2, oldval & i, val) 339 __atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val) 340 __atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val) 341 342 #define __sync_subword_fetch(type, funcname, expr, retval) \ 343 __subword(type, \ 344 type __sync_ ## funcname(type *ptr, type i), \ 345 arch_atomic_read_barrier();, \ 346 expr, \ 347 arch_atomic_write_barrier(); return retval;) 348 349 __sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval) 350 __sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval) 351 __sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval) 352 __sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval) 353 __sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval) 354 __sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval) 355 356 __sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval) 357 __sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval) 358 __sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval) 359 __sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval) 360 __sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval) 361 __sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval) 362 363 __sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val) 364 __sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val) 365 __sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val) 366 __sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val) 367 __sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val) 368 __sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val) 369 370 __sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val) 371 __sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val) 372 __sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val) 373 __sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val) 374 __sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val) 375 __sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val) 376 377 #define __atomic_subword_lock(type, size) \ 378 __subword(type, \ 379 type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \ 380 pre_atomic_barrier(model);, \ 381 nval, \ 382 post_atomic_barrier(model); return oldval;) 383 384 __atomic_subword_lock (u8, 1) 385 __atomic_subword_lock (u16, 2) 386 387 #define __sync_subword_lock(type, size) \ 388 __subword(type, \ 389 type __sync_lock_test_and_set_##size(type* ptr, type nval), \ 390 , \ 391 nval, \ 392 arch_atomic_acquire_barrier_value(oldval); return oldval;) 393 394 __sync_subword_lock (u8, 1) 395 __sync_subword_lock (u16, 2) 396