xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/tilepro/atomic.c (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1 /* TILE atomics.
2    Copyright (C) 2011-2013 Free Software Foundation, Inc.
3    Contributed by Walter Lee (walt@tilera.com)
4 
5    This file is free software; you can redistribute it and/or modify it
6    under the terms of the GNU General Public License as published by the
7    Free Software Foundation; either version 3, or (at your option) any
8    later version.
9 
10    This file is distributed in the hope that it will be useful, but
11    WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #include "system.h"
25 #include "coretypes.h"
26 #include "atomic.h"
27 
28 /* This code should be inlined by the compiler, but for now support
29    it as out-of-line methods in libgcc.  */
30 
31 static inline void
32 pre_atomic_barrier (int model)
33 {
34   switch ((enum memmodel) model)
35     {
36     case MEMMODEL_RELEASE:
37     case MEMMODEL_ACQ_REL:
38     case MEMMODEL_SEQ_CST:
39       __atomic_thread_fence (model);
40       break;
41     default:
42       break;
43     }
44   return;
45 }
46 
47 static inline void
48 post_atomic_barrier (int model)
49 {
50   switch ((enum memmodel) model)
51     {
52     case MEMMODEL_ACQUIRE:
53     case MEMMODEL_ACQ_REL:
54     case MEMMODEL_SEQ_CST:
55       __atomic_thread_fence (model);
56       break;
57     default:
58       break;
59     }
60   return;
61 }
62 
63 #define __unused __attribute__((unused))
64 
65 #define __fetch_and_do(proto, type, size, opname, top, bottom)	\
66 proto								\
67 {								\
68   top;								\
69   type rv = arch_atomic_##opname(p, i);				\
70   bottom;							\
71   return rv;							\
72 }
73 
74 #define __atomic_fetch_and_do(type, size, opname)			\
75   __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \
76 		 type, size, opname,					\
77 		 pre_atomic_barrier(model),				\
78 		 post_atomic_barrier(model))				\
79 
80 __atomic_fetch_and_do (int, 4, add)
81 __atomic_fetch_and_do (int, 4, sub)
82 __atomic_fetch_and_do (int, 4, or)
83 __atomic_fetch_and_do (int, 4, and)
84 __atomic_fetch_and_do (int, 4, xor)
85 __atomic_fetch_and_do (int, 4, nand)
86 __atomic_fetch_and_do (long long, 8, add)
87 __atomic_fetch_and_do (long long, 8, sub)
88 __atomic_fetch_and_do (long long, 8, or)
89 __atomic_fetch_and_do (long long, 8, and)
90 __atomic_fetch_and_do (long long, 8, xor)
91 __atomic_fetch_and_do (long long, 8, nand)
92 
93 #define __sync_fetch_and_do(type, size, opname)				\
94   __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \
95 		 type, size, opname,					\
96 		 arch_atomic_write_barrier(),				\
97 		 arch_atomic_read_barrier())
98 
99 __sync_fetch_and_do (int, 4, add)
100 __sync_fetch_and_do (int, 4, sub)
101 __sync_fetch_and_do (int, 4, or)
102 __sync_fetch_and_do (int, 4, and)
103 __sync_fetch_and_do (int, 4, xor)
104 __sync_fetch_and_do (int, 4, nand)
105 __sync_fetch_and_do (long long, 8, add)
106 __sync_fetch_and_do (long long, 8, sub)
107 __sync_fetch_and_do (long long, 8, or)
108 __sync_fetch_and_do (long long, 8, and)
109 __sync_fetch_and_do (long long, 8, xor)
110 __sync_fetch_and_do (long long, 8, nand)
111 
112 #define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom)	\
113 proto									\
114 {									\
115   top;									\
116   type rv = op2 (arch_atomic_##opname(p, i) op i);			\
117   bottom;								\
118   return rv;								\
119 }
120 
121 #define __atomic_do_and_fetch(type, size, opname, op, op2)		\
122   __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \
123 		 type, size, opname, op, op2,				\
124 		 pre_atomic_barrier(model),				\
125 		 post_atomic_barrier(model))				\
126 
127 __atomic_do_and_fetch (int, 4, add, +, )
128 __atomic_do_and_fetch (int, 4, sub, -, )
129 __atomic_do_and_fetch (int, 4, or, |, )
130 __atomic_do_and_fetch (int, 4, and, &, )
131 __atomic_do_and_fetch (int, 4, xor, |, )
132 __atomic_do_and_fetch (int, 4, nand, &, ~)
133 __atomic_do_and_fetch (long long, 8, add, +, )
134 __atomic_do_and_fetch (long long, 8, sub, -, )
135 __atomic_do_and_fetch (long long, 8, or, |, )
136 __atomic_do_and_fetch (long long, 8, and, &, )
137 __atomic_do_and_fetch (long long, 8, xor, |, )
138 __atomic_do_and_fetch (long long, 8, nand, &, ~)
139 
140 #define __sync_do_and_fetch(type, size, opname, op, op2)		\
141   __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \
142 		 type, size, opname, op, op2,				\
143 		 arch_atomic_write_barrier(),				\
144 		 arch_atomic_read_barrier())				\
145 
146 __sync_do_and_fetch (int, 4, add, +, )
147 __sync_do_and_fetch (int, 4, sub, -, )
148 __sync_do_and_fetch (int, 4, or, |, )
149 __sync_do_and_fetch (int, 4, and, &, )
150 __sync_do_and_fetch (int, 4, xor, |, )
151 __sync_do_and_fetch (int, 4, nand, &, ~)
152 __sync_do_and_fetch (long long, 8, add, +, )
153 __sync_do_and_fetch (long long, 8, sub, -, )
154 __sync_do_and_fetch (long long, 8, or, |, )
155 __sync_do_and_fetch (long long, 8, and, &, )
156 __sync_do_and_fetch (long long, 8, xor, |, )
157 __sync_do_and_fetch (long long, 8, nand, &, ~)
158 
159 #define __atomic_exchange_methods(type, size)				\
160 bool									\
161 __atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp,	\
162 				 type newval, bool weak __unused,	\
163 				 int models, int modelf __unused)	\
164 {									\
165   type oldval = *oldvalp;						\
166   pre_atomic_barrier(models);						\
167   type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
168   post_atomic_barrier(models);						\
169   bool success = (retval == oldval);					\
170   *oldvalp = retval;							\
171   return success;							\
172 }									\
173 									\
174 type									\
175 __atomic_exchange_##size(volatile type* ptr, type val, int model)	\
176 {									\
177   pre_atomic_barrier(model);						\
178   type retval = arch_atomic_exchange(ptr, val);				\
179   post_atomic_barrier(model);						\
180   return retval;							\
181 }
182 
183 __atomic_exchange_methods (int, 4)
184 __atomic_exchange_methods (long long, 8)
185 
186 #define __sync_exchange_methods(type, size)				\
187 type									\
188 __sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval)	\
189 {									\
190   arch_atomic_write_barrier();						\
191   type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
192   arch_atomic_read_barrier();						\
193   return retval;							\
194 }									\
195 									\
196 bool									\
197 __sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \
198 {									\
199   arch_atomic_write_barrier();						\
200   bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \
201   arch_atomic_read_barrier();						\
202   return retval;							\
203 }									\
204 									\
205 type									\
206 __sync_lock_test_and_set_##size(type* ptr, type val)			\
207 {									\
208   type retval = arch_atomic_exchange(ptr, val);				\
209   arch_atomic_acquire_barrier_value(retval);				\
210   return retval;							\
211 }
212 
213 __sync_exchange_methods (int, 4)
214 __sync_exchange_methods (long long, 8)
215 
216 #ifdef __LITTLE_ENDIAN__
217 #define BIT_OFFSET(n, type) ((n) * 8)
218 #else
219 #define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8)
220 #endif
221 
222 /* Subword methods require the same approach for both TILEPro and
223    TILE-Gx.  We load the background data for the word, insert the
224    desired subword piece, then compare-and-exchange it into place.  */
225 #define u8 unsigned char
226 #define u16 unsigned short
227 
228 #define __subword_cmpxchg_body(type, size, ptr, guess, val)		\
229   ({									\
230     unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);	\
231     const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);	\
232     const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;		\
233     const unsigned int bgmask = ~(valmask << shift);			\
234     unsigned int oldword = *p;						\
235     type oldval = (oldword >> shift) & valmask;				\
236     if (__builtin_expect((oldval == guess), 1)) {			\
237       unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
238       oldword = arch_atomic_val_compare_and_exchange(p, oldword, word);	\
239       oldval = (oldword >> shift) & valmask;				\
240     }									\
241     oldval;								\
242   })									\
243 
244 #define __atomic_subword_cmpxchg(type, size)				\
245   									\
246 bool									\
247 __atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr,	\
248 				 type val, bool weak __unused, int models, \
249 				 int modelf __unused)			\
250 {									\
251   pre_atomic_barrier(models);						\
252   type guess = *guess_ptr;						\
253   type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);	\
254   post_atomic_barrier(models);						\
255   bool success = (oldval == guess);					\
256   *guess_ptr = oldval;							\
257   return success;							\
258 }
259 
260 __atomic_subword_cmpxchg (u8, 1)
261 __atomic_subword_cmpxchg (u16, 2)
262 
263 #define __sync_subword_cmpxchg(type, size)				\
264   									\
265 type									\
266 __sync_val_compare_and_swap_##size(type* ptr, type guess, type val)	\
267 {									\
268   arch_atomic_write_barrier();						\
269   type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);	\
270   arch_atomic_read_barrier();						\
271   return oldval;							\
272 }									\
273 									\
274 bool									\
275 __sync_bool_compare_and_swap_##size(type* ptr, type guess, type val)	\
276 {									\
277   type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val);	\
278   return oldval == guess;						\
279 }
280 
281 __sync_subword_cmpxchg (u8, 1)
282 __sync_subword_cmpxchg (u16, 2)
283 
284 /* For the atomic-update subword methods, we use the same approach as
285    above, but we retry until we succeed if the compare-and-exchange
286    fails.  */
287 #define __subword(type, proto, top, expr, bottom)			\
288 proto									\
289 {									\
290   top									\
291   unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);	\
292   const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);		\
293   const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;		\
294   const unsigned int bgmask = ~(valmask << shift);			\
295   unsigned int oldword, xword = *p;					\
296   type val, oldval;							\
297   do {									\
298     oldword = xword;							\
299     oldval = (oldword >> shift) & valmask;				\
300     val = expr;								\
301     unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
302     xword = arch_atomic_val_compare_and_exchange(p, oldword, word);	\
303   } while (__builtin_expect(xword != oldword, 0));			\
304   bottom								\
305 }
306 
307 #define __atomic_subword_fetch(type, funcname, expr, retval)		\
308   __subword(type,							\
309 	    type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
310 	    pre_atomic_barrier(model);,					\
311 	    expr,							\
312 	    post_atomic_barrier(model); return retval;)
313 
314 __atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
315 __atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
316 __atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
317 __atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
318 __atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
319 __atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
320 
321 __atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
322 __atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
323 __atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
324 __atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
325 __atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
326 __atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
327 
328 __atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
329 __atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
330 __atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
331 __atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
332 __atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
333 __atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
334 
335 __atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
336 __atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
337 __atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
338 __atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
339 __atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
340 __atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
341 
342 #define __sync_subword_fetch(type, funcname, expr, retval)	\
343   __subword(type,						\
344 	    type __sync_ ## funcname(type *ptr, type i),	\
345 	    arch_atomic_read_barrier();,			\
346 	    expr,						\
347 	    arch_atomic_write_barrier(); return retval;)
348 
349 __sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval)
350 __sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval)
351 __sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval)
352 __sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval)
353 __sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval)
354 __sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval)
355 
356 __sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval)
357 __sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval)
358 __sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval)
359 __sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval)
360 __sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval)
361 __sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval)
362 
363 __sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val)
364 __sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val)
365 __sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val)
366 __sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val)
367 __sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val)
368 __sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val)
369 
370 __sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val)
371 __sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val)
372 __sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val)
373 __sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val)
374 __sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val)
375 __sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val)
376 
377 #define __atomic_subword_lock(type, size)				\
378   __subword(type,							\
379 	    type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
380 	    pre_atomic_barrier(model);,					\
381 	    nval,							\
382 	    post_atomic_barrier(model); return oldval;)
383 
384 __atomic_subword_lock (u8, 1)
385 __atomic_subword_lock (u16, 2)
386 
387 #define __sync_subword_lock(type, size)					\
388   __subword(type,							\
389 	    type __sync_lock_test_and_set_##size(type* ptr, type nval), \
390 	    ,								\
391 	    nval,							\
392 	    arch_atomic_acquire_barrier_value(oldval); return oldval;)
393 
394 __sync_subword_lock (u8, 1)
395 __sync_subword_lock (u16, 2)
396