xref: /netbsd-src/external/gpl3/gcc.old/dist/libgcc/config/tilepro/atomic.c (revision eceb233b9bd0dfebb902ed73b531ae6964fa3f9b)
1 /* TILE atomics.
2    Copyright (C) 2011-2018 Free Software Foundation, Inc.
3    Contributed by Walter Lee (walt@tilera.com)
4 
5    This file is free software; you can redistribute it and/or modify it
6    under the terms of the GNU General Public License as published by the
7    Free Software Foundation; either version 3, or (at your option) any
8    later version.
9 
10    This file is distributed in the hope that it will be useful, but
11    WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #include "tconfig.h"
25 #include "coretypes.h"
26 #include "atomic.h"
27 
28 #define bool unsigned char
29 
30 /* This code should be inlined by the compiler, but for now support
31    it as out-of-line methods in libgcc.  */
32 
33 static inline void
34 pre_atomic_barrier (int model)
35 {
36   switch (model)
37     {
38     case __ATOMIC_RELEASE:
39     case __ATOMIC_ACQ_REL:
40     case __ATOMIC_SEQ_CST:
41       __atomic_thread_fence (model);
42       break;
43     default:
44       break;
45     }
46   return;
47 }
48 
49 static inline void
50 post_atomic_barrier (int model)
51 {
52   switch (model)
53     {
54     case __ATOMIC_ACQUIRE:
55     case __ATOMIC_ACQ_REL:
56     case __ATOMIC_SEQ_CST:
57       __atomic_thread_fence (model);
58       break;
59     default:
60       break;
61     }
62   return;
63 }
64 
65 #define __unused __attribute__((unused))
66 
67 #define __fetch_and_do(proto, type, size, opname, top, bottom)	\
68 proto								\
69 {								\
70   top;								\
71   type rv = arch_atomic_##opname(p, i);				\
72   bottom;							\
73   return rv;							\
74 }
75 
76 #define __atomic_fetch_and_do(type, size, opname)			\
77   __fetch_and_do(type __atomic_fetch_##opname##_##size(type* p, type i, int model), \
78 		 type, size, opname,					\
79 		 pre_atomic_barrier(model),				\
80 		 post_atomic_barrier(model))				\
81 
82 __atomic_fetch_and_do (int, 4, add)
83 __atomic_fetch_and_do (int, 4, sub)
84 __atomic_fetch_and_do (int, 4, or)
85 __atomic_fetch_and_do (int, 4, and)
86 __atomic_fetch_and_do (int, 4, xor)
87 __atomic_fetch_and_do (int, 4, nand)
88 __atomic_fetch_and_do (long long, 8, add)
89 __atomic_fetch_and_do (long long, 8, sub)
90 __atomic_fetch_and_do (long long, 8, or)
91 __atomic_fetch_and_do (long long, 8, and)
92 __atomic_fetch_and_do (long long, 8, xor)
93 __atomic_fetch_and_do (long long, 8, nand)
94 
95 #define __sync_fetch_and_do(type, size, opname)				\
96   __fetch_and_do(type __sync_fetch_and_##opname##_##size(type* p, type i), \
97 		 type, size, opname,					\
98 		 arch_atomic_write_barrier(),				\
99 		 arch_atomic_read_barrier())
100 
101 __sync_fetch_and_do (int, 4, add)
102 __sync_fetch_and_do (int, 4, sub)
103 __sync_fetch_and_do (int, 4, or)
104 __sync_fetch_and_do (int, 4, and)
105 __sync_fetch_and_do (int, 4, xor)
106 __sync_fetch_and_do (int, 4, nand)
107 __sync_fetch_and_do (long long, 8, add)
108 __sync_fetch_and_do (long long, 8, sub)
109 __sync_fetch_and_do (long long, 8, or)
110 __sync_fetch_and_do (long long, 8, and)
111 __sync_fetch_and_do (long long, 8, xor)
112 __sync_fetch_and_do (long long, 8, nand)
113 
114 #define __do_and_fetch(proto, type, size, opname, op, op2, top, bottom)	\
115 proto									\
116 {									\
117   top;									\
118   type rv = op2 (arch_atomic_##opname(p, i) op i);			\
119   bottom;								\
120   return rv;								\
121 }
122 
123 #define __atomic_do_and_fetch(type, size, opname, op, op2)		\
124   __do_and_fetch(type __atomic_##opname##_fetch_##size(type* p, type i, int model), \
125 		 type, size, opname, op, op2,				\
126 		 pre_atomic_barrier(model),				\
127 		 post_atomic_barrier(model))				\
128 
129 __atomic_do_and_fetch (int, 4, add, +, )
130 __atomic_do_and_fetch (int, 4, sub, -, )
131 __atomic_do_and_fetch (int, 4, or, |, )
132 __atomic_do_and_fetch (int, 4, and, &, )
133 __atomic_do_and_fetch (int, 4, xor, |, )
134 __atomic_do_and_fetch (int, 4, nand, &, ~)
135 __atomic_do_and_fetch (long long, 8, add, +, )
136 __atomic_do_and_fetch (long long, 8, sub, -, )
137 __atomic_do_and_fetch (long long, 8, or, |, )
138 __atomic_do_and_fetch (long long, 8, and, &, )
139 __atomic_do_and_fetch (long long, 8, xor, |, )
140 __atomic_do_and_fetch (long long, 8, nand, &, ~)
141 
142 #define __sync_do_and_fetch(type, size, opname, op, op2)		\
143   __do_and_fetch(type __sync_##opname##_and_fetch_##size(type* p, type i), \
144 		 type, size, opname, op, op2,				\
145 		 arch_atomic_write_barrier(),				\
146 		 arch_atomic_read_barrier())				\
147 
148 __sync_do_and_fetch (int, 4, add, +, )
149 __sync_do_and_fetch (int, 4, sub, -, )
150 __sync_do_and_fetch (int, 4, or, |, )
151 __sync_do_and_fetch (int, 4, and, &, )
152 __sync_do_and_fetch (int, 4, xor, |, )
153 __sync_do_and_fetch (int, 4, nand, &, ~)
154 __sync_do_and_fetch (long long, 8, add, +, )
155 __sync_do_and_fetch (long long, 8, sub, -, )
156 __sync_do_and_fetch (long long, 8, or, |, )
157 __sync_do_and_fetch (long long, 8, and, &, )
158 __sync_do_and_fetch (long long, 8, xor, |, )
159 __sync_do_and_fetch (long long, 8, nand, &, ~)
160 
161 #define __atomic_exchange_methods(type, size)				\
162 bool									\
163 __atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp,	\
164 				 type newval, bool weak __unused,	\
165 				 int models, int modelf __unused)	\
166 {									\
167   type oldval = *oldvalp;						\
168   pre_atomic_barrier(models);						\
169   type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
170   post_atomic_barrier(models);						\
171   bool success = (retval == oldval);					\
172   *oldvalp = retval;							\
173   return success;							\
174 }									\
175 									\
176 type									\
177 __atomic_exchange_##size(volatile type* ptr, type val, int model)	\
178 {									\
179   pre_atomic_barrier(model);						\
180   type retval = arch_atomic_exchange(ptr, val);				\
181   post_atomic_barrier(model);						\
182   return retval;							\
183 }
184 
185 __atomic_exchange_methods (int, 4)
186 __atomic_exchange_methods (long long, 8)
187 
188 #define __sync_exchange_methods(type, size)				\
189 type									\
190 __sync_val_compare_and_swap_##size(type* ptr, type oldval, type newval)	\
191 {									\
192   arch_atomic_write_barrier();						\
193   type retval = arch_atomic_val_compare_and_exchange(ptr, oldval, newval); \
194   arch_atomic_read_barrier();						\
195   return retval;							\
196 }									\
197 									\
198 bool									\
199 __sync_bool_compare_and_swap_##size(type* ptr, type oldval, type newval) \
200 {									\
201   arch_atomic_write_barrier();						\
202   bool retval = arch_atomic_bool_compare_and_exchange(ptr, oldval, newval); \
203   arch_atomic_read_barrier();						\
204   return retval;							\
205 }									\
206 									\
207 type									\
208 __sync_lock_test_and_set_##size(type* ptr, type val)			\
209 {									\
210   type retval = arch_atomic_exchange(ptr, val);				\
211   arch_atomic_acquire_barrier_value(retval);				\
212   return retval;							\
213 }
214 
215 __sync_exchange_methods (int, 4)
216 __sync_exchange_methods (long long, 8)
217 
218 #ifdef __LITTLE_ENDIAN__
219 #define BIT_OFFSET(n, type) ((n) * 8)
220 #else
221 #define BIT_OFFSET(n, type) ((4 - sizeof(type) - (n)) * 8)
222 #endif
223 
224 /* Subword methods require the same approach for both TILEPro and
225    TILE-Gx.  We load the background data for the word, insert the
226    desired subword piece, then compare-and-exchange it into place.  */
227 #define u8 unsigned char
228 #define u16 unsigned short
229 
230 #define __subword_cmpxchg_body(type, size, ptr, guess, val)		\
231   ({									\
232     unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);	\
233     const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);	\
234     const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;		\
235     const unsigned int bgmask = ~(valmask << shift);			\
236     unsigned int oldword = *p;						\
237     type oldval = (oldword >> shift) & valmask;				\
238     if (__builtin_expect((oldval == guess), 1)) {			\
239       unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
240       oldword = arch_atomic_val_compare_and_exchange(p, oldword, word);	\
241       oldval = (oldword >> shift) & valmask;				\
242     }									\
243     oldval;								\
244   })									\
245 
246 #define __atomic_subword_cmpxchg(type, size)				\
247   									\
248 bool									\
249 __atomic_compare_exchange_##size(volatile type* ptr, type* guess_ptr,	\
250 				 type val, bool weak __unused, int models, \
251 				 int modelf __unused)			\
252 {									\
253   pre_atomic_barrier(models);						\
254   type guess = *guess_ptr;						\
255   type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);	\
256   post_atomic_barrier(models);						\
257   bool success = (oldval == guess);					\
258   *guess_ptr = oldval;							\
259   return success;							\
260 }
261 
262 __atomic_subword_cmpxchg (u8, 1)
263 __atomic_subword_cmpxchg (u16, 2)
264 
265 #define __sync_subword_cmpxchg(type, size)				\
266   									\
267 type									\
268 __sync_val_compare_and_swap_##size(type* ptr, type guess, type val)	\
269 {									\
270   arch_atomic_write_barrier();						\
271   type oldval = __subword_cmpxchg_body(type, size, ptr, guess, val);	\
272   arch_atomic_read_barrier();						\
273   return oldval;							\
274 }									\
275 									\
276 bool									\
277 __sync_bool_compare_and_swap_##size(type* ptr, type guess, type val)	\
278 {									\
279   type oldval = __sync_val_compare_and_swap_##size(ptr, guess, val);	\
280   return oldval == guess;						\
281 }
282 
283 __sync_subword_cmpxchg (u8, 1)
284 __sync_subword_cmpxchg (u16, 2)
285 
286 /* For the atomic-update subword methods, we use the same approach as
287    above, but we retry until we succeed if the compare-and-exchange
288    fails.  */
289 #define __subword(type, proto, top, expr, bottom)			\
290 proto									\
291 {									\
292   top									\
293   unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL);	\
294   const int shift = BIT_OFFSET((unsigned long)ptr & 3UL, type);		\
295   const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1;		\
296   const unsigned int bgmask = ~(valmask << shift);			\
297   unsigned int oldword, xword = *p;					\
298   type val, oldval;							\
299   do {									\
300     oldword = xword;							\
301     oldval = (oldword >> shift) & valmask;				\
302     val = expr;								\
303     unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
304     xword = arch_atomic_val_compare_and_exchange(p, oldword, word);	\
305   } while (__builtin_expect(xword != oldword, 0));			\
306   bottom								\
307 }
308 
309 #define __atomic_subword_fetch(type, funcname, expr, retval)		\
310   __subword(type,							\
311 	    type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
312 	    pre_atomic_barrier(model);,					\
313 	    expr,							\
314 	    post_atomic_barrier(model); return retval;)
315 
316 __atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
317 __atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
318 __atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
319 __atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
320 __atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
321 __atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
322 
323 __atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
324 __atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
325 __atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
326 __atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
327 __atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
328 __atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
329 
330 __atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
331 __atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
332 __atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
333 __atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
334 __atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
335 __atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
336 
337 __atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
338 __atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
339 __atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
340 __atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
341 __atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
342 __atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
343 
344 #define __sync_subword_fetch(type, funcname, expr, retval)	\
345   __subword(type,						\
346 	    type __sync_ ## funcname(type *ptr, type i),	\
347 	    arch_atomic_read_barrier();,			\
348 	    expr,						\
349 	    arch_atomic_write_barrier(); return retval;)
350 
351 __sync_subword_fetch (u8, fetch_and_add_1, oldval + i, oldval)
352 __sync_subword_fetch (u8, fetch_and_sub_1, oldval - i, oldval)
353 __sync_subword_fetch (u8, fetch_and_or_1, oldval | i, oldval)
354 __sync_subword_fetch (u8, fetch_and_and_1, oldval & i, oldval)
355 __sync_subword_fetch (u8, fetch_and_xor_1, oldval ^ i, oldval)
356 __sync_subword_fetch (u8, fetch_and_nand_1, ~(oldval & i), oldval)
357 
358 __sync_subword_fetch (u16, fetch_and_add_2, oldval + i, oldval)
359 __sync_subword_fetch (u16, fetch_and_sub_2, oldval - i, oldval)
360 __sync_subword_fetch (u16, fetch_and_or_2, oldval | i, oldval)
361 __sync_subword_fetch (u16, fetch_and_and_2, oldval & i, oldval)
362 __sync_subword_fetch (u16, fetch_and_xor_2, oldval ^ i, oldval)
363 __sync_subword_fetch (u16, fetch_and_nand_2, ~(oldval & i), oldval)
364 
365 __sync_subword_fetch (u8, add_and_fetch_1, oldval + i, val)
366 __sync_subword_fetch (u8, sub_and_fetch_1, oldval - i, val)
367 __sync_subword_fetch (u8, or_and_fetch_1, oldval | i, val)
368 __sync_subword_fetch (u8, and_and_fetch_1, oldval & i, val)
369 __sync_subword_fetch (u8, xor_and_fetch_1, oldval ^ i, val)
370 __sync_subword_fetch (u8, nand_and_fetch_1, ~(oldval & i), val)
371 
372 __sync_subword_fetch (u16, add_and_fetch_2, oldval + i, val)
373 __sync_subword_fetch (u16, sub_and_fetch_2, oldval - i, val)
374 __sync_subword_fetch (u16, or_and_fetch_2, oldval | i, val)
375 __sync_subword_fetch (u16, and_and_fetch_2, oldval & i, val)
376 __sync_subword_fetch (u16, xor_and_fetch_2, oldval ^ i, val)
377 __sync_subword_fetch (u16, nand_and_fetch_2, ~(oldval & i), val)
378 
379 #define __atomic_subword_lock(type, size)				\
380   __subword(type,							\
381 	    type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
382 	    pre_atomic_barrier(model);,					\
383 	    nval,							\
384 	    post_atomic_barrier(model); return oldval;)
385 
386 __atomic_subword_lock (u8, 1)
387 __atomic_subword_lock (u16, 2)
388 
389 #define __sync_subword_lock(type, size)					\
390   __subword(type,							\
391 	    type __sync_lock_test_and_set_##size(type* ptr, type nval), \
392 	    ,								\
393 	    nval,							\
394 	    arch_atomic_acquire_barrier_value(oldval); return oldval;)
395 
396 __sync_subword_lock (u8, 1)
397 __sync_subword_lock (u16, 2)
398