xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/config/nvptx/doacross.h (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1 /* Copyright (C) 2015-2020 Free Software Foundation, Inc.
2    Contributed by Alexander Monakov <amonakov@ispras.ru>
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This is the NVPTX implementation of doacross spinning.  */
27 
28 #ifndef GOMP_DOACROSS_H
29 #define GOMP_DOACROSS_H 1
30 
31 #include "libgomp.h"
32 
33 static int zero;
34 
35 static inline int
cpu_relax(void)36 cpu_relax (void)
37 {
38   int r;
39   /* Here we need a long-latency operation to make the current warp yield.
40      We could use ld.cv, uncached load from system (host) memory, but that
41      would require allocating locked memory in the plugin.  Alternatively,
42      we can use ld.cg, which evicts from L1 and caches in L2.  */
43   asm volatile ("ld.cg.s32 %0, [%1];" : "=r" (r) : "i" (&zero) : "memory");
44   return r;
45 }
46 
doacross_spin(unsigned long * addr,unsigned long expected,unsigned long cur)47 static inline void doacross_spin (unsigned long *addr, unsigned long expected,
48 				  unsigned long cur)
49 {
50   /* Prevent compiler from optimizing based on bounds of containing object.  */
51   asm ("" : "+r" (addr));
52   do
53     {
54       int i = cpu_relax ();
55       cur = addr[i];
56     }
57   while (cur <= expected);
58 }
59 
60 #endif /* GOMP_DOACROSS_H */
61