xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/config/nvptx/bar.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2    Contributed by Alexander Monakov <amonakov@ispras.ru>
3 
4    This file is part of the GNU Offloading and Multi Processing Library
5    (libgomp).
6 
7    Libgomp is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15    more details.
16 
17    Under Section 7 of GPL version 3, you are granted additional
18    permissions described in the GCC Runtime Library Exception, version
19    3.1, as published by the Free Software Foundation.
20 
21    You should have received a copy of the GNU General Public License and
22    a copy of the GCC Runtime Library Exception along with this program;
23    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24    <http://www.gnu.org/licenses/>.  */
25 
26 /* This is an NVPTX specific implementation of a barrier synchronization
27    mechanism for libgomp.  This type is private to the library.  This
28    implementation uses atomic instructions and bar.sync instruction.  */
29 
30 #include <limits.h>
31 #include "libgomp.h"
32 
33 /* For cpu_relax.  */
34 #include "doacross.h"
35 
36 /* Assuming ADDR is &bar->generation, return bar.  Copied from
37    rtems/bar.c.  */
38 
39 static gomp_barrier_t *
generation_to_barrier(int * addr)40 generation_to_barrier (int *addr)
41 {
42   char *bar
43     = (char *) addr - __builtin_offsetof (gomp_barrier_t, generation);
44   return (gomp_barrier_t *)bar;
45 }
46 
47 /* Implement futex_wait-like behaviour to plug into the linux/bar.c
48    implementation.  Assumes ADDR is &bar->generation.   */
49 
50 static inline void
futex_wait(int * addr,int val)51 futex_wait (int *addr, int val)
52 {
53   gomp_barrier_t *bar = generation_to_barrier (addr);
54 
55   if (bar->total < 2)
56     /* A barrier with less than two threads, nop.  */
57     return;
58 
59   gomp_mutex_lock (&bar->lock);
60 
61   /* Futex semantics: only go to sleep if *addr == val.  */
62   if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_ACQUIRE) != val, 0))
63     {
64       gomp_mutex_unlock (&bar->lock);
65       return;
66     }
67 
68   /* Register as waiter.  */
69   unsigned int waiters
70     = __atomic_add_fetch (&bar->waiters, 1, MEMMODEL_ACQ_REL);
71   if (waiters == 0)
72     __builtin_abort ();
73   unsigned int waiter_id = waiters;
74 
75   if (waiters > 1)
76     {
77       /* Wake other threads in bar.sync.  */
78       asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters));
79 
80       /* Ensure that they have updated waiters.  */
81       asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters));
82     }
83 
84   gomp_mutex_unlock (&bar->lock);
85 
86   while (1)
87     {
88       /* Wait for next thread in barrier.  */
89       asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
90 
91       /* Get updated waiters.  */
92       unsigned int updated_waiters
93 	= __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE);
94 
95       /* Notify that we have updated waiters.  */
96       asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
97 
98       waiters = updated_waiters;
99 
100       if (waiter_id > waiters)
101 	/* A wake happened, and we're in the group of woken threads.  */
102 	break;
103 
104       /* Continue waiting.  */
105     }
106 }
107 
108 /* Implement futex_wake-like behaviour to plug into the linux/bar.c
109    implementation.  Assumes ADDR is &bar->generation.  */
110 
111 static inline void
futex_wake(int * addr,int count)112 futex_wake (int *addr, int count)
113 {
114   gomp_barrier_t *bar = generation_to_barrier (addr);
115 
116   if (bar->total < 2)
117     /* A barrier with less than two threads, nop.  */
118     return;
119 
120   gomp_mutex_lock (&bar->lock);
121   unsigned int waiters = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE);
122   if (waiters == 0)
123     {
124       /* No threads to wake.  */
125       gomp_mutex_unlock (&bar->lock);
126       return;
127     }
128 
129   if (count == INT_MAX)
130     /* Release all threads.  */
131     __atomic_store_n (&bar->waiters, 0, MEMMODEL_RELEASE);
132   else if (count < bar->total)
133     /* Release count threads.  */
134     __atomic_add_fetch (&bar->waiters, -count, MEMMODEL_ACQ_REL);
135   else
136     /* Count has an illegal value.  */
137     __builtin_abort ();
138 
139   /* Wake other threads in bar.sync.  */
140   asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
141 
142   /* Let them get the updated waiters.  */
143   asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
144 
145   gomp_mutex_unlock (&bar->lock);
146 }
147 
148 /* Copied from linux/wait.h.  */
149 
do_spin(int * addr,int val)150 static inline int do_spin (int *addr, int val)
151 {
152   /* The current implementation doesn't spin.  */
153   return 1;
154 }
155 
156 /* Copied from linux/wait.h.  */
157 
do_wait(int * addr,int val)158 static inline void do_wait (int *addr, int val)
159 {
160   if (do_spin (addr, val))
161     futex_wait (addr, val);
162 }
163 
164 /* Reuse the linux implementation.  */
165 #define GOMP_WAIT_H 1
166 #include "../linux/bar.c"
167