1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2 Contributed by Alexander Monakov <amonakov@ispras.ru>
3
4 This file is part of the GNU Offloading and Multi Processing Library
5 (libgomp).
6
7 Libgomp is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /* This is an NVPTX specific implementation of a barrier synchronization
27 mechanism for libgomp. This type is private to the library. This
28 implementation uses atomic instructions and bar.sync instruction. */
29
30 #include <limits.h>
31 #include "libgomp.h"
32
33 /* For cpu_relax. */
34 #include "doacross.h"
35
36 /* Assuming ADDR is &bar->generation, return bar. Copied from
37 rtems/bar.c. */
38
39 static gomp_barrier_t *
generation_to_barrier(int * addr)40 generation_to_barrier (int *addr)
41 {
42 char *bar
43 = (char *) addr - __builtin_offsetof (gomp_barrier_t, generation);
44 return (gomp_barrier_t *)bar;
45 }
46
47 /* Implement futex_wait-like behaviour to plug into the linux/bar.c
48 implementation. Assumes ADDR is &bar->generation. */
49
50 static inline void
futex_wait(int * addr,int val)51 futex_wait (int *addr, int val)
52 {
53 gomp_barrier_t *bar = generation_to_barrier (addr);
54
55 if (bar->total < 2)
56 /* A barrier with less than two threads, nop. */
57 return;
58
59 gomp_mutex_lock (&bar->lock);
60
61 /* Futex semantics: only go to sleep if *addr == val. */
62 if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_ACQUIRE) != val, 0))
63 {
64 gomp_mutex_unlock (&bar->lock);
65 return;
66 }
67
68 /* Register as waiter. */
69 unsigned int waiters
70 = __atomic_add_fetch (&bar->waiters, 1, MEMMODEL_ACQ_REL);
71 if (waiters == 0)
72 __builtin_abort ();
73 unsigned int waiter_id = waiters;
74
75 if (waiters > 1)
76 {
77 /* Wake other threads in bar.sync. */
78 asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters));
79
80 /* Ensure that they have updated waiters. */
81 asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters));
82 }
83
84 gomp_mutex_unlock (&bar->lock);
85
86 while (1)
87 {
88 /* Wait for next thread in barrier. */
89 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
90
91 /* Get updated waiters. */
92 unsigned int updated_waiters
93 = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE);
94
95 /* Notify that we have updated waiters. */
96 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
97
98 waiters = updated_waiters;
99
100 if (waiter_id > waiters)
101 /* A wake happened, and we're in the group of woken threads. */
102 break;
103
104 /* Continue waiting. */
105 }
106 }
107
108 /* Implement futex_wake-like behaviour to plug into the linux/bar.c
109 implementation. Assumes ADDR is &bar->generation. */
110
111 static inline void
futex_wake(int * addr,int count)112 futex_wake (int *addr, int count)
113 {
114 gomp_barrier_t *bar = generation_to_barrier (addr);
115
116 if (bar->total < 2)
117 /* A barrier with less than two threads, nop. */
118 return;
119
120 gomp_mutex_lock (&bar->lock);
121 unsigned int waiters = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE);
122 if (waiters == 0)
123 {
124 /* No threads to wake. */
125 gomp_mutex_unlock (&bar->lock);
126 return;
127 }
128
129 if (count == INT_MAX)
130 /* Release all threads. */
131 __atomic_store_n (&bar->waiters, 0, MEMMODEL_RELEASE);
132 else if (count < bar->total)
133 /* Release count threads. */
134 __atomic_add_fetch (&bar->waiters, -count, MEMMODEL_ACQ_REL);
135 else
136 /* Count has an illegal value. */
137 __builtin_abort ();
138
139 /* Wake other threads in bar.sync. */
140 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
141
142 /* Let them get the updated waiters. */
143 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1)));
144
145 gomp_mutex_unlock (&bar->lock);
146 }
147
148 /* Copied from linux/wait.h. */
149
do_spin(int * addr,int val)150 static inline int do_spin (int *addr, int val)
151 {
152 /* The current implementation doesn't spin. */
153 return 1;
154 }
155
156 /* Copied from linux/wait.h. */
157
do_wait(int * addr,int val)158 static inline void do_wait (int *addr, int val)
159 {
160 if (do_spin (addr, val))
161 futex_wait (addr, val);
162 }
163
164 /* Reuse the linux implementation. */
165 #define GOMP_WAIT_H 1
166 #include "../linux/bar.c"
167