1 /* Copyright (C) 2015-2022 Free Software Foundation, Inc. 2 Contributed by Alexander Monakov <amonakov@ispras.ru> 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26 /* This is an NVPTX specific implementation of a barrier synchronization 27 mechanism for libgomp. This type is private to the library. This 28 implementation uses atomic instructions and bar.sync instruction. */ 29 30 #include <limits.h> 31 #include "libgomp.h" 32 33 /* For cpu_relax. */ 34 #include "doacross.h" 35 36 /* Assuming ADDR is &bar->generation, return bar. Copied from 37 rtems/bar.c. */ 38 39 static gomp_barrier_t * 40 generation_to_barrier (int *addr) 41 { 42 char *bar 43 = (char *) addr - __builtin_offsetof (gomp_barrier_t, generation); 44 return (gomp_barrier_t *)bar; 45 } 46 47 /* Implement futex_wait-like behaviour to plug into the linux/bar.c 48 implementation. Assumes ADDR is &bar->generation. */ 49 50 static inline void 51 futex_wait (int *addr, int val) 52 { 53 gomp_barrier_t *bar = generation_to_barrier (addr); 54 55 if (bar->total < 2) 56 /* A barrier with less than two threads, nop. */ 57 return; 58 59 gomp_mutex_lock (&bar->lock); 60 61 /* Futex semantics: only go to sleep if *addr == val. */ 62 if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_ACQUIRE) != val, 0)) 63 { 64 gomp_mutex_unlock (&bar->lock); 65 return; 66 } 67 68 /* Register as waiter. */ 69 unsigned int waiters 70 = __atomic_add_fetch (&bar->waiters, 1, MEMMODEL_ACQ_REL); 71 if (waiters == 0) 72 __builtin_abort (); 73 unsigned int waiter_id = waiters; 74 75 if (waiters > 1) 76 { 77 /* Wake other threads in bar.sync. */ 78 asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters)); 79 80 /* Ensure that they have updated waiters. */ 81 asm volatile ("bar.sync 1, %0;" : : "r" (32 * waiters)); 82 } 83 84 gomp_mutex_unlock (&bar->lock); 85 86 while (1) 87 { 88 /* Wait for next thread in barrier. */ 89 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); 90 91 /* Get updated waiters. */ 92 unsigned int updated_waiters 93 = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE); 94 95 /* Notify that we have updated waiters. */ 96 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); 97 98 waiters = updated_waiters; 99 100 if (waiter_id > waiters) 101 /* A wake happened, and we're in the group of woken threads. */ 102 break; 103 104 /* Continue waiting. */ 105 } 106 } 107 108 /* Implement futex_wake-like behaviour to plug into the linux/bar.c 109 implementation. Assumes ADDR is &bar->generation. */ 110 111 static inline void 112 futex_wake (int *addr, int count) 113 { 114 gomp_barrier_t *bar = generation_to_barrier (addr); 115 116 if (bar->total < 2) 117 /* A barrier with less than two threads, nop. */ 118 return; 119 120 gomp_mutex_lock (&bar->lock); 121 unsigned int waiters = __atomic_load_n (&bar->waiters, MEMMODEL_ACQUIRE); 122 if (waiters == 0) 123 { 124 /* No threads to wake. */ 125 gomp_mutex_unlock (&bar->lock); 126 return; 127 } 128 129 if (count == INT_MAX) 130 /* Release all threads. */ 131 __atomic_store_n (&bar->waiters, 0, MEMMODEL_RELEASE); 132 else if (count < bar->total) 133 /* Release count threads. */ 134 __atomic_add_fetch (&bar->waiters, -count, MEMMODEL_ACQ_REL); 135 else 136 /* Count has an illegal value. */ 137 __builtin_abort (); 138 139 /* Wake other threads in bar.sync. */ 140 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); 141 142 /* Let them get the updated waiters. */ 143 asm volatile ("bar.sync 1, %0;" : : "r" (32 * (waiters + 1))); 144 145 gomp_mutex_unlock (&bar->lock); 146 } 147 148 /* Copied from linux/wait.h. */ 149 150 static inline int do_spin (int *addr, int val) 151 { 152 /* The current implementation doesn't spin. */ 153 return 1; 154 } 155 156 /* Copied from linux/wait.h. */ 157 158 static inline void do_wait (int *addr, int val) 159 { 160 if (do_spin (addr, val)) 161 futex_wait (addr, val); 162 } 163 164 /* Reuse the linux implementation. */ 165 #define GOMP_WAIT_H 1 166 #include "../linux/bar.c" 167