1 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU OpenMP Library (libgomp). 5 6 Libgomp is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 more details. 15 16 Under Section 7 of GPL version 3, you are granted additional 17 permissions described in the GCC Runtime Library Exception, version 18 3.1, as published by the Free Software Foundation. 19 20 You should have received a copy of the GNU General Public License and 21 a copy of the GCC Runtime Library Exception along with this program; 22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 <http://www.gnu.org/licenses/>. */ 24 25 /* This file contains routines for managing work-share iteration, both 26 for loops and sections. */ 27 28 #include "libgomp.h" 29 #include <stdlib.h> 30 31 32 /* This function implements the STATIC scheduling method. The caller should 33 iterate *pstart <= x < *pend. Return zero if there are more iterations 34 to perform; nonzero if not. Return less than 0 if this thread had 35 received the absolutely last iteration. */ 36 37 int 38 gomp_iter_static_next (long *pstart, long *pend) 39 { 40 struct gomp_thread *thr = gomp_thread (); 41 struct gomp_team *team = thr->ts.team; 42 struct gomp_work_share *ws = thr->ts.work_share; 43 unsigned long nthreads = team ? team->nthreads : 1; 44 45 if (thr->ts.static_trip == -1) 46 return -1; 47 48 /* Quick test for degenerate teams and orphaned constructs. */ 49 if (nthreads == 1) 50 { 51 *pstart = ws->next; 52 *pend = ws->end; 53 thr->ts.static_trip = -1; 54 return ws->next == ws->end; 55 } 56 57 /* We interpret chunk_size zero as "unspecified", which means that we 58 should break up the iterations such that each thread makes only one 59 trip through the outer loop. */ 60 if (ws->chunk_size == 0) 61 { 62 unsigned long n, q, i; 63 unsigned long s0, e0; 64 long s, e; 65 66 if (thr->ts.static_trip > 0) 67 return 1; 68 69 /* Compute the total number of iterations. */ 70 s = ws->incr + (ws->incr > 0 ? -1 : 1); 71 n = (ws->end - ws->next + s) / ws->incr; 72 i = thr->ts.team_id; 73 74 /* Compute the "zero-based" start and end points. That is, as 75 if the loop began at zero and incremented by one. */ 76 q = n / nthreads; 77 q += (q * nthreads != n); 78 s0 = q * i; 79 e0 = s0 + q; 80 if (e0 > n) 81 e0 = n; 82 83 /* Notice when no iterations allocated for this thread. */ 84 if (s0 >= e0) 85 { 86 thr->ts.static_trip = 1; 87 return 1; 88 } 89 90 /* Transform these to the actual start and end numbers. */ 91 s = (long)s0 * ws->incr + ws->next; 92 e = (long)e0 * ws->incr + ws->next; 93 94 *pstart = s; 95 *pend = e; 96 thr->ts.static_trip = (e0 == n ? -1 : 1); 97 return 0; 98 } 99 else 100 { 101 unsigned long n, s0, e0, i, c; 102 long s, e; 103 104 /* Otherwise, each thread gets exactly chunk_size iterations 105 (if available) each time through the loop. */ 106 107 s = ws->incr + (ws->incr > 0 ? -1 : 1); 108 n = (ws->end - ws->next + s) / ws->incr; 109 i = thr->ts.team_id; 110 c = ws->chunk_size; 111 112 /* Initial guess is a C sized chunk positioned nthreads iterations 113 in, offset by our thread number. */ 114 s0 = (thr->ts.static_trip * nthreads + i) * c; 115 e0 = s0 + c; 116 117 /* Detect overflow. */ 118 if (s0 >= n) 119 return 1; 120 if (e0 > n) 121 e0 = n; 122 123 /* Transform these to the actual start and end numbers. */ 124 s = (long)s0 * ws->incr + ws->next; 125 e = (long)e0 * ws->incr + ws->next; 126 127 *pstart = s; 128 *pend = e; 129 130 if (e0 == n) 131 thr->ts.static_trip = -1; 132 else 133 thr->ts.static_trip++; 134 return 0; 135 } 136 } 137 138 139 /* This function implements the DYNAMIC scheduling method. Arguments are 140 as for gomp_iter_static_next. This function must be called with ws->lock 141 held. */ 142 143 bool 144 gomp_iter_dynamic_next_locked (long *pstart, long *pend) 145 { 146 struct gomp_thread *thr = gomp_thread (); 147 struct gomp_work_share *ws = thr->ts.work_share; 148 long start, end, chunk, left; 149 150 start = ws->next; 151 if (start == ws->end) 152 return false; 153 154 chunk = ws->chunk_size; 155 left = ws->end - start; 156 if (ws->incr < 0) 157 { 158 if (chunk < left) 159 chunk = left; 160 } 161 else 162 { 163 if (chunk > left) 164 chunk = left; 165 } 166 end = start + chunk; 167 168 ws->next = end; 169 *pstart = start; 170 *pend = end; 171 return true; 172 } 173 174 175 #ifdef HAVE_SYNC_BUILTINS 176 /* Similar, but doesn't require the lock held, and uses compare-and-swap 177 instead. Note that the only memory value that changes is ws->next. */ 178 179 bool 180 gomp_iter_dynamic_next (long *pstart, long *pend) 181 { 182 struct gomp_thread *thr = gomp_thread (); 183 struct gomp_work_share *ws = thr->ts.work_share; 184 long start, end, nend, chunk, incr; 185 186 end = ws->end; 187 incr = ws->incr; 188 chunk = ws->chunk_size; 189 190 if (__builtin_expect (ws->mode, 1)) 191 { 192 long tmp = __sync_fetch_and_add (&ws->next, chunk); 193 if (incr > 0) 194 { 195 if (tmp >= end) 196 return false; 197 nend = tmp + chunk; 198 if (nend > end) 199 nend = end; 200 *pstart = tmp; 201 *pend = nend; 202 return true; 203 } 204 else 205 { 206 if (tmp <= end) 207 return false; 208 nend = tmp + chunk; 209 if (nend < end) 210 nend = end; 211 *pstart = tmp; 212 *pend = nend; 213 return true; 214 } 215 } 216 217 start = ws->next; 218 while (1) 219 { 220 long left = end - start; 221 long tmp; 222 223 if (start == end) 224 return false; 225 226 if (incr < 0) 227 { 228 if (chunk < left) 229 chunk = left; 230 } 231 else 232 { 233 if (chunk > left) 234 chunk = left; 235 } 236 nend = start + chunk; 237 238 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 239 if (__builtin_expect (tmp == start, 1)) 240 break; 241 242 start = tmp; 243 } 244 245 *pstart = start; 246 *pend = nend; 247 return true; 248 } 249 #endif /* HAVE_SYNC_BUILTINS */ 250 251 252 /* This function implements the GUIDED scheduling method. Arguments are 253 as for gomp_iter_static_next. This function must be called with the 254 work share lock held. */ 255 256 bool 257 gomp_iter_guided_next_locked (long *pstart, long *pend) 258 { 259 struct gomp_thread *thr = gomp_thread (); 260 struct gomp_work_share *ws = thr->ts.work_share; 261 struct gomp_team *team = thr->ts.team; 262 unsigned long nthreads = team ? team->nthreads : 1; 263 unsigned long n, q; 264 long start, end; 265 266 if (ws->next == ws->end) 267 return false; 268 269 start = ws->next; 270 n = (ws->end - start) / ws->incr; 271 q = (n + nthreads - 1) / nthreads; 272 273 if (q < ws->chunk_size) 274 q = ws->chunk_size; 275 if (q <= n) 276 end = start + q * ws->incr; 277 else 278 end = ws->end; 279 280 ws->next = end; 281 *pstart = start; 282 *pend = end; 283 return true; 284 } 285 286 #ifdef HAVE_SYNC_BUILTINS 287 /* Similar, but doesn't require the lock held, and uses compare-and-swap 288 instead. Note that the only memory value that changes is ws->next. */ 289 290 bool 291 gomp_iter_guided_next (long *pstart, long *pend) 292 { 293 struct gomp_thread *thr = gomp_thread (); 294 struct gomp_work_share *ws = thr->ts.work_share; 295 struct gomp_team *team = thr->ts.team; 296 unsigned long nthreads = team ? team->nthreads : 1; 297 long start, end, nend, incr; 298 unsigned long chunk_size; 299 300 start = ws->next; 301 end = ws->end; 302 incr = ws->incr; 303 chunk_size = ws->chunk_size; 304 305 while (1) 306 { 307 unsigned long n, q; 308 long tmp; 309 310 if (start == end) 311 return false; 312 313 n = (end - start) / incr; 314 q = (n + nthreads - 1) / nthreads; 315 316 if (q < chunk_size) 317 q = chunk_size; 318 if (__builtin_expect (q <= n, 1)) 319 nend = start + q * incr; 320 else 321 nend = end; 322 323 tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 324 if (__builtin_expect (tmp == start, 1)) 325 break; 326 327 start = tmp; 328 } 329 330 *pstart = start; 331 *pend = nend; 332 return true; 333 } 334 #endif /* HAVE_SYNC_BUILTINS */ 335