xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/oacc-async.c (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* OpenACC Runtime Library Definitions.
2 
3    Copyright (C) 2013-2022 Free Software Foundation, Inc.
4 
5    Contributed by Mentor Embedded.
6 
7    This file is part of the GNU Offloading and Multi Processing Library
8    (libgomp).
9 
10    Libgomp is free software; you can redistribute it and/or modify it
11    under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 3, or (at your option)
13    any later version.
14 
15    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18    more details.
19 
20    Under Section 7 of GPL version 3, you are granted additional
21    permissions described in the GCC Runtime Library Exception, version
22    3.1, as published by the Free Software Foundation.
23 
24    You should have received a copy of the GNU General Public License and
25    a copy of the GCC Runtime Library Exception along with this program;
26    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27    <http://www.gnu.org/licenses/>.  */
28 
29 #include <assert.h>
30 #include <string.h>
31 #include "openacc.h"
32 #include "libgomp.h"
33 #include "oacc-int.h"
34 
35 static struct goacc_thread *
get_goacc_thread(void)36 get_goacc_thread (void)
37 {
38   struct goacc_thread *thr = goacc_thread ();
39 
40   if (!thr || !thr->dev)
41     gomp_fatal ("no device active");
42 
43   return thr;
44 }
45 
46 static int
validate_async_val(int async)47 validate_async_val (int async)
48 {
49   if (!async_valid_p (async))
50     gomp_fatal ("invalid async-argument: %d", async);
51 
52   if (async == acc_async_sync)
53     return -1;
54 
55   if (async == acc_async_noval)
56     return 0;
57 
58   if (async >= 0)
59     /* TODO: we reserve 0 for acc_async_noval before we can clarify the
60        semantics of "default_async".  */
61     return 1 + async;
62   else
63     __builtin_unreachable ();
64 }
65 
66 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
67    might return NULL if no asyncqueue is to be used.  Otherwise, if CREATE,
68    create the asyncqueue if it doesn't exist yet.
69 
70    Unless CREATE, this will not generate any OpenACC Profiling Interface
71    events.  */
72 
73 attribute_hidden struct goacc_asyncqueue *
lookup_goacc_asyncqueue(struct goacc_thread * thr,bool create,int async)74 lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
75 {
76   async = validate_async_val (async);
77   if (async < 0)
78     return NULL;
79 
80   struct goacc_asyncqueue *ret_aq = NULL;
81   struct gomp_device_descr *dev = thr->dev;
82 
83   gomp_mutex_lock (&dev->openacc.async.lock);
84 
85   if (!create
86       && (async >= dev->openacc.async.nasyncqueue
87 	  || !dev->openacc.async.asyncqueue[async]))
88     goto end;
89 
90   if (async >= dev->openacc.async.nasyncqueue)
91     {
92       int diff = async + 1 - dev->openacc.async.nasyncqueue;
93       dev->openacc.async.asyncqueue
94 	= gomp_realloc (dev->openacc.async.asyncqueue,
95 			sizeof (goacc_aq) * (async + 1));
96       memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
97 	      0, sizeof (goacc_aq) * diff);
98       dev->openacc.async.nasyncqueue = async + 1;
99     }
100 
101   if (!dev->openacc.async.asyncqueue[async])
102     {
103       dev->openacc.async.asyncqueue[async]
104 	= dev->openacc.async.construct_func (dev->target_id);
105 
106       if (!dev->openacc.async.asyncqueue[async])
107 	{
108 	  gomp_mutex_unlock (&dev->openacc.async.lock);
109 	  gomp_fatal ("async %d creation failed", async);
110 	}
111 
112       /* Link new async queue into active list.  */
113       goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
114       n->aq = dev->openacc.async.asyncqueue[async];
115       n->next = dev->openacc.async.active;
116       dev->openacc.async.active = n;
117     }
118 
119   ret_aq = dev->openacc.async.asyncqueue[async];
120 
121  end:
122   gomp_mutex_unlock (&dev->openacc.async.lock);
123   return ret_aq;
124 }
125 
126 /* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
127    might return NULL if no asyncqueue is to be used.  Otherwise, create the
128    asyncqueue if it doesn't exist yet.  */
129 
130 attribute_hidden struct goacc_asyncqueue *
get_goacc_asyncqueue(int async)131 get_goacc_asyncqueue (int async)
132 {
133   struct goacc_thread *thr = get_goacc_thread ();
134   return lookup_goacc_asyncqueue (thr, true, async);
135 }
136 
137 int
acc_async_test(int async)138 acc_async_test (int async)
139 {
140   struct goacc_thread *thr = goacc_thread ();
141 
142   if (!thr || !thr->dev)
143     gomp_fatal ("no device active");
144 
145   goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
146   if (!aq)
147     return 1;
148 
149   acc_prof_info prof_info;
150   acc_api_info api_info;
151   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
152   if (profiling_p)
153     {
154       prof_info.async = async;
155       prof_info.async_queue = prof_info.async;
156     }
157 
158   int res = thr->dev->openacc.async.test_func (aq);
159 
160   if (profiling_p)
161     {
162       thr->prof_info = NULL;
163       thr->api_info = NULL;
164     }
165 
166   return res;
167 }
168 
169 int
acc_async_test_all(void)170 acc_async_test_all (void)
171 {
172   struct goacc_thread *thr = get_goacc_thread ();
173 
174   acc_prof_info prof_info;
175   acc_api_info api_info;
176   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
177 
178   int ret = 1;
179   gomp_mutex_lock (&thr->dev->openacc.async.lock);
180   for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
181     if (!thr->dev->openacc.async.test_func (l->aq))
182       {
183 	ret = 0;
184 	break;
185       }
186   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
187 
188   if (profiling_p)
189     {
190       thr->prof_info = NULL;
191       thr->api_info = NULL;
192     }
193 
194   return ret;
195 }
196 
197 void
acc_wait(int async)198 acc_wait (int async)
199 {
200   struct goacc_thread *thr = get_goacc_thread ();
201 
202   goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
203   if (!aq)
204     return;
205 
206   acc_prof_info prof_info;
207   acc_api_info api_info;
208   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
209   if (profiling_p)
210     {
211       prof_info.async = async;
212       prof_info.async_queue = prof_info.async;
213     }
214 
215   if (!thr->dev->openacc.async.synchronize_func (aq))
216     gomp_fatal ("wait on %d failed", async);
217 
218   if (profiling_p)
219     {
220       thr->prof_info = NULL;
221       thr->api_info = NULL;
222     }
223 }
224 
225 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
226 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_wait,acc_async_wait)227 strong_alias (acc_wait, acc_async_wait)
228 #else
229 void
230 acc_async_wait (int async)
231 {
232   acc_wait (async);
233 }
234 #endif
235 
236 void
237 acc_wait_async (int async1, int async2)
238 {
239   struct goacc_thread *thr = get_goacc_thread ();
240 
241   goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
242   /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
243      we'll always be synchronous anyways?  */
244   if (!aq1)
245     return;
246 
247   acc_prof_info prof_info;
248   acc_api_info api_info;
249   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
250   if (profiling_p)
251     {
252       prof_info.async = async2;
253       prof_info.async_queue = prof_info.async;
254     }
255 
256   goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
257   /* An async queue is always synchronized with itself.  */
258   if (aq1 == aq2)
259     goto out_prof;
260 
261   if (aq2)
262     {
263       if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
264 	gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
265     }
266   else
267     {
268       /* TODO: Local thread synchronization.
269 	 Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
270       if (!thr->dev->openacc.async.synchronize_func (aq1))
271 	gomp_fatal ("wait on %d failed", async1);
272     }
273 
274  out_prof:
275   if (profiling_p)
276     {
277       thr->prof_info = NULL;
278       thr->api_info = NULL;
279     }
280 }
281 
282 void
acc_wait_all(void)283 acc_wait_all (void)
284 {
285   struct goacc_thread *thr = goacc_thread ();
286 
287   acc_prof_info prof_info;
288   acc_api_info api_info;
289   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
290 
291   bool ret = true;
292   gomp_mutex_lock (&thr->dev->openacc.async.lock);
293   for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
294     ret &= thr->dev->openacc.async.synchronize_func (l->aq);
295   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
296 
297   if (profiling_p)
298     {
299       thr->prof_info = NULL;
300       thr->api_info = NULL;
301     }
302 
303   if (!ret)
304     gomp_fatal ("wait all failed");
305 }
306 
307 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  */
308 #ifdef HAVE_ATTRIBUTE_ALIAS
strong_alias(acc_wait_all,acc_async_wait_all)309 strong_alias (acc_wait_all, acc_async_wait_all)
310 #else
311 void
312 acc_async_wait_all (void)
313 {
314   acc_wait_all ();
315 }
316 #endif
317 
318 void
319 acc_wait_all_async (int async)
320 {
321   struct goacc_thread *thr = get_goacc_thread ();
322 
323   acc_prof_info prof_info;
324   acc_api_info api_info;
325   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
326   if (profiling_p)
327     {
328       prof_info.async = async;
329       prof_info.async_queue = prof_info.async;
330     }
331 
332   goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
333 
334   bool ret = true;
335   gomp_mutex_lock (&thr->dev->openacc.async.lock);
336   for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
337     {
338       if (waiting_queue)
339 	ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
340       else
341 	/* TODO: Local thread synchronization.
342 	   Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
343 	ret &= thr->dev->openacc.async.synchronize_func (l->aq);
344     }
345   gomp_mutex_unlock (&thr->dev->openacc.async.lock);
346 
347   if (profiling_p)
348     {
349       thr->prof_info = NULL;
350       thr->api_info = NULL;
351     }
352 
353   if (!ret)
354     gomp_fatal ("wait all async(%d) failed", async);
355 }
356 
357 void
GOACC_wait(int async,int num_waits,...)358 GOACC_wait (int async, int num_waits, ...)
359 {
360   goacc_lazy_initialize ();
361 
362   struct goacc_thread *thr = goacc_thread ();
363 
364   /* No nesting.  */
365   assert (thr->prof_info == NULL);
366   assert (thr->api_info == NULL);
367   acc_prof_info prof_info;
368   acc_api_info api_info;
369   bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
370   if (profiling_p)
371     {
372       prof_info.async = async;
373       prof_info.async_queue = prof_info.async;
374     }
375 
376   if (num_waits)
377     {
378       va_list ap;
379 
380       va_start (ap, num_waits);
381       goacc_wait (async, num_waits, &ap);
382       va_end (ap);
383     }
384   else if (async == acc_async_sync)
385     acc_wait_all ();
386   else
387     acc_wait_all_async (async);
388 
389   if (profiling_p)
390     {
391       thr->prof_info = NULL;
392       thr->api_info = NULL;
393     }
394 }
395 
396 attribute_hidden void
goacc_wait(int async,int num_waits,va_list * ap)397 goacc_wait (int async, int num_waits, va_list *ap)
398 {
399   while (num_waits--)
400     {
401       int qid = va_arg (*ap, int);
402 
403       /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
404       if (qid == acc_async_noval)
405 	{
406 	  if (async == acc_async_sync)
407 	    acc_wait_all ();
408 	  else
409 	    acc_wait_all_async (async);
410 	  break;
411 	}
412 
413       if (acc_async_test (qid))
414 	continue;
415 
416       if (async == acc_async_sync)
417 	acc_wait (qid);
418       else if (qid == async)
419 	/* If we're waiting on the same asynchronous queue as we're
420 	   launching on, the queue itself will order work as
421 	   required, so there's no need to wait explicitly.  */
422 	;
423       else
424 	acc_wait_async (qid, async);
425     }
426 }
427 
428 attribute_hidden void
goacc_async_free(struct gomp_device_descr * devicep,struct goacc_asyncqueue * aq,void * ptr)429 goacc_async_free (struct gomp_device_descr *devicep,
430 		  struct goacc_asyncqueue *aq, void *ptr)
431 {
432   if (!aq)
433     free (ptr);
434   else
435     devicep->openacc.async.queue_callback_func (aq, free, ptr);
436 }
437 
438 /* This function initializes the asyncqueues for the device specified by
439    DEVICEP.  TODO DEVICEP must be locked on entry, and remains locked on
440    return.  */
441 
442 attribute_hidden void
goacc_init_asyncqueues(struct gomp_device_descr * devicep)443 goacc_init_asyncqueues (struct gomp_device_descr *devicep)
444 {
445   devicep->openacc.async.nasyncqueue = 0;
446   devicep->openacc.async.asyncqueue = NULL;
447   devicep->openacc.async.active = NULL;
448   gomp_mutex_init (&devicep->openacc.async.lock);
449 }
450 
451 /* This function finalizes the asyncqueues for the device specified by DEVICEP.
452    TODO DEVICEP must be locked on entry, and remains locked on return.  */
453 
454 attribute_hidden bool
goacc_fini_asyncqueues(struct gomp_device_descr * devicep)455 goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
456 {
457   bool ret = true;
458   gomp_mutex_lock (&devicep->openacc.async.lock);
459   if (devicep->openacc.async.nasyncqueue > 0)
460     {
461       goacc_aq_list next;
462       for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
463 	{
464 	  ret &= devicep->openacc.async.destruct_func (l->aq);
465 	  next = l->next;
466 	  free (l);
467 	}
468       free (devicep->openacc.async.asyncqueue);
469       devicep->openacc.async.nasyncqueue = 0;
470       devicep->openacc.async.asyncqueue = NULL;
471       devicep->openacc.async.active = NULL;
472     }
473   gomp_mutex_unlock (&devicep->openacc.async.lock);
474   gomp_mutex_destroy (&devicep->openacc.async.lock);
475   return ret;
476 }
477