xref: /netbsd-src/external/gpl3/gcc/dist/libgomp/oacc-parallel.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* Copyright (C) 2013-2016 Free Software Foundation, Inc.
2 
3    Contributed by Mentor Embedded.
4 
5    This file is part of the GNU Offloading and Multi Processing Library
6    (libgomp).
7 
8    Libgomp is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3, or (at your option)
11    any later version.
12 
13    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16    more details.
17 
18    Under Section 7 of GPL version 3, you are granted additional
19    permissions described in the GCC Runtime Library Exception, version
20    3.1, as published by the Free Software Foundation.
21 
22    You should have received a copy of the GNU General Public License and
23    a copy of the GCC Runtime Library Exception along with this program;
24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25    <http://www.gnu.org/licenses/>.  */
26 
27 /* This file handles OpenACC constructs.  */
28 
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h>  /* For PRIu64.  */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40 
41 static int
42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
43 {
44   if (pos + 1 >= mapnum)
45     return 0;
46 
47   unsigned char kind = kinds[pos+1] & 0xff;
48 
49   return kind == GOMP_MAP_TO_PSET;
50 }
51 
52 static void goacc_wait (int async, int num_waits, va_list *ap);
53 
54 
55 /* Launch a possibly offloaded function on DEVICE.  FN is the host fn
56    address.  MAPNUM, HOSTADDRS, SIZES & KINDS  describe the memory
57    blocks to be copied to/from the device.  Varadic arguments are
58    keyed optional parameters terminated with a zero.  */
59 
60 void
61 GOACC_parallel_keyed (int device, void (*fn) (void *),
62 		      size_t mapnum, void **hostaddrs, size_t *sizes,
63 		      unsigned short *kinds, ...)
64 {
65   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
66   va_list ap;
67   struct goacc_thread *thr;
68   struct gomp_device_descr *acc_dev;
69   struct target_mem_desc *tgt;
70   void **devaddrs;
71   unsigned int i;
72   struct splay_tree_key_s k;
73   splay_tree_key tgt_fn_key;
74   void (*tgt_fn);
75   int async = GOMP_ASYNC_SYNC;
76   unsigned dims[GOMP_DIM_MAX];
77   unsigned tag;
78 
79 #ifdef HAVE_INTTYPES_H
80   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
81 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
82 #else
83   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
84 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
85 #endif
86   goacc_lazy_initialize ();
87 
88   thr = goacc_thread ();
89   acc_dev = thr->dev;
90 
91   /* Host fallback if "if" clause is false or if the current device is set to
92      the host.  */
93   if (host_fallback)
94     {
95       goacc_save_and_set_bind (acc_device_host);
96       fn (hostaddrs);
97       goacc_restore_bind ();
98       return;
99     }
100   else if (acc_device_type (acc_dev->type) == acc_device_host)
101     {
102       fn (hostaddrs);
103       return;
104     }
105 
106   /* Default: let the runtime choose.  */
107   for (i = 0; i != GOMP_DIM_MAX; i++)
108     dims[i] = 0;
109 
110   va_start (ap, kinds);
111   /* TODO: This will need amending when device_type is implemented.  */
112   while ((tag = va_arg (ap, unsigned)) != 0)
113     {
114       if (GOMP_LAUNCH_DEVICE (tag))
115 	gomp_fatal ("device_type '%d' offload parameters, libgomp is too old",
116 		    GOMP_LAUNCH_DEVICE (tag));
117 
118       switch (GOMP_LAUNCH_CODE (tag))
119 	{
120 	case GOMP_LAUNCH_DIM:
121 	  {
122 	    unsigned mask = GOMP_LAUNCH_OP (tag);
123 
124 	    for (i = 0; i != GOMP_DIM_MAX; i++)
125 	      if (mask & GOMP_DIM_MASK (i))
126 		dims[i] = va_arg (ap, unsigned);
127 	  }
128 	  break;
129 
130 	case GOMP_LAUNCH_ASYNC:
131 	  {
132 	    /* Small constant values are encoded in the operand.  */
133 	    async = GOMP_LAUNCH_OP (tag);
134 
135 	    if (async == GOMP_LAUNCH_OP_MAX)
136 	      async = va_arg (ap, unsigned);
137 	    break;
138 	  }
139 
140 	case GOMP_LAUNCH_WAIT:
141 	  {
142 	    unsigned num_waits = GOMP_LAUNCH_OP (tag);
143 
144 	    if (num_waits)
145 	      goacc_wait (async, num_waits, &ap);
146 	    break;
147 	  }
148 
149 	default:
150 	  gomp_fatal ("unrecognized offload code '%d',"
151 		      " libgomp is too old", GOMP_LAUNCH_CODE (tag));
152 	}
153     }
154   va_end (ap);
155 
156   acc_dev->openacc.async_set_async_func (async);
157 
158   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
159     {
160       k.host_start = (uintptr_t) fn;
161       k.host_end = k.host_start + 1;
162       gomp_mutex_lock (&acc_dev->lock);
163       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
164       gomp_mutex_unlock (&acc_dev->lock);
165 
166       if (tgt_fn_key == NULL)
167 	gomp_fatal ("target function wasn't mapped");
168 
169       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
170     }
171   else
172     tgt_fn = (void (*)) fn;
173 
174   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
175 		       GOMP_MAP_VARS_OPENACC);
176 
177   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
178   for (i = 0; i < mapnum; i++)
179     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
180 			    + tgt->list[i].key->tgt_offset);
181 
182   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
183 			      async, dims, tgt);
184 
185   /* If running synchronously, unmap immediately.  */
186   if (async < acc_async_noval)
187     gomp_unmap_vars (tgt, true);
188   else
189     {
190       gomp_copy_from_async (tgt);
191       acc_dev->openacc.register_async_cleanup_func (tgt);
192     }
193 
194   acc_dev->openacc.async_set_async_func (acc_async_sync);
195 }
196 
197 /* Legacy entry point, only provide host execution.  */
198 
199 void
200 GOACC_parallel (int device, void (*fn) (void *),
201 		size_t mapnum, void **hostaddrs, size_t *sizes,
202 		unsigned short *kinds,
203 		int num_gangs, int num_workers, int vector_length,
204 		int async, int num_waits, ...)
205 {
206   goacc_save_and_set_bind (acc_device_host);
207   fn (hostaddrs);
208   goacc_restore_bind ();
209 }
210 
211 void
212 GOACC_data_start (int device, size_t mapnum,
213 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
214 {
215   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
216   struct target_mem_desc *tgt;
217 
218 #ifdef HAVE_INTTYPES_H
219   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
220 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
221 #else
222   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
223 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
224 #endif
225 
226   goacc_lazy_initialize ();
227 
228   struct goacc_thread *thr = goacc_thread ();
229   struct gomp_device_descr *acc_dev = thr->dev;
230 
231   /* Host fallback or 'do nothing'.  */
232   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
233       || host_fallback)
234     {
235       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
236 			   GOMP_MAP_VARS_OPENACC);
237       tgt->prev = thr->mapped_data;
238       thr->mapped_data = tgt;
239 
240       return;
241     }
242 
243   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
244   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
245 		       GOMP_MAP_VARS_OPENACC);
246   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
247   tgt->prev = thr->mapped_data;
248   thr->mapped_data = tgt;
249 }
250 
251 void
252 GOACC_data_end (void)
253 {
254   struct goacc_thread *thr = goacc_thread ();
255   struct target_mem_desc *tgt = thr->mapped_data;
256 
257   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
258   thr->mapped_data = tgt->prev;
259   gomp_unmap_vars (tgt, true);
260   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
261 }
262 
263 void
264 GOACC_enter_exit_data (int device, size_t mapnum,
265 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
266 		       int async, int num_waits, ...)
267 {
268   struct goacc_thread *thr;
269   struct gomp_device_descr *acc_dev;
270   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
271   bool data_enter = false;
272   size_t i;
273 
274   goacc_lazy_initialize ();
275 
276   thr = goacc_thread ();
277   acc_dev = thr->dev;
278 
279   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
280       || host_fallback)
281     return;
282 
283   if (num_waits)
284     {
285       va_list ap;
286 
287       va_start (ap, num_waits);
288       goacc_wait (async, num_waits, &ap);
289       va_end (ap);
290     }
291 
292   acc_dev->openacc.async_set_async_func (async);
293 
294   /* Determine if this is an "acc enter data".  */
295   for (i = 0; i < mapnum; ++i)
296     {
297       unsigned char kind = kinds[i] & 0xff;
298 
299       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
300 	continue;
301 
302       if (kind == GOMP_MAP_FORCE_ALLOC
303 	  || kind == GOMP_MAP_FORCE_PRESENT
304 	  || kind == GOMP_MAP_FORCE_TO)
305 	{
306 	  data_enter = true;
307 	  break;
308 	}
309 
310       if (kind == GOMP_MAP_DELETE
311 	  || kind == GOMP_MAP_FORCE_FROM)
312 	break;
313 
314       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
315 		      kind);
316     }
317 
318   if (data_enter)
319     {
320       for (i = 0; i < mapnum; i++)
321 	{
322 	  unsigned char kind = kinds[i] & 0xff;
323 
324 	  /* Scan for PSETs.  */
325 	  int psets = find_pset (i, mapnum, kinds);
326 
327 	  if (!psets)
328 	    {
329 	      switch (kind)
330 		{
331 		case GOMP_MAP_POINTER:
332 		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
333 					&kinds[i]);
334 		  break;
335 		case GOMP_MAP_FORCE_ALLOC:
336 		  acc_create (hostaddrs[i], sizes[i]);
337 		  break;
338 		case GOMP_MAP_FORCE_PRESENT:
339 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
340 		  break;
341 		case GOMP_MAP_FORCE_TO:
342 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
343 		  break;
344 		default:
345 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
346 			      kind);
347 		  break;
348 		}
349 	    }
350 	  else
351 	    {
352 	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
353 	      /* Increment 'i' by two because OpenACC requires fortran
354 		 arrays to be contiguous, so each PSET is associated with
355 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
356 		 one MAP_POINTER.  */
357 	      i += 2;
358 	    }
359 	}
360     }
361   else
362     for (i = 0; i < mapnum; ++i)
363       {
364 	unsigned char kind = kinds[i] & 0xff;
365 
366 	int psets = find_pset (i, mapnum, kinds);
367 
368 	if (!psets)
369 	  {
370 	    switch (kind)
371 	      {
372 	      case GOMP_MAP_POINTER:
373 		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
374 					 == GOMP_MAP_FORCE_FROM,
375 					 async, 1);
376 		break;
377 	      case GOMP_MAP_DELETE:
378 		acc_delete (hostaddrs[i], sizes[i]);
379 		break;
380 	      case GOMP_MAP_FORCE_FROM:
381 		acc_copyout (hostaddrs[i], sizes[i]);
382 		break;
383 	      default:
384 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
385 			    kind);
386 		break;
387 	      }
388 	  }
389 	else
390 	  {
391 	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
392 				     == GOMP_MAP_FORCE_FROM, async, 3);
393 	    /* See the above comment.  */
394 	    i += 2;
395 	  }
396       }
397 
398   acc_dev->openacc.async_set_async_func (acc_async_sync);
399 }
400 
401 static void
402 goacc_wait (int async, int num_waits, va_list *ap)
403 {
404   struct goacc_thread *thr = goacc_thread ();
405   struct gomp_device_descr *acc_dev = thr->dev;
406 
407   while (num_waits--)
408     {
409       int qid = va_arg (*ap, int);
410 
411       if (acc_async_test (qid))
412 	continue;
413 
414       if (async == acc_async_sync)
415 	acc_wait (qid);
416       else if (qid == async)
417 	;/* If we're waiting on the same asynchronous queue as we're
418 	    launching on, the queue itself will order work as
419 	    required, so there's no need to wait explicitly.  */
420       else
421 	acc_dev->openacc.async_wait_async_func (qid, async);
422     }
423 }
424 
425 void
426 GOACC_update (int device, size_t mapnum,
427 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
428 	      int async, int num_waits, ...)
429 {
430   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
431   size_t i;
432 
433   goacc_lazy_initialize ();
434 
435   struct goacc_thread *thr = goacc_thread ();
436   struct gomp_device_descr *acc_dev = thr->dev;
437 
438   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
439       || host_fallback)
440     return;
441 
442   if (num_waits)
443     {
444       va_list ap;
445 
446       va_start (ap, num_waits);
447       goacc_wait (async, num_waits, &ap);
448       va_end (ap);
449     }
450 
451   acc_dev->openacc.async_set_async_func (async);
452 
453   for (i = 0; i < mapnum; ++i)
454     {
455       unsigned char kind = kinds[i] & 0xff;
456 
457       switch (kind)
458 	{
459 	case GOMP_MAP_POINTER:
460 	case GOMP_MAP_TO_PSET:
461 	  break;
462 
463 	case GOMP_MAP_FORCE_TO:
464 	  acc_update_device (hostaddrs[i], sizes[i]);
465 	  break;
466 
467 	case GOMP_MAP_FORCE_FROM:
468 	  acc_update_self (hostaddrs[i], sizes[i]);
469 	  break;
470 
471 	default:
472 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
473 	  break;
474 	}
475     }
476 
477   acc_dev->openacc.async_set_async_func (acc_async_sync);
478 }
479 
480 void
481 GOACC_wait (int async, int num_waits, ...)
482 {
483   if (num_waits)
484     {
485       va_list ap;
486 
487       va_start (ap, num_waits);
488       goacc_wait (async, num_waits, &ap);
489       va_end (ap);
490     }
491   else if (async == acc_async_sync)
492     acc_wait_all ();
493   else if (async == acc_async_noval)
494     goacc_thread ()->dev->openacc.async_wait_all_async_func (acc_async_noval);
495 }
496 
497 int
498 GOACC_get_num_threads (void)
499 {
500   return 1;
501 }
502 
503 int
504 GOACC_get_thread_num (void)
505 {
506   return 0;
507 }
508 
509 void
510 GOACC_declare (int device, size_t mapnum,
511 	       void **hostaddrs, size_t *sizes, unsigned short *kinds)
512 {
513   int i;
514 
515   for (i = 0; i < mapnum; i++)
516     {
517       unsigned char kind = kinds[i] & 0xff;
518 
519       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
520 	continue;
521 
522       switch (kind)
523 	{
524 	  case GOMP_MAP_FORCE_ALLOC:
525 	  case GOMP_MAP_FORCE_FROM:
526 	  case GOMP_MAP_FORCE_TO:
527 	  case GOMP_MAP_POINTER:
528 	  case GOMP_MAP_DELETE:
529 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
530 				   &kinds[i], 0, 0);
531 	    break;
532 
533 	  case GOMP_MAP_FORCE_DEVICEPTR:
534 	    break;
535 
536 	  case GOMP_MAP_ALLOC:
537 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
538 	      GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
539 				     &kinds[i], 0, 0);
540 	    break;
541 
542 	  case GOMP_MAP_TO:
543 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
544 				   &kinds[i], 0, 0);
545 
546 	    break;
547 
548 	  case GOMP_MAP_FROM:
549 	    kinds[i] = GOMP_MAP_FORCE_FROM;
550 	    GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
551 				   &kinds[i], 0, 0);
552 	    break;
553 
554 	  case GOMP_MAP_FORCE_PRESENT:
555 	    if (!acc_is_present (hostaddrs[i], sizes[i]))
556 	      gomp_fatal ("[%p,%ld] is not mapped", hostaddrs[i],
557 			  (unsigned long) sizes[i]);
558 	    break;
559 
560 	  default:
561 	    assert (0);
562 	    break;
563 	}
564     }
565 }
566