xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/oacc-parallel.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2 
3    Contributed by Mentor Embedded.
4 
5    This file is part of the GNU Offloading and Multi Processing Library
6    (libgomp).
7 
8    Libgomp is free software; you can redistribute it and/or modify it
9    under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3, or (at your option)
11    any later version.
12 
13    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
14    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
16    more details.
17 
18    Under Section 7 of GPL version 3, you are granted additional
19    permissions described in the GCC Runtime Library Exception, version
20    3.1, as published by the Free Software Foundation.
21 
22    You should have received a copy of the GNU General Public License and
23    a copy of the GCC Runtime Library Exception along with this program;
24    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
25    <http://www.gnu.org/licenses/>.  */
26 
27 /* This file handles OpenACC constructs.  */
28 
29 #include "openacc.h"
30 #include "libgomp.h"
31 #include "libgomp_g.h"
32 #include "gomp-constants.h"
33 #include "oacc-int.h"
34 #ifdef HAVE_INTTYPES_H
35 # include <inttypes.h>  /* For PRIu64.  */
36 #endif
37 #include <string.h>
38 #include <stdarg.h>
39 #include <assert.h>
40 
41 static int
42 find_pset (int pos, size_t mapnum, unsigned short *kinds)
43 {
44   if (pos + 1 >= mapnum)
45     return 0;
46 
47   unsigned char kind = kinds[pos+1] & 0xff;
48 
49   return kind == GOMP_MAP_TO_PSET;
50 }
51 
52 static void goacc_wait (int async, int num_waits, va_list ap);
53 
54 void
55 GOACC_parallel (int device, void (*fn) (void *),
56 		size_t mapnum, void **hostaddrs, size_t *sizes,
57 		unsigned short *kinds,
58 		int num_gangs, int num_workers, int vector_length,
59 		int async, int num_waits, ...)
60 {
61   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
62   va_list ap;
63   struct goacc_thread *thr;
64   struct gomp_device_descr *acc_dev;
65   struct target_mem_desc *tgt;
66   void **devaddrs;
67   unsigned int i;
68   struct splay_tree_key_s k;
69   splay_tree_key tgt_fn_key;
70   void (*tgt_fn);
71 
72   if (num_gangs != 1)
73     gomp_fatal ("num_gangs (%d) different from one is not yet supported",
74 		num_gangs);
75   if (num_workers != 1)
76     gomp_fatal ("num_workers (%d) different from one is not yet supported",
77 		num_workers);
78 
79 #ifdef HAVE_INTTYPES_H
80   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p, "
81 		 "async = %d\n",
82 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds, async);
83 #else
84   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p, async=%d\n",
85 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds,
86 	      async);
87 #endif
88   goacc_lazy_initialize ();
89 
90   thr = goacc_thread ();
91   acc_dev = thr->dev;
92 
93   /* Host fallback if "if" clause is false or if the current device is set to
94      the host.  */
95   if (host_fallback)
96     {
97       goacc_save_and_set_bind (acc_device_host);
98       fn (hostaddrs);
99       goacc_restore_bind ();
100       return;
101     }
102   else if (acc_device_type (acc_dev->type) == acc_device_host)
103     {
104       fn (hostaddrs);
105       return;
106     }
107 
108   va_start (ap, num_waits);
109 
110   if (num_waits > 0)
111     goacc_wait (async, num_waits, ap);
112 
113   va_end (ap);
114 
115   acc_dev->openacc.async_set_async_func (async);
116 
117   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
118     {
119       k.host_start = (uintptr_t) fn;
120       k.host_end = k.host_start + 1;
121       gomp_mutex_lock (&acc_dev->lock);
122       tgt_fn_key = splay_tree_lookup (&acc_dev->mem_map, &k);
123       gomp_mutex_unlock (&acc_dev->lock);
124 
125       if (tgt_fn_key == NULL)
126 	gomp_fatal ("target function wasn't mapped");
127 
128       tgt_fn = (void (*)) tgt_fn_key->tgt_offset;
129     }
130   else
131     tgt_fn = (void (*)) fn;
132 
133   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
134 		       false);
135 
136   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
137   for (i = 0; i < mapnum; i++)
138     devaddrs[i] = (void *) (tgt->list[i]->tgt->tgt_start
139 			    + tgt->list[i]->tgt_offset);
140 
141   acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, sizes, kinds,
142 			      num_gangs, num_workers, vector_length, async,
143 			      tgt);
144 
145   /* If running synchronously, unmap immediately.  */
146   if (async < acc_async_noval)
147     gomp_unmap_vars (tgt, true);
148   else
149     {
150       gomp_copy_from_async (tgt);
151       acc_dev->openacc.register_async_cleanup_func (tgt);
152     }
153 
154   acc_dev->openacc.async_set_async_func (acc_async_sync);
155 }
156 
157 void
158 GOACC_data_start (int device, size_t mapnum,
159 		  void **hostaddrs, size_t *sizes, unsigned short *kinds)
160 {
161   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
162   struct target_mem_desc *tgt;
163 
164 #ifdef HAVE_INTTYPES_H
165   gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n",
166 	      __FUNCTION__, (uint64_t) mapnum, hostaddrs, sizes, kinds);
167 #else
168   gomp_debug (0, "%s: mapnum=%lu, hostaddrs=%p, sizes=%p, kinds=%p\n",
169 	      __FUNCTION__, (unsigned long) mapnum, hostaddrs, sizes, kinds);
170 #endif
171 
172   goacc_lazy_initialize ();
173 
174   struct goacc_thread *thr = goacc_thread ();
175   struct gomp_device_descr *acc_dev = thr->dev;
176 
177   /* Host fallback or 'do nothing'.  */
178   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
179       || host_fallback)
180     {
181       tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
182       tgt->prev = thr->mapped_data;
183       thr->mapped_data = tgt;
184 
185       return;
186     }
187 
188   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
189   tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
190 		       false);
191   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
192   tgt->prev = thr->mapped_data;
193   thr->mapped_data = tgt;
194 }
195 
196 void
197 GOACC_data_end (void)
198 {
199   struct goacc_thread *thr = goacc_thread ();
200   struct target_mem_desc *tgt = thr->mapped_data;
201 
202   gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
203   thr->mapped_data = tgt->prev;
204   gomp_unmap_vars (tgt, true);
205   gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
206 }
207 
208 void
209 GOACC_enter_exit_data (int device, size_t mapnum,
210 		       void **hostaddrs, size_t *sizes, unsigned short *kinds,
211 		       int async, int num_waits, ...)
212 {
213   struct goacc_thread *thr;
214   struct gomp_device_descr *acc_dev;
215   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
216   bool data_enter = false;
217   size_t i;
218 
219   goacc_lazy_initialize ();
220 
221   thr = goacc_thread ();
222   acc_dev = thr->dev;
223 
224   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
225       || host_fallback)
226     return;
227 
228   if (num_waits > 0)
229     {
230       va_list ap;
231 
232       va_start (ap, num_waits);
233 
234       goacc_wait (async, num_waits, ap);
235 
236       va_end (ap);
237     }
238 
239   acc_dev->openacc.async_set_async_func (async);
240 
241   /* Determine if this is an "acc enter data".  */
242   for (i = 0; i < mapnum; ++i)
243     {
244       unsigned char kind = kinds[i] & 0xff;
245 
246       if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
247 	continue;
248 
249       if (kind == GOMP_MAP_FORCE_ALLOC
250 	  || kind == GOMP_MAP_FORCE_PRESENT
251 	  || kind == GOMP_MAP_FORCE_TO)
252 	{
253 	  data_enter = true;
254 	  break;
255 	}
256 
257       if (kind == GOMP_MAP_FORCE_DEALLOC
258 	  || kind == GOMP_MAP_FORCE_FROM)
259 	break;
260 
261       gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
262 		      kind);
263     }
264 
265   if (data_enter)
266     {
267       for (i = 0; i < mapnum; i++)
268 	{
269 	  unsigned char kind = kinds[i] & 0xff;
270 
271 	  /* Scan for PSETs.  */
272 	  int psets = find_pset (i, mapnum, kinds);
273 
274 	  if (!psets)
275 	    {
276 	      switch (kind)
277 		{
278 		case GOMP_MAP_POINTER:
279 		  gomp_acc_insert_pointer (1, &hostaddrs[i], &sizes[i],
280 					&kinds[i]);
281 		  break;
282 		case GOMP_MAP_FORCE_ALLOC:
283 		  acc_create (hostaddrs[i], sizes[i]);
284 		  break;
285 		case GOMP_MAP_FORCE_PRESENT:
286 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
287 		  break;
288 		case GOMP_MAP_FORCE_TO:
289 		  acc_present_or_copyin (hostaddrs[i], sizes[i]);
290 		  break;
291 		default:
292 		  gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
293 			      kind);
294 		  break;
295 		}
296 	    }
297 	  else
298 	    {
299 	      gomp_acc_insert_pointer (3, &hostaddrs[i], &sizes[i], &kinds[i]);
300 	      /* Increment 'i' by two because OpenACC requires fortran
301 		 arrays to be contiguous, so each PSET is associated with
302 		 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
303 		 one MAP_POINTER.  */
304 	      i += 2;
305 	    }
306 	}
307     }
308   else
309     for (i = 0; i < mapnum; ++i)
310       {
311 	unsigned char kind = kinds[i] & 0xff;
312 
313 	int psets = find_pset (i, mapnum, kinds);
314 
315 	if (!psets)
316 	  {
317 	    switch (kind)
318 	      {
319 	      case GOMP_MAP_POINTER:
320 		gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
321 					 == GOMP_MAP_FORCE_FROM,
322 					 async, 1);
323 		break;
324 	      case GOMP_MAP_FORCE_DEALLOC:
325 		acc_delete (hostaddrs[i], sizes[i]);
326 		break;
327 	      case GOMP_MAP_FORCE_FROM:
328 		acc_copyout (hostaddrs[i], sizes[i]);
329 		break;
330 	      default:
331 		gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
332 			    kind);
333 		break;
334 	      }
335 	  }
336 	else
337 	  {
338 	    gomp_acc_remove_pointer (hostaddrs[i], (kinds[i] & 0xff)
339 				     == GOMP_MAP_FORCE_FROM, async, 3);
340 	    /* See the above comment.  */
341 	    i += 2;
342 	  }
343       }
344 
345   acc_dev->openacc.async_set_async_func (acc_async_sync);
346 }
347 
348 static void
349 goacc_wait (int async, int num_waits, va_list ap)
350 {
351   struct goacc_thread *thr = goacc_thread ();
352   struct gomp_device_descr *acc_dev = thr->dev;
353   int i;
354 
355   assert (num_waits >= 0);
356 
357   if (async == acc_async_sync && num_waits == 0)
358     {
359       acc_wait_all ();
360       return;
361     }
362 
363   if (async == acc_async_sync && num_waits)
364     {
365       for (i = 0; i < num_waits; i++)
366         {
367           int qid = va_arg (ap, int);
368 
369           if (acc_async_test (qid))
370             continue;
371 
372           acc_wait (qid);
373         }
374       return;
375     }
376 
377   if (async == acc_async_noval && num_waits == 0)
378     {
379       acc_dev->openacc.async_wait_all_async_func (acc_async_noval);
380       return;
381     }
382 
383   for (i = 0; i < num_waits; i++)
384     {
385       int qid = va_arg (ap, int);
386 
387       if (acc_async_test (qid))
388 	continue;
389 
390       /* If we're waiting on the same asynchronous queue as we're launching on,
391          the queue itself will order work as required, so there's no need to
392 	 wait explicitly.  */
393       if (qid != async)
394 	acc_dev->openacc.async_wait_async_func (qid, async);
395     }
396 }
397 
398 void
399 GOACC_update (int device, size_t mapnum,
400 	      void **hostaddrs, size_t *sizes, unsigned short *kinds,
401 	      int async, int num_waits, ...)
402 {
403   bool host_fallback = device == GOMP_DEVICE_HOST_FALLBACK;
404   size_t i;
405 
406   goacc_lazy_initialize ();
407 
408   struct goacc_thread *thr = goacc_thread ();
409   struct gomp_device_descr *acc_dev = thr->dev;
410 
411   if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
412       || host_fallback)
413     return;
414 
415   if (num_waits > 0)
416     {
417       va_list ap;
418 
419       va_start (ap, num_waits);
420 
421       goacc_wait (async, num_waits, ap);
422 
423       va_end (ap);
424     }
425 
426   acc_dev->openacc.async_set_async_func (async);
427 
428   for (i = 0; i < mapnum; ++i)
429     {
430       unsigned char kind = kinds[i] & 0xff;
431 
432       switch (kind)
433 	{
434 	case GOMP_MAP_POINTER:
435 	case GOMP_MAP_TO_PSET:
436 	  break;
437 
438 	case GOMP_MAP_FORCE_TO:
439 	  acc_update_device (hostaddrs[i], sizes[i]);
440 	  break;
441 
442 	case GOMP_MAP_FORCE_FROM:
443 	  acc_update_self (hostaddrs[i], sizes[i]);
444 	  break;
445 
446 	default:
447 	  gomp_fatal (">>>> GOACC_update UNHANDLED kind 0x%.2x", kind);
448 	  break;
449 	}
450     }
451 
452   acc_dev->openacc.async_set_async_func (acc_async_sync);
453 }
454 
455 void
456 GOACC_wait (int async, int num_waits, ...)
457 {
458   va_list ap;
459 
460   va_start (ap, num_waits);
461 
462   goacc_wait (async, num_waits, ap);
463 
464   va_end (ap);
465 }
466 
467 int
468 GOACC_get_num_threads (void)
469 {
470   return 1;
471 }
472 
473 int
474 GOACC_get_thread_num (void)
475 {
476   return 0;
477 }
478