xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/plugin/plugin-hsa.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* Plugin for HSAIL execution.
2 
3    Copyright (C) 2013-2017 Free Software Foundation, Inc.
4 
5    Contributed by Martin Jambor <mjambor@suse.cz> and
6    Martin Liska <mliska@suse.cz>.
7 
8    This file is part of the GNU Offloading and Multi Processing Library
9    (libgomp).
10 
11    Libgomp is free software; you can redistribute it and/or modify it
12    under the terms of the GNU General Public License as published by
13    the Free Software Foundation; either version 3, or (at your option)
14    any later version.
15 
16    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
19    more details.
20 
21    Under Section 7 of GPL version 3, you are granted additional
22    permissions described in the GCC Runtime Library Exception, version
23    3.1, as published by the Free Software Foundation.
24 
25    You should have received a copy of the GNU General Public License and
26    a copy of the GCC Runtime Library Exception along with this program;
27    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
28    <http://www.gnu.org/licenses/>.  */
29 
30 #include "config.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <pthread.h>
35 #include <inttypes.h>
36 #include <stdbool.h>
37 #include <hsa.h>
38 #include <plugin/hsa_ext_finalize.h>
39 #include <dlfcn.h>
40 #include "libgomp-plugin.h"
41 #include "gomp-constants.h"
42 
43 /* Secure getenv() which returns NULL if running as SUID/SGID.  */
44 #ifndef HAVE_SECURE_GETENV
45 #ifdef HAVE___SECURE_GETENV
46 #define secure_getenv __secure_getenv
47 #elif defined (HAVE_UNISTD_H) && defined(HAVE_GETUID) && defined(HAVE_GETEUID) \
48   && defined(HAVE_GETGID) && defined(HAVE_GETEGID)
49 
50 #include <unistd.h>
51 
52 /* Implementation of secure_getenv() for targets where it is not provided but
53    we have at least means to test real and effective IDs. */
54 
55 static char *
56 secure_getenv (const char *name)
57 {
58   if ((getuid () == geteuid ()) && (getgid () == getegid ()))
59     return getenv (name);
60   else
61     return NULL;
62 }
63 
64 #else
65 #define secure_getenv getenv
66 #endif
67 #endif
68 
69 /* As an HSA runtime is dlopened, following structure defines function
70    pointers utilized by the HSA plug-in.  */
71 
72 struct hsa_runtime_fn_info
73 {
74   /* HSA runtime.  */
75   hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
76 					const char **status_string);
77   hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
78 					 hsa_agent_info_t attribute,
79 					 void *value);
80   hsa_status_t (*hsa_init_fn) (void);
81   hsa_status_t (*hsa_iterate_agents_fn)
82     (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
83   hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
84 					  hsa_region_info_t attribute,
85 					  void *value);
86   hsa_status_t (*hsa_queue_create_fn)
87     (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
88      void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
89      void *data, uint32_t private_segment_size,
90      uint32_t group_segment_size, hsa_queue_t **queue);
91   hsa_status_t (*hsa_agent_iterate_regions_fn)
92     (hsa_agent_t agent,
93      hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
94   hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
95   hsa_status_t (*hsa_executable_create_fn)
96     (hsa_profile_t profile, hsa_executable_state_t executable_state,
97      const char *options, hsa_executable_t *executable);
98   hsa_status_t (*hsa_executable_global_variable_define_fn)
99     (hsa_executable_t executable, const char *variable_name, void *address);
100   hsa_status_t (*hsa_executable_load_code_object_fn)
101     (hsa_executable_t executable, hsa_agent_t agent,
102      hsa_code_object_t code_object, const char *options);
103   hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
104 					   const char *options);
105   hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
106 					uint32_t num_consumers,
107 					const hsa_agent_t *consumers,
108 					hsa_signal_t *signal);
109   hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
110 					  void **ptr);
111   hsa_status_t (*hsa_memory_free_fn) (void *ptr);
112   hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
113   hsa_status_t (*hsa_executable_get_symbol_fn)
114     (hsa_executable_t executable, const char *module_name,
115      const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
116      hsa_executable_symbol_t *symbol);
117   hsa_status_t (*hsa_executable_symbol_get_info_fn)
118     (hsa_executable_symbol_t executable_symbol,
119      hsa_executable_symbol_info_t attribute, void *value);
120   uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
121 						    uint64_t value);
122   uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
123   void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
124 				       hsa_signal_value_t value);
125   void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
126 				       hsa_signal_value_t value);
127   hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
128     (hsa_signal_t signal, hsa_signal_condition_t condition,
129      hsa_signal_value_t compare_value, uint64_t timeout_hint,
130      hsa_wait_state_t wait_state_hint);
131   hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
132   hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
133 
134   /* HSA finalizer.  */
135   hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
136 						 hsa_ext_module_t module);
137   hsa_status_t (*hsa_ext_program_create_fn)
138     (hsa_machine_model_t machine_model, hsa_profile_t profile,
139      hsa_default_float_rounding_mode_t default_float_rounding_mode,
140      const char *options, hsa_ext_program_t *program);
141   hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
142   hsa_status_t (*hsa_ext_program_finalize_fn)
143     (hsa_ext_program_t program,hsa_isa_t isa,
144      int32_t call_convention, hsa_ext_control_directives_t control_directives,
145      const char *options, hsa_code_object_type_t code_object_type,
146      hsa_code_object_t *code_object);
147 };
148 
149 /* HSA runtime functions that are initialized in init_hsa_context.  */
150 
151 static struct hsa_runtime_fn_info hsa_fns;
152 
153 /* Keep the following GOMP prefixed structures in sync with respective parts of
154    the compiler.  */
155 
156 /* Structure describing the run-time and grid properties of an HSA kernel
157    lauch.  */
158 
159 struct GOMP_kernel_launch_attributes
160 {
161   /* Number of dimensions the workload has.  Maximum number is 3.  */
162   uint32_t ndim;
163   /* Size of the grid in the three respective dimensions.  */
164   uint32_t gdims[3];
165   /* Size of work-groups in the respective dimensions.  */
166   uint32_t wdims[3];
167 };
168 
169 /* Collection of information needed for a dispatch of a kernel from a
170    kernel.  */
171 
172 struct GOMP_hsa_kernel_dispatch
173 {
174   /* Pointer to a command queue associated with a kernel dispatch agent.  */
175   void *queue;
176   /* Pointer to reserved memory for OMP data struct copying.  */
177   void *omp_data_memory;
178   /* Pointer to a memory space used for kernel arguments passing.  */
179   void *kernarg_address;
180   /* Kernel object.  */
181   uint64_t object;
182   /* Synchronization signal used for dispatch synchronization.  */
183   uint64_t signal;
184   /* Private segment size.  */
185   uint32_t private_segment_size;
186   /* Group segment size.  */
187   uint32_t group_segment_size;
188   /* Number of children kernel dispatches.  */
189   uint64_t kernel_dispatch_count;
190   /* Debug purpose argument.  */
191   uint64_t debug;
192   /* Levels-var ICV.  */
193   uint64_t omp_level;
194   /* Kernel dispatch structures created for children kernel dispatches.  */
195   struct GOMP_hsa_kernel_dispatch **children_dispatches;
196   /* Number of threads.  */
197   uint32_t omp_num_threads;
198 };
199 
200 /* Part of the libgomp plugin interface.  Return the name of the accelerator,
201    which is "hsa".  */
202 
203 const char *
204 GOMP_OFFLOAD_get_name (void)
205 {
206   return "hsa";
207 }
208 
209 /* Part of the libgomp plugin interface.  Return the specific capabilities the
210    HSA accelerator have.  */
211 
212 unsigned int
213 GOMP_OFFLOAD_get_caps (void)
214 {
215   return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
216 }
217 
218 /* Part of the libgomp plugin interface.  Identify as HSA accelerator.  */
219 
220 int
221 GOMP_OFFLOAD_get_type (void)
222 {
223   return OFFLOAD_TARGET_TYPE_HSA;
224 }
225 
226 /* Return the libgomp version number we're compatible with.  There is
227    no requirement for cross-version compatibility.  */
228 
229 unsigned
230 GOMP_OFFLOAD_version (void)
231 {
232   return GOMP_VERSION;
233 }
234 
235 /* Flag to decide whether print to stderr information about what is going on.
236    Set in init_debug depending on environment variables.  */
237 
238 static bool debug;
239 
240 /* Flag to decide if the runtime should suppress a possible fallback to host
241    execution.  */
242 
243 static bool suppress_host_fallback;
244 
245 /* Flag to locate HSA runtime shared library that is dlopened
246    by this plug-in.  */
247 
248 static const char *hsa_runtime_lib;
249 
250 /* Flag to decide if the runtime should support also CPU devices (can be
251    a simulator).  */
252 
253 static bool support_cpu_devices;
254 
255 /* Initialize debug and suppress_host_fallback according to the environment.  */
256 
257 static void
258 init_enviroment_variables (void)
259 {
260   if (secure_getenv ("HSA_DEBUG"))
261     debug = true;
262   else
263     debug = false;
264 
265   if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
266     suppress_host_fallback = true;
267   else
268     suppress_host_fallback = false;
269 
270   hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
271   if (hsa_runtime_lib == NULL)
272     hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
273 
274   support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
275 }
276 
277 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
278    is set to true.  */
279 
280 #define HSA_LOG(prefix, ...) \
281   do \
282   { \
283     if (debug) \
284       { \
285 	fprintf (stderr, prefix); \
286 	fprintf (stderr, __VA_ARGS__); \
287       } \
288   } \
289   while (false);
290 
291 /* Print a debugging message to stderr.  */
292 
293 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
294 
295 /* Print a warning message to stderr.  */
296 
297 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
298 
299 /* Print HSA warning STR with an HSA STATUS code.  */
300 
301 static void
302 hsa_warn (const char *str, hsa_status_t status)
303 {
304   if (!debug)
305     return;
306 
307   const char *hsa_error_msg;
308   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
309 
310   fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
311 }
312 
313 /* Report a fatal error STR together with the HSA error corresponding to STATUS
314    and terminate execution of the current process.  */
315 
316 static void
317 hsa_fatal (const char *str, hsa_status_t status)
318 {
319   const char *hsa_error_msg;
320   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
321   GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
322 		     hsa_error_msg);
323 }
324 
325 /* Like hsa_fatal, except only report error message, and return FALSE
326    for propagating error processing to outside of plugin.  */
327 
328 static bool
329 hsa_error (const char *str, hsa_status_t status)
330 {
331   const char *hsa_error_msg;
332   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
333   GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
334 		     hsa_error_msg);
335   return false;
336 }
337 
338 struct hsa_kernel_description
339 {
340   const char *name;
341   unsigned omp_data_size;
342   bool gridified_kernel_p;
343   unsigned kernel_dependencies_count;
344   const char **kernel_dependencies;
345 };
346 
347 struct global_var_info
348 {
349   const char *name;
350   void *address;
351 };
352 
353 /* Data passed by the static initializer of a compilation unit containing BRIG
354    to GOMP_offload_register.  */
355 
356 struct brig_image_desc
357 {
358   hsa_ext_module_t brig_module;
359   const unsigned kernel_count;
360   struct hsa_kernel_description *kernel_infos;
361   const unsigned global_variable_count;
362   struct global_var_info *global_variables;
363 };
364 
365 struct agent_info;
366 
367 /* Information required to identify, finalize and run any given kernel.  */
368 
369 struct kernel_info
370 {
371   /* Name of the kernel, required to locate it within the brig module.  */
372   const char *name;
373   /* Size of memory space for OMP data.  */
374   unsigned omp_data_size;
375   /* The specific agent the kernel has been or will be finalized for and run
376      on.  */
377   struct agent_info *agent;
378   /* The specific module where the kernel takes place.  */
379   struct module_info *module;
380   /* Mutex enforcing that at most once thread ever initializes a kernel for
381      use.  A thread should have locked agent->modules_rwlock for reading before
382      acquiring it.  */
383   pthread_mutex_t init_mutex;
384   /* Flag indicating whether the kernel has been initialized and all fields
385      below it contain valid data.  */
386   bool initialized;
387   /* Flag indicating that the kernel has a problem that blocks an execution.  */
388   bool initialization_failed;
389   /* The object to be put into the dispatch queue.  */
390   uint64_t object;
391   /* Required size of kernel arguments.  */
392   uint32_t kernarg_segment_size;
393   /* Required size of group segment.  */
394   uint32_t group_segment_size;
395   /* Required size of private segment.  */
396   uint32_t private_segment_size;
397   /* List of all kernel dependencies.  */
398   const char **dependencies;
399   /* Number of dependencies.  */
400   unsigned dependencies_count;
401   /* Maximum OMP data size necessary for kernel from kernel dispatches.  */
402   unsigned max_omp_data_size;
403   /* True if the kernel is gridified.  */
404   bool gridified_kernel_p;
405 };
406 
407 /* Information about a particular brig module, its image and kernels.  */
408 
409 struct module_info
410 {
411   /* The next and previous module in the linked list of modules of an agent.  */
412   struct module_info *next, *prev;
413   /* The description with which the program has registered the image.  */
414   struct brig_image_desc *image_desc;
415 
416   /* Number of kernels in this module.  */
417   int kernel_count;
418   /* An array of kernel_info structures describing each kernel in this
419      module.  */
420   struct kernel_info kernels[];
421 };
422 
423 /* Information about shared brig library.  */
424 
425 struct brig_library_info
426 {
427   char *file_name;
428   hsa_ext_module_t image;
429 };
430 
431 /* Description of an HSA GPU agent and the program associated with it.  */
432 
433 struct agent_info
434 {
435   /* The HSA ID of the agent.  Assigned when hsa_context is initialized.  */
436   hsa_agent_t id;
437   /* Whether the agent has been initialized.  The fields below are usable only
438      if it has been.  */
439   bool initialized;
440   /* The HSA ISA of this agent.  */
441   hsa_isa_t isa;
442   /* Command queue of the agent.  */
443   hsa_queue_t *command_q;
444   /* Kernel from kernel dispatch command queue.  */
445   hsa_queue_t *kernel_dispatch_command_q;
446   /* The HSA memory region from which to allocate kernel arguments.  */
447   hsa_region_t kernarg_region;
448 
449   /* Read-write lock that protects kernels which are running or about to be run
450      from interference with loading and unloading of images.  Needs to be
451      locked for reading while a kernel is being run, and for writing if the
452      list of modules is manipulated (and thus the HSA program invalidated).  */
453   pthread_rwlock_t modules_rwlock;
454   /* The first module in a linked list of modules associated with this
455      kernel.  */
456   struct module_info *first_module;
457 
458   /* Mutex enforcing that only one thread will finalize the HSA program.  A
459      thread should have locked agent->modules_rwlock for reading before
460      acquiring it.  */
461   pthread_mutex_t prog_mutex;
462   /* Flag whether the HSA program that consists of all the modules has been
463      finalized.  */
464   bool prog_finalized;
465   /* Flag whether the program was finalized but with a failure.  */
466   bool prog_finalized_error;
467   /* HSA executable - the finalized program that is used to locate kernels.  */
468   hsa_executable_t executable;
469   /* List of BRIG libraries.  */
470   struct brig_library_info **brig_libraries;
471   /* Number of loaded shared BRIG libraries.  */
472   unsigned brig_libraries_count;
473 };
474 
475 /* Information about the whole HSA environment and all of its agents.  */
476 
477 struct hsa_context_info
478 {
479   /* Whether the structure has been initialized.  */
480   bool initialized;
481   /* Number of usable GPU HSA agents in the system.  */
482   int agent_count;
483   /* Array of agent_info structures describing the individual HSA agents.  */
484   struct agent_info *agents;
485 };
486 
487 /* Information about the whole HSA environment and all of its agents.  */
488 
489 static struct hsa_context_info hsa_context;
490 
491 #define DLSYM_FN(function) \
492   hsa_fns.function##_fn = dlsym (handle, #function); \
493   if (hsa_fns.function##_fn == NULL) \
494     return false;
495 
496 static bool
497 init_hsa_runtime_functions (void)
498 {
499   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
500   if (handle == NULL)
501     return false;
502 
503   DLSYM_FN (hsa_status_string)
504   DLSYM_FN (hsa_agent_get_info)
505   DLSYM_FN (hsa_init)
506   DLSYM_FN (hsa_iterate_agents)
507   DLSYM_FN (hsa_region_get_info)
508   DLSYM_FN (hsa_queue_create)
509   DLSYM_FN (hsa_agent_iterate_regions)
510   DLSYM_FN (hsa_executable_destroy)
511   DLSYM_FN (hsa_executable_create)
512   DLSYM_FN (hsa_executable_global_variable_define)
513   DLSYM_FN (hsa_executable_load_code_object)
514   DLSYM_FN (hsa_executable_freeze)
515   DLSYM_FN (hsa_signal_create)
516   DLSYM_FN (hsa_memory_allocate)
517   DLSYM_FN (hsa_memory_free)
518   DLSYM_FN (hsa_signal_destroy)
519   DLSYM_FN (hsa_executable_get_symbol)
520   DLSYM_FN (hsa_executable_symbol_get_info)
521   DLSYM_FN (hsa_queue_add_write_index_release)
522   DLSYM_FN (hsa_queue_load_read_index_acquire)
523   DLSYM_FN (hsa_signal_wait_acquire)
524   DLSYM_FN (hsa_signal_store_relaxed)
525   DLSYM_FN (hsa_signal_store_release)
526   DLSYM_FN (hsa_signal_load_acquire)
527   DLSYM_FN (hsa_queue_destroy)
528   DLSYM_FN (hsa_ext_program_add_module)
529   DLSYM_FN (hsa_ext_program_create)
530   DLSYM_FN (hsa_ext_program_destroy)
531   DLSYM_FN (hsa_ext_program_finalize)
532   return true;
533 }
534 
535 /* Find kernel for an AGENT by name provided in KERNEL_NAME.  */
536 
537 static struct kernel_info *
538 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
539 {
540   struct module_info *module = agent->first_module;
541 
542   while (module)
543     {
544       for (unsigned i = 0; i < module->kernel_count; i++)
545 	if (strcmp (module->kernels[i].name, kernel_name) == 0)
546 	  return &module->kernels[i];
547 
548       module = module->next;
549     }
550 
551   return NULL;
552 }
553 
554 /* Return true if the agent is a GPU and acceptable of concurrent submissions
555    from different threads.  */
556 
557 static bool
558 suitable_hsa_agent_p (hsa_agent_t agent)
559 {
560   hsa_device_type_t device_type;
561   hsa_status_t status
562     = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
563 				     &device_type);
564   if (status != HSA_STATUS_SUCCESS)
565     return false;
566 
567   switch (device_type)
568     {
569     case HSA_DEVICE_TYPE_GPU:
570       break;
571     case HSA_DEVICE_TYPE_CPU:
572       if (!support_cpu_devices)
573 	return false;
574       break;
575     default:
576       return false;
577     }
578 
579   uint32_t features = 0;
580   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
581 					  &features);
582   if (status != HSA_STATUS_SUCCESS
583       || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
584     return false;
585   hsa_queue_type_t queue_type;
586   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
587 					  &queue_type);
588   if (status != HSA_STATUS_SUCCESS
589       || (queue_type != HSA_QUEUE_TYPE_MULTI))
590     return false;
591 
592   return true;
593 }
594 
595 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
596    agent_count in hsa_context.  */
597 
598 static hsa_status_t
599 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
600 {
601   if (suitable_hsa_agent_p (agent))
602     hsa_context.agent_count++;
603   return HSA_STATUS_SUCCESS;
604 }
605 
606 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
607    id to the describing structure in the hsa context.  The index of the
608    structure is pointed to by DATA, increment it afterwards.  */
609 
610 static hsa_status_t
611 assign_agent_ids (hsa_agent_t agent, void *data)
612 {
613   if (suitable_hsa_agent_p (agent))
614     {
615       int *agent_index = (int *) data;
616       hsa_context.agents[*agent_index].id = agent;
617       ++*agent_index;
618     }
619   return HSA_STATUS_SUCCESS;
620 }
621 
622 /* Initialize hsa_context if it has not already been done.
623    Return TRUE on success.  */
624 
625 static bool
626 init_hsa_context (void)
627 {
628   hsa_status_t status;
629   int agent_index = 0;
630 
631   if (hsa_context.initialized)
632     return true;
633   init_enviroment_variables ();
634   if (!init_hsa_runtime_functions ())
635     {
636       HSA_DEBUG ("Run-time could not be dynamically opened\n");
637       return false;
638     }
639   status = hsa_fns.hsa_init_fn ();
640   if (status != HSA_STATUS_SUCCESS)
641     return hsa_error ("Run-time could not be initialized", status);
642   HSA_DEBUG ("HSA run-time initialized\n");
643   status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
644   if (status != HSA_STATUS_SUCCESS)
645     return hsa_error ("HSA GPU devices could not be enumerated", status);
646   HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
647 
648   hsa_context.agents
649     = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
650 				  * sizeof (struct agent_info));
651   status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
652   if (agent_index != hsa_context.agent_count)
653     {
654       GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
655       return false;
656     }
657   hsa_context.initialized = true;
658   return true;
659 }
660 
661 /* Callback of dispatch queues to report errors.  */
662 
663 static void
664 queue_callback (hsa_status_t status,
665 		hsa_queue_t *queue __attribute__ ((unused)),
666 		void *data __attribute__ ((unused)))
667 {
668   hsa_fatal ("Asynchronous queue error", status);
669 }
670 
671 /* Callback of hsa_agent_iterate_regions.  Determine if a memory REGION can be
672    used for kernarg allocations and if so write it to the memory pointed to by
673    DATA and break the query.  */
674 
675 static hsa_status_t
676 get_kernarg_memory_region (hsa_region_t region, void *data)
677 {
678   hsa_status_t status;
679   hsa_region_segment_t segment;
680 
681   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
682 					   &segment);
683   if (status != HSA_STATUS_SUCCESS)
684     return status;
685   if (segment != HSA_REGION_SEGMENT_GLOBAL)
686     return HSA_STATUS_SUCCESS;
687 
688   uint32_t flags;
689   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
690 					   &flags);
691   if (status != HSA_STATUS_SUCCESS)
692     return status;
693   if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
694     {
695       hsa_region_t *ret = (hsa_region_t *) data;
696       *ret = region;
697       return HSA_STATUS_INFO_BREAK;
698     }
699   return HSA_STATUS_SUCCESS;
700 }
701 
702 /* Part of the libgomp plugin interface.  Return the number of HSA devices on
703    the system.  */
704 
705 int
706 GOMP_OFFLOAD_get_num_devices (void)
707 {
708   if (!init_hsa_context ())
709     return 0;
710   return hsa_context.agent_count;
711 }
712 
713 /* Part of the libgomp plugin interface.  Initialize agent number N so that it
714    can be used for computation.  Return TRUE on success.  */
715 
716 bool
717 GOMP_OFFLOAD_init_device (int n)
718 {
719   if (!init_hsa_context ())
720     return false;
721   if (n >= hsa_context.agent_count)
722     {
723       GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
724       return false;
725     }
726   struct agent_info *agent = &hsa_context.agents[n];
727 
728   if (agent->initialized)
729     return true;
730 
731   if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
732     {
733       GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
734       return false;
735     }
736   if (pthread_mutex_init (&agent->prog_mutex, NULL))
737     {
738       GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
739       return false;
740     }
741 
742   uint32_t queue_size;
743   hsa_status_t status;
744   status = hsa_fns.hsa_agent_get_info_fn (agent->id,
745 					  HSA_AGENT_INFO_QUEUE_MAX_SIZE,
746 					  &queue_size);
747   if (status != HSA_STATUS_SUCCESS)
748     return hsa_error ("Error requesting maximum queue size of the HSA agent",
749     	   	      status);
750   status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
751 					  &agent->isa);
752   if (status != HSA_STATUS_SUCCESS)
753     return hsa_error ("Error querying the ISA of the agent", status);
754   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
755 					HSA_QUEUE_TYPE_MULTI,
756 					queue_callback, NULL, UINT32_MAX,
757 					UINT32_MAX,
758 					&agent->command_q);
759   if (status != HSA_STATUS_SUCCESS)
760     return hsa_error ("Error creating command queue", status);
761 
762   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
763 					HSA_QUEUE_TYPE_MULTI,
764 					queue_callback, NULL, UINT32_MAX,
765 					UINT32_MAX,
766 					&agent->kernel_dispatch_command_q);
767   if (status != HSA_STATUS_SUCCESS)
768     return hsa_error ("Error creating kernel dispatch command queue", status);
769 
770   agent->kernarg_region.handle = (uint64_t) -1;
771   status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
772 						 get_kernarg_memory_region,
773 						 &agent->kernarg_region);
774   if (agent->kernarg_region.handle == (uint64_t) -1)
775     {
776       GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
777 			 "arguments");
778       return false;
779     }
780   HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
781 	     (long long unsigned) agent->command_q->id);
782   HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
783 	     (long long unsigned) agent->kernel_dispatch_command_q->id);
784   agent->initialized = true;
785   return true;
786 }
787 
788 /* Verify that hsa_context has already been initialized and return the
789    agent_info structure describing device number N.  Return NULL on error.  */
790 
791 static struct agent_info *
792 get_agent_info (int n)
793 {
794   if (!hsa_context.initialized)
795     {
796       GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
797       return NULL;
798     }
799   if (n >= hsa_context.agent_count)
800     {
801       GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
802       return NULL;
803     }
804   if (!hsa_context.agents[n].initialized)
805     {
806       GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
807       return NULL;
808     }
809   return &hsa_context.agents[n];
810 }
811 
812 /* Insert MODULE to the linked list of modules of AGENT.  */
813 
814 static void
815 add_module_to_agent (struct agent_info *agent, struct module_info *module)
816 {
817   if (agent->first_module)
818     agent->first_module->prev = module;
819   module->next = agent->first_module;
820   module->prev = NULL;
821   agent->first_module = module;
822 }
823 
824 /* Remove MODULE from the linked list of modules of AGENT.  */
825 
826 static void
827 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
828 {
829   if (agent->first_module == module)
830     agent->first_module = module->next;
831   if (module->prev)
832     module->prev->next = module->next;
833   if (module->next)
834     module->next->prev = module->prev;
835 }
836 
837 /* Free the HSA program in agent and everything associated with it and set
838    agent->prog_finalized and the initialized flags of all kernels to false.
839    Return TRUE on success.  */
840 
841 static bool
842 destroy_hsa_program (struct agent_info *agent)
843 {
844   if (!agent->prog_finalized || agent->prog_finalized_error)
845     return true;
846 
847   hsa_status_t status;
848 
849   HSA_DEBUG ("Destroying the current HSA program.\n");
850 
851   status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
852   if (status != HSA_STATUS_SUCCESS)
853     return hsa_error ("Could not destroy HSA executable", status);
854 
855   struct module_info *module;
856   for (module = agent->first_module; module; module = module->next)
857     {
858       int i;
859       for (i = 0; i < module->kernel_count; i++)
860 	module->kernels[i].initialized = false;
861     }
862   agent->prog_finalized = false;
863   return true;
864 }
865 
866 /* Initialize KERNEL from D and other parameters.  Return true on success. */
867 
868 static bool
869 init_basic_kernel_info (struct kernel_info *kernel,
870 			struct hsa_kernel_description *d,
871 			struct agent_info *agent,
872 			struct module_info *module)
873 {
874   kernel->agent = agent;
875   kernel->module = module;
876   kernel->name = d->name;
877   kernel->omp_data_size = d->omp_data_size;
878   kernel->gridified_kernel_p = d->gridified_kernel_p;
879   kernel->dependencies_count = d->kernel_dependencies_count;
880   kernel->dependencies = d->kernel_dependencies;
881   if (pthread_mutex_init (&kernel->init_mutex, NULL))
882     {
883       GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
884       return false;
885     }
886   return true;
887 }
888 
889 /* Part of the libgomp plugin interface.  Load BRIG module described by struct
890    brig_image_desc in TARGET_DATA and return references to kernel descriptors
891    in TARGET_TABLE.  */
892 
893 int
894 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
895 			 struct addr_pair **target_table)
896 {
897   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
898     {
899       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
900 			 " (expected %u, received %u)",
901 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
902       return -1;
903     }
904 
905   struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
906   struct agent_info *agent;
907   struct addr_pair *pair;
908   struct module_info *module;
909   struct kernel_info *kernel;
910   int kernel_count = image_desc->kernel_count;
911 
912   agent = get_agent_info (ord);
913   if (!agent)
914     return -1;
915 
916   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
917     {
918       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
919       return -1;
920     }
921   if (agent->prog_finalized
922       && !destroy_hsa_program (agent))
923     return -1;
924 
925   HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
926   pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
927   *target_table = pair;
928   module = (struct module_info *)
929     GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
930 				+ kernel_count * sizeof (struct kernel_info));
931   module->image_desc = image_desc;
932   module->kernel_count = kernel_count;
933 
934   kernel = &module->kernels[0];
935 
936   /* Allocate memory for kernel dependencies.  */
937   for (unsigned i = 0; i < kernel_count; i++)
938     {
939       pair->start = (uintptr_t) kernel;
940       pair->end = (uintptr_t) (kernel + 1);
941 
942       struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
943       if (!init_basic_kernel_info (kernel, d, agent, module))
944 	return -1;
945       kernel++;
946       pair++;
947     }
948 
949   add_module_to_agent (agent, module);
950   if (pthread_rwlock_unlock (&agent->modules_rwlock))
951     {
952       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
953       return -1;
954     }
955   return kernel_count;
956 }
957 
958 /* Add a shared BRIG library from a FILE_NAME to an AGENT.  */
959 
960 static struct brig_library_info *
961 add_shared_library (const char *file_name, struct agent_info *agent)
962 {
963   struct brig_library_info *library = NULL;
964 
965   void *f = dlopen (file_name, RTLD_NOW);
966   void *start = dlsym (f, "__brig_start");
967   void *end = dlsym (f, "__brig_end");
968 
969   if (start == NULL || end == NULL)
970     return NULL;
971 
972   unsigned size = end - start;
973   char *buf = (char *) GOMP_PLUGIN_malloc (size);
974   memcpy (buf, start, size);
975 
976   library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
977   library->file_name = (char *) GOMP_PLUGIN_malloc
978     ((strlen (file_name) + 1));
979   strcpy (library->file_name, file_name);
980   library->image = (hsa_ext_module_t) buf;
981 
982   return library;
983 }
984 
985 /* Release memory used for BRIG shared libraries that correspond
986    to an AGENT.  */
987 
988 static void
989 release_agent_shared_libraries (struct agent_info *agent)
990 {
991   for (unsigned i = 0; i < agent->brig_libraries_count; i++)
992     if (agent->brig_libraries[i])
993       {
994 	free (agent->brig_libraries[i]->file_name);
995 	free (agent->brig_libraries[i]->image);
996 	free (agent->brig_libraries[i]);
997       }
998 
999   free (agent->brig_libraries);
1000 }
1001 
1002 /* Create and finalize the program consisting of all loaded modules.  */
1003 
1004 static void
1005 create_and_finalize_hsa_program (struct agent_info *agent)
1006 {
1007   hsa_status_t status;
1008   hsa_ext_program_t prog_handle;
1009   int mi = 0;
1010 
1011   if (pthread_mutex_lock (&agent->prog_mutex))
1012     GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
1013   if (agent->prog_finalized)
1014     goto final;
1015 
1016   status = hsa_fns.hsa_ext_program_create_fn
1017     (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
1018      HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
1019      NULL, &prog_handle);
1020   if (status != HSA_STATUS_SUCCESS)
1021     hsa_fatal ("Could not create an HSA program", status);
1022 
1023   HSA_DEBUG ("Created a finalized program\n");
1024 
1025   struct module_info *module = agent->first_module;
1026   while (module)
1027     {
1028       status = hsa_fns.hsa_ext_program_add_module_fn
1029 	(prog_handle, module->image_desc->brig_module);
1030       if (status != HSA_STATUS_SUCCESS)
1031 	hsa_fatal ("Could not add a module to the HSA program", status);
1032       module = module->next;
1033       mi++;
1034     }
1035 
1036   /* Load all shared libraries.  */
1037   const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1038   const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1039 
1040   agent->brig_libraries_count = libraries_count;
1041   agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1042     (sizeof (struct brig_library_info) * libraries_count);
1043 
1044   for (unsigned i = 0; i < libraries_count; i++)
1045     {
1046       struct brig_library_info *library = add_shared_library (libraries[i],
1047 							      agent);
1048       if (library == NULL)
1049 	{
1050 	  HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1051 		       libraries[i]);
1052 	  continue;
1053 	}
1054 
1055       status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1056 						      library->image);
1057       if (status != HSA_STATUS_SUCCESS)
1058 	hsa_warn ("Could not add a shared BRIG library the HSA program",
1059 		  status);
1060       else
1061 	HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1062 		   libraries[i]);
1063     }
1064 
1065   hsa_ext_control_directives_t control_directives;
1066   memset (&control_directives, 0, sizeof (control_directives));
1067   hsa_code_object_t code_object;
1068   status = hsa_fns.hsa_ext_program_finalize_fn
1069     (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1070      control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1071   if (status != HSA_STATUS_SUCCESS)
1072     {
1073       hsa_warn ("Finalization of the HSA program failed", status);
1074       goto failure;
1075     }
1076 
1077   HSA_DEBUG ("Finalization done\n");
1078   hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1079 
1080   status
1081     = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1082 					HSA_EXECUTABLE_STATE_UNFROZEN,
1083 					"", &agent->executable);
1084   if (status != HSA_STATUS_SUCCESS)
1085     hsa_fatal ("Could not create HSA executable", status);
1086 
1087   module = agent->first_module;
1088   while (module)
1089     {
1090       /* Initialize all global variables declared in the module.  */
1091       for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1092 	{
1093 	  struct global_var_info *var;
1094 	  var = &module->image_desc->global_variables[i];
1095 	  status = hsa_fns.hsa_executable_global_variable_define_fn
1096 	    (agent->executable, var->name, var->address);
1097 
1098 	  HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1099 		     var->address);
1100 
1101 	  if (status != HSA_STATUS_SUCCESS)
1102 	    hsa_fatal ("Could not define a global variable in the HSA program",
1103 		       status);
1104 	}
1105 
1106       module = module->next;
1107     }
1108 
1109   status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1110 						       agent->id,
1111 						       code_object, "");
1112   if (status != HSA_STATUS_SUCCESS)
1113     hsa_fatal ("Could not add a code object to the HSA executable", status);
1114   status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1115   if (status != HSA_STATUS_SUCCESS)
1116     hsa_fatal ("Could not freeze the HSA executable", status);
1117 
1118   HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1119 
1120   /* If all goes good, jump to final.  */
1121   goto final;
1122 
1123 failure:
1124   agent->prog_finalized_error = true;
1125 
1126 final:
1127   agent->prog_finalized = true;
1128 
1129   if (pthread_mutex_unlock (&agent->prog_mutex))
1130     GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1131 }
1132 
1133 /* Create kernel dispatch data structure for given KERNEL.  */
1134 
1135 static struct GOMP_hsa_kernel_dispatch *
1136 create_single_kernel_dispatch (struct kernel_info *kernel,
1137 			       unsigned omp_data_size)
1138 {
1139   struct agent_info *agent = kernel->agent;
1140   struct GOMP_hsa_kernel_dispatch *shadow
1141     = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1142 
1143   shadow->queue = agent->command_q;
1144   shadow->omp_data_memory
1145     = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1146   unsigned dispatch_count = kernel->dependencies_count;
1147   shadow->kernel_dispatch_count = dispatch_count;
1148 
1149   shadow->children_dispatches
1150     = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1151 
1152   shadow->object = kernel->object;
1153 
1154   hsa_signal_t sync_signal;
1155   hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1156   if (status != HSA_STATUS_SUCCESS)
1157     hsa_fatal ("Error creating the HSA sync signal", status);
1158 
1159   shadow->signal = sync_signal.handle;
1160   shadow->private_segment_size = kernel->private_segment_size;
1161   shadow->group_segment_size = kernel->group_segment_size;
1162 
1163   status
1164     = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1165 				      kernel->kernarg_segment_size,
1166 				      &shadow->kernarg_address);
1167   if (status != HSA_STATUS_SUCCESS)
1168     hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1169 
1170   return shadow;
1171 }
1172 
1173 /* Release data structure created for a kernel dispatch in SHADOW argument.  */
1174 
1175 static void
1176 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1177 {
1178   HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow,
1179 	     shadow->debug, (void *) shadow->debug);
1180 
1181   hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1182 
1183   hsa_signal_t s;
1184   s.handle = shadow->signal;
1185   hsa_fns.hsa_signal_destroy_fn (s);
1186 
1187   free (shadow->omp_data_memory);
1188 
1189   for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1190     release_kernel_dispatch (shadow->children_dispatches[i]);
1191 
1192   free (shadow->children_dispatches);
1193   free (shadow);
1194 }
1195 
1196 /* Initialize a KERNEL without its dependencies.  MAX_OMP_DATA_SIZE is used
1197    to calculate maximum necessary memory for OMP data allocation.  */
1198 
1199 static void
1200 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1201 {
1202   hsa_status_t status;
1203   struct agent_info *agent = kernel->agent;
1204   hsa_executable_symbol_t kernel_symbol;
1205   status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1206 						 kernel->name, agent->id,
1207 						 0, &kernel_symbol);
1208   if (status != HSA_STATUS_SUCCESS)
1209     {
1210       hsa_warn ("Could not find symbol for kernel in the code object", status);
1211       goto failure;
1212     }
1213   HSA_DEBUG ("Located kernel %s\n", kernel->name);
1214   status = hsa_fns.hsa_executable_symbol_get_info_fn
1215     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1216   if (status != HSA_STATUS_SUCCESS)
1217     hsa_fatal ("Could not extract a kernel object from its symbol", status);
1218   status = hsa_fns.hsa_executable_symbol_get_info_fn
1219     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1220      &kernel->kernarg_segment_size);
1221   if (status != HSA_STATUS_SUCCESS)
1222     hsa_fatal ("Could not get info about kernel argument size", status);
1223   status = hsa_fns.hsa_executable_symbol_get_info_fn
1224     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1225      &kernel->group_segment_size);
1226   if (status != HSA_STATUS_SUCCESS)
1227     hsa_fatal ("Could not get info about kernel group segment size", status);
1228   status = hsa_fns.hsa_executable_symbol_get_info_fn
1229     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1230      &kernel->private_segment_size);
1231   if (status != HSA_STATUS_SUCCESS)
1232     hsa_fatal ("Could not get info about kernel private segment size",
1233 	       status);
1234 
1235   HSA_DEBUG ("Kernel structure for %s fully initialized with "
1236 	     "following segment sizes: \n", kernel->name);
1237   HSA_DEBUG ("  group_segment_size: %u\n",
1238 	     (unsigned) kernel->group_segment_size);
1239   HSA_DEBUG ("  private_segment_size: %u\n",
1240 	     (unsigned) kernel->private_segment_size);
1241   HSA_DEBUG ("  kernarg_segment_size: %u\n",
1242 	     (unsigned) kernel->kernarg_segment_size);
1243   HSA_DEBUG ("  omp_data_size: %u\n", kernel->omp_data_size);
1244   HSA_DEBUG ("  gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1245 
1246   if (kernel->omp_data_size > *max_omp_data_size)
1247     *max_omp_data_size = kernel->omp_data_size;
1248 
1249   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1250     {
1251       struct kernel_info *dependency
1252 	= get_kernel_for_agent (agent, kernel->dependencies[i]);
1253 
1254       if (dependency == NULL)
1255 	{
1256 	  HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1257 		     "dependency name: %s\n", kernel->name,
1258 		     kernel->dependencies[i]);
1259 	  goto failure;
1260 	}
1261 
1262       if (dependency->dependencies_count > 0)
1263 	{
1264 	  HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1265 		     "a depth bigger than one\n")
1266 	  goto failure;
1267 	}
1268 
1269       init_single_kernel (dependency, max_omp_data_size);
1270     }
1271 
1272   return;
1273 
1274 failure:
1275   kernel->initialization_failed = true;
1276 }
1277 
1278 /* Indent stream F by INDENT spaces.  */
1279 
1280 static void
1281 indent_stream (FILE *f, unsigned indent)
1282 {
1283   fprintf (f, "%*s", indent, "");
1284 }
1285 
1286 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces.  */
1287 
1288 static void
1289 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1290 {
1291   indent_stream (stderr, indent);
1292   fprintf (stderr, "this: %p\n", dispatch);
1293   indent_stream (stderr, indent);
1294   fprintf (stderr, "queue: %p\n", dispatch->queue);
1295   indent_stream (stderr, indent);
1296   fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1297   indent_stream (stderr, indent);
1298   fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1299   indent_stream (stderr, indent);
1300   fprintf (stderr, "object: %lu\n", dispatch->object);
1301   indent_stream (stderr, indent);
1302   fprintf (stderr, "signal: %lu\n", dispatch->signal);
1303   indent_stream (stderr, indent);
1304   fprintf (stderr, "private_segment_size: %u\n",
1305 	   dispatch->private_segment_size);
1306   indent_stream (stderr, indent);
1307   fprintf (stderr, "group_segment_size: %u\n",
1308 	   dispatch->group_segment_size);
1309   indent_stream (stderr, indent);
1310   fprintf (stderr, "children dispatches: %lu\n",
1311 	   dispatch->kernel_dispatch_count);
1312   indent_stream (stderr, indent);
1313   fprintf (stderr, "omp_num_threads: %u\n",
1314 	   dispatch->omp_num_threads);
1315   fprintf (stderr, "\n");
1316 
1317   for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1318     print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1319 }
1320 
1321 /* Create kernel dispatch data structure for a KERNEL and all its
1322    dependencies.  */
1323 
1324 static struct GOMP_hsa_kernel_dispatch *
1325 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1326 {
1327   struct GOMP_hsa_kernel_dispatch *shadow
1328     = create_single_kernel_dispatch (kernel, omp_data_size);
1329   shadow->omp_num_threads = 64;
1330   shadow->debug = 0;
1331   shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1332 
1333   /* Create kernel dispatch data structures.  We do not allow to have
1334      a kernel dispatch with depth bigger than one.  */
1335   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1336     {
1337       struct kernel_info *dependency
1338 	= get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1339       shadow->children_dispatches[i]
1340 	= create_single_kernel_dispatch (dependency, omp_data_size);
1341       shadow->children_dispatches[i]->queue
1342 	= kernel->agent->kernel_dispatch_command_q;
1343       shadow->children_dispatches[i]->omp_level = 1;
1344     }
1345 
1346   return shadow;
1347 }
1348 
1349 /* Do all the work that is necessary before running KERNEL for the first time.
1350    The function assumes the program has been created, finalized and frozen by
1351    create_and_finalize_hsa_program.  */
1352 
1353 static void
1354 init_kernel (struct kernel_info *kernel)
1355 {
1356   if (pthread_mutex_lock (&kernel->init_mutex))
1357     GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1358   if (kernel->initialized)
1359     {
1360       if (pthread_mutex_unlock (&kernel->init_mutex))
1361 	GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1362 			   "mutex");
1363 
1364       return;
1365     }
1366 
1367   /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1368      dispatch operation.  */
1369   init_single_kernel (kernel, &kernel->max_omp_data_size);
1370 
1371   if (!kernel->initialization_failed)
1372     HSA_DEBUG ("\n");
1373 
1374   kernel->initialized = true;
1375   if (pthread_mutex_unlock (&kernel->init_mutex))
1376     GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1377 		       "mutex");
1378 }
1379 
1380 /* Parse the target attributes INPUT provided by the compiler and return true
1381    if we should run anything all.  If INPUT is NULL, fill DEF with default
1382    values, then store INPUT or DEF into *RESULT.  */
1383 
1384 static bool
1385 parse_target_attributes (void **input,
1386 			 struct GOMP_kernel_launch_attributes *def,
1387 			 struct GOMP_kernel_launch_attributes **result)
1388 {
1389   if (!input)
1390     GOMP_PLUGIN_fatal ("No target arguments provided");
1391 
1392   bool attrs_found = false;
1393   while (*input)
1394     {
1395       uintptr_t id = (uintptr_t) *input;
1396       if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1397 	  && ((id & GOMP_TARGET_ARG_ID_MASK)
1398 	      == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1399 	{
1400 	  input++;
1401 	  attrs_found = true;
1402 	  break;
1403 	}
1404 
1405       if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1406 	input++;
1407       input++;
1408     }
1409 
1410   if (!attrs_found)
1411     {
1412       def->ndim = 1;
1413       def->gdims[0] = 1;
1414       def->gdims[1] = 1;
1415       def->gdims[2] = 1;
1416       def->wdims[0] = 1;
1417       def->wdims[1] = 1;
1418       def->wdims[2] = 1;
1419       *result = def;
1420       HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1421       return true;
1422     }
1423 
1424   struct GOMP_kernel_launch_attributes *kla;
1425   kla = (struct GOMP_kernel_launch_attributes *) *input;
1426   *result = kla;
1427   if (kla->ndim == 0 || kla->ndim > 3)
1428     GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1429 
1430   HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1431   unsigned i;
1432   for (i = 0; i < kla->ndim; i++)
1433     {
1434       HSA_DEBUG ("  Dimension %u: grid size %u and group size %u\n", i,
1435 		 kla->gdims[i], kla->wdims[i]);
1436       if (kla->gdims[i] == 0)
1437 	return false;
1438     }
1439   return true;
1440 }
1441 
1442 /* Return the group size given the requested GROUP size, GRID size and number
1443    of grid dimensions NDIM.  */
1444 
1445 static uint32_t
1446 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1447 {
1448   if (group == 0)
1449     {
1450       /* TODO: Provide a default via environment or device characteristics.  */
1451       if (ndim == 1)
1452 	group = 64;
1453       else if (ndim == 2)
1454 	group = 8;
1455       else
1456 	group = 4;
1457     }
1458 
1459   if (group > grid)
1460     group = grid;
1461   return group;
1462 }
1463 
1464 /* Return true if the HSA runtime can run function FN_PTR.  */
1465 
1466 bool
1467 GOMP_OFFLOAD_can_run (void *fn_ptr)
1468 {
1469   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1470   struct agent_info *agent = kernel->agent;
1471   create_and_finalize_hsa_program (agent);
1472 
1473   if (agent->prog_finalized_error)
1474     goto failure;
1475 
1476   init_kernel (kernel);
1477   if (kernel->initialization_failed)
1478     goto failure;
1479 
1480   return true;
1481 
1482 failure:
1483   if (suppress_host_fallback)
1484     GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1485   HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1486   return false;
1487 }
1488 
1489 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET.  */
1490 
1491 void
1492 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1493 {
1494   __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1495 }
1496 
1497 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1498    launchattributes from KLA.  */
1499 
1500 void
1501 run_kernel (struct kernel_info *kernel, void *vars,
1502 	    struct GOMP_kernel_launch_attributes *kla)
1503 {
1504   struct agent_info *agent = kernel->agent;
1505   if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1506     GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1507 
1508   if (!agent->initialized)
1509     GOMP_PLUGIN_fatal ("Agent must be initialized");
1510 
1511   if (!kernel->initialized)
1512     GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1513 
1514   struct GOMP_hsa_kernel_dispatch *shadow
1515     = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1516 
1517   if (debug)
1518     {
1519       fprintf (stderr, "\nKernel has following dependencies:\n");
1520       print_kernel_dispatch (shadow, 2);
1521     }
1522 
1523   uint64_t index
1524     = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1525   HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1526 
1527   /* Wait until the queue is not full before writing the packet.   */
1528   while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1529 	 >= agent->command_q->size)
1530     ;
1531 
1532   hsa_kernel_dispatch_packet_t *packet;
1533   packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1534 	   + index % agent->command_q->size;
1535 
1536   memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1537   packet->grid_size_x = kla->gdims[0];
1538   packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1539 					     kla->wdims[0]);
1540 
1541   if (kla->ndim >= 2)
1542     {
1543       packet->grid_size_y = kla->gdims[1];
1544       packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1545 						 kla->wdims[1]);
1546     }
1547   else
1548     {
1549       packet->grid_size_y = 1;
1550       packet->workgroup_size_y = 1;
1551     }
1552 
1553   if (kla->ndim == 3)
1554     {
1555       packet->grid_size_z = kla->gdims[2];
1556       packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1557 					     kla->wdims[2]);
1558     }
1559   else
1560     {
1561       packet->grid_size_z = 1;
1562       packet->workgroup_size_z = 1;
1563     }
1564 
1565   packet->private_segment_size = kernel->private_segment_size;
1566   packet->group_segment_size = kernel->group_segment_size;
1567   packet->kernel_object = kernel->object;
1568   packet->kernarg_address = shadow->kernarg_address;
1569   hsa_signal_t s;
1570   s.handle = shadow->signal;
1571   packet->completion_signal = s;
1572   hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1573   memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1574 
1575   /* PR hsa/70337.  */
1576   size_t vars_size = sizeof (vars);
1577   if (kernel->kernarg_segment_size > vars_size)
1578     {
1579       if (kernel->kernarg_segment_size != vars_size
1580 	  + sizeof (struct hsa_kernel_runtime *))
1581 	GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1582       memcpy (packet->kernarg_address + vars_size, &shadow,
1583 	      sizeof (struct hsa_kernel_runtime *));
1584     }
1585 
1586   HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1587 
1588   uint16_t header;
1589   header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1590   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1591   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1592 
1593   HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1594 
1595   packet_store_release ((uint32_t *) packet, header,
1596 			(uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1597 
1598   hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1599 				       index);
1600 
1601   /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1602      signal wait and signal load operations on their own and we need to
1603      periodically call the hsa_signal_load_acquire on completion signals of
1604      children kernels in the CPU to make that happen.  As soon the
1605      limitation will be resolved, this workaround can be removed.  */
1606 
1607   HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1608 
1609   /* Root signal waits with 1ms timeout.  */
1610   while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1611 					     1000 * 1000,
1612 					     HSA_WAIT_STATE_BLOCKED) != 0)
1613     for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1614       {
1615 	hsa_signal_t child_s;
1616 	child_s.handle = shadow->children_dispatches[i]->signal;
1617 
1618 	HSA_DEBUG ("Waiting for children completion signal: %lu\n",
1619 		   shadow->children_dispatches[i]->signal);
1620 	hsa_fns.hsa_signal_load_acquire_fn (child_s);
1621       }
1622 
1623   release_kernel_dispatch (shadow);
1624 
1625   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1626     GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1627 }
1628 
1629 /* Part of the libgomp plugin interface.  Run a kernel on device N (the number
1630    is actually ignored, we assume the FN_PTR has been mapped using the correct
1631    device) and pass it an array of pointers in VARS as a parameter.  The kernel
1632    is identified by FN_PTR which must point to a kernel_info structure.  */
1633 
1634 void
1635 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1636 		  void *fn_ptr, void *vars, void **args)
1637 {
1638   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1639   struct GOMP_kernel_launch_attributes def;
1640   struct GOMP_kernel_launch_attributes *kla;
1641   if (!parse_target_attributes (args, &def, &kla))
1642     {
1643       HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1644       return;
1645     }
1646   run_kernel (kernel, vars, kla);
1647 }
1648 
1649 /* Information to be passed to a thread running a kernel asycnronously.  */
1650 
1651 struct async_run_info
1652 {
1653   int device;
1654   void *tgt_fn;
1655   void *tgt_vars;
1656   void **args;
1657   void *async_data;
1658 };
1659 
1660 /* Thread routine to run a kernel asynchronously.  */
1661 
1662 static void *
1663 run_kernel_asynchronously (void *thread_arg)
1664 {
1665   struct async_run_info *info = (struct async_run_info *) thread_arg;
1666   int device = info->device;
1667   void *tgt_fn = info->tgt_fn;
1668   void *tgt_vars = info->tgt_vars;
1669   void **args = info->args;
1670   void *async_data = info->async_data;
1671 
1672   free (info);
1673   GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1674   GOMP_PLUGIN_target_task_completion (async_data);
1675   return NULL;
1676 }
1677 
1678 /* Part of the libgomp plugin interface.  Run a kernel like GOMP_OFFLOAD_run
1679    does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1680    has finished.  */
1681 
1682 void
1683 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1684 			void **args, void *async_data)
1685 {
1686   pthread_t pt;
1687   struct async_run_info *info;
1688   HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n")
1689   info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1690 
1691   info->device = device;
1692   info->tgt_fn = tgt_fn;
1693   info->tgt_vars = tgt_vars;
1694   info->args = args;
1695   info->async_data = async_data;
1696 
1697   int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1698   if (err != 0)
1699     GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1700 		       strerror (err));
1701   err = pthread_detach (pt);
1702   if (err != 0)
1703     GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1704 		       "asynchronously: %s", strerror (err));
1705 }
1706 
1707 /* Deinitialize all information associated with MODULE and kernels within
1708    it.  Return TRUE on success.  */
1709 
1710 static bool
1711 destroy_module (struct module_info *module)
1712 {
1713   int i;
1714   for (i = 0; i < module->kernel_count; i++)
1715     if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1716       {
1717 	GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1718 			   "mutex");
1719 	return false;
1720       }
1721   return true;
1722 }
1723 
1724 /* Part of the libgomp plugin interface.  Unload BRIG module described by
1725    struct brig_image_desc in TARGET_DATA from agent number N.  Return
1726    TRUE on success.  */
1727 
1728 bool
1729 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1730 {
1731   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1732     {
1733       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1734 			 " (expected %u, received %u)",
1735 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1736       return false;
1737     }
1738 
1739   struct agent_info *agent;
1740   agent = get_agent_info (n);
1741   if (!agent)
1742     return false;
1743 
1744   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1745     {
1746       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1747       return false;
1748     }
1749   struct module_info *module = agent->first_module;
1750   while (module)
1751     {
1752       if (module->image_desc == target_data)
1753 	break;
1754       module = module->next;
1755     }
1756   if (!module)
1757     {
1758       GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1759 			 "loaded before");
1760       return false;
1761     }
1762 
1763   remove_module_from_agent (agent, module);
1764   if (!destroy_module (module))
1765     return false;
1766   free (module);
1767   if (!destroy_hsa_program (agent))
1768     return false;
1769   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1770     {
1771       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1772       return false;
1773     }
1774   return true;
1775 }
1776 
1777 /* Part of the libgomp plugin interface.  Deinitialize all information and
1778    status associated with agent number N.  We do not attempt any
1779    synchronization, assuming the user and libgomp will not attempt
1780    deinitialization of a device that is in any way being used at the same
1781    time.  Return TRUE on success.  */
1782 
1783 bool
1784 GOMP_OFFLOAD_fini_device (int n)
1785 {
1786   struct agent_info *agent = get_agent_info (n);
1787   if (!agent)
1788     return false;
1789 
1790   if (!agent->initialized)
1791     return true;
1792 
1793   struct module_info *next_module = agent->first_module;
1794   while (next_module)
1795     {
1796       struct module_info *module = next_module;
1797       next_module = module->next;
1798       if (!destroy_module (module))
1799 	return false;
1800       free (module);
1801     }
1802   agent->first_module = NULL;
1803   if (!destroy_hsa_program (agent))
1804     return false;
1805 
1806   release_agent_shared_libraries (agent);
1807 
1808   hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1809   if (status != HSA_STATUS_SUCCESS)
1810     return hsa_error ("Error destroying command queue", status);
1811   status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1812   if (status != HSA_STATUS_SUCCESS)
1813     return hsa_error ("Error destroying kernel dispatch command queue", status);
1814   if (pthread_mutex_destroy (&agent->prog_mutex))
1815     {
1816       GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1817       return false;
1818     }
1819   if (pthread_rwlock_destroy (&agent->modules_rwlock))
1820     {
1821       GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1822       return false;
1823     }
1824   agent->initialized = false;
1825   return true;
1826 }
1827 
1828 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1829    for HSA.  */
1830 
1831 void *
1832 GOMP_OFFLOAD_alloc (int ord, size_t size)
1833 {
1834   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1835 		     "it should never be called");
1836   return NULL;
1837 }
1838 
1839 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1840    for HSA.  */
1841 
1842 bool
1843 GOMP_OFFLOAD_free (int ord, void *ptr)
1844 {
1845   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1846 		     "it should never be called");
1847   return false;
1848 }
1849 
1850 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1851    for HSA.  */
1852 
1853 bool
1854 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1855 {
1856   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1857 		     "it should never be called");
1858   return false;
1859 }
1860 
1861 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1862    for HSA.  */
1863 
1864 bool
1865 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1866 {
1867   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1868 		     "it should never be called");
1869   return false;
1870 }
1871 
1872 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1873    for HSA.  */
1874 
1875 bool
1876 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1877 {
1878   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1879 		     "it should never be called");
1880   return false;
1881 }
1882