xref: /netbsd-src/external/gpl3/gcc.old/dist/libgomp/plugin/plugin-hsa.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
1 /* Plugin for HSAIL execution.
2 
3    Copyright (C) 2013-2020 Free Software Foundation, Inc.
4 
5    Contributed by Martin Jambor <mjambor@suse.cz> and
6    Martin Liska <mliska@suse.cz>.
7 
8    This file is part of the GNU Offloading and Multi Processing Library
9    (libgomp).
10 
11    Libgomp is free software; you can redistribute it and/or modify it
12    under the terms of the GNU General Public License as published by
13    the Free Software Foundation; either version 3, or (at your option)
14    any later version.
15 
16    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
17    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
19    more details.
20 
21    Under Section 7 of GPL version 3, you are granted additional
22    permissions described in the GCC Runtime Library Exception, version
23    3.1, as published by the Free Software Foundation.
24 
25    You should have received a copy of the GNU General Public License and
26    a copy of the GCC Runtime Library Exception along with this program;
27    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
28    <http://www.gnu.org/licenses/>.  */
29 
30 #include "config.h"
31 #include <stdint.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <pthread.h>
36 #ifdef HAVE_INTTYPES_H
37 #include <inttypes.h>
38 #endif
39 #include <stdbool.h>
40 #include <hsa.h>
41 #include <plugin/hsa_ext_finalize.h>
42 #include <dlfcn.h>
43 #include "libgomp-plugin.h"
44 #include "gomp-constants.h"
45 #include "secure_getenv.h"
46 
47 #ifdef HAVE_INTTYPES_H
48 typedef uint64_t print_uint64_t;
49 #else
50 #define PRIu64 "lu"
51 typedef unsigned long print_uint64_t;
52 #endif
53 
54 /* As an HSA runtime is dlopened, following structure defines function
55    pointers utilized by the HSA plug-in.  */
56 
57 struct hsa_runtime_fn_info
58 {
59   /* HSA runtime.  */
60   hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
61 					const char **status_string);
62   hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
63 					 hsa_agent_info_t attribute,
64 					 void *value);
65   hsa_status_t (*hsa_init_fn) (void);
66   hsa_status_t (*hsa_iterate_agents_fn)
67     (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
68   hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
69 					  hsa_region_info_t attribute,
70 					  void *value);
71   hsa_status_t (*hsa_queue_create_fn)
72     (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
73      void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
74      void *data, uint32_t private_segment_size,
75      uint32_t group_segment_size, hsa_queue_t **queue);
76   hsa_status_t (*hsa_agent_iterate_regions_fn)
77     (hsa_agent_t agent,
78      hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
79   hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
80   hsa_status_t (*hsa_executable_create_fn)
81     (hsa_profile_t profile, hsa_executable_state_t executable_state,
82      const char *options, hsa_executable_t *executable);
83   hsa_status_t (*hsa_executable_global_variable_define_fn)
84     (hsa_executable_t executable, const char *variable_name, void *address);
85   hsa_status_t (*hsa_executable_load_code_object_fn)
86     (hsa_executable_t executable, hsa_agent_t agent,
87      hsa_code_object_t code_object, const char *options);
88   hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
89 					   const char *options);
90   hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
91 					uint32_t num_consumers,
92 					const hsa_agent_t *consumers,
93 					hsa_signal_t *signal);
94   hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
95 					  void **ptr);
96   hsa_status_t (*hsa_memory_free_fn) (void *ptr);
97   hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
98   hsa_status_t (*hsa_executable_get_symbol_fn)
99     (hsa_executable_t executable, const char *module_name,
100      const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
101      hsa_executable_symbol_t *symbol);
102   hsa_status_t (*hsa_executable_symbol_get_info_fn)
103     (hsa_executable_symbol_t executable_symbol,
104      hsa_executable_symbol_info_t attribute, void *value);
105   uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
106 						    uint64_t value);
107   uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
108   void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
109 				       hsa_signal_value_t value);
110   void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
111 				       hsa_signal_value_t value);
112   hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
113     (hsa_signal_t signal, hsa_signal_condition_t condition,
114      hsa_signal_value_t compare_value, uint64_t timeout_hint,
115      hsa_wait_state_t wait_state_hint);
116   hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
117   hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
118 
119   /* HSA finalizer.  */
120   hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
121 						 hsa_ext_module_t module);
122   hsa_status_t (*hsa_ext_program_create_fn)
123     (hsa_machine_model_t machine_model, hsa_profile_t profile,
124      hsa_default_float_rounding_mode_t default_float_rounding_mode,
125      const char *options, hsa_ext_program_t *program);
126   hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
127   hsa_status_t (*hsa_ext_program_finalize_fn)
128     (hsa_ext_program_t program,hsa_isa_t isa,
129      int32_t call_convention, hsa_ext_control_directives_t control_directives,
130      const char *options, hsa_code_object_type_t code_object_type,
131      hsa_code_object_t *code_object);
132 };
133 
134 /* HSA runtime functions that are initialized in init_hsa_context.  */
135 
136 static struct hsa_runtime_fn_info hsa_fns;
137 
138 /* Keep the following GOMP prefixed structures in sync with respective parts of
139    the compiler.  */
140 
141 /* Structure describing the run-time and grid properties of an HSA kernel
142    lauch.  */
143 
144 struct GOMP_kernel_launch_attributes
145 {
146   /* Number of dimensions the workload has.  Maximum number is 3.  */
147   uint32_t ndim;
148   /* Size of the grid in the three respective dimensions.  */
149   uint32_t gdims[3];
150   /* Size of work-groups in the respective dimensions.  */
151   uint32_t wdims[3];
152 };
153 
154 /* Collection of information needed for a dispatch of a kernel from a
155    kernel.  */
156 
157 struct GOMP_hsa_kernel_dispatch
158 {
159   /* Pointer to a command queue associated with a kernel dispatch agent.  */
160   void *queue;
161   /* Pointer to reserved memory for OMP data struct copying.  */
162   void *omp_data_memory;
163   /* Pointer to a memory space used for kernel arguments passing.  */
164   void *kernarg_address;
165   /* Kernel object.  */
166   uint64_t object;
167   /* Synchronization signal used for dispatch synchronization.  */
168   uint64_t signal;
169   /* Private segment size.  */
170   uint32_t private_segment_size;
171   /* Group segment size.  */
172   uint32_t group_segment_size;
173   /* Number of children kernel dispatches.  */
174   uint64_t kernel_dispatch_count;
175   /* Debug purpose argument.  */
176   uint64_t debug;
177   /* Levels-var ICV.  */
178   uint64_t omp_level;
179   /* Kernel dispatch structures created for children kernel dispatches.  */
180   struct GOMP_hsa_kernel_dispatch **children_dispatches;
181   /* Number of threads.  */
182   uint32_t omp_num_threads;
183 };
184 
185 /* Part of the libgomp plugin interface.  Return the name of the accelerator,
186    which is "hsa".  */
187 
188 const char *
GOMP_OFFLOAD_get_name(void)189 GOMP_OFFLOAD_get_name (void)
190 {
191   return "hsa";
192 }
193 
194 /* Part of the libgomp plugin interface.  Return the specific capabilities the
195    HSA accelerator have.  */
196 
197 unsigned int
GOMP_OFFLOAD_get_caps(void)198 GOMP_OFFLOAD_get_caps (void)
199 {
200   return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
201 }
202 
203 /* Part of the libgomp plugin interface.  Identify as HSA accelerator.  */
204 
205 int
GOMP_OFFLOAD_get_type(void)206 GOMP_OFFLOAD_get_type (void)
207 {
208   return OFFLOAD_TARGET_TYPE_HSA;
209 }
210 
211 /* Return the libgomp version number we're compatible with.  There is
212    no requirement for cross-version compatibility.  */
213 
214 unsigned
GOMP_OFFLOAD_version(void)215 GOMP_OFFLOAD_version (void)
216 {
217   return GOMP_VERSION;
218 }
219 
220 /* Flag to decide whether print to stderr information about what is going on.
221    Set in init_debug depending on environment variables.  */
222 
223 static bool debug;
224 
225 /* Flag to decide if the runtime should suppress a possible fallback to host
226    execution.  */
227 
228 static bool suppress_host_fallback;
229 
230 /* Flag to locate HSA runtime shared library that is dlopened
231    by this plug-in.  */
232 
233 static const char *hsa_runtime_lib;
234 
235 /* Flag to decide if the runtime should support also CPU devices (can be
236    a simulator).  */
237 
238 static bool support_cpu_devices;
239 
240 /* Initialize debug and suppress_host_fallback according to the environment.  */
241 
242 static void
init_enviroment_variables(void)243 init_enviroment_variables (void)
244 {
245   if (secure_getenv ("HSA_DEBUG"))
246     debug = true;
247   else
248     debug = false;
249 
250   if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
251     suppress_host_fallback = true;
252   else
253     suppress_host_fallback = false;
254 
255   hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
256   if (hsa_runtime_lib == NULL)
257     hsa_runtime_lib = "libhsa-runtime64.so";
258 
259   support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
260 }
261 
262 /* Print a logging message with PREFIX to stderr if HSA_DEBUG value
263    is set to true.  */
264 
265 #define HSA_LOG(prefix, ...) \
266   do \
267   { \
268     if (debug) \
269       { \
270 	fprintf (stderr, prefix); \
271 	fprintf (stderr, __VA_ARGS__); \
272       } \
273   } \
274   while (false)
275 
276 /* Print a debugging message to stderr.  */
277 
278 #define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
279 
280 /* Print a warning message to stderr.  */
281 
282 #define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
283 
284 /* Print HSA warning STR with an HSA STATUS code.  */
285 
286 static void
hsa_warn(const char * str,hsa_status_t status)287 hsa_warn (const char *str, hsa_status_t status)
288 {
289   if (!debug)
290     return;
291 
292   const char *hsa_error_msg = "[unknown]";
293   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
294 
295   fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
296 }
297 
298 /* Report a fatal error STR together with the HSA error corresponding to STATUS
299    and terminate execution of the current process.  */
300 
301 static void
hsa_fatal(const char * str,hsa_status_t status)302 hsa_fatal (const char *str, hsa_status_t status)
303 {
304   const char *hsa_error_msg = "[unknown]";
305   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
306   GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
307 		     hsa_error_msg);
308 }
309 
310 /* Like hsa_fatal, except only report error message, and return FALSE
311    for propagating error processing to outside of plugin.  */
312 
313 static bool
hsa_error(const char * str,hsa_status_t status)314 hsa_error (const char *str, hsa_status_t status)
315 {
316   const char *hsa_error_msg = "[unknown]";
317   hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
318   GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
319 		     hsa_error_msg);
320   return false;
321 }
322 
323 struct hsa_kernel_description
324 {
325   const char *name;
326   unsigned omp_data_size;
327   bool gridified_kernel_p;
328   unsigned kernel_dependencies_count;
329   const char **kernel_dependencies;
330 };
331 
332 struct global_var_info
333 {
334   const char *name;
335   void *address;
336 };
337 
338 /* Data passed by the static initializer of a compilation unit containing BRIG
339    to GOMP_offload_register.  */
340 
341 struct brig_image_desc
342 {
343   hsa_ext_module_t brig_module;
344   const unsigned kernel_count;
345   struct hsa_kernel_description *kernel_infos;
346   const unsigned global_variable_count;
347   struct global_var_info *global_variables;
348 };
349 
350 struct agent_info;
351 
352 /* Information required to identify, finalize and run any given kernel.  */
353 
354 struct kernel_info
355 {
356   /* Name of the kernel, required to locate it within the brig module.  */
357   const char *name;
358   /* Size of memory space for OMP data.  */
359   unsigned omp_data_size;
360   /* The specific agent the kernel has been or will be finalized for and run
361      on.  */
362   struct agent_info *agent;
363   /* The specific module where the kernel takes place.  */
364   struct module_info *module;
365   /* Mutex enforcing that at most once thread ever initializes a kernel for
366      use.  A thread should have locked agent->modules_rwlock for reading before
367      acquiring it.  */
368   pthread_mutex_t init_mutex;
369   /* Flag indicating whether the kernel has been initialized and all fields
370      below it contain valid data.  */
371   bool initialized;
372   /* Flag indicating that the kernel has a problem that blocks an execution.  */
373   bool initialization_failed;
374   /* The object to be put into the dispatch queue.  */
375   uint64_t object;
376   /* Required size of kernel arguments.  */
377   uint32_t kernarg_segment_size;
378   /* Required size of group segment.  */
379   uint32_t group_segment_size;
380   /* Required size of private segment.  */
381   uint32_t private_segment_size;
382   /* List of all kernel dependencies.  */
383   const char **dependencies;
384   /* Number of dependencies.  */
385   unsigned dependencies_count;
386   /* Maximum OMP data size necessary for kernel from kernel dispatches.  */
387   unsigned max_omp_data_size;
388   /* True if the kernel is gridified.  */
389   bool gridified_kernel_p;
390 };
391 
392 /* Information about a particular brig module, its image and kernels.  */
393 
394 struct module_info
395 {
396   /* The next and previous module in the linked list of modules of an agent.  */
397   struct module_info *next, *prev;
398   /* The description with which the program has registered the image.  */
399   struct brig_image_desc *image_desc;
400 
401   /* Number of kernels in this module.  */
402   int kernel_count;
403   /* An array of kernel_info structures describing each kernel in this
404      module.  */
405   struct kernel_info kernels[];
406 };
407 
408 /* Information about shared brig library.  */
409 
410 struct brig_library_info
411 {
412   char *file_name;
413   hsa_ext_module_t image;
414 };
415 
416 /* Description of an HSA GPU agent and the program associated with it.  */
417 
418 struct agent_info
419 {
420   /* The HSA ID of the agent.  Assigned when hsa_context is initialized.  */
421   hsa_agent_t id;
422   /* Whether the agent has been initialized.  The fields below are usable only
423      if it has been.  */
424   bool initialized;
425   /* The HSA ISA of this agent.  */
426   hsa_isa_t isa;
427   /* Command queue of the agent.  */
428   hsa_queue_t *command_q;
429   /* Kernel from kernel dispatch command queue.  */
430   hsa_queue_t *kernel_dispatch_command_q;
431   /* The HSA memory region from which to allocate kernel arguments.  */
432   hsa_region_t kernarg_region;
433 
434   /* Read-write lock that protects kernels which are running or about to be run
435      from interference with loading and unloading of images.  Needs to be
436      locked for reading while a kernel is being run, and for writing if the
437      list of modules is manipulated (and thus the HSA program invalidated).  */
438   pthread_rwlock_t modules_rwlock;
439   /* The first module in a linked list of modules associated with this
440      kernel.  */
441   struct module_info *first_module;
442 
443   /* Mutex enforcing that only one thread will finalize the HSA program.  A
444      thread should have locked agent->modules_rwlock for reading before
445      acquiring it.  */
446   pthread_mutex_t prog_mutex;
447   /* Flag whether the HSA program that consists of all the modules has been
448      finalized.  */
449   bool prog_finalized;
450   /* Flag whether the program was finalized but with a failure.  */
451   bool prog_finalized_error;
452   /* HSA executable - the finalized program that is used to locate kernels.  */
453   hsa_executable_t executable;
454   /* List of BRIG libraries.  */
455   struct brig_library_info **brig_libraries;
456   /* Number of loaded shared BRIG libraries.  */
457   unsigned brig_libraries_count;
458 };
459 
460 /* Information about the whole HSA environment and all of its agents.  */
461 
462 struct hsa_context_info
463 {
464   /* Whether the structure has been initialized.  */
465   bool initialized;
466   /* Number of usable GPU HSA agents in the system.  */
467   int agent_count;
468   /* Array of agent_info structures describing the individual HSA agents.  */
469   struct agent_info *agents;
470 };
471 
472 /* Information about the whole HSA environment and all of its agents.  */
473 
474 static struct hsa_context_info hsa_context;
475 
476 #define DLSYM_FN(function) \
477   hsa_fns.function##_fn = dlsym (handle, #function); \
478   if (hsa_fns.function##_fn == NULL) \
479     goto dl_fail;
480 
481 static bool
init_hsa_runtime_functions(void)482 init_hsa_runtime_functions (void)
483 {
484   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
485   if (handle == NULL)
486     goto dl_fail;
487 
488   DLSYM_FN (hsa_status_string)
489   DLSYM_FN (hsa_agent_get_info)
490   DLSYM_FN (hsa_init)
491   DLSYM_FN (hsa_iterate_agents)
492   DLSYM_FN (hsa_region_get_info)
493   DLSYM_FN (hsa_queue_create)
494   DLSYM_FN (hsa_agent_iterate_regions)
495   DLSYM_FN (hsa_executable_destroy)
496   DLSYM_FN (hsa_executable_create)
497   DLSYM_FN (hsa_executable_global_variable_define)
498   DLSYM_FN (hsa_executable_load_code_object)
499   DLSYM_FN (hsa_executable_freeze)
500   DLSYM_FN (hsa_signal_create)
501   DLSYM_FN (hsa_memory_allocate)
502   DLSYM_FN (hsa_memory_free)
503   DLSYM_FN (hsa_signal_destroy)
504   DLSYM_FN (hsa_executable_get_symbol)
505   DLSYM_FN (hsa_executable_symbol_get_info)
506   DLSYM_FN (hsa_queue_add_write_index_release)
507   DLSYM_FN (hsa_queue_load_read_index_acquire)
508   DLSYM_FN (hsa_signal_wait_acquire)
509   DLSYM_FN (hsa_signal_store_relaxed)
510   DLSYM_FN (hsa_signal_store_release)
511   DLSYM_FN (hsa_signal_load_acquire)
512   DLSYM_FN (hsa_queue_destroy)
513   DLSYM_FN (hsa_ext_program_add_module)
514   DLSYM_FN (hsa_ext_program_create)
515   DLSYM_FN (hsa_ext_program_destroy)
516   DLSYM_FN (hsa_ext_program_finalize)
517   return true;
518 
519  dl_fail:
520   HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
521   return false;
522 }
523 
524 /* Find kernel for an AGENT by name provided in KERNEL_NAME.  */
525 
526 static struct kernel_info *
get_kernel_for_agent(struct agent_info * agent,const char * kernel_name)527 get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
528 {
529   struct module_info *module = agent->first_module;
530 
531   while (module)
532     {
533       for (unsigned i = 0; i < module->kernel_count; i++)
534 	if (strcmp (module->kernels[i].name, kernel_name) == 0)
535 	  return &module->kernels[i];
536 
537       module = module->next;
538     }
539 
540   return NULL;
541 }
542 
543 /* Return true if the agent is a GPU and acceptable of concurrent submissions
544    from different threads.  */
545 
546 static bool
suitable_hsa_agent_p(hsa_agent_t agent)547 suitable_hsa_agent_p (hsa_agent_t agent)
548 {
549   hsa_device_type_t device_type;
550   hsa_status_t status
551     = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
552 				     &device_type);
553   if (status != HSA_STATUS_SUCCESS)
554     return false;
555 
556   switch (device_type)
557     {
558     case HSA_DEVICE_TYPE_GPU:
559       break;
560     case HSA_DEVICE_TYPE_CPU:
561       if (!support_cpu_devices)
562 	return false;
563       break;
564     default:
565       return false;
566     }
567 
568   uint32_t features = 0;
569   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
570 					  &features);
571   if (status != HSA_STATUS_SUCCESS
572       || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
573     return false;
574   hsa_queue_type_t queue_type;
575   status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
576 					  &queue_type);
577   if (status != HSA_STATUS_SUCCESS
578       || (queue_type != HSA_QUEUE_TYPE_MULTI))
579     return false;
580 
581   return true;
582 }
583 
584 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
585    agent_count in hsa_context.  */
586 
587 static hsa_status_t
count_gpu_agents(hsa_agent_t agent,void * data)588 count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
589 {
590   if (suitable_hsa_agent_p (agent))
591     hsa_context.agent_count++;
592   return HSA_STATUS_SUCCESS;
593 }
594 
595 /* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
596    id to the describing structure in the hsa context.  The index of the
597    structure is pointed to by DATA, increment it afterwards.  */
598 
599 static hsa_status_t
assign_agent_ids(hsa_agent_t agent,void * data)600 assign_agent_ids (hsa_agent_t agent, void *data)
601 {
602   if (suitable_hsa_agent_p (agent))
603     {
604       int *agent_index = (int *) data;
605       hsa_context.agents[*agent_index].id = agent;
606       ++*agent_index;
607     }
608   return HSA_STATUS_SUCCESS;
609 }
610 
611 /* Initialize hsa_context if it has not already been done.
612    Return TRUE on success.  */
613 
614 static bool
init_hsa_context(void)615 init_hsa_context (void)
616 {
617   hsa_status_t status;
618   int agent_index = 0;
619 
620   if (hsa_context.initialized)
621     return true;
622   init_enviroment_variables ();
623   if (!init_hsa_runtime_functions ())
624     {
625       HSA_DEBUG ("Run-time could not be dynamically opened\n");
626       return false;
627     }
628   status = hsa_fns.hsa_init_fn ();
629   if (status != HSA_STATUS_SUCCESS)
630     return hsa_error ("Run-time could not be initialized", status);
631   HSA_DEBUG ("HSA run-time initialized\n");
632   status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
633   if (status != HSA_STATUS_SUCCESS)
634     return hsa_error ("HSA GPU devices could not be enumerated", status);
635   HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
636 
637   hsa_context.agents
638     = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
639 				  * sizeof (struct agent_info));
640   status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
641   if (agent_index != hsa_context.agent_count)
642     {
643       GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
644       return false;
645     }
646   hsa_context.initialized = true;
647   return true;
648 }
649 
650 /* Callback of dispatch queues to report errors.  */
651 
652 static void
queue_callback(hsa_status_t status,hsa_queue_t * queue,void * data)653 queue_callback (hsa_status_t status,
654 		hsa_queue_t *queue __attribute__ ((unused)),
655 		void *data __attribute__ ((unused)))
656 {
657   hsa_fatal ("Asynchronous queue error", status);
658 }
659 
660 /* Callback of hsa_agent_iterate_regions.  Determine if a memory REGION can be
661    used for kernarg allocations and if so write it to the memory pointed to by
662    DATA and break the query.  */
663 
664 static hsa_status_t
get_kernarg_memory_region(hsa_region_t region,void * data)665 get_kernarg_memory_region (hsa_region_t region, void *data)
666 {
667   hsa_status_t status;
668   hsa_region_segment_t segment;
669 
670   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
671 					   &segment);
672   if (status != HSA_STATUS_SUCCESS)
673     return status;
674   if (segment != HSA_REGION_SEGMENT_GLOBAL)
675     return HSA_STATUS_SUCCESS;
676 
677   uint32_t flags;
678   status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
679 					   &flags);
680   if (status != HSA_STATUS_SUCCESS)
681     return status;
682   if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
683     {
684       hsa_region_t *ret = (hsa_region_t *) data;
685       *ret = region;
686       return HSA_STATUS_INFO_BREAK;
687     }
688   return HSA_STATUS_SUCCESS;
689 }
690 
691 /* Part of the libgomp plugin interface.  Return the number of HSA devices on
692    the system.  */
693 
694 int
GOMP_OFFLOAD_get_num_devices(void)695 GOMP_OFFLOAD_get_num_devices (void)
696 {
697   if (!init_hsa_context ())
698     return 0;
699   return hsa_context.agent_count;
700 }
701 
702 /* Part of the libgomp plugin interface.  Initialize agent number N so that it
703    can be used for computation.  Return TRUE on success.  */
704 
705 bool
GOMP_OFFLOAD_init_device(int n)706 GOMP_OFFLOAD_init_device (int n)
707 {
708   if (!init_hsa_context ())
709     return false;
710   if (n >= hsa_context.agent_count)
711     {
712       GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
713       return false;
714     }
715   struct agent_info *agent = &hsa_context.agents[n];
716 
717   if (agent->initialized)
718     return true;
719 
720   if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
721     {
722       GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
723       return false;
724     }
725   if (pthread_mutex_init (&agent->prog_mutex, NULL))
726     {
727       GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
728       return false;
729     }
730 
731   uint32_t queue_size;
732   hsa_status_t status;
733   status = hsa_fns.hsa_agent_get_info_fn (agent->id,
734 					  HSA_AGENT_INFO_QUEUE_MAX_SIZE,
735 					  &queue_size);
736   if (status != HSA_STATUS_SUCCESS)
737     return hsa_error ("Error requesting maximum queue size of the HSA agent",
738     	   	      status);
739   status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
740 					  &agent->isa);
741   if (status != HSA_STATUS_SUCCESS)
742     return hsa_error ("Error querying the ISA of the agent", status);
743   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
744 					HSA_QUEUE_TYPE_MULTI,
745 					queue_callback, NULL, UINT32_MAX,
746 					UINT32_MAX,
747 					&agent->command_q);
748   if (status != HSA_STATUS_SUCCESS)
749     return hsa_error ("Error creating command queue", status);
750 
751   status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
752 					HSA_QUEUE_TYPE_MULTI,
753 					queue_callback, NULL, UINT32_MAX,
754 					UINT32_MAX,
755 					&agent->kernel_dispatch_command_q);
756   if (status != HSA_STATUS_SUCCESS)
757     return hsa_error ("Error creating kernel dispatch command queue", status);
758 
759   agent->kernarg_region.handle = (uint64_t) -1;
760   status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
761 						 get_kernarg_memory_region,
762 						 &agent->kernarg_region);
763   if (agent->kernarg_region.handle == (uint64_t) -1)
764     {
765       GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
766 			 "arguments");
767       return false;
768     }
769   HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
770 	     (long long unsigned) agent->command_q->id);
771   HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
772 	     (long long unsigned) agent->kernel_dispatch_command_q->id);
773   agent->initialized = true;
774   return true;
775 }
776 
777 /* Verify that hsa_context has already been initialized and return the
778    agent_info structure describing device number N.  Return NULL on error.  */
779 
780 static struct agent_info *
get_agent_info(int n)781 get_agent_info (int n)
782 {
783   if (!hsa_context.initialized)
784     {
785       GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
786       return NULL;
787     }
788   if (n >= hsa_context.agent_count)
789     {
790       GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
791       return NULL;
792     }
793   if (!hsa_context.agents[n].initialized)
794     {
795       GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
796       return NULL;
797     }
798   return &hsa_context.agents[n];
799 }
800 
801 /* Insert MODULE to the linked list of modules of AGENT.  */
802 
803 static void
add_module_to_agent(struct agent_info * agent,struct module_info * module)804 add_module_to_agent (struct agent_info *agent, struct module_info *module)
805 {
806   if (agent->first_module)
807     agent->first_module->prev = module;
808   module->next = agent->first_module;
809   module->prev = NULL;
810   agent->first_module = module;
811 }
812 
813 /* Remove MODULE from the linked list of modules of AGENT.  */
814 
815 static void
remove_module_from_agent(struct agent_info * agent,struct module_info * module)816 remove_module_from_agent (struct agent_info *agent, struct module_info *module)
817 {
818   if (agent->first_module == module)
819     agent->first_module = module->next;
820   if (module->prev)
821     module->prev->next = module->next;
822   if (module->next)
823     module->next->prev = module->prev;
824 }
825 
826 /* Free the HSA program in agent and everything associated with it and set
827    agent->prog_finalized and the initialized flags of all kernels to false.
828    Return TRUE on success.  */
829 
830 static bool
destroy_hsa_program(struct agent_info * agent)831 destroy_hsa_program (struct agent_info *agent)
832 {
833   if (!agent->prog_finalized || agent->prog_finalized_error)
834     return true;
835 
836   hsa_status_t status;
837 
838   HSA_DEBUG ("Destroying the current HSA program.\n");
839 
840   status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
841   if (status != HSA_STATUS_SUCCESS)
842     return hsa_error ("Could not destroy HSA executable", status);
843 
844   struct module_info *module;
845   for (module = agent->first_module; module; module = module->next)
846     {
847       int i;
848       for (i = 0; i < module->kernel_count; i++)
849 	module->kernels[i].initialized = false;
850     }
851   agent->prog_finalized = false;
852   return true;
853 }
854 
855 /* Initialize KERNEL from D and other parameters.  Return true on success. */
856 
857 static bool
init_basic_kernel_info(struct kernel_info * kernel,struct hsa_kernel_description * d,struct agent_info * agent,struct module_info * module)858 init_basic_kernel_info (struct kernel_info *kernel,
859 			struct hsa_kernel_description *d,
860 			struct agent_info *agent,
861 			struct module_info *module)
862 {
863   kernel->agent = agent;
864   kernel->module = module;
865   kernel->name = d->name;
866   kernel->omp_data_size = d->omp_data_size;
867   kernel->gridified_kernel_p = d->gridified_kernel_p;
868   kernel->dependencies_count = d->kernel_dependencies_count;
869   kernel->dependencies = d->kernel_dependencies;
870   if (pthread_mutex_init (&kernel->init_mutex, NULL))
871     {
872       GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
873       return false;
874     }
875   return true;
876 }
877 
878 /* Part of the libgomp plugin interface.  Load BRIG module described by struct
879    brig_image_desc in TARGET_DATA and return references to kernel descriptors
880    in TARGET_TABLE.  */
881 
882 int
GOMP_OFFLOAD_load_image(int ord,unsigned version,const void * target_data,struct addr_pair ** target_table)883 GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
884 			 struct addr_pair **target_table)
885 {
886   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
887     {
888       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
889 			 " (expected %u, received %u)",
890 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
891       return -1;
892     }
893 
894   struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
895   struct agent_info *agent;
896   struct addr_pair *pair;
897   struct module_info *module;
898   struct kernel_info *kernel;
899   int kernel_count = image_desc->kernel_count;
900 
901   agent = get_agent_info (ord);
902   if (!agent)
903     return -1;
904 
905   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
906     {
907       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
908       return -1;
909     }
910   if (agent->prog_finalized
911       && !destroy_hsa_program (agent))
912     return -1;
913 
914   HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
915   pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
916   *target_table = pair;
917   module = (struct module_info *)
918     GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
919 				+ kernel_count * sizeof (struct kernel_info));
920   module->image_desc = image_desc;
921   module->kernel_count = kernel_count;
922 
923   kernel = &module->kernels[0];
924 
925   /* Allocate memory for kernel dependencies.  */
926   for (unsigned i = 0; i < kernel_count; i++)
927     {
928       pair->start = (uintptr_t) kernel;
929       pair->end = (uintptr_t) (kernel + 1);
930 
931       struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
932       if (!init_basic_kernel_info (kernel, d, agent, module))
933 	return -1;
934       kernel++;
935       pair++;
936     }
937 
938   add_module_to_agent (agent, module);
939   if (pthread_rwlock_unlock (&agent->modules_rwlock))
940     {
941       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
942       return -1;
943     }
944   return kernel_count;
945 }
946 
947 /* Add a shared BRIG library from a FILE_NAME to an AGENT.  */
948 
949 static struct brig_library_info *
add_shared_library(const char * file_name,struct agent_info * agent)950 add_shared_library (const char *file_name, struct agent_info *agent)
951 {
952   struct brig_library_info *library = NULL;
953 
954   void *f = dlopen (file_name, RTLD_NOW);
955   void *start = dlsym (f, "__brig_start");
956   void *end = dlsym (f, "__brig_end");
957 
958   if (start == NULL || end == NULL)
959     return NULL;
960 
961   unsigned size = end - start;
962   char *buf = (char *) GOMP_PLUGIN_malloc (size);
963   memcpy (buf, start, size);
964 
965   library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
966   library->file_name = (char *) GOMP_PLUGIN_malloc
967     ((strlen (file_name) + 1));
968   strcpy (library->file_name, file_name);
969   library->image = (hsa_ext_module_t) buf;
970 
971   return library;
972 }
973 
974 /* Release memory used for BRIG shared libraries that correspond
975    to an AGENT.  */
976 
977 static void
release_agent_shared_libraries(struct agent_info * agent)978 release_agent_shared_libraries (struct agent_info *agent)
979 {
980   for (unsigned i = 0; i < agent->brig_libraries_count; i++)
981     if (agent->brig_libraries[i])
982       {
983 	free (agent->brig_libraries[i]->file_name);
984 	free (agent->brig_libraries[i]->image);
985 	free (agent->brig_libraries[i]);
986       }
987 
988   free (agent->brig_libraries);
989 }
990 
991 /* Create and finalize the program consisting of all loaded modules.  */
992 
993 static void
create_and_finalize_hsa_program(struct agent_info * agent)994 create_and_finalize_hsa_program (struct agent_info *agent)
995 {
996   hsa_status_t status;
997   hsa_ext_program_t prog_handle;
998   int mi = 0;
999 
1000   if (pthread_mutex_lock (&agent->prog_mutex))
1001     GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
1002   if (agent->prog_finalized)
1003     goto final;
1004 
1005   status = hsa_fns.hsa_ext_program_create_fn
1006     (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
1007      HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
1008      NULL, &prog_handle);
1009   if (status != HSA_STATUS_SUCCESS)
1010     hsa_fatal ("Could not create an HSA program", status);
1011 
1012   HSA_DEBUG ("Created a finalized program\n");
1013 
1014   struct module_info *module = agent->first_module;
1015   while (module)
1016     {
1017       status = hsa_fns.hsa_ext_program_add_module_fn
1018 	(prog_handle, module->image_desc->brig_module);
1019       if (status != HSA_STATUS_SUCCESS)
1020 	hsa_fatal ("Could not add a module to the HSA program", status);
1021       module = module->next;
1022       mi++;
1023     }
1024 
1025   /* Load all shared libraries.  */
1026   const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
1027   const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
1028 
1029   agent->brig_libraries_count = libraries_count;
1030   agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
1031     (sizeof (struct brig_library_info) * libraries_count);
1032 
1033   for (unsigned i = 0; i < libraries_count; i++)
1034     {
1035       struct brig_library_info *library = add_shared_library (libraries[i],
1036 							      agent);
1037       if (library == NULL)
1038 	{
1039 	  HSA_WARNING ("Could not open a shared BRIG library: %s\n",
1040 		       libraries[i]);
1041 	  continue;
1042 	}
1043 
1044       status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
1045 						      library->image);
1046       if (status != HSA_STATUS_SUCCESS)
1047 	hsa_warn ("Could not add a shared BRIG library the HSA program",
1048 		  status);
1049       else
1050 	HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
1051 		   libraries[i]);
1052     }
1053 
1054   hsa_ext_control_directives_t control_directives;
1055   memset (&control_directives, 0, sizeof (control_directives));
1056   hsa_code_object_t code_object;
1057   status = hsa_fns.hsa_ext_program_finalize_fn
1058     (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
1059      control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
1060   if (status != HSA_STATUS_SUCCESS)
1061     {
1062       hsa_warn ("Finalization of the HSA program failed", status);
1063       goto failure;
1064     }
1065 
1066   HSA_DEBUG ("Finalization done\n");
1067   hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
1068 
1069   status
1070     = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
1071 					HSA_EXECUTABLE_STATE_UNFROZEN,
1072 					"", &agent->executable);
1073   if (status != HSA_STATUS_SUCCESS)
1074     hsa_fatal ("Could not create HSA executable", status);
1075 
1076   module = agent->first_module;
1077   while (module)
1078     {
1079       /* Initialize all global variables declared in the module.  */
1080       for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
1081 	{
1082 	  struct global_var_info *var;
1083 	  var = &module->image_desc->global_variables[i];
1084 	  status = hsa_fns.hsa_executable_global_variable_define_fn
1085 	    (agent->executable, var->name, var->address);
1086 
1087 	  HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
1088 		     var->address);
1089 
1090 	  if (status != HSA_STATUS_SUCCESS)
1091 	    hsa_fatal ("Could not define a global variable in the HSA program",
1092 		       status);
1093 	}
1094 
1095       module = module->next;
1096     }
1097 
1098   status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
1099 						       agent->id,
1100 						       code_object, "");
1101   if (status != HSA_STATUS_SUCCESS)
1102     hsa_fatal ("Could not add a code object to the HSA executable", status);
1103   status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
1104   if (status != HSA_STATUS_SUCCESS)
1105     hsa_fatal ("Could not freeze the HSA executable", status);
1106 
1107   HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
1108 
1109   /* If all goes good, jump to final.  */
1110   goto final;
1111 
1112 failure:
1113   agent->prog_finalized_error = true;
1114 
1115 final:
1116   agent->prog_finalized = true;
1117 
1118   if (pthread_mutex_unlock (&agent->prog_mutex))
1119     GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
1120 }
1121 
1122 /* Create kernel dispatch data structure for given KERNEL.  */
1123 
1124 static struct GOMP_hsa_kernel_dispatch *
create_single_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1125 create_single_kernel_dispatch (struct kernel_info *kernel,
1126 			       unsigned omp_data_size)
1127 {
1128   struct agent_info *agent = kernel->agent;
1129   struct GOMP_hsa_kernel_dispatch *shadow
1130     = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
1131 
1132   shadow->queue = agent->command_q;
1133   shadow->omp_data_memory
1134     = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
1135   unsigned dispatch_count = kernel->dependencies_count;
1136   shadow->kernel_dispatch_count = dispatch_count;
1137 
1138   shadow->children_dispatches
1139     = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
1140 
1141   shadow->object = kernel->object;
1142 
1143   hsa_signal_t sync_signal;
1144   hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
1145   if (status != HSA_STATUS_SUCCESS)
1146     hsa_fatal ("Error creating the HSA sync signal", status);
1147 
1148   shadow->signal = sync_signal.handle;
1149   shadow->private_segment_size = kernel->private_segment_size;
1150   shadow->group_segment_size = kernel->group_segment_size;
1151 
1152   status
1153     = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
1154 				      kernel->kernarg_segment_size,
1155 				      &shadow->kernarg_address);
1156   if (status != HSA_STATUS_SUCCESS)
1157     hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
1158 
1159   return shadow;
1160 }
1161 
1162 /* Release data structure created for a kernel dispatch in SHADOW argument.  */
1163 
1164 static void
release_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * shadow)1165 release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
1166 {
1167   HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n",
1168 	     shadow, (print_uint64_t) shadow->debug,
1169 	     (void *) (uintptr_t) shadow->debug);
1170 
1171   hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
1172 
1173   hsa_signal_t s;
1174   s.handle = shadow->signal;
1175   hsa_fns.hsa_signal_destroy_fn (s);
1176 
1177   free (shadow->omp_data_memory);
1178 
1179   for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1180     release_kernel_dispatch (shadow->children_dispatches[i]);
1181 
1182   free (shadow->children_dispatches);
1183   free (shadow);
1184 }
1185 
1186 /* Initialize a KERNEL without its dependencies.  MAX_OMP_DATA_SIZE is used
1187    to calculate maximum necessary memory for OMP data allocation.  */
1188 
1189 static void
init_single_kernel(struct kernel_info * kernel,unsigned * max_omp_data_size)1190 init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
1191 {
1192   hsa_status_t status;
1193   struct agent_info *agent = kernel->agent;
1194   hsa_executable_symbol_t kernel_symbol;
1195   status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
1196 						 kernel->name, agent->id,
1197 						 0, &kernel_symbol);
1198   if (status != HSA_STATUS_SUCCESS)
1199     {
1200       hsa_warn ("Could not find symbol for kernel in the code object", status);
1201       goto failure;
1202     }
1203   HSA_DEBUG ("Located kernel %s\n", kernel->name);
1204   status = hsa_fns.hsa_executable_symbol_get_info_fn
1205     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
1206   if (status != HSA_STATUS_SUCCESS)
1207     hsa_fatal ("Could not extract a kernel object from its symbol", status);
1208   status = hsa_fns.hsa_executable_symbol_get_info_fn
1209     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
1210      &kernel->kernarg_segment_size);
1211   if (status != HSA_STATUS_SUCCESS)
1212     hsa_fatal ("Could not get info about kernel argument size", status);
1213   status = hsa_fns.hsa_executable_symbol_get_info_fn
1214     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
1215      &kernel->group_segment_size);
1216   if (status != HSA_STATUS_SUCCESS)
1217     hsa_fatal ("Could not get info about kernel group segment size", status);
1218   status = hsa_fns.hsa_executable_symbol_get_info_fn
1219     (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
1220      &kernel->private_segment_size);
1221   if (status != HSA_STATUS_SUCCESS)
1222     hsa_fatal ("Could not get info about kernel private segment size",
1223 	       status);
1224 
1225   HSA_DEBUG ("Kernel structure for %s fully initialized with "
1226 	     "following segment sizes: \n", kernel->name);
1227   HSA_DEBUG ("  group_segment_size: %u\n",
1228 	     (unsigned) kernel->group_segment_size);
1229   HSA_DEBUG ("  private_segment_size: %u\n",
1230 	     (unsigned) kernel->private_segment_size);
1231   HSA_DEBUG ("  kernarg_segment_size: %u\n",
1232 	     (unsigned) kernel->kernarg_segment_size);
1233   HSA_DEBUG ("  omp_data_size: %u\n", kernel->omp_data_size);
1234   HSA_DEBUG ("  gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
1235 
1236   if (kernel->omp_data_size > *max_omp_data_size)
1237     *max_omp_data_size = kernel->omp_data_size;
1238 
1239   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1240     {
1241       struct kernel_info *dependency
1242 	= get_kernel_for_agent (agent, kernel->dependencies[i]);
1243 
1244       if (dependency == NULL)
1245 	{
1246 	  HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
1247 		     "dependency name: %s\n", kernel->name,
1248 		     kernel->dependencies[i]);
1249 	  goto failure;
1250 	}
1251 
1252       if (dependency->dependencies_count > 0)
1253 	{
1254 	  HSA_DEBUG ("HSA does not allow kernel dispatching code with "
1255 		     "a depth bigger than one\n");
1256 	  goto failure;
1257 	}
1258 
1259       init_single_kernel (dependency, max_omp_data_size);
1260     }
1261 
1262   return;
1263 
1264 failure:
1265   kernel->initialization_failed = true;
1266 }
1267 
1268 /* Indent stream F by INDENT spaces.  */
1269 
1270 static void
indent_stream(FILE * f,unsigned indent)1271 indent_stream (FILE *f, unsigned indent)
1272 {
1273   fprintf (f, "%*s", indent, "");
1274 }
1275 
1276 /* Dump kernel DISPATCH data structure and indent it by INDENT spaces.  */
1277 
1278 static void
print_kernel_dispatch(struct GOMP_hsa_kernel_dispatch * dispatch,unsigned indent)1279 print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
1280 {
1281   indent_stream (stderr, indent);
1282   fprintf (stderr, "this: %p\n", dispatch);
1283   indent_stream (stderr, indent);
1284   fprintf (stderr, "queue: %p\n", dispatch->queue);
1285   indent_stream (stderr, indent);
1286   fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
1287   indent_stream (stderr, indent);
1288   fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
1289   indent_stream (stderr, indent);
1290   fprintf (stderr, "object: %" PRIu64 "\n", (print_uint64_t) dispatch->object);
1291   indent_stream (stderr, indent);
1292   fprintf (stderr, "signal: %" PRIu64 "\n", (print_uint64_t) dispatch->signal);
1293   indent_stream (stderr, indent);
1294   fprintf (stderr, "private_segment_size: %u\n",
1295 	   dispatch->private_segment_size);
1296   indent_stream (stderr, indent);
1297   fprintf (stderr, "group_segment_size: %u\n",
1298 	   dispatch->group_segment_size);
1299   indent_stream (stderr, indent);
1300   fprintf (stderr, "children dispatches: %" PRIu64 "\n",
1301 	   (print_uint64_t) dispatch->kernel_dispatch_count);
1302   indent_stream (stderr, indent);
1303   fprintf (stderr, "omp_num_threads: %u\n",
1304 	   dispatch->omp_num_threads);
1305   fprintf (stderr, "\n");
1306 
1307   for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
1308     print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
1309 }
1310 
1311 /* Create kernel dispatch data structure for a KERNEL and all its
1312    dependencies.  */
1313 
1314 static struct GOMP_hsa_kernel_dispatch *
create_kernel_dispatch(struct kernel_info * kernel,unsigned omp_data_size)1315 create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
1316 {
1317   struct GOMP_hsa_kernel_dispatch *shadow
1318     = create_single_kernel_dispatch (kernel, omp_data_size);
1319   shadow->omp_num_threads = 64;
1320   shadow->debug = 0;
1321   shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
1322 
1323   /* Create kernel dispatch data structures.  We do not allow to have
1324      a kernel dispatch with depth bigger than one.  */
1325   for (unsigned i = 0; i < kernel->dependencies_count; i++)
1326     {
1327       struct kernel_info *dependency
1328 	= get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
1329       shadow->children_dispatches[i]
1330 	= create_single_kernel_dispatch (dependency, omp_data_size);
1331       shadow->children_dispatches[i]->queue
1332 	= kernel->agent->kernel_dispatch_command_q;
1333       shadow->children_dispatches[i]->omp_level = 1;
1334     }
1335 
1336   return shadow;
1337 }
1338 
1339 /* Do all the work that is necessary before running KERNEL for the first time.
1340    The function assumes the program has been created, finalized and frozen by
1341    create_and_finalize_hsa_program.  */
1342 
1343 static void
init_kernel(struct kernel_info * kernel)1344 init_kernel (struct kernel_info *kernel)
1345 {
1346   if (pthread_mutex_lock (&kernel->init_mutex))
1347     GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
1348   if (kernel->initialized)
1349     {
1350       if (pthread_mutex_unlock (&kernel->init_mutex))
1351 	GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1352 			   "mutex");
1353 
1354       return;
1355     }
1356 
1357   /* Precomputed maximum size of OMP data necessary for a kernel from kernel
1358      dispatch operation.  */
1359   init_single_kernel (kernel, &kernel->max_omp_data_size);
1360 
1361   if (!kernel->initialization_failed)
1362     HSA_DEBUG ("\n");
1363 
1364   kernel->initialized = true;
1365   if (pthread_mutex_unlock (&kernel->init_mutex))
1366     GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
1367 		       "mutex");
1368 }
1369 
1370 /* Parse the target attributes INPUT provided by the compiler and return true
1371    if we should run anything all.  If INPUT is NULL, fill DEF with default
1372    values, then store INPUT or DEF into *RESULT.  */
1373 
1374 static bool
parse_target_attributes(void ** input,struct GOMP_kernel_launch_attributes * def,struct GOMP_kernel_launch_attributes ** result)1375 parse_target_attributes (void **input,
1376 			 struct GOMP_kernel_launch_attributes *def,
1377 			 struct GOMP_kernel_launch_attributes **result)
1378 {
1379   if (!input)
1380     GOMP_PLUGIN_fatal ("No target arguments provided");
1381 
1382   bool attrs_found = false;
1383   while (*input)
1384     {
1385       uintptr_t id = (uintptr_t) *input;
1386       if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
1387 	  && ((id & GOMP_TARGET_ARG_ID_MASK)
1388 	      == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
1389 	{
1390 	  input++;
1391 	  attrs_found = true;
1392 	  break;
1393 	}
1394 
1395       if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
1396 	input++;
1397       input++;
1398     }
1399 
1400   if (!attrs_found)
1401     {
1402       def->ndim = 1;
1403       def->gdims[0] = 1;
1404       def->gdims[1] = 1;
1405       def->gdims[2] = 1;
1406       def->wdims[0] = 1;
1407       def->wdims[1] = 1;
1408       def->wdims[2] = 1;
1409       *result = def;
1410       HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
1411       return true;
1412     }
1413 
1414   struct GOMP_kernel_launch_attributes *kla;
1415   kla = (struct GOMP_kernel_launch_attributes *) *input;
1416   *result = kla;
1417   if (kla->ndim == 0 || kla->ndim > 3)
1418     GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
1419 
1420   HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
1421   unsigned i;
1422   for (i = 0; i < kla->ndim; i++)
1423     {
1424       HSA_DEBUG ("  Dimension %u: grid size %u and group size %u\n", i,
1425 		 kla->gdims[i], kla->wdims[i]);
1426       if (kla->gdims[i] == 0)
1427 	return false;
1428     }
1429   return true;
1430 }
1431 
1432 /* Return the group size given the requested GROUP size, GRID size and number
1433    of grid dimensions NDIM.  */
1434 
1435 static uint32_t
get_group_size(uint32_t ndim,uint32_t grid,uint32_t group)1436 get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
1437 {
1438   if (group == 0)
1439     {
1440       /* TODO: Provide a default via environment or device characteristics.  */
1441       if (ndim == 1)
1442 	group = 64;
1443       else if (ndim == 2)
1444 	group = 8;
1445       else
1446 	group = 4;
1447     }
1448 
1449   if (group > grid)
1450     group = grid;
1451   return group;
1452 }
1453 
1454 /* Return true if the HSA runtime can run function FN_PTR.  */
1455 
1456 bool
GOMP_OFFLOAD_can_run(void * fn_ptr)1457 GOMP_OFFLOAD_can_run (void *fn_ptr)
1458 {
1459   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1460   struct agent_info *agent = kernel->agent;
1461   create_and_finalize_hsa_program (agent);
1462 
1463   if (agent->prog_finalized_error)
1464     goto failure;
1465 
1466   init_kernel (kernel);
1467   if (kernel->initialization_failed)
1468     goto failure;
1469 
1470   return true;
1471 
1472 failure:
1473   if (suppress_host_fallback)
1474     GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
1475   HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
1476   return false;
1477 }
1478 
1479 /* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET.  */
1480 
1481 void
packet_store_release(uint32_t * packet,uint16_t header,uint16_t rest)1482 packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
1483 {
1484   __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
1485 }
1486 
1487 /* Run KERNEL on its agent, pass VARS to it as arguments and take
1488    launchattributes from KLA.  */
1489 
1490 void
run_kernel(struct kernel_info * kernel,void * vars,struct GOMP_kernel_launch_attributes * kla)1491 run_kernel (struct kernel_info *kernel, void *vars,
1492 	    struct GOMP_kernel_launch_attributes *kla)
1493 {
1494   struct agent_info *agent = kernel->agent;
1495   if (pthread_rwlock_rdlock (&agent->modules_rwlock))
1496     GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
1497 
1498   if (!agent->initialized)
1499     GOMP_PLUGIN_fatal ("Agent must be initialized");
1500 
1501   if (!kernel->initialized)
1502     GOMP_PLUGIN_fatal ("Called kernel must be initialized");
1503 
1504   struct GOMP_hsa_kernel_dispatch *shadow
1505     = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
1506 
1507   if (debug)
1508     {
1509       fprintf (stderr, "\nKernel has following dependencies:\n");
1510       print_kernel_dispatch (shadow, 2);
1511     }
1512 
1513   uint64_t index
1514     = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
1515   HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
1516 
1517   /* Wait until the queue is not full before writing the packet.   */
1518   while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
1519 	 >= agent->command_q->size)
1520     ;
1521 
1522   hsa_kernel_dispatch_packet_t *packet;
1523   packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
1524 	   + index % agent->command_q->size;
1525 
1526   memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
1527   packet->grid_size_x = kla->gdims[0];
1528   packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
1529 					     kla->wdims[0]);
1530 
1531   if (kla->ndim >= 2)
1532     {
1533       packet->grid_size_y = kla->gdims[1];
1534       packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
1535 						 kla->wdims[1]);
1536     }
1537   else
1538     {
1539       packet->grid_size_y = 1;
1540       packet->workgroup_size_y = 1;
1541     }
1542 
1543   if (kla->ndim == 3)
1544     {
1545       packet->grid_size_z = kla->gdims[2];
1546       packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
1547 					     kla->wdims[2]);
1548     }
1549   else
1550     {
1551       packet->grid_size_z = 1;
1552       packet->workgroup_size_z = 1;
1553     }
1554 
1555   packet->private_segment_size = kernel->private_segment_size;
1556   packet->group_segment_size = kernel->group_segment_size;
1557   packet->kernel_object = kernel->object;
1558   packet->kernarg_address = shadow->kernarg_address;
1559   hsa_signal_t s;
1560   s.handle = shadow->signal;
1561   packet->completion_signal = s;
1562   hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
1563   memcpy (shadow->kernarg_address, &vars, sizeof (vars));
1564 
1565   /* PR hsa/70337.  */
1566   size_t vars_size = sizeof (vars);
1567   if (kernel->kernarg_segment_size > vars_size)
1568     {
1569       if (kernel->kernarg_segment_size != vars_size
1570 	  + sizeof (struct hsa_kernel_runtime *))
1571 	GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
1572       memcpy (packet->kernarg_address + vars_size, &shadow,
1573 	      sizeof (struct hsa_kernel_runtime *));
1574     }
1575 
1576   HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
1577 
1578   uint16_t header;
1579   header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
1580   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
1581   header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
1582 
1583   HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
1584 
1585   packet_store_release ((uint32_t *) packet, header,
1586 			(uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
1587 
1588   hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
1589 				       index);
1590 
1591   /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
1592      signal wait and signal load operations on their own and we need to
1593      periodically call the hsa_signal_load_acquire on completion signals of
1594      children kernels in the CPU to make that happen.  As soon the
1595      limitation will be resolved, this workaround can be removed.  */
1596 
1597   HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
1598 
1599   /* Root signal waits with 1ms timeout.  */
1600   while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
1601 					     1000 * 1000,
1602 					     HSA_WAIT_STATE_BLOCKED) != 0)
1603     for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
1604       {
1605 	hsa_signal_t child_s;
1606 	child_s.handle = shadow->children_dispatches[i]->signal;
1607 
1608 	HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n",
1609 		   (print_uint64_t) shadow->children_dispatches[i]->signal);
1610 	hsa_fns.hsa_signal_load_acquire_fn (child_s);
1611       }
1612 
1613   release_kernel_dispatch (shadow);
1614 
1615   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1616     GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
1617 }
1618 
1619 /* Part of the libgomp plugin interface.  Run a kernel on device N (the number
1620    is actually ignored, we assume the FN_PTR has been mapped using the correct
1621    device) and pass it an array of pointers in VARS as a parameter.  The kernel
1622    is identified by FN_PTR which must point to a kernel_info structure.  */
1623 
1624 void
GOMP_OFFLOAD_run(int n,void * fn_ptr,void * vars,void ** args)1625 GOMP_OFFLOAD_run (int n __attribute__((unused)),
1626 		  void *fn_ptr, void *vars, void **args)
1627 {
1628   struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
1629   struct GOMP_kernel_launch_attributes def;
1630   struct GOMP_kernel_launch_attributes *kla;
1631   if (!parse_target_attributes (args, &def, &kla))
1632     {
1633       HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
1634       return;
1635     }
1636   run_kernel (kernel, vars, kla);
1637 }
1638 
1639 /* Information to be passed to a thread running a kernel asycnronously.  */
1640 
1641 struct async_run_info
1642 {
1643   int device;
1644   void *tgt_fn;
1645   void *tgt_vars;
1646   void **args;
1647   void *async_data;
1648 };
1649 
1650 /* Thread routine to run a kernel asynchronously.  */
1651 
1652 static void *
run_kernel_asynchronously(void * thread_arg)1653 run_kernel_asynchronously (void *thread_arg)
1654 {
1655   struct async_run_info *info = (struct async_run_info *) thread_arg;
1656   int device = info->device;
1657   void *tgt_fn = info->tgt_fn;
1658   void *tgt_vars = info->tgt_vars;
1659   void **args = info->args;
1660   void *async_data = info->async_data;
1661 
1662   free (info);
1663   GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
1664   GOMP_PLUGIN_target_task_completion (async_data);
1665   return NULL;
1666 }
1667 
1668 /* Part of the libgomp plugin interface.  Run a kernel like GOMP_OFFLOAD_run
1669    does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
1670    has finished.  */
1671 
1672 void
GOMP_OFFLOAD_async_run(int device,void * tgt_fn,void * tgt_vars,void ** args,void * async_data)1673 GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
1674 			void **args, void *async_data)
1675 {
1676   pthread_t pt;
1677   struct async_run_info *info;
1678   HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
1679   info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
1680 
1681   info->device = device;
1682   info->tgt_fn = tgt_fn;
1683   info->tgt_vars = tgt_vars;
1684   info->args = args;
1685   info->async_data = async_data;
1686 
1687   int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
1688   if (err != 0)
1689     GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
1690 		       strerror (err));
1691   err = pthread_detach (pt);
1692   if (err != 0)
1693     GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
1694 		       "asynchronously: %s", strerror (err));
1695 }
1696 
1697 /* Deinitialize all information associated with MODULE and kernels within
1698    it.  Return TRUE on success.  */
1699 
1700 static bool
destroy_module(struct module_info * module)1701 destroy_module (struct module_info *module)
1702 {
1703   int i;
1704   for (i = 0; i < module->kernel_count; i++)
1705     if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
1706       {
1707 	GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
1708 			   "mutex");
1709 	return false;
1710       }
1711   return true;
1712 }
1713 
1714 /* Part of the libgomp plugin interface.  Unload BRIG module described by
1715    struct brig_image_desc in TARGET_DATA from agent number N.  Return
1716    TRUE on success.  */
1717 
1718 bool
GOMP_OFFLOAD_unload_image(int n,unsigned version,const void * target_data)1719 GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
1720 {
1721   if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
1722     {
1723       GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
1724 			 " (expected %u, received %u)",
1725 			 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
1726       return false;
1727     }
1728 
1729   struct agent_info *agent;
1730   agent = get_agent_info (n);
1731   if (!agent)
1732     return false;
1733 
1734   if (pthread_rwlock_wrlock (&agent->modules_rwlock))
1735     {
1736       GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
1737       return false;
1738     }
1739   struct module_info *module = agent->first_module;
1740   while (module)
1741     {
1742       if (module->image_desc == target_data)
1743 	break;
1744       module = module->next;
1745     }
1746   if (!module)
1747     {
1748       GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
1749 			 "loaded before");
1750       return false;
1751     }
1752 
1753   remove_module_from_agent (agent, module);
1754   if (!destroy_module (module))
1755     return false;
1756   free (module);
1757   if (!destroy_hsa_program (agent))
1758     return false;
1759   if (pthread_rwlock_unlock (&agent->modules_rwlock))
1760     {
1761       GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
1762       return false;
1763     }
1764   return true;
1765 }
1766 
1767 /* Part of the libgomp plugin interface.  Deinitialize all information and
1768    status associated with agent number N.  We do not attempt any
1769    synchronization, assuming the user and libgomp will not attempt
1770    deinitialization of a device that is in any way being used at the same
1771    time.  Return TRUE on success.  */
1772 
1773 bool
GOMP_OFFLOAD_fini_device(int n)1774 GOMP_OFFLOAD_fini_device (int n)
1775 {
1776   struct agent_info *agent = get_agent_info (n);
1777   if (!agent)
1778     return false;
1779 
1780   if (!agent->initialized)
1781     return true;
1782 
1783   struct module_info *next_module = agent->first_module;
1784   while (next_module)
1785     {
1786       struct module_info *module = next_module;
1787       next_module = module->next;
1788       if (!destroy_module (module))
1789 	return false;
1790       free (module);
1791     }
1792   agent->first_module = NULL;
1793   if (!destroy_hsa_program (agent))
1794     return false;
1795 
1796   release_agent_shared_libraries (agent);
1797 
1798   hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
1799   if (status != HSA_STATUS_SUCCESS)
1800     return hsa_error ("Error destroying command queue", status);
1801   status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
1802   if (status != HSA_STATUS_SUCCESS)
1803     return hsa_error ("Error destroying kernel dispatch command queue", status);
1804   if (pthread_mutex_destroy (&agent->prog_mutex))
1805     {
1806       GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
1807       return false;
1808     }
1809   if (pthread_rwlock_destroy (&agent->modules_rwlock))
1810     {
1811       GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
1812       return false;
1813     }
1814   agent->initialized = false;
1815   return true;
1816 }
1817 
1818 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1819    for HSA.  */
1820 
1821 void *
GOMP_OFFLOAD_alloc(int ord,size_t size)1822 GOMP_OFFLOAD_alloc (int ord, size_t size)
1823 {
1824   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
1825 		     "it should never be called");
1826   return NULL;
1827 }
1828 
1829 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1830    for HSA.  */
1831 
1832 bool
GOMP_OFFLOAD_free(int ord,void * ptr)1833 GOMP_OFFLOAD_free (int ord, void *ptr)
1834 {
1835   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
1836 		     "it should never be called");
1837   return false;
1838 }
1839 
1840 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1841    for HSA.  */
1842 
1843 bool
GOMP_OFFLOAD_dev2host(int ord,void * dst,const void * src,size_t n)1844 GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
1845 {
1846   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
1847 		     "it should never be called");
1848   return false;
1849 }
1850 
1851 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1852    for HSA.  */
1853 
1854 bool
GOMP_OFFLOAD_host2dev(int ord,void * dst,const void * src,size_t n)1855 GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
1856 {
1857   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
1858 		     "it should never be called");
1859   return false;
1860 }
1861 
1862 /* Part of the libgomp plugin interface.  Not implemented as it is not required
1863    for HSA.  */
1864 
1865 bool
GOMP_OFFLOAD_dev2dev(int ord,void * dst,const void * src,size_t n)1866 GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
1867 {
1868   GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
1869 		     "it should never be called");
1870   return false;
1871 }
1872