xref: /dflybsd-src/contrib/gcc-8.0/gcc/ipa-hsa.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Callgraph based analysis of static variables.
2*38fd1498Szrj    Copyright (C) 2015-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Martin Liska <mliska@suse.cz>
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
20*38fd1498Szrj 
21*38fd1498Szrj /* Interprocedural HSA pass is responsible for creation of HSA clones.
22*38fd1498Szrj    For all these HSA clones, we emit HSAIL instructions and pass processing
23*38fd1498Szrj    is terminated.  */
24*38fd1498Szrj 
25*38fd1498Szrj #include "config.h"
26*38fd1498Szrj #include "system.h"
27*38fd1498Szrj #include "coretypes.h"
28*38fd1498Szrj #include "tm.h"
29*38fd1498Szrj #include "is-a.h"
30*38fd1498Szrj #include "hash-set.h"
31*38fd1498Szrj #include "vec.h"
32*38fd1498Szrj #include "tree.h"
33*38fd1498Szrj #include "tree-pass.h"
34*38fd1498Szrj #include "function.h"
35*38fd1498Szrj #include "basic-block.h"
36*38fd1498Szrj #include "gimple.h"
37*38fd1498Szrj #include "dumpfile.h"
38*38fd1498Szrj #include "gimple-pretty-print.h"
39*38fd1498Szrj #include "tree-streamer.h"
40*38fd1498Szrj #include "stringpool.h"
41*38fd1498Szrj #include "cgraph.h"
42*38fd1498Szrj #include "print-tree.h"
43*38fd1498Szrj #include "symbol-summary.h"
44*38fd1498Szrj #include "hsa-common.h"
45*38fd1498Szrj 
46*38fd1498Szrj namespace {
47*38fd1498Szrj 
48*38fd1498Szrj /* If NODE is not versionable, warn about not emiting HSAIL and return false.
49*38fd1498Szrj    Otherwise return true.  */
50*38fd1498Szrj 
51*38fd1498Szrj static bool
check_warn_node_versionable(cgraph_node * node)52*38fd1498Szrj check_warn_node_versionable (cgraph_node *node)
53*38fd1498Szrj {
54*38fd1498Szrj   if (!node->local.versionable)
55*38fd1498Szrj     {
56*38fd1498Szrj       warning_at (EXPR_LOCATION (node->decl), OPT_Whsa,
57*38fd1498Szrj 		  "could not emit HSAIL for function %s: function cannot be "
58*38fd1498Szrj 		  "cloned", node->name ());
59*38fd1498Szrj       return false;
60*38fd1498Szrj     }
61*38fd1498Szrj   return true;
62*38fd1498Szrj }
63*38fd1498Szrj 
64*38fd1498Szrj /* The function creates HSA clones for all functions that were either
65*38fd1498Szrj    marked as HSA kernels or are callable HSA functions.  Apart from that,
66*38fd1498Szrj    we redirect all edges that come from an HSA clone and end in another
67*38fd1498Szrj    HSA clone to connect these two functions.  */
68*38fd1498Szrj 
69*38fd1498Szrj static unsigned int
process_hsa_functions(void)70*38fd1498Szrj process_hsa_functions (void)
71*38fd1498Szrj {
72*38fd1498Szrj   struct cgraph_node *node;
73*38fd1498Szrj 
74*38fd1498Szrj   if (hsa_summaries == NULL)
75*38fd1498Szrj     hsa_summaries = new hsa_summary_t (symtab);
76*38fd1498Szrj 
77*38fd1498Szrj   FOR_EACH_DEFINED_FUNCTION (node)
78*38fd1498Szrj     {
79*38fd1498Szrj       hsa_function_summary *s = hsa_summaries->get (node);
80*38fd1498Szrj 
81*38fd1498Szrj       /* A linked function is skipped.  */
82*38fd1498Szrj       if (s->m_bound_function != NULL)
83*38fd1498Szrj 	continue;
84*38fd1498Szrj 
85*38fd1498Szrj       if (s->m_kind != HSA_NONE)
86*38fd1498Szrj 	{
87*38fd1498Szrj 	  if (!check_warn_node_versionable (node))
88*38fd1498Szrj 	    continue;
89*38fd1498Szrj 	  cgraph_node *clone
90*38fd1498Szrj 	    = node->create_virtual_clone (vec <cgraph_edge *> (),
91*38fd1498Szrj 					  NULL, NULL, "hsa");
92*38fd1498Szrj 	  TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
93*38fd1498Szrj 	  clone->externally_visible = node->externally_visible;
94*38fd1498Szrj 
95*38fd1498Szrj 	  clone->force_output = true;
96*38fd1498Szrj 	  hsa_summaries->link_functions (clone, node, s->m_kind, false);
97*38fd1498Szrj 
98*38fd1498Szrj 	  if (dump_file)
99*38fd1498Szrj 	    fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n",
100*38fd1498Szrj 		     clone->name (),
101*38fd1498Szrj 		     s->m_kind == HSA_KERNEL ? "kernel" : "function");
102*38fd1498Szrj 	}
103*38fd1498Szrj       else if (hsa_callable_function_p (node->decl)
104*38fd1498Szrj 	       /* At this point, this is enough to identify clones for
105*38fd1498Szrj 		  parallel, which for HSA would need to be kernels anyway.  */
106*38fd1498Szrj 	       && !DECL_ARTIFICIAL (node->decl))
107*38fd1498Szrj 	{
108*38fd1498Szrj 	  if (!check_warn_node_versionable (node))
109*38fd1498Szrj 	    continue;
110*38fd1498Szrj 	  cgraph_node *clone
111*38fd1498Szrj 	    = node->create_virtual_clone (vec <cgraph_edge *> (),
112*38fd1498Szrj 					  NULL, NULL, "hsa");
113*38fd1498Szrj 	  TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
114*38fd1498Szrj 	  clone->externally_visible = node->externally_visible;
115*38fd1498Szrj 
116*38fd1498Szrj 	  if (!cgraph_local_p (node))
117*38fd1498Szrj 	    clone->force_output = true;
118*38fd1498Szrj 	  hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false);
119*38fd1498Szrj 
120*38fd1498Szrj 	  if (dump_file)
121*38fd1498Szrj 	    fprintf (dump_file, "Created a new HSA function clone: %s\n",
122*38fd1498Szrj 		     clone->name ());
123*38fd1498Szrj 	}
124*38fd1498Szrj     }
125*38fd1498Szrj 
126*38fd1498Szrj   /* Redirect all edges that are between HSA clones.  */
127*38fd1498Szrj   FOR_EACH_DEFINED_FUNCTION (node)
128*38fd1498Szrj     {
129*38fd1498Szrj       cgraph_edge *e = node->callees;
130*38fd1498Szrj 
131*38fd1498Szrj       while (e)
132*38fd1498Szrj 	{
133*38fd1498Szrj 	  hsa_function_summary *src = hsa_summaries->get (node);
134*38fd1498Szrj 	  if (src->m_kind != HSA_NONE && src->m_gpu_implementation_p)
135*38fd1498Szrj 	    {
136*38fd1498Szrj 	      hsa_function_summary *dst = hsa_summaries->get (e->callee);
137*38fd1498Szrj 	      if (dst->m_kind != HSA_NONE && !dst->m_gpu_implementation_p)
138*38fd1498Szrj 		{
139*38fd1498Szrj 		  e->redirect_callee (dst->m_bound_function);
140*38fd1498Szrj 		  if (dump_file)
141*38fd1498Szrj 		    fprintf (dump_file,
142*38fd1498Szrj 			     "Redirecting edge to HSA function: %s->%s\n",
143*38fd1498Szrj 			     xstrdup_for_dump (e->caller->name ()),
144*38fd1498Szrj 			     xstrdup_for_dump (e->callee->name ()));
145*38fd1498Szrj 		}
146*38fd1498Szrj 	    }
147*38fd1498Szrj 
148*38fd1498Szrj 	  e = e->next_callee;
149*38fd1498Szrj 	}
150*38fd1498Szrj     }
151*38fd1498Szrj 
152*38fd1498Szrj   return 0;
153*38fd1498Szrj }
154*38fd1498Szrj 
155*38fd1498Szrj /* Iterate all HSA functions and stream out HSA function summary.  */
156*38fd1498Szrj 
157*38fd1498Szrj static void
ipa_hsa_write_summary(void)158*38fd1498Szrj ipa_hsa_write_summary (void)
159*38fd1498Szrj {
160*38fd1498Szrj   struct bitpack_d bp;
161*38fd1498Szrj   struct cgraph_node *node;
162*38fd1498Szrj   struct output_block *ob;
163*38fd1498Szrj   unsigned int count = 0;
164*38fd1498Szrj   lto_symtab_encoder_iterator lsei;
165*38fd1498Szrj   lto_symtab_encoder_t encoder;
166*38fd1498Szrj 
167*38fd1498Szrj   if (!hsa_summaries)
168*38fd1498Szrj     return;
169*38fd1498Szrj 
170*38fd1498Szrj   ob = create_output_block (LTO_section_ipa_hsa);
171*38fd1498Szrj   encoder = ob->decl_state->symtab_node_encoder;
172*38fd1498Szrj   ob->symbol = NULL;
173*38fd1498Szrj   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
174*38fd1498Szrj        lsei_next_function_in_partition (&lsei))
175*38fd1498Szrj     {
176*38fd1498Szrj       node = lsei_cgraph_node (lsei);
177*38fd1498Szrj       hsa_function_summary *s = hsa_summaries->get (node);
178*38fd1498Szrj 
179*38fd1498Szrj       if (s->m_kind != HSA_NONE)
180*38fd1498Szrj 	count++;
181*38fd1498Szrj     }
182*38fd1498Szrj 
183*38fd1498Szrj   streamer_write_uhwi (ob, count);
184*38fd1498Szrj 
185*38fd1498Szrj   /* Process all of the functions.  */
186*38fd1498Szrj   for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
187*38fd1498Szrj        lsei_next_function_in_partition (&lsei))
188*38fd1498Szrj     {
189*38fd1498Szrj       node = lsei_cgraph_node (lsei);
190*38fd1498Szrj       hsa_function_summary *s = hsa_summaries->get (node);
191*38fd1498Szrj 
192*38fd1498Szrj       if (s->m_kind != HSA_NONE)
193*38fd1498Szrj 	{
194*38fd1498Szrj 	  encoder = ob->decl_state->symtab_node_encoder;
195*38fd1498Szrj 	  int node_ref = lto_symtab_encoder_encode (encoder, node);
196*38fd1498Szrj 	  streamer_write_uhwi (ob, node_ref);
197*38fd1498Szrj 
198*38fd1498Szrj 	  bp = bitpack_create (ob->main_stream);
199*38fd1498Szrj 	  bp_pack_value (&bp, s->m_kind, 2);
200*38fd1498Szrj 	  bp_pack_value (&bp, s->m_gpu_implementation_p, 1);
201*38fd1498Szrj 	  bp_pack_value (&bp, s->m_bound_function != NULL, 1);
202*38fd1498Szrj 	  streamer_write_bitpack (&bp);
203*38fd1498Szrj 	  if (s->m_bound_function)
204*38fd1498Szrj 	    stream_write_tree (ob, s->m_bound_function->decl, true);
205*38fd1498Szrj 	}
206*38fd1498Szrj     }
207*38fd1498Szrj 
208*38fd1498Szrj   streamer_write_char_stream (ob->main_stream, 0);
209*38fd1498Szrj   produce_asm (ob, NULL);
210*38fd1498Szrj   destroy_output_block (ob);
211*38fd1498Szrj }
212*38fd1498Szrj 
213*38fd1498Szrj /* Read section in file FILE_DATA of length LEN with data DATA.  */
214*38fd1498Szrj 
215*38fd1498Szrj static void
ipa_hsa_read_section(struct lto_file_decl_data * file_data,const char * data,size_t len)216*38fd1498Szrj ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data,
217*38fd1498Szrj 		       size_t len)
218*38fd1498Szrj {
219*38fd1498Szrj   const struct lto_function_header *header
220*38fd1498Szrj     = (const struct lto_function_header *) data;
221*38fd1498Szrj   const int cfg_offset = sizeof (struct lto_function_header);
222*38fd1498Szrj   const int main_offset = cfg_offset + header->cfg_size;
223*38fd1498Szrj   const int string_offset = main_offset + header->main_size;
224*38fd1498Szrj   struct data_in *data_in;
225*38fd1498Szrj   unsigned int i;
226*38fd1498Szrj   unsigned int count;
227*38fd1498Szrj 
228*38fd1498Szrj   lto_input_block ib_main ((const char *) data + main_offset,
229*38fd1498Szrj 			   header->main_size, file_data->mode_table);
230*38fd1498Szrj 
231*38fd1498Szrj   data_in
232*38fd1498Szrj     = lto_data_in_create (file_data, (const char *) data + string_offset,
233*38fd1498Szrj 			  header->string_size, vNULL);
234*38fd1498Szrj   count = streamer_read_uhwi (&ib_main);
235*38fd1498Szrj 
236*38fd1498Szrj   for (i = 0; i < count; i++)
237*38fd1498Szrj     {
238*38fd1498Szrj       unsigned int index;
239*38fd1498Szrj       struct cgraph_node *node;
240*38fd1498Szrj       lto_symtab_encoder_t encoder;
241*38fd1498Szrj 
242*38fd1498Szrj       index = streamer_read_uhwi (&ib_main);
243*38fd1498Szrj       encoder = file_data->symtab_node_encoder;
244*38fd1498Szrj       node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
245*38fd1498Szrj 								index));
246*38fd1498Szrj       gcc_assert (node->definition);
247*38fd1498Szrj       hsa_function_summary *s = hsa_summaries->get (node);
248*38fd1498Szrj 
249*38fd1498Szrj       struct bitpack_d bp = streamer_read_bitpack (&ib_main);
250*38fd1498Szrj       s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2);
251*38fd1498Szrj       s->m_gpu_implementation_p = bp_unpack_value (&bp, 1);
252*38fd1498Szrj       bool has_tree = bp_unpack_value (&bp, 1);
253*38fd1498Szrj 
254*38fd1498Szrj       if (has_tree)
255*38fd1498Szrj 	{
256*38fd1498Szrj 	  tree decl = stream_read_tree (&ib_main, data_in);
257*38fd1498Szrj 	  s->m_bound_function = cgraph_node::get_create (decl);
258*38fd1498Szrj 	}
259*38fd1498Szrj     }
260*38fd1498Szrj   lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data,
261*38fd1498Szrj 			 len);
262*38fd1498Szrj   lto_data_in_delete (data_in);
263*38fd1498Szrj }
264*38fd1498Szrj 
265*38fd1498Szrj /* Load streamed HSA functions summary and assign the summary to a function.  */
266*38fd1498Szrj 
267*38fd1498Szrj static void
ipa_hsa_read_summary(void)268*38fd1498Szrj ipa_hsa_read_summary (void)
269*38fd1498Szrj {
270*38fd1498Szrj   struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
271*38fd1498Szrj   struct lto_file_decl_data *file_data;
272*38fd1498Szrj   unsigned int j = 0;
273*38fd1498Szrj 
274*38fd1498Szrj   if (hsa_summaries == NULL)
275*38fd1498Szrj     hsa_summaries = new hsa_summary_t (symtab);
276*38fd1498Szrj 
277*38fd1498Szrj   while ((file_data = file_data_vec[j++]))
278*38fd1498Szrj     {
279*38fd1498Szrj       size_t len;
280*38fd1498Szrj       const char *data = lto_get_section_data (file_data, LTO_section_ipa_hsa,
281*38fd1498Szrj 					       NULL, &len);
282*38fd1498Szrj 
283*38fd1498Szrj       if (data)
284*38fd1498Szrj 	ipa_hsa_read_section (file_data, data, len);
285*38fd1498Szrj     }
286*38fd1498Szrj }
287*38fd1498Szrj 
288*38fd1498Szrj const pass_data pass_data_ipa_hsa =
289*38fd1498Szrj {
290*38fd1498Szrj   IPA_PASS, /* type */
291*38fd1498Szrj   "hsa", /* name */
292*38fd1498Szrj   OPTGROUP_OMP, /* optinfo_flags */
293*38fd1498Szrj   TV_IPA_HSA, /* tv_id */
294*38fd1498Szrj   0, /* properties_required */
295*38fd1498Szrj   0, /* properties_provided */
296*38fd1498Szrj   0, /* properties_destroyed */
297*38fd1498Szrj   0, /* todo_flags_start */
298*38fd1498Szrj   TODO_dump_symtab, /* todo_flags_finish */
299*38fd1498Szrj };
300*38fd1498Szrj 
301*38fd1498Szrj class pass_ipa_hsa : public ipa_opt_pass_d
302*38fd1498Szrj {
303*38fd1498Szrj public:
pass_ipa_hsa(gcc::context * ctxt)304*38fd1498Szrj   pass_ipa_hsa (gcc::context *ctxt)
305*38fd1498Szrj     : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt,
306*38fd1498Szrj 		      NULL, /* generate_summary */
307*38fd1498Szrj 		      ipa_hsa_write_summary, /* write_summary */
308*38fd1498Szrj 		      ipa_hsa_read_summary, /* read_summary */
309*38fd1498Szrj 		      ipa_hsa_write_summary, /* write_optimization_summary */
310*38fd1498Szrj 		      ipa_hsa_read_summary, /* read_optimization_summary */
311*38fd1498Szrj 		      NULL, /* stmt_fixup */
312*38fd1498Szrj 		      0, /* function_transform_todo_flags_start */
313*38fd1498Szrj 		      NULL, /* function_transform */
314*38fd1498Szrj 		      NULL) /* variable_transform */
315*38fd1498Szrj     {}
316*38fd1498Szrj 
317*38fd1498Szrj   /* opt_pass methods: */
318*38fd1498Szrj   virtual bool gate (function *);
319*38fd1498Szrj 
execute(function *)320*38fd1498Szrj   virtual unsigned int execute (function *) { return process_hsa_functions (); }
321*38fd1498Szrj 
322*38fd1498Szrj }; // class pass_ipa_reference
323*38fd1498Szrj 
324*38fd1498Szrj bool
gate(function *)325*38fd1498Szrj pass_ipa_hsa::gate (function *)
326*38fd1498Szrj {
327*38fd1498Szrj   return hsa_gen_requested_p ();
328*38fd1498Szrj }
329*38fd1498Szrj 
330*38fd1498Szrj } // anon namespace
331*38fd1498Szrj 
332*38fd1498Szrj ipa_opt_pass_d *
make_pass_ipa_hsa(gcc::context * ctxt)333*38fd1498Szrj make_pass_ipa_hsa (gcc::context *ctxt)
334*38fd1498Szrj {
335*38fd1498Szrj   return new pass_ipa_hsa (ctxt);
336*38fd1498Szrj }
337