1*38fd1498Szrj /* Callgraph based analysis of static variables.
2*38fd1498Szrj Copyright (C) 2015-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Martin Liska <mliska@suse.cz>
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj /* Interprocedural HSA pass is responsible for creation of HSA clones.
22*38fd1498Szrj For all these HSA clones, we emit HSAIL instructions and pass processing
23*38fd1498Szrj is terminated. */
24*38fd1498Szrj
25*38fd1498Szrj #include "config.h"
26*38fd1498Szrj #include "system.h"
27*38fd1498Szrj #include "coretypes.h"
28*38fd1498Szrj #include "tm.h"
29*38fd1498Szrj #include "is-a.h"
30*38fd1498Szrj #include "hash-set.h"
31*38fd1498Szrj #include "vec.h"
32*38fd1498Szrj #include "tree.h"
33*38fd1498Szrj #include "tree-pass.h"
34*38fd1498Szrj #include "function.h"
35*38fd1498Szrj #include "basic-block.h"
36*38fd1498Szrj #include "gimple.h"
37*38fd1498Szrj #include "dumpfile.h"
38*38fd1498Szrj #include "gimple-pretty-print.h"
39*38fd1498Szrj #include "tree-streamer.h"
40*38fd1498Szrj #include "stringpool.h"
41*38fd1498Szrj #include "cgraph.h"
42*38fd1498Szrj #include "print-tree.h"
43*38fd1498Szrj #include "symbol-summary.h"
44*38fd1498Szrj #include "hsa-common.h"
45*38fd1498Szrj
46*38fd1498Szrj namespace {
47*38fd1498Szrj
48*38fd1498Szrj /* If NODE is not versionable, warn about not emiting HSAIL and return false.
49*38fd1498Szrj Otherwise return true. */
50*38fd1498Szrj
51*38fd1498Szrj static bool
check_warn_node_versionable(cgraph_node * node)52*38fd1498Szrj check_warn_node_versionable (cgraph_node *node)
53*38fd1498Szrj {
54*38fd1498Szrj if (!node->local.versionable)
55*38fd1498Szrj {
56*38fd1498Szrj warning_at (EXPR_LOCATION (node->decl), OPT_Whsa,
57*38fd1498Szrj "could not emit HSAIL for function %s: function cannot be "
58*38fd1498Szrj "cloned", node->name ());
59*38fd1498Szrj return false;
60*38fd1498Szrj }
61*38fd1498Szrj return true;
62*38fd1498Szrj }
63*38fd1498Szrj
64*38fd1498Szrj /* The function creates HSA clones for all functions that were either
65*38fd1498Szrj marked as HSA kernels or are callable HSA functions. Apart from that,
66*38fd1498Szrj we redirect all edges that come from an HSA clone and end in another
67*38fd1498Szrj HSA clone to connect these two functions. */
68*38fd1498Szrj
69*38fd1498Szrj static unsigned int
process_hsa_functions(void)70*38fd1498Szrj process_hsa_functions (void)
71*38fd1498Szrj {
72*38fd1498Szrj struct cgraph_node *node;
73*38fd1498Szrj
74*38fd1498Szrj if (hsa_summaries == NULL)
75*38fd1498Szrj hsa_summaries = new hsa_summary_t (symtab);
76*38fd1498Szrj
77*38fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
78*38fd1498Szrj {
79*38fd1498Szrj hsa_function_summary *s = hsa_summaries->get (node);
80*38fd1498Szrj
81*38fd1498Szrj /* A linked function is skipped. */
82*38fd1498Szrj if (s->m_bound_function != NULL)
83*38fd1498Szrj continue;
84*38fd1498Szrj
85*38fd1498Szrj if (s->m_kind != HSA_NONE)
86*38fd1498Szrj {
87*38fd1498Szrj if (!check_warn_node_versionable (node))
88*38fd1498Szrj continue;
89*38fd1498Szrj cgraph_node *clone
90*38fd1498Szrj = node->create_virtual_clone (vec <cgraph_edge *> (),
91*38fd1498Szrj NULL, NULL, "hsa");
92*38fd1498Szrj TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
93*38fd1498Szrj clone->externally_visible = node->externally_visible;
94*38fd1498Szrj
95*38fd1498Szrj clone->force_output = true;
96*38fd1498Szrj hsa_summaries->link_functions (clone, node, s->m_kind, false);
97*38fd1498Szrj
98*38fd1498Szrj if (dump_file)
99*38fd1498Szrj fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n",
100*38fd1498Szrj clone->name (),
101*38fd1498Szrj s->m_kind == HSA_KERNEL ? "kernel" : "function");
102*38fd1498Szrj }
103*38fd1498Szrj else if (hsa_callable_function_p (node->decl)
104*38fd1498Szrj /* At this point, this is enough to identify clones for
105*38fd1498Szrj parallel, which for HSA would need to be kernels anyway. */
106*38fd1498Szrj && !DECL_ARTIFICIAL (node->decl))
107*38fd1498Szrj {
108*38fd1498Szrj if (!check_warn_node_versionable (node))
109*38fd1498Szrj continue;
110*38fd1498Szrj cgraph_node *clone
111*38fd1498Szrj = node->create_virtual_clone (vec <cgraph_edge *> (),
112*38fd1498Szrj NULL, NULL, "hsa");
113*38fd1498Szrj TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
114*38fd1498Szrj clone->externally_visible = node->externally_visible;
115*38fd1498Szrj
116*38fd1498Szrj if (!cgraph_local_p (node))
117*38fd1498Szrj clone->force_output = true;
118*38fd1498Szrj hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false);
119*38fd1498Szrj
120*38fd1498Szrj if (dump_file)
121*38fd1498Szrj fprintf (dump_file, "Created a new HSA function clone: %s\n",
122*38fd1498Szrj clone->name ());
123*38fd1498Szrj }
124*38fd1498Szrj }
125*38fd1498Szrj
126*38fd1498Szrj /* Redirect all edges that are between HSA clones. */
127*38fd1498Szrj FOR_EACH_DEFINED_FUNCTION (node)
128*38fd1498Szrj {
129*38fd1498Szrj cgraph_edge *e = node->callees;
130*38fd1498Szrj
131*38fd1498Szrj while (e)
132*38fd1498Szrj {
133*38fd1498Szrj hsa_function_summary *src = hsa_summaries->get (node);
134*38fd1498Szrj if (src->m_kind != HSA_NONE && src->m_gpu_implementation_p)
135*38fd1498Szrj {
136*38fd1498Szrj hsa_function_summary *dst = hsa_summaries->get (e->callee);
137*38fd1498Szrj if (dst->m_kind != HSA_NONE && !dst->m_gpu_implementation_p)
138*38fd1498Szrj {
139*38fd1498Szrj e->redirect_callee (dst->m_bound_function);
140*38fd1498Szrj if (dump_file)
141*38fd1498Szrj fprintf (dump_file,
142*38fd1498Szrj "Redirecting edge to HSA function: %s->%s\n",
143*38fd1498Szrj xstrdup_for_dump (e->caller->name ()),
144*38fd1498Szrj xstrdup_for_dump (e->callee->name ()));
145*38fd1498Szrj }
146*38fd1498Szrj }
147*38fd1498Szrj
148*38fd1498Szrj e = e->next_callee;
149*38fd1498Szrj }
150*38fd1498Szrj }
151*38fd1498Szrj
152*38fd1498Szrj return 0;
153*38fd1498Szrj }
154*38fd1498Szrj
155*38fd1498Szrj /* Iterate all HSA functions and stream out HSA function summary. */
156*38fd1498Szrj
157*38fd1498Szrj static void
ipa_hsa_write_summary(void)158*38fd1498Szrj ipa_hsa_write_summary (void)
159*38fd1498Szrj {
160*38fd1498Szrj struct bitpack_d bp;
161*38fd1498Szrj struct cgraph_node *node;
162*38fd1498Szrj struct output_block *ob;
163*38fd1498Szrj unsigned int count = 0;
164*38fd1498Szrj lto_symtab_encoder_iterator lsei;
165*38fd1498Szrj lto_symtab_encoder_t encoder;
166*38fd1498Szrj
167*38fd1498Szrj if (!hsa_summaries)
168*38fd1498Szrj return;
169*38fd1498Szrj
170*38fd1498Szrj ob = create_output_block (LTO_section_ipa_hsa);
171*38fd1498Szrj encoder = ob->decl_state->symtab_node_encoder;
172*38fd1498Szrj ob->symbol = NULL;
173*38fd1498Szrj for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
174*38fd1498Szrj lsei_next_function_in_partition (&lsei))
175*38fd1498Szrj {
176*38fd1498Szrj node = lsei_cgraph_node (lsei);
177*38fd1498Szrj hsa_function_summary *s = hsa_summaries->get (node);
178*38fd1498Szrj
179*38fd1498Szrj if (s->m_kind != HSA_NONE)
180*38fd1498Szrj count++;
181*38fd1498Szrj }
182*38fd1498Szrj
183*38fd1498Szrj streamer_write_uhwi (ob, count);
184*38fd1498Szrj
185*38fd1498Szrj /* Process all of the functions. */
186*38fd1498Szrj for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
187*38fd1498Szrj lsei_next_function_in_partition (&lsei))
188*38fd1498Szrj {
189*38fd1498Szrj node = lsei_cgraph_node (lsei);
190*38fd1498Szrj hsa_function_summary *s = hsa_summaries->get (node);
191*38fd1498Szrj
192*38fd1498Szrj if (s->m_kind != HSA_NONE)
193*38fd1498Szrj {
194*38fd1498Szrj encoder = ob->decl_state->symtab_node_encoder;
195*38fd1498Szrj int node_ref = lto_symtab_encoder_encode (encoder, node);
196*38fd1498Szrj streamer_write_uhwi (ob, node_ref);
197*38fd1498Szrj
198*38fd1498Szrj bp = bitpack_create (ob->main_stream);
199*38fd1498Szrj bp_pack_value (&bp, s->m_kind, 2);
200*38fd1498Szrj bp_pack_value (&bp, s->m_gpu_implementation_p, 1);
201*38fd1498Szrj bp_pack_value (&bp, s->m_bound_function != NULL, 1);
202*38fd1498Szrj streamer_write_bitpack (&bp);
203*38fd1498Szrj if (s->m_bound_function)
204*38fd1498Szrj stream_write_tree (ob, s->m_bound_function->decl, true);
205*38fd1498Szrj }
206*38fd1498Szrj }
207*38fd1498Szrj
208*38fd1498Szrj streamer_write_char_stream (ob->main_stream, 0);
209*38fd1498Szrj produce_asm (ob, NULL);
210*38fd1498Szrj destroy_output_block (ob);
211*38fd1498Szrj }
212*38fd1498Szrj
213*38fd1498Szrj /* Read section in file FILE_DATA of length LEN with data DATA. */
214*38fd1498Szrj
215*38fd1498Szrj static void
ipa_hsa_read_section(struct lto_file_decl_data * file_data,const char * data,size_t len)216*38fd1498Szrj ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data,
217*38fd1498Szrj size_t len)
218*38fd1498Szrj {
219*38fd1498Szrj const struct lto_function_header *header
220*38fd1498Szrj = (const struct lto_function_header *) data;
221*38fd1498Szrj const int cfg_offset = sizeof (struct lto_function_header);
222*38fd1498Szrj const int main_offset = cfg_offset + header->cfg_size;
223*38fd1498Szrj const int string_offset = main_offset + header->main_size;
224*38fd1498Szrj struct data_in *data_in;
225*38fd1498Szrj unsigned int i;
226*38fd1498Szrj unsigned int count;
227*38fd1498Szrj
228*38fd1498Szrj lto_input_block ib_main ((const char *) data + main_offset,
229*38fd1498Szrj header->main_size, file_data->mode_table);
230*38fd1498Szrj
231*38fd1498Szrj data_in
232*38fd1498Szrj = lto_data_in_create (file_data, (const char *) data + string_offset,
233*38fd1498Szrj header->string_size, vNULL);
234*38fd1498Szrj count = streamer_read_uhwi (&ib_main);
235*38fd1498Szrj
236*38fd1498Szrj for (i = 0; i < count; i++)
237*38fd1498Szrj {
238*38fd1498Szrj unsigned int index;
239*38fd1498Szrj struct cgraph_node *node;
240*38fd1498Szrj lto_symtab_encoder_t encoder;
241*38fd1498Szrj
242*38fd1498Szrj index = streamer_read_uhwi (&ib_main);
243*38fd1498Szrj encoder = file_data->symtab_node_encoder;
244*38fd1498Szrj node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
245*38fd1498Szrj index));
246*38fd1498Szrj gcc_assert (node->definition);
247*38fd1498Szrj hsa_function_summary *s = hsa_summaries->get (node);
248*38fd1498Szrj
249*38fd1498Szrj struct bitpack_d bp = streamer_read_bitpack (&ib_main);
250*38fd1498Szrj s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2);
251*38fd1498Szrj s->m_gpu_implementation_p = bp_unpack_value (&bp, 1);
252*38fd1498Szrj bool has_tree = bp_unpack_value (&bp, 1);
253*38fd1498Szrj
254*38fd1498Szrj if (has_tree)
255*38fd1498Szrj {
256*38fd1498Szrj tree decl = stream_read_tree (&ib_main, data_in);
257*38fd1498Szrj s->m_bound_function = cgraph_node::get_create (decl);
258*38fd1498Szrj }
259*38fd1498Szrj }
260*38fd1498Szrj lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data,
261*38fd1498Szrj len);
262*38fd1498Szrj lto_data_in_delete (data_in);
263*38fd1498Szrj }
264*38fd1498Szrj
265*38fd1498Szrj /* Load streamed HSA functions summary and assign the summary to a function. */
266*38fd1498Szrj
267*38fd1498Szrj static void
ipa_hsa_read_summary(void)268*38fd1498Szrj ipa_hsa_read_summary (void)
269*38fd1498Szrj {
270*38fd1498Szrj struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
271*38fd1498Szrj struct lto_file_decl_data *file_data;
272*38fd1498Szrj unsigned int j = 0;
273*38fd1498Szrj
274*38fd1498Szrj if (hsa_summaries == NULL)
275*38fd1498Szrj hsa_summaries = new hsa_summary_t (symtab);
276*38fd1498Szrj
277*38fd1498Szrj while ((file_data = file_data_vec[j++]))
278*38fd1498Szrj {
279*38fd1498Szrj size_t len;
280*38fd1498Szrj const char *data = lto_get_section_data (file_data, LTO_section_ipa_hsa,
281*38fd1498Szrj NULL, &len);
282*38fd1498Szrj
283*38fd1498Szrj if (data)
284*38fd1498Szrj ipa_hsa_read_section (file_data, data, len);
285*38fd1498Szrj }
286*38fd1498Szrj }
287*38fd1498Szrj
288*38fd1498Szrj const pass_data pass_data_ipa_hsa =
289*38fd1498Szrj {
290*38fd1498Szrj IPA_PASS, /* type */
291*38fd1498Szrj "hsa", /* name */
292*38fd1498Szrj OPTGROUP_OMP, /* optinfo_flags */
293*38fd1498Szrj TV_IPA_HSA, /* tv_id */
294*38fd1498Szrj 0, /* properties_required */
295*38fd1498Szrj 0, /* properties_provided */
296*38fd1498Szrj 0, /* properties_destroyed */
297*38fd1498Szrj 0, /* todo_flags_start */
298*38fd1498Szrj TODO_dump_symtab, /* todo_flags_finish */
299*38fd1498Szrj };
300*38fd1498Szrj
301*38fd1498Szrj class pass_ipa_hsa : public ipa_opt_pass_d
302*38fd1498Szrj {
303*38fd1498Szrj public:
pass_ipa_hsa(gcc::context * ctxt)304*38fd1498Szrj pass_ipa_hsa (gcc::context *ctxt)
305*38fd1498Szrj : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt,
306*38fd1498Szrj NULL, /* generate_summary */
307*38fd1498Szrj ipa_hsa_write_summary, /* write_summary */
308*38fd1498Szrj ipa_hsa_read_summary, /* read_summary */
309*38fd1498Szrj ipa_hsa_write_summary, /* write_optimization_summary */
310*38fd1498Szrj ipa_hsa_read_summary, /* read_optimization_summary */
311*38fd1498Szrj NULL, /* stmt_fixup */
312*38fd1498Szrj 0, /* function_transform_todo_flags_start */
313*38fd1498Szrj NULL, /* function_transform */
314*38fd1498Szrj NULL) /* variable_transform */
315*38fd1498Szrj {}
316*38fd1498Szrj
317*38fd1498Szrj /* opt_pass methods: */
318*38fd1498Szrj virtual bool gate (function *);
319*38fd1498Szrj
execute(function *)320*38fd1498Szrj virtual unsigned int execute (function *) { return process_hsa_functions (); }
321*38fd1498Szrj
322*38fd1498Szrj }; // class pass_ipa_reference
323*38fd1498Szrj
324*38fd1498Szrj bool
gate(function *)325*38fd1498Szrj pass_ipa_hsa::gate (function *)
326*38fd1498Szrj {
327*38fd1498Szrj return hsa_gen_requested_p ();
328*38fd1498Szrj }
329*38fd1498Szrj
330*38fd1498Szrj } // anon namespace
331*38fd1498Szrj
332*38fd1498Szrj ipa_opt_pass_d *
make_pass_ipa_hsa(gcc::context * ctxt)333*38fd1498Szrj make_pass_ipa_hsa (gcc::context *ctxt)
334*38fd1498Szrj {
335*38fd1498Szrj return new pass_ipa_hsa (ctxt);
336*38fd1498Szrj }
337