xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/omp-grid.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
2 
3    Copyright (C) 2013-2019 Free Software Foundation, Inc.
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "cgraph.h"
30 #include "pretty-print.h"
31 #include "fold-const.h"
32 #include "gimplify.h"
33 #include "gimple-iterator.h"
34 #include "gimple-walk.h"
35 #include "tree-inline.h"
36 #include "langhooks.h"
37 #include "omp-general.h"
38 #include "omp-low.h"
39 #include "omp-grid.h"
40 #include "gimple-pretty-print.h"
41 
42 /* Return the lastprivate predicate for a given gridified loop described by
43    FD).  */
44 
45 tree
46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
47 {
48   /* When dealing with a gridified loop, we need to check up to three collapsed
49      iteration variables but they are not actually captured in this fd.
50      Fortunately, we can easily rely on HSA builtins to get this
51      information.  */
52 
53   tree id, size;
54   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
55       && gimple_omp_for_grid_intra_group (fd->for_stmt))
56     {
57       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
58       size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
59     }
60   else
61     {
62       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
63       size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
64     }
65   tree cond = NULL;
66   for (int dim = 0; dim < fd->collapse; dim++)
67     {
68       tree dim_tree = build_int_cstu (unsigned_type_node, dim);
69       tree u1 = build_int_cstu (unsigned_type_node, 1);
70       tree c2
71 	= build2 (EQ_EXPR, boolean_type_node,
72 		  build2 (PLUS_EXPR, unsigned_type_node,
73 			  build_call_expr (id, 1, dim_tree), u1),
74 		  build_call_expr (size, 1, dim_tree));
75       if (cond)
76 	cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
77       else
78 	cond = c2;
79     }
80   return cond;
81 }
82 
83 /* Structure describing the basic properties of the loop we ara analyzing
84    whether it can be gridified and when it is gridified.  */
85 
86 struct grid_prop
87 {
88   /* True when we are doing tiling gridification, i.e. when there is a distinct
89      distribute loop over groups and a loop construct over work-items.  False
90      when distribute and parallel for loops form a combined construct.  */
91   bool tiling;
92   /* Location of the target construct for optimization information
93      messages.  */
94   dump_user_location_t target_loc;
95   /* The collapse clause of the involved loops.  Collapse value of all of them
96      must be the same for gridification to take place.  */
97   size_t collapse;
98   /* Group sizes, if requested by the user or NULL if not requested.  */
99   tree group_sizes[3];
100 };
101 
102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
103   "gridified HSA kernel because "
104 
105 /* Return true if STMT is an assignment of a register-type into a local
106    VAR_DECL.  If GRID is non-NULL, the assignment additionally must not be to
107    any of the trees specifying group sizes there.  */
108 
109 static bool
110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
111 {
112   gassign *assign = dyn_cast <gassign *> (stmt);
113   if (!assign)
114     return false;
115   if (gimple_clobber_p (assign))
116     return true;
117   tree lhs = gimple_assign_lhs (assign);
118   if (!VAR_P (lhs)
119       || !is_gimple_reg_type (TREE_TYPE (lhs))
120       || is_global_var (lhs))
121     return false;
122   if (grid)
123     for (unsigned i = 0; i < grid->collapse; i++)
124       if (lhs == grid->group_sizes[i])
125 	return false;
126   return true;
127 }
128 
129 /* Return true if all statements in SEQ are assignments to local register-type
130    variables that do not hold group size information.  */
131 
132 static bool
133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
134 {
135   if (!seq)
136     return true;
137 
138   gimple_stmt_iterator gsi;
139   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
140     if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
141       return false;
142   return true;
143 }
144 
145 /* Scan statements in SEQ and call itself recursively on any bind.  GRID
146    describes hitherto discovered properties of the loop that is evaluated for
147    possible gridification.  If during whole search only assignments to
148    register-type local variables (that do not overwrite group size information)
149    and one single OMP statement is encountered, return true, otherwise return
150    false.  RET is where we store any OMP statement encountered.  */
151 
152 static bool
153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
154 					  const char *name, gimple **ret)
155 {
156   gimple_stmt_iterator gsi;
157   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
158     {
159       gimple *stmt = gsi_stmt (gsi);
160 
161       if (grid_safe_assignment_p (stmt, grid))
162 	continue;
163       if (gbind *bind = dyn_cast <gbind *> (stmt))
164 	{
165 	  gimple_seq bind_body = gimple_bind_body (bind);
166 	  if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
167 							 ret))
168 	      return false;
169 	}
170       else if (is_gimple_omp (stmt))
171 	{
172 	  if (*ret)
173 	    {
174 	      if (dump_enabled_p ())
175 		{
176 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
177 				   GRID_MISSED_MSG_PREFIX "%s construct "
178 				   "contains multiple OpenMP constructs\n",
179 				   name);
180 		  dump_printf_loc (MSG_NOTE, *ret,
181 				   "The first OpenMP construct within "
182 				   "a parallel\n");
183 		  dump_printf_loc (MSG_NOTE, stmt,
184 				   "The second OpenMP construct within "
185 				   "a parallel\n");
186 		}
187 	      return false;
188 	    }
189 	  *ret = stmt;
190 	}
191       else
192 	{
193 	  if (dump_enabled_p ())
194 	    {
195 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
196 			       GRID_MISSED_MSG_PREFIX "%s construct contains "
197 			       "a complex statement\n", name);
198 	      dump_printf_loc (MSG_NOTE, stmt,
199 			       "This statement cannot be analyzed for "
200 			       "gridification\n");
201 	    }
202 	  return false;
203 	}
204     }
205   return true;
206 }
207 
208 /* Scan statements in SEQ and make sure that it and any binds in it contain
209    only assignments to local register-type variables (that do not overwrite
210    group size information) and one OMP construct.  If so, return that
211    construct, otherwise return NULL.  GRID describes hitherto discovered
212    properties of the loop that is evaluated for possible gridification.  If
213    dumping is enabled and function fails, use NAME to dump a note with the
214    reason for failure.  */
215 
216 static gimple *
217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
218 					const char *name)
219 {
220   if (!seq)
221     {
222       if (dump_enabled_p ())
223 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
224 			 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
225 			 name);
226       return NULL;
227     }
228 
229   gimple *ret = NULL;
230   if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
231     {
232       if (!ret && dump_enabled_p ())
233 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
234 			 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
235 			 " any other OpenMP construct\n", name);
236       return ret;
237     }
238   else
239     return NULL;
240 }
241 
242 /* Walker function looking for statements there is no point gridifying (and for
243    noreturn function calls which we cannot do).  Return non-NULL if such a
244    function is found.  */
245 
246 static tree
247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
248 				   bool *handled_ops_p,
249 				   struct walk_stmt_info *wi)
250 {
251   *handled_ops_p = false;
252   gimple *stmt = gsi_stmt (*gsi);
253   switch (gimple_code (stmt))
254     {
255     case GIMPLE_CALL:
256       if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
257 	{
258 	  *handled_ops_p = true;
259 	  wi->info = stmt;
260 	  return error_mark_node;
261 	}
262       break;
263 
264     /* We may reduce the following list if we find a way to implement the
265        clauses, but now there is no point trying further.  */
266     case GIMPLE_OMP_CRITICAL:
267     case GIMPLE_OMP_TASKGROUP:
268     case GIMPLE_OMP_TASK:
269     case GIMPLE_OMP_SECTION:
270     case GIMPLE_OMP_SECTIONS:
271     case GIMPLE_OMP_SECTIONS_SWITCH:
272     case GIMPLE_OMP_TARGET:
273     case GIMPLE_OMP_ORDERED:
274       *handled_ops_p = true;
275       wi->info = stmt;
276       return error_mark_node;
277     default:
278       break;
279     }
280   return NULL;
281 }
282 
283 /* Examine clauses of omp parallel statement PAR and if any prevents
284    gridification, issue a missed-optimization diagnostics and return false,
285    otherwise return true.  GRID describes hitherto discovered properties of the
286    loop that is evaluated for possible gridification.  */
287 
288 static bool
289 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
290 {
291   tree clauses = gimple_omp_parallel_clauses (par);
292   while (clauses)
293     {
294       switch (OMP_CLAUSE_CODE (clauses))
295 	{
296 	case OMP_CLAUSE_NUM_THREADS:
297 	  if (dump_enabled_p ())
298 	    {
299 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
300 			       GRID_MISSED_MSG_PREFIX "because there is "
301 			       "a num_threads clause of the parallel "
302 			       "construct\n");
303 	      dump_printf_loc (MSG_NOTE, par,
304 			       "Parallel construct has a num_threads clause\n");
305 	    }
306 	  return false;
307 
308 	case OMP_CLAUSE_REDUCTION:
309 	  if (dump_enabled_p ())
310 	    {
311 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
312 			       GRID_MISSED_MSG_PREFIX "a reduction clause "
313 			       "is present\n ");
314 	      dump_printf_loc (MSG_NOTE, par,
315 			       "Parallel construct has a reduction clause\n");
316 	    }
317 	  return false;
318 
319 	default:
320 	  break;
321 	}
322       clauses = OMP_CLAUSE_CHAIN (clauses);
323     }
324   return true;
325 }
326 
327 /* Examine clauses and the body of omp loop statement GFOR and if something
328    prevents gridification, issue a missed-optimization diagnostics and return
329    false, otherwise return true.  GRID describes hitherto discovered properties
330    of the loop that is evaluated for possible gridification.  */
331 
332 static bool
333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
334 {
335   if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
336 						 grid))
337     {
338       if (dump_enabled_p ())
339 	{
340 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
341 			   GRID_MISSED_MSG_PREFIX "the inner loop "
342 			   "loop bounds computation contains a complex "
343 			   "statement\n");
344 	  dump_printf_loc (MSG_NOTE, gfor,
345 			   "Loop construct cannot be analyzed for "
346 			   "gridification\n");
347 	}
348       return false;
349     }
350 
351   tree clauses = gimple_omp_for_clauses (gfor);
352   while (clauses)
353     {
354       switch (OMP_CLAUSE_CODE (clauses))
355 	{
356 	case OMP_CLAUSE_SCHEDULE:
357 	  if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
358 	    {
359 	      if (dump_enabled_p ())
360 		{
361 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
362 				   GRID_MISSED_MSG_PREFIX "the inner loop "
363 				   "has a non-automatic schedule clause\n");
364 		  dump_printf_loc (MSG_NOTE, gfor,
365 				   "Loop construct has a non automatic "
366 				   "schedule clause\n");
367 		}
368 	      return false;
369 	    }
370 	  break;
371 
372 	case OMP_CLAUSE_REDUCTION:
373 	  if (dump_enabled_p ())
374 	    {
375 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
376 			       GRID_MISSED_MSG_PREFIX "a reduction "
377 			       "clause is present\n ");
378 	      dump_printf_loc (MSG_NOTE, gfor,
379 			       "Loop construct has a reduction schedule "
380 			       "clause\n");
381 	    }
382 	  return false;
383 
384 	default:
385 	  break;
386 	}
387       clauses = OMP_CLAUSE_CHAIN (clauses);
388     }
389   struct walk_stmt_info wi;
390   memset (&wi, 0, sizeof (wi));
391   if (walk_gimple_seq (gimple_omp_body (gfor),
392 		       grid_find_ungridifiable_statement,
393 		       NULL, &wi))
394     {
395       gimple *bad = (gimple *) wi.info;
396       if (dump_enabled_p ())
397 	{
398 	  if (is_gimple_call (bad))
399 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
400 			       GRID_MISSED_MSG_PREFIX "the inner loop contains "
401 			       "call to a noreturn function\n");
402 	  else
403 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
404 			     GRID_MISSED_MSG_PREFIX "the inner loop contains "
405 			     "statement %s which cannot be transformed\n",
406 			     gimple_code_name[(int) gimple_code (bad)]);
407 	  dump_printf_loc (MSG_NOTE, bad,
408 			   "This statement cannot be analyzed for "
409 			   "gridification\n");
410 	}
411       return false;
412     }
413   return true;
414 }
415 
416 /* Given distribute omp construct represented by DIST, which in the original
417    source forms a compound construct with a looping construct, return true if it
418    can be turned into a gridified HSA kernel.  Otherwise return false.  GRID
419    describes hitherto discovered properties of the loop that is evaluated for
420    possible gridification.  */
421 
422 static bool
423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
424 {
425   dump_user_location_t tloc = grid->target_loc;
426   gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
427 							 grid, "distribute");
428   gomp_parallel *par;
429   if (!stmt
430       || !(par = dyn_cast <gomp_parallel *> (stmt))
431       || !grid_parallel_clauses_gridifiable (par, tloc))
432     return false;
433 
434   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
435 						 "parallel");
436   gomp_for *gfor;
437   if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
438     return false;
439 
440   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
441     {
442       if (dump_enabled_p ())
443 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
444 			 GRID_MISSED_MSG_PREFIX "the inner loop is not "
445 			 "a simple for loop\n");
446       return false;
447     }
448   gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
449 
450   if (!grid_inner_loop_gridifiable_p (gfor, grid))
451     return false;
452 
453   return true;
454 }
455 
456 /* Given an omp loop statement GFOR, return true if it can participate in
457    tiling gridification, i.e. in one where the distribute and parallel for
458    loops do not form a compound statement.  GRID describes hitherto discovered
459    properties of the loop that is evaluated for possible gridification.  */
460 
461 static bool
462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
463 {
464   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
465     {
466       if (dump_enabled_p ())
467 	{
468 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
469 			   GRID_MISSED_MSG_PREFIX "an inner loop is not "
470 			   "a simple for loop\n");
471 	  dump_printf_loc (MSG_NOTE, gfor,
472 			   "This statement is not a simple for loop\n");
473 	}
474       return false;
475     }
476 
477   if (!grid_inner_loop_gridifiable_p (gfor, grid))
478     return false;
479 
480   if (gimple_omp_for_collapse (gfor) != grid->collapse)
481     {
482       if (dump_enabled_p ())
483 	{
484 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
485 			   GRID_MISSED_MSG_PREFIX "an inner loop does not "
486 			   "have use the same collapse clause\n");
487 	  dump_printf_loc (MSG_NOTE, gfor,
488 			   "Loop construct uses a different collapse clause\n");
489 	}
490       return false;
491     }
492 
493   struct omp_for_data fd;
494   struct omp_for_data_loop *loops
495     = (struct omp_for_data_loop *)alloca (grid->collapse
496 					  * sizeof (struct omp_for_data_loop));
497   omp_extract_for_data (gfor, &fd, loops);
498   for (unsigned i = 0; i < grid->collapse; i++)
499     {
500       tree itype, type = TREE_TYPE (fd.loops[i].v);
501       if (POINTER_TYPE_P (type))
502 	itype = signed_type_for (type);
503       else
504 	itype = type;
505 
506       tree n1 = fold_convert (itype, fd.loops[i].n1);
507       tree n2 = fold_convert (itype, fd.loops[i].n2);
508       tree t = build_int_cst (itype,
509 			      (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
510       t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
511       t = fold_build2 (PLUS_EXPR, itype, t, n2);
512       t = fold_build2 (MINUS_EXPR, itype, t, n1);
513       if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
514 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
515 			 fold_build1 (NEGATE_EXPR, itype, t),
516 			 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
517       else
518 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
519 
520       if (!operand_equal_p (grid->group_sizes[i], t, 0))
521 	{
522 	  if (dump_enabled_p ())
523 	    {
524 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
525 			       GRID_MISSED_MSG_PREFIX "the distribute and "
526 			       "an internal loop do not agree on tile size\n");
527 	      dump_printf_loc (MSG_NOTE, gfor,
528 			       "Loop construct does not seem to loop over "
529 			       "a tile size\n");
530 	    }
531 	  return false;
532 	}
533     }
534   return true;
535 }
536 
537 /* Facing a call to FNDECL in the body of a distribute construct, return true
538    if we can handle it or false if it precludes gridification.  */
539 
540 static bool
541 grid_call_permissible_in_distribute_p (tree fndecl)
542 {
543   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
544     return true;
545 
546   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
547   if (strstr (name, "omp_") != name)
548     return false;
549 
550   if ((strcmp (name, "omp_get_thread_num") == 0)
551       || (strcmp (name, "omp_get_num_threads") == 0)
552       || (strcmp (name, "omp_get_num_teams") == 0)
553       || (strcmp (name, "omp_get_team_num") == 0)
554       || (strcmp (name, "omp_get_level") == 0)
555       || (strcmp (name, "omp_get_active_level") == 0)
556       || (strcmp (name, "omp_in_parallel") == 0))
557     return true;
558 
559   return false;
560 }
561 
562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
563    of a distribute construct that is pointed at by GSI, modify it as necessary
564    for gridification.  If the statement itself got removed, return true.  */
565 
566 static bool
567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
568 {
569   gimple *stmt = gsi_stmt (*gsi);
570   tree fndecl = gimple_call_fndecl (stmt);
571   gcc_checking_assert (stmt);
572   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
573     return false;
574 
575   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
576   if ((strcmp (name, "omp_get_thread_num") == 0)
577       || (strcmp (name, "omp_get_level") == 0)
578       || (strcmp (name, "omp_get_active_level") == 0)
579       || (strcmp (name, "omp_in_parallel") == 0))
580     {
581       tree lhs = gimple_call_lhs (stmt);
582       if (lhs)
583 	{
584 	  gassign *assign
585 	    = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
586 	  gsi_insert_before (gsi, assign, GSI_SAME_STMT);
587 	}
588       gsi_remove (gsi, true);
589       return true;
590     }
591 
592   /* The rest of the omp functions can stay as they are, HSA back-end will
593      handle them correctly.  */
594   gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
595 		       || (strcmp (name, "omp_get_num_teams") == 0)
596 		       || (strcmp (name, "omp_get_team_num") == 0));
597   return false;
598 }
599 
600 /* Given a sequence of statements within a distribute omp construct or a
601    parallel construct, which in the original source does not form a compound
602    construct with a looping construct, return true if it does not prevent us
603    from turning it into a gridified HSA kernel.  Otherwise return false.  GRID
604    describes hitherto discovered properties of the loop that is evaluated for
605    possible gridification.  IN_PARALLEL must be true if seq is within a
606    parallel construct and flase if it is only within a distribute
607    construct.  */
608 
609 static bool
610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
611 				  bool in_parallel)
612 {
613   gimple_stmt_iterator gsi;
614   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
615     {
616       gimple *stmt = gsi_stmt (gsi);
617 
618       if (grid_safe_assignment_p (stmt, grid)
619 	  || gimple_code (stmt) == GIMPLE_GOTO
620 	  || gimple_code (stmt) == GIMPLE_LABEL
621 	  || gimple_code (stmt) == GIMPLE_COND)
622 	continue;
623       else if (gbind *bind = dyn_cast <gbind *> (stmt))
624 	{
625 	  if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
626 						 grid, in_parallel))
627 	    return false;
628 	  continue;
629 	}
630       else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
631 	{
632 	  if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
633 	    {
634 	      if (dump_enabled_p ())
635 		{
636 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
637 				   GRID_MISSED_MSG_PREFIX "the distribute "
638 				   "construct contains a try..catch region\n");
639 		  dump_printf_loc (MSG_NOTE, try_stmt,
640 				   "This statement cannot be analyzed for "
641 				   "tiled gridification\n");
642 		}
643 	      return false;
644 	    }
645 	  if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
646 						 grid, in_parallel))
647 	    return false;
648 	  if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
649 						 grid, in_parallel))
650 	    return false;
651 	  continue;
652 	}
653       else if (is_gimple_call (stmt))
654 	{
655 	  tree fndecl = gimple_call_fndecl (stmt);
656 	  if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
657 	    continue;
658 
659 	  if (dump_enabled_p ())
660 	    {
661 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
662 			       GRID_MISSED_MSG_PREFIX "the distribute "
663 			       "construct contains a call\n");
664 	      dump_printf_loc (MSG_NOTE, stmt,
665 			       "This statement cannot be analyzed for "
666 			       "tiled gridification\n");
667 	    }
668 	  return false;
669 	}
670       else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
671 	{
672 	  if (in_parallel)
673 	    {
674 	      if (dump_enabled_p ())
675 		{
676 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
677 				   GRID_MISSED_MSG_PREFIX "a parallel "
678 				   "construct contains another parallel "
679 				   "construct\n");
680 		  dump_printf_loc (MSG_NOTE, stmt,
681 				   "This parallel construct is nested in "
682 				   "another one\n");
683 		}
684 	      return false;
685 	    }
686 	  if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
687 	      || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
688 						    grid, true))
689 	    return false;
690 	}
691       else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
692 	{
693 	  if (!in_parallel)
694 	    {
695 	      if (dump_enabled_p ())
696 		{
697 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
698 				   GRID_MISSED_MSG_PREFIX "a loop "
699 				   "construct is not nested within a parallel "
700 				   "construct\n");
701 		  dump_printf_loc (MSG_NOTE, stmt,
702 				   "This loop construct is not nested in "
703 				   "a parallel construct\n");
704 		}
705 	      return false;
706 	    }
707 	  if (!grid_gfor_follows_tiling_pattern (gfor, grid))
708 	    return false;
709 	}
710       else
711 	{
712 	  if (dump_enabled_p ())
713 	    {
714 	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
715 			       GRID_MISSED_MSG_PREFIX "the distribute "
716 			       "construct contains a complex statement\n");
717 	      dump_printf_loc (MSG_NOTE, stmt,
718 			       "This statement cannot be analyzed for "
719 			       "tiled gridification\n");
720 	    }
721 	  return false;
722 	}
723     }
724     return true;
725 }
726 
727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
728    return true, otherwise return false.  In the case of success, also fill in
729    GRID with information describing the kernel grid.  */
730 
731 static bool
732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
733 {
734   if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
735     return false;
736 
737   dump_user_location_t tloc = target;
738   grid->target_loc = tloc;
739   gimple *stmt
740     = grid_find_single_omp_among_assignments (gimple_omp_body (target),
741 					      grid, "target");
742   if (!stmt)
743     return false;
744   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
745   tree group_size = NULL;
746   if (!teams)
747     {
748       if (dump_enabled_p ())
749 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
750 			 GRID_MISSED_MSG_PREFIX "it does not have a sole "
751 			 "teams construct in it.\n");
752       return false;
753     }
754 
755   tree clauses = gimple_omp_teams_clauses (teams);
756   while (clauses)
757     {
758       switch (OMP_CLAUSE_CODE (clauses))
759 	{
760 	case OMP_CLAUSE_NUM_TEAMS:
761 	  if (dump_enabled_p ())
762 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
763 			     GRID_MISSED_MSG_PREFIX "the teams construct "
764 			     "contains a num_teams clause\n ");
765 	  return false;
766 
767 	case OMP_CLAUSE_REDUCTION:
768 	  if (dump_enabled_p ())
769 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
770 			     GRID_MISSED_MSG_PREFIX "a reduction "
771 			     "clause is present\n ");
772 	  return false;
773 
774 	case OMP_CLAUSE_THREAD_LIMIT:
775 	  if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
776 	    group_size = OMP_CLAUSE_OPERAND (clauses, 0);
777 	  break;
778 
779 	default:
780 	  break;
781 	}
782       clauses = OMP_CLAUSE_CHAIN (clauses);
783     }
784 
785   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
786 						 "teams");
787   if (!stmt)
788     return false;
789   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
790   if (!dist)
791     {
792       if (dump_enabled_p ())
793 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
794 			 GRID_MISSED_MSG_PREFIX "the teams construct does not "
795 			 "have a single distribute construct in it.\n");
796       return false;
797     }
798 
799   gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
800 
801   grid->collapse = gimple_omp_for_collapse (dist);
802   if (grid->collapse > 3)
803     {
804       if (dump_enabled_p ())
805 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
806 			 GRID_MISSED_MSG_PREFIX "the distribute construct "
807 			 "contains collapse clause with parameter greater "
808 			 "than 3\n");
809       return false;
810     }
811 
812   struct omp_for_data fd;
813   struct omp_for_data_loop *dist_loops
814     = (struct omp_for_data_loop *)alloca (grid->collapse
815 					  * sizeof (struct omp_for_data_loop));
816   omp_extract_for_data (dist, &fd, dist_loops);
817   if (fd.chunk_size)
818     {
819       if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
820 	{
821 	  if (dump_enabled_p ())
822 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
823 			     GRID_MISSED_MSG_PREFIX "the teams "
824 			     "thread limit is different from distribute "
825 			     "schedule chunk\n");
826 	  return false;
827 	}
828       group_size = fd.chunk_size;
829     }
830   if (group_size && grid->collapse > 1)
831     {
832       if (dump_enabled_p ())
833 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
834 			 GRID_MISSED_MSG_PREFIX "group size cannot be "
835 			 "set using thread_limit or schedule clauses "
836 			 "when also using a collapse clause greater than 1\n");
837       return false;
838     }
839 
840   if (gimple_omp_for_combined_p (dist))
841     {
842       grid->tiling = false;
843       grid->group_sizes[0] = group_size;
844       for (unsigned i = 1; i < grid->collapse; i++)
845 	grid->group_sizes[i] = NULL;
846       return grid_dist_follows_simple_pattern (dist, grid);
847     }
848   else
849     {
850       grid->tiling = true;
851       if (group_size)
852 	{
853 	  if (dump_enabled_p ())
854 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
855 			     GRID_MISSED_MSG_PREFIX "group size cannot be set "
856 			     "using thread_limit or schedule clauses when "
857 			     "distribute and loop constructs do not form "
858 			     "one combined construct\n");
859 	  return false;
860 	}
861       for (unsigned i = 0; i < grid->collapse; i++)
862 	{
863 	  if (fd.loops[i].cond_code == GT_EXPR)
864 	    grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
865 						TREE_TYPE (fd.loops[i].step),
866 						fd.loops[i].step);
867 	  else
868 	    grid->group_sizes[i] = fd.loops[i].step;
869 	}
870       return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
871 					       false);
872     }
873 }
874 
875 /* Operand walker, used to remap pre-body declarations according to a hash map
876    provided in DATA.  */
877 
878 static tree
879 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
880 {
881   tree t = *tp;
882 
883   if (DECL_P (t) || TYPE_P (t))
884     *walk_subtrees = 0;
885   else
886     *walk_subtrees = 1;
887 
888   if (VAR_P (t))
889     {
890       struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
891       hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
892       tree *repl = declmap->get (t);
893       if (repl)
894 	*tp = *repl;
895     }
896   return NULL_TREE;
897 }
898 
899 /* Identifiers of segments into which a particular variable should be places
900    when gridifying.  */
901 
902 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
903 		       GRID_SEGMENT_GLOBAL};
904 
905 /* Mark VAR so that it is eventually placed into SEGMENT.  Place an artificial
906    builtin call into SEQ that will make sure the variable is always considered
907    address taken.  */
908 
909 static void
910 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
911 {
912   /* Making a non-addressable variables would require that we re-gimplify all
913      their uses.  Fortunately, we do not have to do this because if they are
914      not addressable, it means they are not used in atomic or parallel
915      statements and so relaxed GPU consistency rules mean we can just keep them
916      private.  */
917   if (!TREE_ADDRESSABLE (var))
918     return;
919 
920   switch (segment)
921     {
922     case GRID_SEGMENT_GROUP:
923       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
924 					 NULL, DECL_ATTRIBUTES (var));
925       break;
926     case GRID_SEGMENT_GLOBAL:
927       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
928 					 NULL, DECL_ATTRIBUTES (var));
929       break;
930     default:
931       gcc_unreachable ();
932     }
933 
934   if (!TREE_STATIC (var))
935     {
936       TREE_STATIC (var) = 1;
937       const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
938       SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
939       varpool_node::finalize_decl (var);
940     }
941 
942 }
943 
944 /* Copy leading register-type assignments to local variables in SRC to just
945    before DST, Creating temporaries, adjusting mapping of operands in WI and
946    remapping operands as necessary.  Add any new temporaries to TGT_BIND.
947    Return the first statement that does not conform to grid_safe_assignment_p
948    or NULL.  If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
949    variables in traversed bind statements so that they are put into the
950    appropriate segment.  */
951 
952 static gimple *
953 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
954 				     gbind *tgt_bind,
955 				     enum grid_var_segment var_segment,
956 				     struct walk_stmt_info *wi)
957 {
958   hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
959   gimple_stmt_iterator gsi;
960   for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
961     {
962       gimple *stmt = gsi_stmt (gsi);
963       if (gbind *bind = dyn_cast <gbind *> (stmt))
964 	{
965 	  gimple *r = grid_copy_leading_local_assignments
966 	    (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
967 
968 	  if (var_segment != GRID_SEGMENT_PRIVATE)
969 	    for (tree var = gimple_bind_vars (bind);
970 		 var;
971 		 var = DECL_CHAIN (var))
972 	      grid_mark_variable_segment (var, var_segment);
973 	  if (r)
974 	    return r;
975 	  else
976 	    continue;
977 	}
978       if (!grid_safe_assignment_p (stmt, NULL))
979 	return stmt;
980       tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
981       tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
982 				 TREE_TYPE (lhs));
983       DECL_CONTEXT (repl) = current_function_decl;
984       gimple_bind_append_vars (tgt_bind, repl);
985 
986       declmap->put (lhs, repl);
987       gassign *copy = as_a <gassign *> (gimple_copy (stmt));
988       walk_gimple_op (copy, grid_remap_prebody_decls, wi);
989       gsi_insert_before (dst, copy, GSI_SAME_STMT);
990     }
991   return NULL;
992 }
993 
994 /* Statement walker function to make adjustments to statements within the
995    gridifed kernel copy.  */
996 
997 static tree
998 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
999 			struct walk_stmt_info *)
1000 {
1001   *handled_ops_p = false;
1002   gimple *stmt = gsi_stmt (*gsi);
1003   if (gimple_code (stmt) == GIMPLE_OMP_FOR
1004       && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
1005   {
1006     gomp_for *loop = as_a <gomp_for *> (stmt);
1007     tree clauses = gimple_omp_for_clauses (loop);
1008     tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1009     if (cl)
1010       OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1011     else
1012       {
1013 	tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1014 	OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1015 	OMP_CLAUSE_CHAIN (c) = clauses;
1016 	gimple_omp_for_set_clauses (loop, c);
1017       }
1018   }
1019   return NULL_TREE;
1020 }
1021 
1022 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1023    combined construct with a simd loop, eliminate the simd loop.  */
1024 
1025 static void
1026 grid_eliminate_combined_simd_part (gomp_for *parloop)
1027 {
1028   struct walk_stmt_info wi;
1029 
1030   memset (&wi, 0, sizeof (wi));
1031   wi.val_only = true;
1032   enum gf_mask msk = GF_OMP_FOR_SIMD;
1033   wi.info = (void *) &msk;
1034   walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1035   gimple *stmt = (gimple *) wi.info;
1036   /* We expect that the SIMD id the only statement in the parallel loop.  */
1037   gcc_assert (stmt
1038 	      && gimple_code (stmt) == GIMPLE_OMP_FOR
1039 	      && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
1040 	      && gimple_omp_for_combined_into_p (stmt)
1041 	      && !gimple_omp_for_combined_p (stmt));
1042   gomp_for *simd = as_a <gomp_for *> (stmt);
1043 
1044   /* Copy over the iteration properties because the body refers to the index in
1045      the bottmom-most loop.  */
1046   unsigned i, collapse = gimple_omp_for_collapse (parloop);
1047   gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1048   for (i = 0; i < collapse; i++)
1049     {
1050       gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1051       gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1052       gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1053       gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1054     }
1055 
1056   tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1057   while (*tgt)
1058     tgt = &OMP_CLAUSE_CHAIN (*tgt);
1059 
1060   /* Copy over all clauses, except for linear clauses, which are turned into
1061      private clauses, and all other simd-specific clauses, which are
1062      ignored.  */
1063   tree *pc = gimple_omp_for_clauses_ptr (simd);
1064   while (*pc)
1065     {
1066       tree c = *pc;
1067       switch (TREE_CODE (c))
1068 	{
1069 	case OMP_CLAUSE_LINEAR:
1070 	  {
1071 	    tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1072 	    OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1073 	    OMP_CLAUSE_CHAIN (priv) = NULL;
1074 	    *tgt = priv;
1075 	    tgt = &OMP_CLAUSE_CHAIN (priv);
1076 	    pc = &OMP_CLAUSE_CHAIN (c);
1077 	    break;
1078 	  }
1079 
1080 	case OMP_CLAUSE_SAFELEN:
1081 	case OMP_CLAUSE_SIMDLEN:
1082 	case OMP_CLAUSE_ALIGNED:
1083 	  pc = &OMP_CLAUSE_CHAIN (c);
1084 	  break;
1085 
1086 	default:
1087 	  *pc = OMP_CLAUSE_CHAIN (c);
1088 	  OMP_CLAUSE_CHAIN (c) = NULL;
1089 	  *tgt = c;
1090 	  tgt = &OMP_CLAUSE_CHAIN (c);
1091 	  break;
1092 	}
1093     }
1094 
1095   /* Finally, throw away the simd and mark the parallel loop as not
1096      combined.  */
1097   gimple_omp_set_body (parloop, gimple_omp_body (simd));
1098   gimple_omp_for_set_combined_p (parloop, false);
1099 }
1100 
1101 /* Statement walker function marking all parallels as grid_phony and loops as
1102    grid ones representing threads of a particular thread group.  */
1103 
1104 static tree
1105 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1106 			struct walk_stmt_info *wi_in)
1107 {
1108   *handled_ops_p = false;
1109   if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1110     {
1111       *handled_ops_p = true;
1112       gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1113       gimple_omp_for_set_grid_intra_group (loop, true);
1114       if (gimple_omp_for_combined_p (loop))
1115 	grid_eliminate_combined_simd_part (loop);
1116 
1117       struct walk_stmt_info body_wi;
1118       memset (&body_wi, 0, sizeof (body_wi));
1119       walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1120 			   grid_process_grid_body, NULL, &body_wi);
1121 
1122       gbind *bind = (gbind *) wi_in->info;
1123       tree c;
1124       for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1125 	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1126 	  {
1127 	    push_gimplify_context ();
1128 	    tree ov = OMP_CLAUSE_DECL (c);
1129 	    tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1130 				    TREE_TYPE (ov));
1131 
1132 	    grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1133 	    DECL_CONTEXT (gv) = current_function_decl;
1134 	    gimple_bind_append_vars (bind, gv);
1135 	    tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1136 	    gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1137 	    x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1138 	    gimple_seq l = NULL;
1139 	    gimplify_and_add (x, &l);
1140 	    gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1141 	    pop_gimplify_context (bind);
1142 	  }
1143     }
1144   return NULL_TREE;
1145 }
1146 
1147 /* Statement walker function marking all parallels as grid_phony and loops as
1148    grid ones representing threads of a particular thread group.  */
1149 
1150 static tree
1151 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1152 				      bool *handled_ops_p,
1153 				      struct walk_stmt_info *wi_in)
1154 {
1155   *handled_ops_p = false;
1156   wi_in->removed_stmt = false;
1157   gimple *stmt = gsi_stmt (*gsi);
1158   if (gbind *bind = dyn_cast <gbind *> (stmt))
1159     {
1160       for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1161 	grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1162     }
1163   else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1164     {
1165       *handled_ops_p = true;
1166       gimple_omp_parallel_set_grid_phony (parallel, true);
1167 
1168       gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1169       gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1170       gimple_seq s = NULL;
1171       gimple_seq_add_stmt (&s, new_bind);
1172       gimple_omp_set_body (parallel, s);
1173 
1174       struct walk_stmt_info wi_par;
1175       memset (&wi_par, 0, sizeof (wi_par));
1176       wi_par.info = new_bind;
1177       walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1178 			   grid_mark_tiling_loops, NULL, &wi_par);
1179     }
1180   else if (is_a <gcall *> (stmt))
1181     wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1182   return NULL_TREE;
1183 }
1184 
1185 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1186    components, mark them as part of kernel, copy assignment leading to them
1187    just before DST, remapping them using WI and adding new temporaries to
1188    TGT_BIND, and and return the loop that will be used for kernel dispatch.  */
1189 
1190 static gomp_for *
1191 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1192 			       gimple_stmt_iterator *dst,
1193 			       gbind *tgt_bind, struct walk_stmt_info *wi)
1194 {
1195   gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1196 						      GRID_SEGMENT_GLOBAL, wi);
1197   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1198   gcc_assert (teams);
1199   gimple_omp_teams_set_grid_phony (teams, true);
1200   stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1201 					      tgt_bind, GRID_SEGMENT_GLOBAL,
1202 					      wi);
1203   gcc_checking_assert (stmt);
1204   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1205   gcc_assert (dist);
1206   gimple_seq prebody = gimple_omp_for_pre_body (dist);
1207   if (prebody)
1208     grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1209 					 GRID_SEGMENT_GROUP, wi);
1210 
1211   if (grid->tiling)
1212     {
1213       gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1214       gimple_omp_for_set_grid_group_iter (dist, true);
1215 
1216       struct walk_stmt_info wi_tiled;
1217       memset (&wi_tiled, 0, sizeof (wi_tiled));
1218       walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1219 			   grid_mark_tiling_parallels_and_loops, NULL,
1220 			   &wi_tiled);
1221       return dist;
1222     }
1223   else
1224     {
1225       gimple_omp_for_set_grid_phony (dist, true);
1226       stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1227 						  tgt_bind,
1228 						  GRID_SEGMENT_PRIVATE, wi);
1229       gcc_checking_assert (stmt);
1230       gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1231       gimple_omp_parallel_set_grid_phony (parallel, true);
1232       stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1233 						  dst, tgt_bind,
1234 						  GRID_SEGMENT_PRIVATE, wi);
1235       gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1236       gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1237       prebody = gimple_omp_for_pre_body (inner_loop);
1238       if (prebody)
1239 	grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1240 					     GRID_SEGMENT_PRIVATE, wi);
1241 
1242       if (gimple_omp_for_combined_p (inner_loop))
1243 	grid_eliminate_combined_simd_part (inner_loop);
1244       struct walk_stmt_info body_wi;
1245       memset (&body_wi, 0, sizeof (body_wi));
1246       walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1247 			   grid_process_grid_body, NULL, &body_wi);
1248 
1249       return inner_loop;
1250     }
1251 }
1252 
1253 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1254    create a GPU kernel for it.  GSI must point to the same statement, TGT_BIND
1255    is the bind into which temporaries inserted before TARGET should be
1256    added.  */
1257 
1258 static void
1259 grid_attempt_target_gridification (gomp_target *target,
1260 				   gimple_stmt_iterator *gsi,
1261 				   gbind *tgt_bind)
1262 {
1263   /* removed group_size */
1264   grid_prop grid = {};
1265   if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1266     return;
1267 
1268   location_t loc = gimple_location (target);
1269   if (dump_enabled_p ())
1270     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
1271 		     "Target construct will be turned into a gridified HSA "
1272 		     "kernel\n");
1273 
1274   /* Copy target body to a GPUKERNEL construct:  */
1275   gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1276     (gimple_omp_body (target));
1277 
1278   hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1279   struct walk_stmt_info wi;
1280   memset (&wi, 0, sizeof (struct walk_stmt_info));
1281   wi.info = declmap;
1282 
1283   /* Copy assignments in between OMP statements before target, mark OMP
1284      statements within copy appropriately.  */
1285   gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1286 							tgt_bind, &wi);
1287 
1288   gbind *old_bind
1289     = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1290   gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1291   tree new_block = gimple_bind_block (new_bind);
1292   tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1293   BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1294   BLOCK_SUBBLOCKS (enc_block) = new_block;
1295   BLOCK_SUPERCONTEXT (new_block) = enc_block;
1296   gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1297   gimple_seq_add_stmt
1298     (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1299      gpukernel);
1300 
1301   for (size_t i = 0; i < grid.collapse; i++)
1302     walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1303   push_gimplify_context ();
1304   for (size_t i = 0; i < grid.collapse; i++)
1305     {
1306       tree index_var = gimple_omp_for_index (inner_loop, i);
1307       tree itype, type = TREE_TYPE (index_var);
1308       if (POINTER_TYPE_P (type))
1309 	itype = signed_type_for (type);
1310       else
1311 	itype = type;
1312 
1313       enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1314       tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1315       walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1316       tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1317       walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1318       tree step
1319 	= omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1320       omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
1321       n1 = fold_convert (itype, n1);
1322       n2 = fold_convert (itype, n2);
1323 
1324       tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1325 
1326       tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1327       t = fold_build2 (PLUS_EXPR, itype, step, t);
1328       t = fold_build2 (PLUS_EXPR, itype, t, n2);
1329       t = fold_build2 (MINUS_EXPR, itype, t, n1);
1330       if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1331 	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1332 			 fold_build1 (NEGATE_EXPR, itype, t),
1333 			 fold_build1 (NEGATE_EXPR, itype, step));
1334       else
1335 	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1336       t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1337       if (grid.tiling)
1338 	{
1339 	  if (cond_code == GT_EXPR)
1340 	    step = fold_build1 (NEGATE_EXPR, itype, step);
1341 	  t = fold_build2 (MULT_EXPR, itype, t, step);
1342 	}
1343 
1344       tree gs = fold_convert (uint32_type_node, t);
1345       gimple_seq tmpseq = NULL;
1346       gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1347       if (!gimple_seq_empty_p (tmpseq))
1348 	gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1349 
1350       tree ws;
1351       if (grid.group_sizes[i])
1352 	{
1353 	  ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1354 	  tmpseq = NULL;
1355 	  gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1356 	  if (!gimple_seq_empty_p (tmpseq))
1357 	    gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1358 	}
1359       else
1360 	ws = build_zero_cst (uint32_type_node);
1361 
1362       tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1363       OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1364       OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1365       OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1366       OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1367       gimple_omp_target_set_clauses (target, c);
1368     }
1369   pop_gimplify_context (tgt_bind);
1370   delete declmap;
1371   return;
1372 }
1373 
1374 /* Walker function doing all the work for create_target_kernels.  */
1375 
1376 static tree
1377 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1378 				   bool *handled_ops_p,
1379 				   struct walk_stmt_info *incoming)
1380 {
1381   *handled_ops_p = false;
1382 
1383   gimple *stmt = gsi_stmt (*gsi);
1384   gomp_target *target = dyn_cast <gomp_target *> (stmt);
1385   if (target)
1386     {
1387       gbind *tgt_bind = (gbind *) incoming->info;
1388       gcc_checking_assert (tgt_bind);
1389       grid_attempt_target_gridification (target, gsi, tgt_bind);
1390       return NULL_TREE;
1391     }
1392   gbind *bind = dyn_cast <gbind *> (stmt);
1393   if (bind)
1394     {
1395       *handled_ops_p = true;
1396       struct walk_stmt_info wi;
1397       memset (&wi, 0, sizeof (wi));
1398       wi.info = bind;
1399       walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1400 			   grid_gridify_all_targets_stmt, NULL, &wi);
1401     }
1402   return NULL_TREE;
1403 }
1404 
1405 /* Attempt to gridify all target constructs in BODY_P.  All such targets will
1406    have their bodies duplicated, with the new copy being put into a
1407    gimple_omp_grid_body statement.  All kernel-related construct within the
1408    grid_body will be marked with phony flags or kernel kinds.  Moreover, some
1409    re-structuring is often needed, such as copying pre-bodies before the target
1410    construct so that kernel grid sizes can be computed.  */
1411 
1412 void
1413 omp_grid_gridify_all_targets (gimple_seq *body_p)
1414 {
1415   struct walk_stmt_info wi;
1416   memset (&wi, 0, sizeof (wi));
1417   walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
1418 }
1419