gcc-8.0/gcc/omp-grid.c

*38fd1498Szrj/* Lowering and expansion of OpenMP directives for HSA GPU agents.
*38fd1498Szrj
*38fd1498Szrj   Copyright (C) 2013-2018 Free Software Foundation, Inc.
*38fd1498Szrj
*38fd1498SzrjThis file is part of GCC.
*38fd1498Szrj
*38fd1498SzrjGCC is free software; you can redistribute it and/or modify it under
*38fd1498Szrjthe terms of the GNU General Public License as published by the Free
*38fd1498SzrjSoftware Foundation; either version 3, or (at your option) any later
*38fd1498Szrjversion.
*38fd1498Szrj
*38fd1498SzrjGCC is distributed in the hope that it will be useful, but WITHOUT ANY
*38fd1498SzrjWARRANTY; without even the implied warranty of MERCHANTABILITY or
*38fd1498SzrjFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
*38fd1498Szrjfor more details.
*38fd1498Szrj
*38fd1498SzrjYou should have received a copy of the GNU General Public License
*38fd1498Szrjalong with GCC; see the file COPYING3.  If not see
*38fd1498Szrj<http://www.gnu.org/licenses/>.  */
*38fd1498Szrj
*38fd1498Szrj#include "config.h"
*38fd1498Szrj#include "system.h"
*38fd1498Szrj#include "coretypes.h"
*38fd1498Szrj#include "backend.h"
*38fd1498Szrj#include "tree.h"
*38fd1498Szrj#include "gimple.h"
*38fd1498Szrj#include "tree-pass.h"
*38fd1498Szrj#include "ssa.h"
*38fd1498Szrj#include "cgraph.h"
*38fd1498Szrj#include "pretty-print.h"
*38fd1498Szrj#include "fold-const.h"
*38fd1498Szrj#include "gimplify.h"
*38fd1498Szrj#include "gimple-iterator.h"
*38fd1498Szrj#include "gimple-walk.h"
*38fd1498Szrj#include "tree-inline.h"
*38fd1498Szrj#include "langhooks.h"
*38fd1498Szrj#include "omp-general.h"
*38fd1498Szrj#include "omp-low.h"
*38fd1498Szrj#include "omp-grid.h"
*38fd1498Szrj#include "gimple-pretty-print.h"
*38fd1498Szrj
*38fd1498Szrj/* Return the lastprivate predicate for a given gridified loop described by
*38fd1498Szrj   FD).  */
*38fd1498Szrj
*38fd1498Szrjtree
*38fd1498Szrjomp_grid_lastprivate_predicate (struct omp_for_data *fd)
*38fd1498Szrj{
*38fd1498Szrj  /* When dealing with a gridified loop, we need to check up to three collapsed
*38fd1498Szrj     iteration variables but they are not actually captured in this fd.
*38fd1498Szrj     Fortunately, we can easily rely on HSA builtins to get this
*38fd1498Szrj     information.  */
*38fd1498Szrj
*38fd1498Szrj  tree id, size;
*38fd1498Szrj  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
*38fd1498Szrj      && gimple_omp_for_grid_intra_group (fd->for_stmt))
*38fd1498Szrj    {
*38fd1498Szrj      id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
*38fd1498Szrj      size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
*38fd1498Szrj    }
*38fd1498Szrj  else
*38fd1498Szrj    {
*38fd1498Szrj      id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
*38fd1498Szrj      size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
*38fd1498Szrj    }
*38fd1498Szrj  tree cond = NULL;
*38fd1498Szrj  for (int dim = 0; dim < fd->collapse; dim++)
*38fd1498Szrj    {
*38fd1498Szrj      tree dim_tree = build_int_cstu (unsigned_type_node, dim);
*38fd1498Szrj      tree u1 = build_int_cstu (unsigned_type_node, 1);
*38fd1498Szrj      tree c2
*38fd1498Szrj	= build2 (EQ_EXPR, boolean_type_node,
*38fd1498Szrj		  build2 (PLUS_EXPR, unsigned_type_node,
*38fd1498Szrj			  build_call_expr (id, 1, dim_tree), u1),
*38fd1498Szrj		  build_call_expr (size, 1, dim_tree));
*38fd1498Szrj      if (cond)
*38fd1498Szrj	cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
*38fd1498Szrj      else
*38fd1498Szrj	cond = c2;
*38fd1498Szrj    }
*38fd1498Szrj  return cond;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Structure describing the basic properties of the loop we ara analyzing
*38fd1498Szrj   whether it can be gridified and when it is gridified.  */
*38fd1498Szrj
*38fd1498Szrjstruct grid_prop
*38fd1498Szrj{
*38fd1498Szrj  /* True when we are doing tiling gridification, i.e. when there is a distinct
*38fd1498Szrj     distribute loop over groups and a loop construct over work-items.  False
*38fd1498Szrj     when distribute and parallel for loops form a combined construct.  */
*38fd1498Szrj  bool tiling;
*38fd1498Szrj  /* Location of the target construct for optimization information
*38fd1498Szrj     messages.  */
*38fd1498Szrj  location_t target_loc;
*38fd1498Szrj  /* The collapse clause of the involved loops.  Collapse value of all of them
*38fd1498Szrj     must be the same for gridification to take place.  */
*38fd1498Szrj  size_t collapse;
*38fd1498Szrj  /* Group sizes, if requested by the user or NULL if not requested.  */
*38fd1498Szrj  tree group_sizes[3];
*38fd1498Szrj};
*38fd1498Szrj
*38fd1498Szrj#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
*38fd1498Szrj  "gridified HSA kernel because "
*38fd1498Szrj
*38fd1498Szrj/* Return true if STMT is an assignment of a register-type into a local
*38fd1498Szrj   VAR_DECL.  If GRID is non-NULL, the assignment additionally must not be to
*38fd1498Szrj   any of the trees specifying group sizes there.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_safe_assignment_p (gimple *stmt, grid_prop *grid)
*38fd1498Szrj{
*38fd1498Szrj  gassign *assign = dyn_cast <gassign *> (stmt);
*38fd1498Szrj  if (!assign)
*38fd1498Szrj    return false;
*38fd1498Szrj  if (gimple_clobber_p (assign))
*38fd1498Szrj    return true;
*38fd1498Szrj  tree lhs = gimple_assign_lhs (assign);
*38fd1498Szrj  if (!VAR_P (lhs)
*38fd1498Szrj      || !is_gimple_reg_type (TREE_TYPE (lhs))
*38fd1498Szrj      || is_global_var (lhs))
*38fd1498Szrj    return false;
*38fd1498Szrj  if (grid)
*38fd1498Szrj    for (unsigned i = 0; i < grid->collapse; i++)
*38fd1498Szrj      if (lhs == grid->group_sizes[i])
*38fd1498Szrj	return false;
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Return true if all statements in SEQ are assignments to local register-type
*38fd1498Szrj   variables that do not hold group size information.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
*38fd1498Szrj{
*38fd1498Szrj  if (!seq)
*38fd1498Szrj    return true;
*38fd1498Szrj
*38fd1498Szrj  gimple_stmt_iterator gsi;
*38fd1498Szrj  for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
*38fd1498Szrj    if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
*38fd1498Szrj      return false;
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Scan statements in SEQ and call itself recursively on any bind.  GRID
*38fd1498Szrj   describes hitherto discovered properties of the loop that is evaluated for
*38fd1498Szrj   possible gridification.  If during whole search only assignments to
*38fd1498Szrj   register-type local variables (that do not overwrite group size information)
*38fd1498Szrj   and one single OMP statement is encountered, return true, otherwise return
*38fd1498Szrj   false.  RET is where we store any OMP statement encountered.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
*38fd1498Szrj					  const char *name, gimple **ret)
*38fd1498Szrj{
*38fd1498Szrj  gimple_stmt_iterator gsi;
*38fd1498Szrj  for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
*38fd1498Szrj    {
*38fd1498Szrj      gimple *stmt = gsi_stmt (gsi);
*38fd1498Szrj
*38fd1498Szrj      if (grid_safe_assignment_p (stmt, grid))
*38fd1498Szrj	continue;
*38fd1498Szrj      if (gbind *bind = dyn_cast <gbind *> (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  gimple_seq bind_body = gimple_bind_body (bind);
*38fd1498Szrj	  if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
*38fd1498Szrj							 ret))
*38fd1498Szrj	      return false;
*38fd1498Szrj	}
*38fd1498Szrj      else if (is_gimple_omp (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  if (*ret)
*38fd1498Szrj	    {
*38fd1498Szrj	      if (dump_enabled_p ())
*38fd1498Szrj		{
*38fd1498Szrj		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj				   GRID_MISSED_MSG_PREFIX "%s construct "
*38fd1498Szrj				   "contains multiple OpenMP constructs\n",
*38fd1498Szrj				   name);
*38fd1498Szrj		  dump_printf_loc (MSG_NOTE, gimple_location (*ret),
*38fd1498Szrj				   "The first OpenMP construct within "
*38fd1498Szrj				   "a parallel\n");
*38fd1498Szrj		  dump_printf_loc (MSG_NOTE, gimple_location (stmt),
*38fd1498Szrj				   "The second OpenMP construct within "
*38fd1498Szrj				   "a parallel\n");
*38fd1498Szrj		}
*38fd1498Szrj	      return false;
*38fd1498Szrj	    }
*38fd1498Szrj	  *ret = stmt;
*38fd1498Szrj	}
*38fd1498Szrj      else
*38fd1498Szrj	{
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "%s construct contains "
*38fd1498Szrj			       "a complex statement\n", name);
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (stmt),
*38fd1498Szrj			       "This statement cannot be analyzed for "
*38fd1498Szrj			       "gridification\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj	}
*38fd1498Szrj    }
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Scan statements in SEQ and make sure that it and any binds in it contain
*38fd1498Szrj   only assignments to local register-type variables (that do not overwrite
*38fd1498Szrj   group size information) and one OMP construct.  If so, return that
*38fd1498Szrj   construct, otherwise return NULL.  GRID describes hitherto discovered
*38fd1498Szrj   properties of the loop that is evaluated for possible gridification.  If
*38fd1498Szrj   dumping is enabled and function fails, use NAME to dump a note with the
*38fd1498Szrj   reason for failure.  */
*38fd1498Szrj
*38fd1498Szrjstatic gimple *
*38fd1498Szrjgrid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
*38fd1498Szrj					const char *name)
*38fd1498Szrj{
*38fd1498Szrj  if (!seq)
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
*38fd1498Szrj			 name);
*38fd1498Szrj      return NULL;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  gimple *ret = NULL;
*38fd1498Szrj  if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
*38fd1498Szrj    {
*38fd1498Szrj      if (!ret && dump_enabled_p ())
*38fd1498Szrj	dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
*38fd1498Szrj			 " any other OpenMP construct\n", name);
*38fd1498Szrj      return ret;
*38fd1498Szrj    }
*38fd1498Szrj  else
*38fd1498Szrj    return NULL;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Walker function looking for statements there is no point gridifying (and for
*38fd1498Szrj   noreturn function calls which we cannot do).  Return non-NULL if such a
*38fd1498Szrj   function is found.  */
*38fd1498Szrj
*38fd1498Szrjstatic tree
*38fd1498Szrjgrid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
*38fd1498Szrj				   bool *handled_ops_p,
*38fd1498Szrj				   struct walk_stmt_info *wi)
*38fd1498Szrj{
*38fd1498Szrj  *handled_ops_p = false;
*38fd1498Szrj  gimple *stmt = gsi_stmt (*gsi);
*38fd1498Szrj  switch (gimple_code (stmt))
*38fd1498Szrj    {
*38fd1498Szrj    case GIMPLE_CALL:
*38fd1498Szrj      if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
*38fd1498Szrj	{
*38fd1498Szrj	  *handled_ops_p = true;
*38fd1498Szrj	  wi->info = stmt;
*38fd1498Szrj	  return error_mark_node;
*38fd1498Szrj	}
*38fd1498Szrj      break;
*38fd1498Szrj
*38fd1498Szrj    /* We may reduce the following list if we find a way to implement the
*38fd1498Szrj       clauses, but now there is no point trying further.  */
*38fd1498Szrj    case GIMPLE_OMP_CRITICAL:
*38fd1498Szrj    case GIMPLE_OMP_TASKGROUP:
*38fd1498Szrj    case GIMPLE_OMP_TASK:
*38fd1498Szrj    case GIMPLE_OMP_SECTION:
*38fd1498Szrj    case GIMPLE_OMP_SECTIONS:
*38fd1498Szrj    case GIMPLE_OMP_SECTIONS_SWITCH:
*38fd1498Szrj    case GIMPLE_OMP_TARGET:
*38fd1498Szrj    case GIMPLE_OMP_ORDERED:
*38fd1498Szrj      *handled_ops_p = true;
*38fd1498Szrj      wi->info = stmt;
*38fd1498Szrj      return error_mark_node;
*38fd1498Szrj    default:
*38fd1498Szrj      break;
*38fd1498Szrj    }
*38fd1498Szrj  return NULL;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Examine clauses of omp parallel statement PAR and if any prevents
*38fd1498Szrj   gridification, issue a missed-optimization diagnostics and return false,
*38fd1498Szrj   otherwise return true.  GRID describes hitherto discovered properties of the
*38fd1498Szrj   loop that is evaluated for possible gridification.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc)
*38fd1498Szrj{
*38fd1498Szrj  tree clauses = gimple_omp_parallel_clauses (par);
*38fd1498Szrj  while (clauses)
*38fd1498Szrj    {
*38fd1498Szrj      switch (OMP_CLAUSE_CODE (clauses))
*38fd1498Szrj	{
*38fd1498Szrj	case OMP_CLAUSE_NUM_THREADS:
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "because there is "
*38fd1498Szrj			       "a num_threads clause of the parallel "
*38fd1498Szrj			       "construct\n");
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (par),
*38fd1498Szrj			       "Parallel construct has a num_threads clause\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj
*38fd1498Szrj	case OMP_CLAUSE_REDUCTION:
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "a reduction clause "
*38fd1498Szrj			       "is present\n ");
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (par),
*38fd1498Szrj			       "Parallel construct has a reduction clause\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj
*38fd1498Szrj	default:
*38fd1498Szrj	  break;
*38fd1498Szrj	}
*38fd1498Szrj      clauses = OMP_CLAUSE_CHAIN (clauses);
*38fd1498Szrj    }
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Examine clauses and the body of omp loop statement GFOR and if something
*38fd1498Szrj   prevents gridification, issue a missed-optimization diagnostics and return
*38fd1498Szrj   false, otherwise return true.  GRID describes hitherto discovered properties
*38fd1498Szrj   of the loop that is evaluated for possible gridification.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
*38fd1498Szrj{
*38fd1498Szrj  if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
*38fd1498Szrj						 grid))
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	{
*38fd1498Szrj	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			   GRID_MISSED_MSG_PREFIX "the inner loop "
*38fd1498Szrj			   "loop bounds computation contains a complex "
*38fd1498Szrj			   "statement\n");
*38fd1498Szrj	  dump_printf_loc (MSG_NOTE, gimple_location (gfor),
*38fd1498Szrj			   "Loop construct cannot be analyzed for "
*38fd1498Szrj			   "gridification\n");
*38fd1498Szrj	}
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  tree clauses = gimple_omp_for_clauses (gfor);
*38fd1498Szrj  while (clauses)
*38fd1498Szrj    {
*38fd1498Szrj      switch (OMP_CLAUSE_CODE (clauses))
*38fd1498Szrj	{
*38fd1498Szrj	case OMP_CLAUSE_SCHEDULE:
*38fd1498Szrj	  if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
*38fd1498Szrj	    {
*38fd1498Szrj	      if (dump_enabled_p ())
*38fd1498Szrj		{
*38fd1498Szrj		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj				   GRID_MISSED_MSG_PREFIX "the inner loop "
*38fd1498Szrj				   "has a non-automatic schedule clause\n");
*38fd1498Szrj		  dump_printf_loc (MSG_NOTE, gimple_location (gfor),
*38fd1498Szrj				   "Loop construct has a non automatic "
*38fd1498Szrj				   "schedule clause\n");
*38fd1498Szrj		}
*38fd1498Szrj	      return false;
*38fd1498Szrj	    }
*38fd1498Szrj	  break;
*38fd1498Szrj
*38fd1498Szrj	case OMP_CLAUSE_REDUCTION:
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "a reduction "
*38fd1498Szrj			       "clause is present\n ");
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (gfor),
*38fd1498Szrj			       "Loop construct has a reduction schedule "
*38fd1498Szrj			       "clause\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj
*38fd1498Szrj	default:
*38fd1498Szrj	  break;
*38fd1498Szrj	}
*38fd1498Szrj      clauses = OMP_CLAUSE_CHAIN (clauses);
*38fd1498Szrj    }
*38fd1498Szrj  struct walk_stmt_info wi;
*38fd1498Szrj  memset (&wi, 0, sizeof (wi));
*38fd1498Szrj  if (walk_gimple_seq (gimple_omp_body (gfor),
*38fd1498Szrj		       grid_find_ungridifiable_statement,
*38fd1498Szrj		       NULL, &wi))
*38fd1498Szrj    {
*38fd1498Szrj      gimple *bad = (gimple *) wi.info;
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	{
*38fd1498Szrj	  if (is_gimple_call (bad))
*38fd1498Szrj	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "the inner loop contains "
*38fd1498Szrj			       "call to a noreturn function\n");
*38fd1498Szrj	  else
*38fd1498Szrj	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			     GRID_MISSED_MSG_PREFIX "the inner loop contains "
*38fd1498Szrj			     "statement %s which cannot be transformed\n",
*38fd1498Szrj			     gimple_code_name[(int) gimple_code (bad)]);
*38fd1498Szrj	  dump_printf_loc (MSG_NOTE, gimple_location (bad),
*38fd1498Szrj			   "This statement cannot be analyzed for "
*38fd1498Szrj			   "gridification\n");
*38fd1498Szrj	}
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Given distribute omp construct represented by DIST, which in the original
*38fd1498Szrj   source forms a compound construct with a looping construct, return true if it
*38fd1498Szrj   can be turned into a gridified HSA kernel.  Otherwise return false.  GRID
*38fd1498Szrj   describes hitherto discovered properties of the loop that is evaluated for
*38fd1498Szrj   possible gridification.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
*38fd1498Szrj{
*38fd1498Szrj  location_t tloc = grid->target_loc;
*38fd1498Szrj  gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
*38fd1498Szrj							 grid, "distribute");
*38fd1498Szrj  gomp_parallel *par;
*38fd1498Szrj  if (!stmt
*38fd1498Szrj      || !(par = dyn_cast <gomp_parallel *> (stmt))
*38fd1498Szrj      || !grid_parallel_clauses_gridifiable (par, tloc))
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
*38fd1498Szrj						 "parallel");
*38fd1498Szrj  gomp_for *gfor;
*38fd1498Szrj  if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			 GRID_MISSED_MSG_PREFIX "the inner loop is not "
*38fd1498Szrj			 "a simple for loop\n");
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj  gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
*38fd1498Szrj
*38fd1498Szrj  if (!grid_inner_loop_gridifiable_p (gfor, grid))
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Given an omp loop statement GFOR, return true if it can participate in
*38fd1498Szrj   tiling gridification, i.e. in one where the distribute and parallel for
*38fd1498Szrj   loops do not form a compound statement.  GRID describes hitherto discovered
*38fd1498Szrj   properties of the loop that is evaluated for possible gridification.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
*38fd1498Szrj{
*38fd1498Szrj  if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	{
*38fd1498Szrj	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			   GRID_MISSED_MSG_PREFIX "an inner loop is not "
*38fd1498Szrj			   "a simple for loop\n");
*38fd1498Szrj	  dump_printf_loc (MSG_NOTE, gimple_location (gfor),
*38fd1498Szrj			   "This statement is not a simple for loop\n");
*38fd1498Szrj	}
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  if (!grid_inner_loop_gridifiable_p (gfor, grid))
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  if (gimple_omp_for_collapse (gfor) != grid->collapse)
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	{
*38fd1498Szrj	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			   GRID_MISSED_MSG_PREFIX "an inner loop does not "
*38fd1498Szrj			   "have use the same collapse clause\n");
*38fd1498Szrj	  dump_printf_loc (MSG_NOTE, gimple_location (gfor),
*38fd1498Szrj			   "Loop construct uses a different collapse clause\n");
*38fd1498Szrj	}
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  struct omp_for_data fd;
*38fd1498Szrj  struct omp_for_data_loop *loops
*38fd1498Szrj    = (struct omp_for_data_loop *)alloca (grid->collapse
*38fd1498Szrj					  * sizeof (struct omp_for_data_loop));
*38fd1498Szrj  omp_extract_for_data (gfor, &fd, loops);
*38fd1498Szrj  for (unsigned i = 0; i < grid->collapse; i++)
*38fd1498Szrj    {
*38fd1498Szrj      tree itype, type = TREE_TYPE (fd.loops[i].v);
*38fd1498Szrj      if (POINTER_TYPE_P (type))
*38fd1498Szrj	itype = signed_type_for (type);
*38fd1498Szrj      else
*38fd1498Szrj	itype = type;
*38fd1498Szrj
*38fd1498Szrj      tree n1 = fold_convert (itype, fd.loops[i].n1);
*38fd1498Szrj      tree n2 = fold_convert (itype, fd.loops[i].n2);
*38fd1498Szrj      tree t = build_int_cst (itype,
*38fd1498Szrj			      (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
*38fd1498Szrj      t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
*38fd1498Szrj      t = fold_build2 (PLUS_EXPR, itype, t, n2);
*38fd1498Szrj      t = fold_build2 (MINUS_EXPR, itype, t, n1);
*38fd1498Szrj      if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
*38fd1498Szrj	t = fold_build2 (TRUNC_DIV_EXPR, itype,
*38fd1498Szrj			 fold_build1 (NEGATE_EXPR, itype, t),
*38fd1498Szrj			 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
*38fd1498Szrj      else
*38fd1498Szrj	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
*38fd1498Szrj
*38fd1498Szrj      if (!operand_equal_p (grid->group_sizes[i], t, 0))
*38fd1498Szrj	{
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "the distribute and "
*38fd1498Szrj			       "an internal loop do not agree on tile size\n");
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (gfor),
*38fd1498Szrj			       "Loop construct does not seem to loop over "
*38fd1498Szrj			       "a tile size\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj	}
*38fd1498Szrj    }
*38fd1498Szrj  return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Facing a call to FNDECL in the body of a distribute construct, return true
*38fd1498Szrj   if we can handle it or false if it precludes gridification.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_call_permissible_in_distribute_p (tree fndecl)
*38fd1498Szrj{
*38fd1498Szrj  if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
*38fd1498Szrj    return true;
*38fd1498Szrj
*38fd1498Szrj  const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
*38fd1498Szrj  if (strstr (name, "omp_") != name)
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  if ((strcmp (name, "omp_get_thread_num") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_num_threads") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_num_teams") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_team_num") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_level") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_active_level") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_in_parallel") == 0))
*38fd1498Szrj    return true;
*38fd1498Szrj
*38fd1498Szrj  return false;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
*38fd1498Szrj   of a distribute construct that is pointed at by GSI, modify it as necessary
*38fd1498Szrj   for gridification.  If the statement itself got removed, return true.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
*38fd1498Szrj{
*38fd1498Szrj  gimple *stmt = gsi_stmt (*gsi);
*38fd1498Szrj  tree fndecl = gimple_call_fndecl (stmt);
*38fd1498Szrj  gcc_checking_assert (stmt);
*38fd1498Szrj  if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
*38fd1498Szrj  if ((strcmp (name, "omp_get_thread_num") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_level") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_get_active_level") == 0)
*38fd1498Szrj      || (strcmp (name, "omp_in_parallel") == 0))
*38fd1498Szrj    {
*38fd1498Szrj      tree lhs = gimple_call_lhs (stmt);
*38fd1498Szrj      if (lhs)
*38fd1498Szrj	{
*38fd1498Szrj	  gassign *assign
*38fd1498Szrj	    = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
*38fd1498Szrj	  gsi_insert_before (gsi, assign, GSI_SAME_STMT);
*38fd1498Szrj	}
*38fd1498Szrj      gsi_remove (gsi, true);
*38fd1498Szrj      return true;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  /* The rest of the omp functions can stay as they are, HSA back-end will
*38fd1498Szrj     handle them correctly.  */
*38fd1498Szrj  gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
*38fd1498Szrj		       || (strcmp (name, "omp_get_num_teams") == 0)
*38fd1498Szrj		       || (strcmp (name, "omp_get_team_num") == 0));
*38fd1498Szrj  return false;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Given a sequence of statements within a distribute omp construct or a
*38fd1498Szrj   parallel construct, which in the original source does not form a compound
*38fd1498Szrj   construct with a looping construct, return true if it does not prevent us
*38fd1498Szrj   from turning it into a gridified HSA kernel.  Otherwise return false.  GRID
*38fd1498Szrj   describes hitherto discovered properties of the loop that is evaluated for
*38fd1498Szrj   possible gridification.  IN_PARALLEL must be true if seq is within a
*38fd1498Szrj   parallel construct and flase if it is only within a distribute
*38fd1498Szrj   construct.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
*38fd1498Szrj				  bool in_parallel)
*38fd1498Szrj{
*38fd1498Szrj  gimple_stmt_iterator gsi;
*38fd1498Szrj  for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
*38fd1498Szrj    {
*38fd1498Szrj      gimple *stmt = gsi_stmt (gsi);
*38fd1498Szrj
*38fd1498Szrj      if (grid_safe_assignment_p (stmt, grid)
*38fd1498Szrj	  || gimple_code (stmt) == GIMPLE_GOTO
*38fd1498Szrj	  || gimple_code (stmt) == GIMPLE_LABEL
*38fd1498Szrj	  || gimple_code (stmt) == GIMPLE_COND)
*38fd1498Szrj	continue;
*38fd1498Szrj      else if (gbind *bind = dyn_cast <gbind *> (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
*38fd1498Szrj						 grid, in_parallel))
*38fd1498Szrj	    return false;
*38fd1498Szrj	  continue;
*38fd1498Szrj	}
*38fd1498Szrj      else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
*38fd1498Szrj	    {
*38fd1498Szrj	      if (dump_enabled_p ())
*38fd1498Szrj		{
*38fd1498Szrj		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj				   GRID_MISSED_MSG_PREFIX "the distribute "
*38fd1498Szrj				   "construct contains a try..catch region\n");
*38fd1498Szrj		  dump_printf_loc (MSG_NOTE, gimple_location (try_stmt),
*38fd1498Szrj				   "This statement cannot be analyzed for "
*38fd1498Szrj				   "tiled gridification\n");
*38fd1498Szrj		}
*38fd1498Szrj	      return false;
*38fd1498Szrj	    }
*38fd1498Szrj	  if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
*38fd1498Szrj						 grid, in_parallel))
*38fd1498Szrj	    return false;
*38fd1498Szrj	  if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
*38fd1498Szrj						 grid, in_parallel))
*38fd1498Szrj	    return false;
*38fd1498Szrj	  continue;
*38fd1498Szrj	}
*38fd1498Szrj      else if (is_gimple_call (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  tree fndecl = gimple_call_fndecl (stmt);
*38fd1498Szrj	  if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
*38fd1498Szrj	    continue;
*38fd1498Szrj
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "the distribute "
*38fd1498Szrj			       "construct contains a call\n");
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (stmt),
*38fd1498Szrj			       "This statement cannot be analyzed for "
*38fd1498Szrj			       "tiled gridification\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj	}
*38fd1498Szrj      else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  if (in_parallel)
*38fd1498Szrj	    {
*38fd1498Szrj	      if (dump_enabled_p ())
*38fd1498Szrj		{
*38fd1498Szrj		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj				   GRID_MISSED_MSG_PREFIX "a parallel "
*38fd1498Szrj				   "construct contains another parallel "
*38fd1498Szrj				   "construct\n");
*38fd1498Szrj		  dump_printf_loc (MSG_NOTE, gimple_location (stmt),
*38fd1498Szrj				   "This parallel construct is nested in "
*38fd1498Szrj				   "another one\n");
*38fd1498Szrj		}
*38fd1498Szrj	      return false;
*38fd1498Szrj	    }
*38fd1498Szrj	  if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
*38fd1498Szrj	      || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
*38fd1498Szrj						    grid, true))
*38fd1498Szrj	    return false;
*38fd1498Szrj	}
*38fd1498Szrj      else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  if (!in_parallel)
*38fd1498Szrj	    {
*38fd1498Szrj	      if (dump_enabled_p ())
*38fd1498Szrj		{
*38fd1498Szrj		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj				   GRID_MISSED_MSG_PREFIX "a loop "
*38fd1498Szrj				   "construct is not nested within a parallel "
*38fd1498Szrj				   "construct\n");
*38fd1498Szrj		  dump_printf_loc (MSG_NOTE, gimple_location (stmt),
*38fd1498Szrj				   "This loop construct is not nested in "
*38fd1498Szrj				   "a parallel construct\n");
*38fd1498Szrj		}
*38fd1498Szrj	      return false;
*38fd1498Szrj	    }
*38fd1498Szrj	  if (!grid_gfor_follows_tiling_pattern (gfor, grid))
*38fd1498Szrj	    return false;
*38fd1498Szrj	}
*38fd1498Szrj      else
*38fd1498Szrj	{
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    {
*38fd1498Szrj	      dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
*38fd1498Szrj			       GRID_MISSED_MSG_PREFIX "the distribute "
*38fd1498Szrj			       "construct contains a complex statement\n");
*38fd1498Szrj	      dump_printf_loc (MSG_NOTE, gimple_location (stmt),
*38fd1498Szrj			       "This statement cannot be analyzed for "
*38fd1498Szrj			       "tiled gridification\n");
*38fd1498Szrj	    }
*38fd1498Szrj	  return false;
*38fd1498Szrj	}
*38fd1498Szrj    }
*38fd1498Szrj    return true;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
*38fd1498Szrj   return true, otherwise return false.  In the case of success, also fill in
*38fd1498Szrj   GRID with information describing the kernel grid.  */
*38fd1498Szrj
*38fd1498Szrjstatic bool
*38fd1498Szrjgrid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
*38fd1498Szrj{
*38fd1498Szrj  if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
*38fd1498Szrj    return false;
*38fd1498Szrj
*38fd1498Szrj  location_t tloc = gimple_location (target);
*38fd1498Szrj  grid->target_loc = tloc;
*38fd1498Szrj  gimple *stmt
*38fd1498Szrj    = grid_find_single_omp_among_assignments (gimple_omp_body (target),
*38fd1498Szrj					      grid, "target");
*38fd1498Szrj  if (!stmt)
*38fd1498Szrj    return false;
*38fd1498Szrj  gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
*38fd1498Szrj  tree group_size = NULL;
*38fd1498Szrj  if (!teams)
*38fd1498Szrj    {
*38fd1498Szrj      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj		       GRID_MISSED_MSG_PREFIX "it does not have a sole teams "
*38fd1498Szrj		       "construct in it.\n");
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  tree clauses = gimple_omp_teams_clauses (teams);
*38fd1498Szrj  while (clauses)
*38fd1498Szrj    {
*38fd1498Szrj      switch (OMP_CLAUSE_CODE (clauses))
*38fd1498Szrj	{
*38fd1498Szrj	case OMP_CLAUSE_NUM_TEAMS:
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			     GRID_MISSED_MSG_PREFIX "the teams construct "
*38fd1498Szrj			     "contains a num_teams clause\n ");
*38fd1498Szrj	  return false;
*38fd1498Szrj
*38fd1498Szrj	case OMP_CLAUSE_REDUCTION:
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			     GRID_MISSED_MSG_PREFIX "a reduction "
*38fd1498Szrj			     "clause is present\n ");
*38fd1498Szrj	  return false;
*38fd1498Szrj
*38fd1498Szrj	case OMP_CLAUSE_THREAD_LIMIT:
*38fd1498Szrj	  if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
*38fd1498Szrj	    group_size = OMP_CLAUSE_OPERAND (clauses, 0);
*38fd1498Szrj	  break;
*38fd1498Szrj
*38fd1498Szrj	default:
*38fd1498Szrj	  break;
*38fd1498Szrj	}
*38fd1498Szrj      clauses = OMP_CLAUSE_CHAIN (clauses);
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
*38fd1498Szrj						 "teams");
*38fd1498Szrj  if (!stmt)
*38fd1498Szrj    return false;
*38fd1498Szrj  gomp_for *dist = dyn_cast <gomp_for *> (stmt);
*38fd1498Szrj  if (!dist)
*38fd1498Szrj    {
*38fd1498Szrj      dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj		       GRID_MISSED_MSG_PREFIX "the teams construct does not "
*38fd1498Szrj		       "have a single distribute construct in it.\n");
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
*38fd1498Szrj
*38fd1498Szrj  grid->collapse = gimple_omp_for_collapse (dist);
*38fd1498Szrj  if (grid->collapse > 3)
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			 GRID_MISSED_MSG_PREFIX "the distribute construct "
*38fd1498Szrj			 "contains collapse clause with parameter greater "
*38fd1498Szrj			 "than 3\n");
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  struct omp_for_data fd;
*38fd1498Szrj  struct omp_for_data_loop *dist_loops
*38fd1498Szrj    = (struct omp_for_data_loop *)alloca (grid->collapse
*38fd1498Szrj					  * sizeof (struct omp_for_data_loop));
*38fd1498Szrj  omp_extract_for_data (dist, &fd, dist_loops);
*38fd1498Szrj  if (fd.chunk_size)
*38fd1498Szrj    {
*38fd1498Szrj      if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
*38fd1498Szrj	{
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			     GRID_MISSED_MSG_PREFIX "the teams "
*38fd1498Szrj			     "thread limit is different from distribute "
*38fd1498Szrj			     "schedule chunk\n");
*38fd1498Szrj	  return false;
*38fd1498Szrj	}
*38fd1498Szrj      group_size = fd.chunk_size;
*38fd1498Szrj    }
*38fd1498Szrj  if (group_size && grid->collapse > 1)
*38fd1498Szrj    {
*38fd1498Szrj      if (dump_enabled_p ())
*38fd1498Szrj	dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			 GRID_MISSED_MSG_PREFIX "group size cannot be "
*38fd1498Szrj			 "set using thread_limit or schedule clauses "
*38fd1498Szrj			 "when also using a collapse clause greater than 1\n");
*38fd1498Szrj      return false;
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  if (gimple_omp_for_combined_p (dist))
*38fd1498Szrj    {
*38fd1498Szrj      grid->tiling = false;
*38fd1498Szrj      grid->group_sizes[0] = group_size;
*38fd1498Szrj      for (unsigned i = 1; i < grid->collapse; i++)
*38fd1498Szrj	grid->group_sizes[i] = NULL;
*38fd1498Szrj      return grid_dist_follows_simple_pattern (dist, grid);
*38fd1498Szrj    }
*38fd1498Szrj  else
*38fd1498Szrj    {
*38fd1498Szrj      grid->tiling = true;
*38fd1498Szrj      if (group_size)
*38fd1498Szrj	{
*38fd1498Szrj	  if (dump_enabled_p ())
*38fd1498Szrj	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
*38fd1498Szrj			     GRID_MISSED_MSG_PREFIX "group size cannot be set "
*38fd1498Szrj			     "using thread_limit or schedule clauses when "
*38fd1498Szrj			     "distribute and loop constructs do not form "
*38fd1498Szrj			     "one combined construct\n");
*38fd1498Szrj	  return false;
*38fd1498Szrj	}
*38fd1498Szrj      for (unsigned i = 0; i < grid->collapse; i++)
*38fd1498Szrj	{
*38fd1498Szrj	  if (fd.loops[i].cond_code == GT_EXPR)
*38fd1498Szrj	    grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
*38fd1498Szrj						TREE_TYPE (fd.loops[i].step),
*38fd1498Szrj						fd.loops[i].step);
*38fd1498Szrj	  else
*38fd1498Szrj	    grid->group_sizes[i] = fd.loops[i].step;
*38fd1498Szrj	}
*38fd1498Szrj      return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
*38fd1498Szrj					       false);
*38fd1498Szrj    }
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Operand walker, used to remap pre-body declarations according to a hash map
*38fd1498Szrj   provided in DATA.  */
*38fd1498Szrj
*38fd1498Szrjstatic tree
*38fd1498Szrjgrid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
*38fd1498Szrj{
*38fd1498Szrj  tree t = *tp;
*38fd1498Szrj
*38fd1498Szrj  if (DECL_P (t) || TYPE_P (t))
*38fd1498Szrj    *walk_subtrees = 0;
*38fd1498Szrj  else
*38fd1498Szrj    *walk_subtrees = 1;
*38fd1498Szrj
*38fd1498Szrj  if (VAR_P (t))
*38fd1498Szrj    {
*38fd1498Szrj      struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
*38fd1498Szrj      hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
*38fd1498Szrj      tree *repl = declmap->get (t);
*38fd1498Szrj      if (repl)
*38fd1498Szrj	*tp = *repl;
*38fd1498Szrj    }
*38fd1498Szrj  return NULL_TREE;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Identifiers of segments into which a particular variable should be places
*38fd1498Szrj   when gridifying.  */
*38fd1498Szrj
*38fd1498Szrjenum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
*38fd1498Szrj		       GRID_SEGMENT_GLOBAL};
*38fd1498Szrj
*38fd1498Szrj/* Mark VAR so that it is eventually placed into SEGMENT.  Place an artificial
*38fd1498Szrj   builtin call into SEQ that will make sure the variable is always considered
*38fd1498Szrj   address taken.  */
*38fd1498Szrj
*38fd1498Szrjstatic void
*38fd1498Szrjgrid_mark_variable_segment (tree var, enum grid_var_segment segment)
*38fd1498Szrj{
*38fd1498Szrj  /* Making a non-addressable variables would require that we re-gimplify all
*38fd1498Szrj     their uses.  Fortunately, we do not have to do this because if they are
*38fd1498Szrj     not addressable, it means they are not used in atomic or parallel
*38fd1498Szrj     statements and so relaxed GPU consistency rules mean we can just keep them
*38fd1498Szrj     private.  */
*38fd1498Szrj  if (!TREE_ADDRESSABLE (var))
*38fd1498Szrj    return;
*38fd1498Szrj
*38fd1498Szrj  switch (segment)
*38fd1498Szrj    {
*38fd1498Szrj    case GRID_SEGMENT_GROUP:
*38fd1498Szrj      DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
*38fd1498Szrj					 NULL, DECL_ATTRIBUTES (var));
*38fd1498Szrj      break;
*38fd1498Szrj    case GRID_SEGMENT_GLOBAL:
*38fd1498Szrj      DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
*38fd1498Szrj					 NULL, DECL_ATTRIBUTES (var));
*38fd1498Szrj      break;
*38fd1498Szrj    default:
*38fd1498Szrj      gcc_unreachable ();
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  if (!TREE_STATIC (var))
*38fd1498Szrj    {
*38fd1498Szrj      TREE_STATIC (var) = 1;
*38fd1498Szrj      varpool_node::finalize_decl (var);
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Copy leading register-type assignments to local variables in SRC to just
*38fd1498Szrj   before DST, Creating temporaries, adjusting mapping of operands in WI and
*38fd1498Szrj   remapping operands as necessary.  Add any new temporaries to TGT_BIND.
*38fd1498Szrj   Return the first statement that does not conform to grid_safe_assignment_p
*38fd1498Szrj   or NULL.  If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
*38fd1498Szrj   variables in traversed bind statements so that they are put into the
*38fd1498Szrj   appropriate segment.  */
*38fd1498Szrj
*38fd1498Szrjstatic gimple *
*38fd1498Szrjgrid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
*38fd1498Szrj				     gbind *tgt_bind,
*38fd1498Szrj				     enum grid_var_segment var_segment,
*38fd1498Szrj				     struct walk_stmt_info *wi)
*38fd1498Szrj{
*38fd1498Szrj  hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
*38fd1498Szrj  gimple_stmt_iterator gsi;
*38fd1498Szrj  for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
*38fd1498Szrj    {
*38fd1498Szrj      gimple *stmt = gsi_stmt (gsi);
*38fd1498Szrj      if (gbind *bind = dyn_cast <gbind *> (stmt))
*38fd1498Szrj	{
*38fd1498Szrj	  gimple *r = grid_copy_leading_local_assignments
*38fd1498Szrj	    (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
*38fd1498Szrj
*38fd1498Szrj	  if (var_segment != GRID_SEGMENT_PRIVATE)
*38fd1498Szrj	    for (tree var = gimple_bind_vars (bind);
*38fd1498Szrj		 var;
*38fd1498Szrj		 var = DECL_CHAIN (var))
*38fd1498Szrj	      grid_mark_variable_segment (var, var_segment);
*38fd1498Szrj	  if (r)
*38fd1498Szrj	    return r;
*38fd1498Szrj	  else
*38fd1498Szrj	    continue;
*38fd1498Szrj	}
*38fd1498Szrj      if (!grid_safe_assignment_p (stmt, NULL))
*38fd1498Szrj	return stmt;
*38fd1498Szrj      tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
*38fd1498Szrj      tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
*38fd1498Szrj				 TREE_TYPE (lhs));
*38fd1498Szrj      DECL_CONTEXT (repl) = current_function_decl;
*38fd1498Szrj      gimple_bind_append_vars (tgt_bind, repl);
*38fd1498Szrj
*38fd1498Szrj      declmap->put (lhs, repl);
*38fd1498Szrj      gassign *copy = as_a <gassign *> (gimple_copy (stmt));
*38fd1498Szrj      walk_gimple_op (copy, grid_remap_prebody_decls, wi);
*38fd1498Szrj      gsi_insert_before (dst, copy, GSI_SAME_STMT);
*38fd1498Szrj    }
*38fd1498Szrj  return NULL;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Statement walker function to make adjustments to statements within the
*38fd1498Szrj   gridifed kernel copy.  */
*38fd1498Szrj
*38fd1498Szrjstatic tree
*38fd1498Szrjgrid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
*38fd1498Szrj			struct walk_stmt_info *)
*38fd1498Szrj{
*38fd1498Szrj  *handled_ops_p = false;
*38fd1498Szrj  gimple *stmt = gsi_stmt (*gsi);
*38fd1498Szrj  if (gimple_code (stmt) == GIMPLE_OMP_FOR
*38fd1498Szrj      && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
*38fd1498Szrj  {
*38fd1498Szrj    gomp_for *loop = as_a <gomp_for *> (stmt);
*38fd1498Szrj    tree clauses = gimple_omp_for_clauses (loop);
*38fd1498Szrj    tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
*38fd1498Szrj    if (cl)
*38fd1498Szrj      OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
*38fd1498Szrj    else
*38fd1498Szrj      {
*38fd1498Szrj	tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
*38fd1498Szrj	OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
*38fd1498Szrj	OMP_CLAUSE_CHAIN (c) = clauses;
*38fd1498Szrj	gimple_omp_for_set_clauses (loop, c);
*38fd1498Szrj      }
*38fd1498Szrj  }
*38fd1498Szrj  return NULL_TREE;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Given a PARLOOP that is a normal for looping construct but also a part of a
*38fd1498Szrj   combined construct with a simd loop, eliminate the simd loop.  */
*38fd1498Szrj
*38fd1498Szrjstatic void
*38fd1498Szrjgrid_eliminate_combined_simd_part (gomp_for *parloop)
*38fd1498Szrj{
*38fd1498Szrj  struct walk_stmt_info wi;
*38fd1498Szrj
*38fd1498Szrj  memset (&wi, 0, sizeof (wi));
*38fd1498Szrj  wi.val_only = true;
*38fd1498Szrj  enum gf_mask msk = GF_OMP_FOR_SIMD;
*38fd1498Szrj  wi.info = (void *) &msk;
*38fd1498Szrj  walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
*38fd1498Szrj  gimple *stmt = (gimple *) wi.info;
*38fd1498Szrj  /* We expect that the SIMD id the only statement in the parallel loop.  */
*38fd1498Szrj  gcc_assert (stmt
*38fd1498Szrj	      && gimple_code (stmt) == GIMPLE_OMP_FOR
*38fd1498Szrj	      && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
*38fd1498Szrj	      && gimple_omp_for_combined_into_p (stmt)
*38fd1498Szrj	      && !gimple_omp_for_combined_p (stmt));
*38fd1498Szrj  gomp_for *simd = as_a <gomp_for *> (stmt);
*38fd1498Szrj
*38fd1498Szrj  /* Copy over the iteration properties because the body refers to the index in
*38fd1498Szrj     the bottmom-most loop.  */
*38fd1498Szrj  unsigned i, collapse = gimple_omp_for_collapse (parloop);
*38fd1498Szrj  gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
*38fd1498Szrj  for (i = 0; i < collapse; i++)
*38fd1498Szrj    {
*38fd1498Szrj      gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
*38fd1498Szrj      gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
*38fd1498Szrj      gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
*38fd1498Szrj      gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  tree *tgt= gimple_omp_for_clauses_ptr (parloop);
*38fd1498Szrj  while (*tgt)
*38fd1498Szrj    tgt = &OMP_CLAUSE_CHAIN (*tgt);
*38fd1498Szrj
*38fd1498Szrj  /* Copy over all clauses, except for linaer clauses, which are turned into
*38fd1498Szrj     private clauses, and all other simd-specificl clauses, which are
*38fd1498Szrj     ignored.  */
*38fd1498Szrj  tree *pc = gimple_omp_for_clauses_ptr (simd);
*38fd1498Szrj  while (*pc)
*38fd1498Szrj    {
*38fd1498Szrj      tree c = *pc;
*38fd1498Szrj      switch (TREE_CODE (c))
*38fd1498Szrj	{
*38fd1498Szrj	case OMP_CLAUSE_LINEAR:
*38fd1498Szrj	  {
*38fd1498Szrj	    tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
*38fd1498Szrj	    OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
*38fd1498Szrj	    OMP_CLAUSE_CHAIN (priv) = NULL;
*38fd1498Szrj	    *tgt = priv;
*38fd1498Szrj	    tgt = &OMP_CLAUSE_CHAIN (priv);
*38fd1498Szrj	    pc = &OMP_CLAUSE_CHAIN (c);
*38fd1498Szrj	    break;
*38fd1498Szrj	  }
*38fd1498Szrj
*38fd1498Szrj	case OMP_CLAUSE_SAFELEN:
*38fd1498Szrj	case OMP_CLAUSE_SIMDLEN:
*38fd1498Szrj	case OMP_CLAUSE_ALIGNED:
*38fd1498Szrj	  pc = &OMP_CLAUSE_CHAIN (c);
*38fd1498Szrj	  break;
*38fd1498Szrj
*38fd1498Szrj	default:
*38fd1498Szrj	  *pc = OMP_CLAUSE_CHAIN (c);
*38fd1498Szrj	  OMP_CLAUSE_CHAIN (c) = NULL;
*38fd1498Szrj	  *tgt = c;
*38fd1498Szrj	  tgt = &OMP_CLAUSE_CHAIN(c);
*38fd1498Szrj	  break;
*38fd1498Szrj	}
*38fd1498Szrj    }
*38fd1498Szrj
*38fd1498Szrj  /* Finally, throw away the simd and mark the parallel loop as not
*38fd1498Szrj     combined.  */
*38fd1498Szrj  gimple_omp_set_body (parloop, gimple_omp_body (simd));
*38fd1498Szrj  gimple_omp_for_set_combined_p (parloop, false);
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Statement walker function marking all parallels as grid_phony and loops as
*38fd1498Szrj   grid ones representing threads of a particular thread group.  */
*38fd1498Szrj
*38fd1498Szrjstatic tree
*38fd1498Szrjgrid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
*38fd1498Szrj			struct walk_stmt_info *wi_in)
*38fd1498Szrj{
*38fd1498Szrj  *handled_ops_p = false;
*38fd1498Szrj  if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
*38fd1498Szrj    {
*38fd1498Szrj      *handled_ops_p = true;
*38fd1498Szrj      gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
*38fd1498Szrj      gimple_omp_for_set_grid_intra_group (loop, true);
*38fd1498Szrj      if (gimple_omp_for_combined_p (loop))
*38fd1498Szrj	grid_eliminate_combined_simd_part (loop);
*38fd1498Szrj
*38fd1498Szrj      struct walk_stmt_info body_wi;
*38fd1498Szrj      memset (&body_wi, 0, sizeof (body_wi));
*38fd1498Szrj      walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
*38fd1498Szrj			   grid_process_grid_body, NULL, &body_wi);
*38fd1498Szrj
*38fd1498Szrj      gbind *bind = (gbind *) wi_in->info;
*38fd1498Szrj      tree c;
*38fd1498Szrj      for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
*38fd1498Szrj	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
*38fd1498Szrj	  {
*38fd1498Szrj	    push_gimplify_context ();
*38fd1498Szrj	    tree ov = OMP_CLAUSE_DECL (c);
*38fd1498Szrj	    tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
*38fd1498Szrj				    TREE_TYPE (ov));
*38fd1498Szrj
*38fd1498Szrj	    grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
*38fd1498Szrj	    DECL_CONTEXT (gv) = current_function_decl;
*38fd1498Szrj	    gimple_bind_append_vars (bind, gv);
*38fd1498Szrj	    tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
*38fd1498Szrj	    gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
*38fd1498Szrj	    x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
*38fd1498Szrj	    gimple_seq l = NULL;
*38fd1498Szrj	    gimplify_and_add (x, &l);
*38fd1498Szrj	    gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
*38fd1498Szrj	    pop_gimplify_context (bind);
*38fd1498Szrj	  }
*38fd1498Szrj    }
*38fd1498Szrj  return NULL_TREE;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Statement walker function marking all parallels as grid_phony and loops as
*38fd1498Szrj   grid ones representing threads of a particular thread group.  */
*38fd1498Szrj
*38fd1498Szrjstatic tree
*38fd1498Szrjgrid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
*38fd1498Szrj				      bool *handled_ops_p,
*38fd1498Szrj				      struct walk_stmt_info *wi_in)
*38fd1498Szrj{
*38fd1498Szrj  *handled_ops_p = false;
*38fd1498Szrj  wi_in->removed_stmt = false;
*38fd1498Szrj  gimple *stmt = gsi_stmt (*gsi);
*38fd1498Szrj  if (gbind *bind = dyn_cast <gbind *> (stmt))
*38fd1498Szrj    {
*38fd1498Szrj      for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
*38fd1498Szrj	grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
*38fd1498Szrj    }
*38fd1498Szrj  else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
*38fd1498Szrj    {
*38fd1498Szrj      *handled_ops_p = true;
*38fd1498Szrj      gimple_omp_parallel_set_grid_phony (parallel, true);
*38fd1498Szrj
*38fd1498Szrj      gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
*38fd1498Szrj      gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
*38fd1498Szrj      gimple_seq s = NULL;
*38fd1498Szrj      gimple_seq_add_stmt (&s, new_bind);
*38fd1498Szrj      gimple_omp_set_body (parallel, s);
*38fd1498Szrj
*38fd1498Szrj      struct walk_stmt_info wi_par;
*38fd1498Szrj      memset (&wi_par, 0, sizeof (wi_par));
*38fd1498Szrj      wi_par.info = new_bind;
*38fd1498Szrj      walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
*38fd1498Szrj			   grid_mark_tiling_loops, NULL, &wi_par);
*38fd1498Szrj    }
*38fd1498Szrj  else if (is_a <gcall *> (stmt))
*38fd1498Szrj    wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
*38fd1498Szrj  return NULL_TREE;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Given freshly copied top level kernel SEQ, identify the individual OMP
*38fd1498Szrj   components, mark them as part of kernel, copy assignment leading to them
*38fd1498Szrj   just before DST, remapping them using WI and adding new temporaries to
*38fd1498Szrj   TGT_BIND, and and return the loop that will be used for kernel dispatch.  */
*38fd1498Szrj
*38fd1498Szrjstatic gomp_for *
*38fd1498Szrjgrid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
*38fd1498Szrj			       gimple_stmt_iterator *dst,
*38fd1498Szrj			       gbind *tgt_bind, struct walk_stmt_info *wi)
*38fd1498Szrj{
*38fd1498Szrj  gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
*38fd1498Szrj						      GRID_SEGMENT_GLOBAL, wi);
*38fd1498Szrj  gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
*38fd1498Szrj  gcc_assert (teams);
*38fd1498Szrj  gimple_omp_teams_set_grid_phony (teams, true);
*38fd1498Szrj  stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
*38fd1498Szrj					      tgt_bind, GRID_SEGMENT_GLOBAL,
*38fd1498Szrj					      wi);
*38fd1498Szrj  gcc_checking_assert (stmt);
*38fd1498Szrj  gomp_for *dist = dyn_cast <gomp_for *> (stmt);
*38fd1498Szrj  gcc_assert (dist);
*38fd1498Szrj  gimple_seq prebody = gimple_omp_for_pre_body (dist);
*38fd1498Szrj  if (prebody)
*38fd1498Szrj    grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
*38fd1498Szrj					 GRID_SEGMENT_GROUP, wi);
*38fd1498Szrj
*38fd1498Szrj  if (grid->tiling)
*38fd1498Szrj    {
*38fd1498Szrj      gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
*38fd1498Szrj      gimple_omp_for_set_grid_group_iter (dist, true);
*38fd1498Szrj
*38fd1498Szrj      struct walk_stmt_info wi_tiled;
*38fd1498Szrj      memset (&wi_tiled, 0, sizeof (wi_tiled));
*38fd1498Szrj      walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
*38fd1498Szrj			   grid_mark_tiling_parallels_and_loops, NULL,
*38fd1498Szrj			   &wi_tiled);
*38fd1498Szrj      return dist;
*38fd1498Szrj    }
*38fd1498Szrj  else
*38fd1498Szrj    {
*38fd1498Szrj      gimple_omp_for_set_grid_phony (dist, true);
*38fd1498Szrj      stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
*38fd1498Szrj						  tgt_bind,
*38fd1498Szrj						  GRID_SEGMENT_PRIVATE, wi);
*38fd1498Szrj      gcc_checking_assert (stmt);
*38fd1498Szrj      gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
*38fd1498Szrj      gimple_omp_parallel_set_grid_phony (parallel, true);
*38fd1498Szrj      stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
*38fd1498Szrj						  dst, tgt_bind,
*38fd1498Szrj						  GRID_SEGMENT_PRIVATE, wi);
*38fd1498Szrj      gomp_for *inner_loop = as_a <gomp_for *> (stmt);
*38fd1498Szrj      gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
*38fd1498Szrj      prebody = gimple_omp_for_pre_body (inner_loop);
*38fd1498Szrj      if (prebody)
*38fd1498Szrj	grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
*38fd1498Szrj					     GRID_SEGMENT_PRIVATE, wi);
*38fd1498Szrj
*38fd1498Szrj      if (gimple_omp_for_combined_p (inner_loop))
*38fd1498Szrj	grid_eliminate_combined_simd_part (inner_loop);
*38fd1498Szrj      struct walk_stmt_info body_wi;
*38fd1498Szrj      memset (&body_wi, 0, sizeof (body_wi));
*38fd1498Szrj      walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
*38fd1498Szrj			   grid_process_grid_body, NULL, &body_wi);
*38fd1498Szrj
*38fd1498Szrj      return inner_loop;
*38fd1498Szrj    }
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
*38fd1498Szrj   create a GPU kernel for it.  GSI must point to the same statement, TGT_BIND
*38fd1498Szrj   is the bind into which temporaries inserted before TARGET should be
*38fd1498Szrj   added.  */
*38fd1498Szrj
*38fd1498Szrjstatic void
*38fd1498Szrjgrid_attempt_target_gridification (gomp_target *target,
*38fd1498Szrj				   gimple_stmt_iterator *gsi,
*38fd1498Szrj				   gbind *tgt_bind)
*38fd1498Szrj{
*38fd1498Szrj  /* removed group_size */
*38fd1498Szrj  grid_prop grid;
*38fd1498Szrj  memset (&grid, 0, sizeof (grid));
*38fd1498Szrj  if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
*38fd1498Szrj    return;
*38fd1498Szrj
*38fd1498Szrj  location_t loc = gimple_location (target);
*38fd1498Szrj  if (dump_enabled_p ())
*38fd1498Szrj    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
*38fd1498Szrj		     "Target construct will be turned into a gridified HSA "
*38fd1498Szrj		     "kernel\n");
*38fd1498Szrj
*38fd1498Szrj  /* Copy target body to a GPUKERNEL construct:  */
*38fd1498Szrj  gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
*38fd1498Szrj    (gimple_omp_body (target));
*38fd1498Szrj
*38fd1498Szrj  hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
*38fd1498Szrj  struct walk_stmt_info wi;
*38fd1498Szrj  memset (&wi, 0, sizeof (struct walk_stmt_info));
*38fd1498Szrj  wi.info = declmap;
*38fd1498Szrj
*38fd1498Szrj  /* Copy assignments in between OMP statements before target, mark OMP
*38fd1498Szrj     statements within copy appropriately.  */
*38fd1498Szrj  gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
*38fd1498Szrj							tgt_bind, &wi);
*38fd1498Szrj
*38fd1498Szrj  gbind *old_bind
*38fd1498Szrj    = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
*38fd1498Szrj  gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
*38fd1498Szrj  tree new_block = gimple_bind_block (new_bind);
*38fd1498Szrj  tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
*38fd1498Szrj  BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
*38fd1498Szrj  BLOCK_SUBBLOCKS (enc_block) = new_block;
*38fd1498Szrj  BLOCK_SUPERCONTEXT (new_block) = enc_block;
*38fd1498Szrj  gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
*38fd1498Szrj  gimple_seq_add_stmt
*38fd1498Szrj    (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
*38fd1498Szrj     gpukernel);
*38fd1498Szrj
*38fd1498Szrj  for (size_t i = 0; i < grid.collapse; i++)
*38fd1498Szrj    walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
*38fd1498Szrj  push_gimplify_context ();
*38fd1498Szrj  for (size_t i = 0; i < grid.collapse; i++)
*38fd1498Szrj    {
*38fd1498Szrj      tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i));
*38fd1498Szrj      if (POINTER_TYPE_P (type))
*38fd1498Szrj	itype = signed_type_for (type);
*38fd1498Szrj      else
*38fd1498Szrj	itype = type;
*38fd1498Szrj
*38fd1498Szrj      enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
*38fd1498Szrj      tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
*38fd1498Szrj      walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
*38fd1498Szrj      tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
*38fd1498Szrj      walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
*38fd1498Szrj      omp_adjust_for_condition (loc, &cond_code, &n2);
*38fd1498Szrj      n1 = fold_convert (itype, n1);
*38fd1498Szrj      n2 = fold_convert (itype, n2);
*38fd1498Szrj
*38fd1498Szrj      tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
*38fd1498Szrj      tree step
*38fd1498Szrj	= omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
*38fd1498Szrj
*38fd1498Szrj      tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
*38fd1498Szrj      t = fold_build2 (PLUS_EXPR, itype, step, t);
*38fd1498Szrj      t = fold_build2 (PLUS_EXPR, itype, t, n2);
*38fd1498Szrj      t = fold_build2 (MINUS_EXPR, itype, t, n1);
*38fd1498Szrj      if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
*38fd1498Szrj	t = fold_build2 (TRUNC_DIV_EXPR, itype,
*38fd1498Szrj			 fold_build1 (NEGATE_EXPR, itype, t),
*38fd1498Szrj			 fold_build1 (NEGATE_EXPR, itype, step));
*38fd1498Szrj      else
*38fd1498Szrj	t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
*38fd1498Szrj      t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
*38fd1498Szrj      if (grid.tiling)
*38fd1498Szrj	{
*38fd1498Szrj	  if (cond_code == GT_EXPR)
*38fd1498Szrj	    step = fold_build1 (NEGATE_EXPR, itype, step);
*38fd1498Szrj	  t = fold_build2 (MULT_EXPR, itype, t, step);
*38fd1498Szrj	}
*38fd1498Szrj
*38fd1498Szrj      tree gs = fold_convert (uint32_type_node, t);
*38fd1498Szrj      gimple_seq tmpseq = NULL;
*38fd1498Szrj      gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
*38fd1498Szrj      if (!gimple_seq_empty_p (tmpseq))
*38fd1498Szrj	gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
*38fd1498Szrj
*38fd1498Szrj      tree ws;
*38fd1498Szrj      if (grid.group_sizes[i])
*38fd1498Szrj	{
*38fd1498Szrj	  ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
*38fd1498Szrj	  tmpseq = NULL;
*38fd1498Szrj	  gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
*38fd1498Szrj	  if (!gimple_seq_empty_p (tmpseq))
*38fd1498Szrj	    gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
*38fd1498Szrj	}
*38fd1498Szrj      else
*38fd1498Szrj	ws = build_zero_cst (uint32_type_node);
*38fd1498Szrj
*38fd1498Szrj      tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
*38fd1498Szrj      OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
*38fd1498Szrj      OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
*38fd1498Szrj      OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
*38fd1498Szrj      OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
*38fd1498Szrj      gimple_omp_target_set_clauses (target, c);
*38fd1498Szrj    }
*38fd1498Szrj  pop_gimplify_context (tgt_bind);
*38fd1498Szrj  delete declmap;
*38fd1498Szrj  return;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Walker function doing all the work for create_target_kernels.  */
*38fd1498Szrj
*38fd1498Szrjstatic tree
*38fd1498Szrjgrid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
*38fd1498Szrj				   bool *handled_ops_p,
*38fd1498Szrj				   struct walk_stmt_info *incoming)
*38fd1498Szrj{
*38fd1498Szrj  *handled_ops_p = false;
*38fd1498Szrj
*38fd1498Szrj  gimple *stmt = gsi_stmt (*gsi);
*38fd1498Szrj  gomp_target *target = dyn_cast <gomp_target *> (stmt);
*38fd1498Szrj  if (target)
*38fd1498Szrj    {
*38fd1498Szrj      gbind *tgt_bind = (gbind *) incoming->info;
*38fd1498Szrj      gcc_checking_assert (tgt_bind);
*38fd1498Szrj      grid_attempt_target_gridification (target, gsi, tgt_bind);
*38fd1498Szrj      return NULL_TREE;
*38fd1498Szrj    }
*38fd1498Szrj  gbind *bind = dyn_cast <gbind *> (stmt);
*38fd1498Szrj  if (bind)
*38fd1498Szrj    {
*38fd1498Szrj      *handled_ops_p = true;
*38fd1498Szrj      struct walk_stmt_info wi;
*38fd1498Szrj      memset (&wi, 0, sizeof (wi));
*38fd1498Szrj      wi.info = bind;
*38fd1498Szrj      walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
*38fd1498Szrj			   grid_gridify_all_targets_stmt, NULL, &wi);
*38fd1498Szrj    }
*38fd1498Szrj  return NULL_TREE;
*38fd1498Szrj}
*38fd1498Szrj
*38fd1498Szrj/* Attempt to gridify all target constructs in BODY_P.  All such targets will
*38fd1498Szrj   have their bodies duplicated, with the new copy being put into a
*38fd1498Szrj   gimple_omp_grid_body statement.  All kernel-related construct within the
*38fd1498Szrj   grid_body will be marked with phony flags or kernel kinds.  Moreover, some
*38fd1498Szrj   re-structuring is often needed, such as copying pre-bodies before the target
*38fd1498Szrj   construct so that kernel grid sizes can be computed.  */
*38fd1498Szrj
*38fd1498Szrjvoid
*38fd1498Szrjomp_grid_gridify_all_targets (gimple_seq *body_p)
*38fd1498Szrj{
*38fd1498Szrj  struct walk_stmt_info wi;
*38fd1498Szrj  memset (&wi, 0, sizeof (wi));
*38fd1498Szrj  walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
*38fd1498Szrj}