xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/hsa-regalloc.c (revision 8feb0f0b7eaff0608f8350bbfa3098827b4bb91b)
11debfc3dSmrg /* HSAIL IL Register allocation and out-of-SSA.
2*8feb0f0bSmrg    Copyright (C) 2013-2020 Free Software Foundation, Inc.
31debfc3dSmrg    Contributed by Michael Matz <matz@suse.de>
41debfc3dSmrg 
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg 
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify
81debfc3dSmrg it under the terms of the GNU General Public License as published by
91debfc3dSmrg the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg any later version.
111debfc3dSmrg 
121debfc3dSmrg GCC is distributed in the hope that it will be useful,
131debfc3dSmrg but WITHOUT ANY WARRANTY; without even the implied warranty of
141debfc3dSmrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
151debfc3dSmrg GNU General Public License for more details.
161debfc3dSmrg 
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3.  If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>.  */
201debfc3dSmrg 
211debfc3dSmrg #include "config.h"
221debfc3dSmrg #include "system.h"
231debfc3dSmrg #include "coretypes.h"
241debfc3dSmrg #include "tm.h"
251debfc3dSmrg #include "is-a.h"
261debfc3dSmrg #include "vec.h"
271debfc3dSmrg #include "tree.h"
281debfc3dSmrg #include "dominance.h"
29a2dc1f3fSmrg #include "basic-block.h"
301debfc3dSmrg #include "function.h"
31c0a68be4Smrg #include "cfganal.h"
32c0a68be4Smrg #include "cfg.h"
331debfc3dSmrg #include "bitmap.h"
341debfc3dSmrg #include "dumpfile.h"
351debfc3dSmrg #include "cgraph.h"
361debfc3dSmrg #include "print-tree.h"
371debfc3dSmrg #include "cfghooks.h"
38*8feb0f0bSmrg #include "alloc-pool.h"
391debfc3dSmrg #include "symbol-summary.h"
401debfc3dSmrg #include "hsa-common.h"
411debfc3dSmrg 
421debfc3dSmrg 
431debfc3dSmrg /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa.  */
441debfc3dSmrg 
451debfc3dSmrg static void
naive_process_phi(hsa_insn_phi * phi,const vec<edge> & predecessors)46a2dc1f3fSmrg naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
471debfc3dSmrg {
481debfc3dSmrg   unsigned count = phi->operand_count ();
491debfc3dSmrg   for (unsigned i = 0; i < count; i++)
501debfc3dSmrg     {
511debfc3dSmrg       gcc_checking_assert (phi->get_op (i));
521debfc3dSmrg       hsa_op_base *op = phi->get_op (i);
531debfc3dSmrg       hsa_bb *hbb;
541debfc3dSmrg       edge e;
551debfc3dSmrg 
561debfc3dSmrg       if (!op)
571debfc3dSmrg 	break;
581debfc3dSmrg 
59a2dc1f3fSmrg       e = predecessors[i];
601debfc3dSmrg       if (single_succ_p (e->src))
611debfc3dSmrg 	hbb = hsa_bb_for_bb (e->src);
621debfc3dSmrg       else
631debfc3dSmrg 	{
641debfc3dSmrg 	  basic_block old_dest = e->dest;
651debfc3dSmrg 	  hbb = hsa_init_new_bb (split_edge (e));
661debfc3dSmrg 
671debfc3dSmrg 	  /* If switch insn used this edge, fix jump table.  */
681debfc3dSmrg 	  hsa_bb *source = hsa_bb_for_bb (e->src);
691debfc3dSmrg 	  hsa_insn_sbr *sbr;
701debfc3dSmrg 	  if (source->m_last_insn
711debfc3dSmrg 	      && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
721debfc3dSmrg 	    sbr->replace_all_labels (old_dest, hbb->m_bb);
731debfc3dSmrg 	}
741debfc3dSmrg 
751debfc3dSmrg       hsa_build_append_simple_mov (phi->m_dest, op, hbb);
761debfc3dSmrg     }
771debfc3dSmrg }
781debfc3dSmrg 
791debfc3dSmrg /* Naive out-of SSA.  */
801debfc3dSmrg 
811debfc3dSmrg static void
naive_outof_ssa(void)821debfc3dSmrg naive_outof_ssa (void)
831debfc3dSmrg {
841debfc3dSmrg   basic_block bb;
851debfc3dSmrg 
861debfc3dSmrg   hsa_cfun->m_in_ssa = false;
871debfc3dSmrg 
881debfc3dSmrg   FOR_ALL_BB_FN (bb, cfun)
891debfc3dSmrg   {
901debfc3dSmrg     hsa_bb *hbb = hsa_bb_for_bb (bb);
911debfc3dSmrg     hsa_insn_phi *phi;
921debfc3dSmrg 
93a2dc1f3fSmrg     /* naive_process_phi can call split_edge on an incoming edge which order if
94a2dc1f3fSmrg        the incoming edges to the basic block and thus make it inconsistent with
95a2dc1f3fSmrg        the ordering of PHI arguments, so we collect them in advance.  */
96a2dc1f3fSmrg     auto_vec<edge, 8> predecessors;
97a2dc1f3fSmrg     unsigned pred_count = EDGE_COUNT (bb->preds);
98a2dc1f3fSmrg     for (unsigned i = 0; i < pred_count; i++)
99a2dc1f3fSmrg       predecessors.safe_push (EDGE_PRED (bb, i));
100a2dc1f3fSmrg 
1011debfc3dSmrg     for (phi = hbb->m_first_phi;
1021debfc3dSmrg 	 phi;
1031debfc3dSmrg 	 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
104a2dc1f3fSmrg       naive_process_phi (phi, predecessors);
1051debfc3dSmrg 
1061debfc3dSmrg     /* Zap PHI nodes, they will be deallocated when everything else will.  */
1071debfc3dSmrg     hbb->m_first_phi = NULL;
1081debfc3dSmrg     hbb->m_last_phi = NULL;
1091debfc3dSmrg   }
1101debfc3dSmrg }
1111debfc3dSmrg 
1121debfc3dSmrg /* Return register class number for the given HSA TYPE.  0 means the 'c' one
1131debfc3dSmrg    bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
1141debfc3dSmrg    and 3 for 'q' 128 bit class.  */
1151debfc3dSmrg 
1161debfc3dSmrg static int
m_reg_class_for_type(BrigType16_t type)1171debfc3dSmrg m_reg_class_for_type (BrigType16_t type)
1181debfc3dSmrg {
1191debfc3dSmrg   switch (type)
1201debfc3dSmrg     {
1211debfc3dSmrg     case BRIG_TYPE_B1:
1221debfc3dSmrg       return 0;
1231debfc3dSmrg 
1241debfc3dSmrg     case BRIG_TYPE_U8:
1251debfc3dSmrg     case BRIG_TYPE_U16:
1261debfc3dSmrg     case BRIG_TYPE_U32:
1271debfc3dSmrg     case BRIG_TYPE_S8:
1281debfc3dSmrg     case BRIG_TYPE_S16:
1291debfc3dSmrg     case BRIG_TYPE_S32:
1301debfc3dSmrg     case BRIG_TYPE_F16:
1311debfc3dSmrg     case BRIG_TYPE_F32:
1321debfc3dSmrg     case BRIG_TYPE_B8:
1331debfc3dSmrg     case BRIG_TYPE_B16:
1341debfc3dSmrg     case BRIG_TYPE_B32:
1351debfc3dSmrg     case BRIG_TYPE_U8X4:
1361debfc3dSmrg     case BRIG_TYPE_S8X4:
1371debfc3dSmrg     case BRIG_TYPE_U16X2:
1381debfc3dSmrg     case BRIG_TYPE_S16X2:
1391debfc3dSmrg     case BRIG_TYPE_F16X2:
1401debfc3dSmrg       return 1;
1411debfc3dSmrg 
1421debfc3dSmrg     case BRIG_TYPE_U64:
1431debfc3dSmrg     case BRIG_TYPE_S64:
1441debfc3dSmrg     case BRIG_TYPE_F64:
1451debfc3dSmrg     case BRIG_TYPE_B64:
1461debfc3dSmrg     case BRIG_TYPE_U8X8:
1471debfc3dSmrg     case BRIG_TYPE_S8X8:
1481debfc3dSmrg     case BRIG_TYPE_U16X4:
1491debfc3dSmrg     case BRIG_TYPE_S16X4:
1501debfc3dSmrg     case BRIG_TYPE_F16X4:
1511debfc3dSmrg     case BRIG_TYPE_U32X2:
1521debfc3dSmrg     case BRIG_TYPE_S32X2:
1531debfc3dSmrg     case BRIG_TYPE_F32X2:
1541debfc3dSmrg       return 2;
1551debfc3dSmrg 
1561debfc3dSmrg     case BRIG_TYPE_B128:
1571debfc3dSmrg     case BRIG_TYPE_U8X16:
1581debfc3dSmrg     case BRIG_TYPE_S8X16:
1591debfc3dSmrg     case BRIG_TYPE_U16X8:
1601debfc3dSmrg     case BRIG_TYPE_S16X8:
1611debfc3dSmrg     case BRIG_TYPE_F16X8:
1621debfc3dSmrg     case BRIG_TYPE_U32X4:
1631debfc3dSmrg     case BRIG_TYPE_U64X2:
1641debfc3dSmrg     case BRIG_TYPE_S32X4:
1651debfc3dSmrg     case BRIG_TYPE_S64X2:
1661debfc3dSmrg     case BRIG_TYPE_F32X4:
1671debfc3dSmrg     case BRIG_TYPE_F64X2:
1681debfc3dSmrg       return 3;
1691debfc3dSmrg 
1701debfc3dSmrg     default:
1711debfc3dSmrg       gcc_unreachable ();
1721debfc3dSmrg     }
1731debfc3dSmrg }
1741debfc3dSmrg 
1751debfc3dSmrg /* If the Ith operands of INSN is or contains a register (in an address),
1761debfc3dSmrg    return the address of that register operand.  If not return NULL.  */
1771debfc3dSmrg 
1781debfc3dSmrg static hsa_op_reg **
insn_reg_addr(hsa_insn_basic * insn,int i)1791debfc3dSmrg insn_reg_addr (hsa_insn_basic *insn, int i)
1801debfc3dSmrg {
1811debfc3dSmrg   hsa_op_base *op = insn->get_op (i);
1821debfc3dSmrg   if (!op)
1831debfc3dSmrg     return NULL;
1841debfc3dSmrg   hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
1851debfc3dSmrg   if (reg)
1861debfc3dSmrg     return (hsa_op_reg **) insn->get_op_addr (i);
1871debfc3dSmrg   hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
1881debfc3dSmrg   if (addr && addr->m_reg)
1891debfc3dSmrg     return &addr->m_reg;
1901debfc3dSmrg   return NULL;
1911debfc3dSmrg }
1921debfc3dSmrg 
1931debfc3dSmrg struct m_reg_class_desc
1941debfc3dSmrg {
1951debfc3dSmrg   unsigned next_avail, max_num;
1961debfc3dSmrg   unsigned used_num, max_used;
1971debfc3dSmrg   uint64_t used[2];
1981debfc3dSmrg   char cl_char;
1991debfc3dSmrg };
2001debfc3dSmrg 
2011debfc3dSmrg /* Rewrite the instructions in BB to observe spilled live ranges.
2021debfc3dSmrg    CLASSES is the global register class state.  */
2031debfc3dSmrg 
2041debfc3dSmrg static void
rewrite_code_bb(basic_block bb,struct m_reg_class_desc * classes)2051debfc3dSmrg rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
2061debfc3dSmrg {
2071debfc3dSmrg   hsa_bb *hbb = hsa_bb_for_bb (bb);
2081debfc3dSmrg   hsa_insn_basic *insn, *next_insn;
2091debfc3dSmrg 
2101debfc3dSmrg   for (insn = hbb->m_first_insn; insn; insn = next_insn)
2111debfc3dSmrg     {
2121debfc3dSmrg       next_insn = insn->m_next;
2131debfc3dSmrg       unsigned count = insn->operand_count ();
2141debfc3dSmrg       for (unsigned i = 0; i < count; i++)
2151debfc3dSmrg 	{
2161debfc3dSmrg 	  gcc_checking_assert (insn->get_op (i));
2171debfc3dSmrg 	  hsa_op_reg **regaddr = insn_reg_addr (insn, i);
2181debfc3dSmrg 
2191debfc3dSmrg 	  if (regaddr)
2201debfc3dSmrg 	    {
2211debfc3dSmrg 	      hsa_op_reg *reg = *regaddr;
2221debfc3dSmrg 	      if (reg->m_reg_class)
2231debfc3dSmrg 		continue;
2241debfc3dSmrg 	      gcc_assert (reg->m_spill_sym);
2251debfc3dSmrg 
2261debfc3dSmrg 	      int cl = m_reg_class_for_type (reg->m_type);
2271debfc3dSmrg 	      hsa_op_reg *tmp, *tmp2;
2281debfc3dSmrg 	      if (insn->op_output_p (i))
2291debfc3dSmrg 		tmp = hsa_spill_out (insn, reg, &tmp2);
2301debfc3dSmrg 	      else
2311debfc3dSmrg 		tmp = hsa_spill_in (insn, reg, &tmp2);
2321debfc3dSmrg 
2331debfc3dSmrg 	      *regaddr = tmp;
2341debfc3dSmrg 
2351debfc3dSmrg 	      tmp->m_reg_class = classes[cl].cl_char;
2361debfc3dSmrg 	      tmp->m_hard_num = (char) (classes[cl].max_num + i);
2371debfc3dSmrg 	      if (tmp2)
2381debfc3dSmrg 		{
2391debfc3dSmrg 		  gcc_assert (cl == 0);
2401debfc3dSmrg 		  tmp2->m_reg_class = classes[1].cl_char;
2411debfc3dSmrg 		  tmp2->m_hard_num = (char) (classes[1].max_num + i);
2421debfc3dSmrg 		}
2431debfc3dSmrg 	    }
2441debfc3dSmrg 	}
2451debfc3dSmrg     }
2461debfc3dSmrg }
2471debfc3dSmrg 
2481debfc3dSmrg /* Dump current function to dump file F, with info specific
2491debfc3dSmrg    to register allocation.  */
2501debfc3dSmrg 
2511debfc3dSmrg void
dump_hsa_cfun_regalloc(FILE * f)2521debfc3dSmrg dump_hsa_cfun_regalloc (FILE *f)
2531debfc3dSmrg {
2541debfc3dSmrg   basic_block bb;
2551debfc3dSmrg 
2561debfc3dSmrg   fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
2571debfc3dSmrg 
2581debfc3dSmrg   FOR_ALL_BB_FN (bb, cfun)
2591debfc3dSmrg   {
260*8feb0f0bSmrg     hsa_bb *hbb = (class hsa_bb *) bb->aux;
2611debfc3dSmrg     bitmap_print (dump_file, hbb->m_livein, "m_livein  ", "\n");
2621debfc3dSmrg     dump_hsa_bb (f, hbb);
2631debfc3dSmrg     bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
2641debfc3dSmrg   }
2651debfc3dSmrg }
2661debfc3dSmrg 
2671debfc3dSmrg /* Given the global register allocation state CLASSES and a
2681debfc3dSmrg    register REG, try to give it a hardware register.  If successful,
2691debfc3dSmrg    store that hardreg in REG and return it, otherwise return -1.
2701debfc3dSmrg    Also changes CLASSES to accommodate for the allocated register.  */
2711debfc3dSmrg 
2721debfc3dSmrg static int
try_alloc_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)2731debfc3dSmrg try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
2741debfc3dSmrg {
2751debfc3dSmrg   int cl = m_reg_class_for_type (reg->m_type);
2761debfc3dSmrg   int ret = -1;
2771debfc3dSmrg   if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
2781debfc3dSmrg       >= 128 - 5)
2791debfc3dSmrg     return -1;
2801debfc3dSmrg   if (classes[cl].used_num < classes[cl].max_num)
2811debfc3dSmrg     {
2821debfc3dSmrg       unsigned int i;
2831debfc3dSmrg       classes[cl].used_num++;
2841debfc3dSmrg       if (classes[cl].used_num > classes[cl].max_used)
2851debfc3dSmrg 	classes[cl].max_used = classes[cl].used_num;
2861debfc3dSmrg       for (i = 0; i < classes[cl].used_num; i++)
2871debfc3dSmrg 	if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
2881debfc3dSmrg 	  break;
2891debfc3dSmrg       ret = i;
2901debfc3dSmrg       classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
2911debfc3dSmrg       reg->m_reg_class = classes[cl].cl_char;
2921debfc3dSmrg       reg->m_hard_num = i;
2931debfc3dSmrg     }
2941debfc3dSmrg   return ret;
2951debfc3dSmrg }
2961debfc3dSmrg 
2971debfc3dSmrg /* Free up hardregs used by REG, into allocation state CLASSES.  */
2981debfc3dSmrg 
2991debfc3dSmrg static void
free_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)3001debfc3dSmrg free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
3011debfc3dSmrg {
3021debfc3dSmrg   int cl = m_reg_class_for_type (reg->m_type);
3031debfc3dSmrg   int ret = reg->m_hard_num;
3041debfc3dSmrg   gcc_assert (reg->m_reg_class == classes[cl].cl_char);
3051debfc3dSmrg   classes[cl].used_num--;
3061debfc3dSmrg   classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
3071debfc3dSmrg }
3081debfc3dSmrg 
3091debfc3dSmrg /* Note that the live range for REG ends at least at END.  */
3101debfc3dSmrg 
3111debfc3dSmrg static void
note_lr_end(hsa_op_reg * reg,int end)3121debfc3dSmrg note_lr_end (hsa_op_reg *reg, int end)
3131debfc3dSmrg {
3141debfc3dSmrg   if (reg->m_lr_end < end)
3151debfc3dSmrg     reg->m_lr_end = end;
3161debfc3dSmrg }
3171debfc3dSmrg 
3181debfc3dSmrg /* Note that the live range for REG starts at least at BEGIN.  */
3191debfc3dSmrg 
3201debfc3dSmrg static void
note_lr_begin(hsa_op_reg * reg,int begin)3211debfc3dSmrg note_lr_begin (hsa_op_reg *reg, int begin)
3221debfc3dSmrg {
3231debfc3dSmrg   if (reg->m_lr_begin > begin)
3241debfc3dSmrg     reg->m_lr_begin = begin;
3251debfc3dSmrg }
3261debfc3dSmrg 
3271debfc3dSmrg /* Given two registers A and B, return -1, 0 or 1 if A's live range
3281debfc3dSmrg    starts before, at or after B's live range.  */
3291debfc3dSmrg 
3301debfc3dSmrg static int
cmp_begin(const void * a,const void * b)3311debfc3dSmrg cmp_begin (const void *a, const void *b)
3321debfc3dSmrg {
3331debfc3dSmrg   const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
3341debfc3dSmrg   const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
3351debfc3dSmrg   int ret;
3361debfc3dSmrg   if (rega == regb)
3371debfc3dSmrg     return 0;
3381debfc3dSmrg   ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
3391debfc3dSmrg   if (ret)
3401debfc3dSmrg     return ret;
3411debfc3dSmrg   return ((*rega)->m_order - (*regb)->m_order);
3421debfc3dSmrg }
3431debfc3dSmrg 
3441debfc3dSmrg /* Given two registers REGA and REGB, return true if REGA's
3451debfc3dSmrg    live range ends after REGB's.  This results in a sorting order
3461debfc3dSmrg    with earlier end points at the end.  */
3471debfc3dSmrg 
3481debfc3dSmrg static bool
cmp_end(hsa_op_reg * const & rega,hsa_op_reg * const & regb)3491debfc3dSmrg cmp_end (hsa_op_reg * const &rega, hsa_op_reg * const &regb)
3501debfc3dSmrg {
3511debfc3dSmrg   int ret;
3521debfc3dSmrg   if (rega == regb)
3531debfc3dSmrg     return false;
3541debfc3dSmrg   ret = (regb)->m_lr_end - (rega)->m_lr_end;
3551debfc3dSmrg   if (ret)
3561debfc3dSmrg     return ret < 0;
3571debfc3dSmrg   return (((regb)->m_order - (rega)->m_order)) < 0;
3581debfc3dSmrg }
3591debfc3dSmrg 
3601debfc3dSmrg /* Expire all old intervals in ACTIVE (a per-regclass vector),
3611debfc3dSmrg    that is, those that end before the interval REG starts.  Give
3621debfc3dSmrg    back resources freed so into the state CLASSES.  */
3631debfc3dSmrg 
3641debfc3dSmrg static void
expire_old_intervals(hsa_op_reg * reg,vec<hsa_op_reg * > * active,struct m_reg_class_desc * classes)3651debfc3dSmrg expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
3661debfc3dSmrg 		      struct m_reg_class_desc *classes)
3671debfc3dSmrg {
3681debfc3dSmrg   for (int i = 0; i < 4; i++)
3691debfc3dSmrg     while (!active[i].is_empty ())
3701debfc3dSmrg       {
3711debfc3dSmrg 	hsa_op_reg *a = active[i].pop ();
3721debfc3dSmrg 	if (a->m_lr_end > reg->m_lr_begin)
3731debfc3dSmrg 	  {
3741debfc3dSmrg 	    active[i].quick_push (a);
3751debfc3dSmrg 	    break;
3761debfc3dSmrg 	  }
3771debfc3dSmrg 	free_reg (classes, a);
3781debfc3dSmrg       }
3791debfc3dSmrg }
3801debfc3dSmrg 
3811debfc3dSmrg /* The interval REG didn't get a hardreg.  Spill it or one of those
3821debfc3dSmrg    from ACTIVE (if the latter, then REG will become allocated to the
3831debfc3dSmrg    hardreg that formerly was used by it).  */
3841debfc3dSmrg 
3851debfc3dSmrg static void
spill_at_interval(hsa_op_reg * reg,vec<hsa_op_reg * > * active)3861debfc3dSmrg spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
3871debfc3dSmrg {
3881debfc3dSmrg   int cl = m_reg_class_for_type (reg->m_type);
3891debfc3dSmrg   gcc_assert (!active[cl].is_empty ());
3901debfc3dSmrg   hsa_op_reg *cand = active[cl][0];
3911debfc3dSmrg   if (cand->m_lr_end > reg->m_lr_end)
3921debfc3dSmrg     {
3931debfc3dSmrg       reg->m_reg_class = cand->m_reg_class;
3941debfc3dSmrg       reg->m_hard_num = cand->m_hard_num;
3951debfc3dSmrg       active[cl].ordered_remove (0);
3961debfc3dSmrg       unsigned place = active[cl].lower_bound (reg, cmp_end);
3971debfc3dSmrg       active[cl].quick_insert (place, reg);
3981debfc3dSmrg     }
3991debfc3dSmrg   else
4001debfc3dSmrg     cand = reg;
4011debfc3dSmrg 
4021debfc3dSmrg   gcc_assert (!cand->m_spill_sym);
4031debfc3dSmrg   BrigType16_t type = cand->m_type;
4041debfc3dSmrg   if (type == BRIG_TYPE_B1)
4051debfc3dSmrg     type = BRIG_TYPE_U8;
4061debfc3dSmrg   cand->m_reg_class = 0;
4071debfc3dSmrg   cand->m_spill_sym = hsa_get_spill_symbol (type);
4081debfc3dSmrg   cand->m_spill_sym->m_name_number = cand->m_order;
4091debfc3dSmrg }
4101debfc3dSmrg 
4111debfc3dSmrg /* Given the global register state CLASSES allocate all HSA virtual
4121debfc3dSmrg    registers either to hardregs or to a spill symbol.  */
4131debfc3dSmrg 
4141debfc3dSmrg static void
linear_scan_regalloc(struct m_reg_class_desc * classes)4151debfc3dSmrg linear_scan_regalloc (struct m_reg_class_desc *classes)
4161debfc3dSmrg {
4171debfc3dSmrg   /* Compute liveness.  */
4181debfc3dSmrg   bool changed;
4191debfc3dSmrg   int i, n;
4201debfc3dSmrg   int insn_order;
4211debfc3dSmrg   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
4221debfc3dSmrg   bitmap work = BITMAP_ALLOC (NULL);
4231debfc3dSmrg   vec<hsa_op_reg*> ind2reg = vNULL;
4241debfc3dSmrg   vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
4251debfc3dSmrg   hsa_insn_basic *m_last_insn;
4261debfc3dSmrg 
4271debfc3dSmrg   /* We will need the reverse post order for linearization,
4281debfc3dSmrg      and the post order for liveness analysis, which is the same
4291debfc3dSmrg      backward.  */
4301debfc3dSmrg   n = pre_and_rev_post_order_compute (NULL, bbs, true);
4311debfc3dSmrg   ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
4321debfc3dSmrg 
4331debfc3dSmrg   /* Give all instructions a linearized number, at the same time
4341debfc3dSmrg      build a mapping from register index to register.  */
4351debfc3dSmrg   insn_order = 1;
4361debfc3dSmrg   for (i = 0; i < n; i++)
4371debfc3dSmrg     {
4381debfc3dSmrg       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
4391debfc3dSmrg       hsa_bb *hbb = hsa_bb_for_bb (bb);
4401debfc3dSmrg       hsa_insn_basic *insn;
4411debfc3dSmrg       for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
4421debfc3dSmrg 	{
4431debfc3dSmrg 	  unsigned opi;
4441debfc3dSmrg 	  insn->m_number = insn_order++;
4451debfc3dSmrg 	  for (opi = 0; opi < insn->operand_count (); opi++)
4461debfc3dSmrg 	    {
4471debfc3dSmrg 	      gcc_checking_assert (insn->get_op (opi));
4481debfc3dSmrg 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
4491debfc3dSmrg 	      if (regaddr)
4501debfc3dSmrg 		ind2reg[(*regaddr)->m_order] = *regaddr;
4511debfc3dSmrg 	    }
4521debfc3dSmrg 	}
4531debfc3dSmrg     }
4541debfc3dSmrg 
4551debfc3dSmrg   /* Initialize all live ranges to [after-end, 0).  */
4561debfc3dSmrg   for (i = 0; i < hsa_cfun->m_reg_count; i++)
4571debfc3dSmrg     if (ind2reg[i])
4581debfc3dSmrg       ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
4591debfc3dSmrg 
4601debfc3dSmrg   /* Classic liveness analysis, as long as something changes:
4611debfc3dSmrg        m_liveout is union (m_livein of successors)
4621debfc3dSmrg        m_livein is m_liveout minus defs plus uses.  */
4631debfc3dSmrg   do
4641debfc3dSmrg     {
4651debfc3dSmrg       changed = false;
4661debfc3dSmrg       for (i = n - 1; i >= 0; i--)
4671debfc3dSmrg 	{
4681debfc3dSmrg 	  edge e;
4691debfc3dSmrg 	  edge_iterator ei;
4701debfc3dSmrg 	  basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
4711debfc3dSmrg 	  hsa_bb *hbb = hsa_bb_for_bb (bb);
4721debfc3dSmrg 
4731debfc3dSmrg 	  /* Union of successors m_livein (or empty if none).  */
4741debfc3dSmrg 	  bool first = true;
4751debfc3dSmrg 	  FOR_EACH_EDGE (e, ei, bb->succs)
4761debfc3dSmrg 	    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
4771debfc3dSmrg 	      {
4781debfc3dSmrg 		hsa_bb *succ = hsa_bb_for_bb (e->dest);
4791debfc3dSmrg 		if (first)
4801debfc3dSmrg 		  {
4811debfc3dSmrg 		    bitmap_copy (work, succ->m_livein);
4821debfc3dSmrg 		    first = false;
4831debfc3dSmrg 		  }
4841debfc3dSmrg 		else
4851debfc3dSmrg 		  bitmap_ior_into (work, succ->m_livein);
4861debfc3dSmrg 	      }
4871debfc3dSmrg 	  if (first)
4881debfc3dSmrg 	    bitmap_clear (work);
4891debfc3dSmrg 
4901debfc3dSmrg 	  bitmap_copy (hbb->m_liveout, work);
4911debfc3dSmrg 
4921debfc3dSmrg 	  /* Remove defs, include uses in a backward insn walk.  */
4931debfc3dSmrg 	  hsa_insn_basic *insn;
4941debfc3dSmrg 	  for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
4951debfc3dSmrg 	    {
4961debfc3dSmrg 	      unsigned opi;
4971debfc3dSmrg 	      unsigned ndefs = insn->input_count ();
4981debfc3dSmrg 	      for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
4991debfc3dSmrg 		{
5001debfc3dSmrg 		  gcc_checking_assert (insn->get_op (opi));
5011debfc3dSmrg 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
5021debfc3dSmrg 		  if (regaddr)
5031debfc3dSmrg 		    bitmap_clear_bit (work, (*regaddr)->m_order);
5041debfc3dSmrg 		}
5051debfc3dSmrg 	      for (; opi < insn->operand_count (); opi++)
5061debfc3dSmrg 		{
5071debfc3dSmrg 		  gcc_checking_assert (insn->get_op (opi));
5081debfc3dSmrg 		  hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
5091debfc3dSmrg 		  if (regaddr)
5101debfc3dSmrg 		    bitmap_set_bit (work, (*regaddr)->m_order);
5111debfc3dSmrg 		}
5121debfc3dSmrg 	    }
5131debfc3dSmrg 
5141debfc3dSmrg 	  /* Note if that changed something.  */
5151debfc3dSmrg 	  if (bitmap_ior_into (hbb->m_livein, work))
5161debfc3dSmrg 	    changed = true;
5171debfc3dSmrg 	}
5181debfc3dSmrg     }
5191debfc3dSmrg   while (changed);
5201debfc3dSmrg 
5211debfc3dSmrg   /* Make one pass through all instructions in linear order,
5221debfc3dSmrg      noting and merging possible live range start and end points.  */
5231debfc3dSmrg   m_last_insn = NULL;
5241debfc3dSmrg   for (i = n - 1; i >= 0; i--)
5251debfc3dSmrg     {
5261debfc3dSmrg       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
5271debfc3dSmrg       hsa_bb *hbb = hsa_bb_for_bb (bb);
5281debfc3dSmrg       hsa_insn_basic *insn;
5291debfc3dSmrg       int after_end_number;
5301debfc3dSmrg       unsigned bit;
5311debfc3dSmrg       bitmap_iterator bi;
5321debfc3dSmrg 
5331debfc3dSmrg       if (m_last_insn)
5341debfc3dSmrg 	after_end_number = m_last_insn->m_number;
5351debfc3dSmrg       else
5361debfc3dSmrg 	after_end_number = insn_order;
5371debfc3dSmrg       /* Everything live-out in this BB has at least an end point
5381debfc3dSmrg 	 after us.  */
5391debfc3dSmrg       EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
5401debfc3dSmrg 	note_lr_end (ind2reg[bit], after_end_number);
5411debfc3dSmrg 
5421debfc3dSmrg       for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
5431debfc3dSmrg 	{
5441debfc3dSmrg 	  unsigned opi;
5451debfc3dSmrg 	  unsigned ndefs = insn->input_count ();
5461debfc3dSmrg 	  for (opi = 0; opi < insn->operand_count (); opi++)
5471debfc3dSmrg 	    {
5481debfc3dSmrg 	      gcc_checking_assert (insn->get_op (opi));
5491debfc3dSmrg 	      hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
5501debfc3dSmrg 	      if (regaddr)
5511debfc3dSmrg 		{
5521debfc3dSmrg 		  hsa_op_reg *reg = *regaddr;
5531debfc3dSmrg 		  if (opi < ndefs)
5541debfc3dSmrg 		    note_lr_begin (reg, insn->m_number);
5551debfc3dSmrg 		  else
5561debfc3dSmrg 		    note_lr_end (reg, insn->m_number);
5571debfc3dSmrg 		}
5581debfc3dSmrg 	    }
5591debfc3dSmrg 	}
5601debfc3dSmrg 
5611debfc3dSmrg       /* Everything live-in in this BB has a start point before
5621debfc3dSmrg 	 our first insn.  */
5631debfc3dSmrg       int before_start_number;
5641debfc3dSmrg       if (hbb->m_first_insn)
5651debfc3dSmrg 	before_start_number = hbb->m_first_insn->m_number;
5661debfc3dSmrg       else
5671debfc3dSmrg 	before_start_number = after_end_number;
5681debfc3dSmrg       before_start_number--;
5691debfc3dSmrg       EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
5701debfc3dSmrg 	note_lr_begin (ind2reg[bit], before_start_number);
5711debfc3dSmrg 
5721debfc3dSmrg       if (hbb->m_first_insn)
5731debfc3dSmrg 	m_last_insn = hbb->m_first_insn;
5741debfc3dSmrg     }
5751debfc3dSmrg 
5761debfc3dSmrg   for (i = 0; i < hsa_cfun->m_reg_count; i++)
5771debfc3dSmrg     if (ind2reg[i])
5781debfc3dSmrg       {
5791debfc3dSmrg 	/* All regs that have still their start at after all code actually
5801debfc3dSmrg 	   are defined at the start of the routine (prologue).  */
5811debfc3dSmrg 	if (ind2reg[i]->m_lr_begin == insn_order)
5821debfc3dSmrg 	  ind2reg[i]->m_lr_begin = 0;
5831debfc3dSmrg 	/* All regs that have no use but a def will have lr_end == 0,
5841debfc3dSmrg 	   they are actually live from def until after the insn they are
5851debfc3dSmrg 	   defined in.  */
5861debfc3dSmrg 	if (ind2reg[i]->m_lr_end == 0)
5871debfc3dSmrg 	  ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
5881debfc3dSmrg       }
5891debfc3dSmrg 
5901debfc3dSmrg   /* Sort all intervals by increasing start point.  */
5911debfc3dSmrg   gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
5921debfc3dSmrg 
5931debfc3dSmrg   if (flag_checking)
5941debfc3dSmrg     for (unsigned i = 0; i < ind2reg.length (); i++)
5951debfc3dSmrg       gcc_assert (ind2reg[i]);
5961debfc3dSmrg 
5971debfc3dSmrg   ind2reg.qsort (cmp_begin);
5981debfc3dSmrg   for (i = 0; i < 4; i++)
5991debfc3dSmrg     active[i].reserve_exact (hsa_cfun->m_reg_count);
6001debfc3dSmrg 
6011debfc3dSmrg   /* Now comes the linear scan allocation.  */
6021debfc3dSmrg   for (i = 0; i < hsa_cfun->m_reg_count; i++)
6031debfc3dSmrg     {
6041debfc3dSmrg       hsa_op_reg *reg = ind2reg[i];
6051debfc3dSmrg       if (!reg)
6061debfc3dSmrg 	continue;
6071debfc3dSmrg       expire_old_intervals (reg, active, classes);
6081debfc3dSmrg       int cl = m_reg_class_for_type (reg->m_type);
6091debfc3dSmrg       if (try_alloc_reg (classes, reg) >= 0)
6101debfc3dSmrg 	{
6111debfc3dSmrg 	  unsigned place = active[cl].lower_bound (reg, cmp_end);
6121debfc3dSmrg 	  active[cl].quick_insert (place, reg);
6131debfc3dSmrg 	}
6141debfc3dSmrg       else
6151debfc3dSmrg 	spill_at_interval (reg, active);
6161debfc3dSmrg 
6171debfc3dSmrg       /* Some interesting dumping as we go.  */
6181debfc3dSmrg       if (dump_file && (dump_flags & TDF_DETAILS))
6191debfc3dSmrg 	{
6201debfc3dSmrg 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->",
6211debfc3dSmrg 		   reg->m_order, reg->m_lr_begin, reg->m_lr_end);
6221debfc3dSmrg 	  if (reg->m_reg_class)
6231debfc3dSmrg 	    fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
6241debfc3dSmrg 	  else
6251debfc3dSmrg 	    fprintf (dump_file, "[%%__%s_%i]",
6261debfc3dSmrg 		     hsa_seg_name (reg->m_spill_sym->m_segment),
6271debfc3dSmrg 		     reg->m_spill_sym->m_name_number);
6281debfc3dSmrg 	  for (int cl = 0; cl < 4; cl++)
6291debfc3dSmrg 	    {
6301debfc3dSmrg 	      bool first = true;
6311debfc3dSmrg 	      hsa_op_reg *r;
6321debfc3dSmrg 	      fprintf (dump_file, " {");
6331debfc3dSmrg 	      for (int j = 0; active[cl].iterate (j, &r); j++)
6341debfc3dSmrg 		if (first)
6351debfc3dSmrg 		  {
6361debfc3dSmrg 		    fprintf (dump_file, "%d", r->m_order);
6371debfc3dSmrg 		    first = false;
6381debfc3dSmrg 		  }
6391debfc3dSmrg 		else
6401debfc3dSmrg 		  fprintf (dump_file, ", %d", r->m_order);
6411debfc3dSmrg 	      fprintf (dump_file, "}");
6421debfc3dSmrg 	    }
6431debfc3dSmrg 	  fprintf (dump_file, "\n");
6441debfc3dSmrg 	}
6451debfc3dSmrg     }
6461debfc3dSmrg 
6471debfc3dSmrg   BITMAP_FREE (work);
6481debfc3dSmrg   free (bbs);
6491debfc3dSmrg 
6501debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
6511debfc3dSmrg     {
6521debfc3dSmrg       fprintf (dump_file, "------- After liveness: -------\n");
6531debfc3dSmrg       dump_hsa_cfun_regalloc (dump_file);
6541debfc3dSmrg       fprintf (dump_file, "  ----- Intervals:\n");
6551debfc3dSmrg       for (i = 0; i < hsa_cfun->m_reg_count; i++)
6561debfc3dSmrg 	{
6571debfc3dSmrg 	  hsa_op_reg *reg = ind2reg[i];
6581debfc3dSmrg 	  if (!reg)
6591debfc3dSmrg 	    continue;
6601debfc3dSmrg 	  fprintf (dump_file, "  reg%d: [%5d, %5d)->", reg->m_order,
6611debfc3dSmrg 		   reg->m_lr_begin, reg->m_lr_end);
6621debfc3dSmrg 	  if (reg->m_reg_class)
6631debfc3dSmrg 	    fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
6641debfc3dSmrg 	  else
6651debfc3dSmrg 	    fprintf (dump_file, "[%%__%s_%i]\n",
6661debfc3dSmrg 		     hsa_seg_name (reg->m_spill_sym->m_segment),
6671debfc3dSmrg 		     reg->m_spill_sym->m_name_number);
6681debfc3dSmrg 	}
6691debfc3dSmrg     }
6701debfc3dSmrg 
6711debfc3dSmrg   for (i = 0; i < 4; i++)
6721debfc3dSmrg     active[i].release ();
6731debfc3dSmrg   ind2reg.release ();
6741debfc3dSmrg }
6751debfc3dSmrg 
6761debfc3dSmrg /* Entry point for register allocation.  */
6771debfc3dSmrg 
6781debfc3dSmrg static void
regalloc(void)6791debfc3dSmrg regalloc (void)
6801debfc3dSmrg {
6811debfc3dSmrg   basic_block bb;
6821debfc3dSmrg   m_reg_class_desc classes[4];
6831debfc3dSmrg 
6841debfc3dSmrg   /* If there are no registers used in the function, exit right away.  */
6851debfc3dSmrg   if (hsa_cfun->m_reg_count == 0)
6861debfc3dSmrg     return;
6871debfc3dSmrg 
6881debfc3dSmrg   memset (classes, 0, sizeof (classes));
6891debfc3dSmrg   classes[0].next_avail = 0;
6901debfc3dSmrg   classes[0].max_num = 7;
6911debfc3dSmrg   classes[0].cl_char = 'c';
6921debfc3dSmrg   classes[1].cl_char = 's';
6931debfc3dSmrg   classes[2].cl_char = 'd';
6941debfc3dSmrg   classes[3].cl_char = 'q';
6951debfc3dSmrg 
6961debfc3dSmrg   for (int i = 1; i < 4; i++)
6971debfc3dSmrg     {
6981debfc3dSmrg       classes[i].next_avail = 0;
6991debfc3dSmrg       classes[i].max_num = 20;
7001debfc3dSmrg     }
7011debfc3dSmrg 
7021debfc3dSmrg   linear_scan_regalloc (classes);
7031debfc3dSmrg 
7041debfc3dSmrg   FOR_ALL_BB_FN (bb, cfun)
7051debfc3dSmrg     rewrite_code_bb (bb, classes);
7061debfc3dSmrg }
7071debfc3dSmrg 
7081debfc3dSmrg /* Out of SSA and register allocation on HSAIL IL.  */
7091debfc3dSmrg 
7101debfc3dSmrg void
hsa_regalloc(void)7111debfc3dSmrg hsa_regalloc (void)
7121debfc3dSmrg {
7131debfc3dSmrg   hsa_cfun->update_dominance ();
7141debfc3dSmrg   naive_outof_ssa ();
7151debfc3dSmrg 
7161debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
7171debfc3dSmrg     {
7181debfc3dSmrg       fprintf (dump_file, "------- After out-of-SSA: -------\n");
7191debfc3dSmrg       dump_hsa_cfun (dump_file);
7201debfc3dSmrg     }
7211debfc3dSmrg 
7221debfc3dSmrg   regalloc ();
7231debfc3dSmrg 
7241debfc3dSmrg   if (dump_file && (dump_flags & TDF_DETAILS))
7251debfc3dSmrg     {
7261debfc3dSmrg       fprintf (dump_file, "------- After register allocation: -------\n");
7271debfc3dSmrg       dump_hsa_cfun (dump_file);
7281debfc3dSmrg     }
7291debfc3dSmrg }
730