11debfc3dSmrg /* HSAIL IL Register allocation and out-of-SSA.
2*8feb0f0bSmrg Copyright (C) 2013-2020 Free Software Foundation, Inc.
31debfc3dSmrg Contributed by Michael Matz <matz@suse.de>
41debfc3dSmrg
51debfc3dSmrg This file is part of GCC.
61debfc3dSmrg
71debfc3dSmrg GCC is free software; you can redistribute it and/or modify
81debfc3dSmrg it under the terms of the GNU General Public License as published by
91debfc3dSmrg the Free Software Foundation; either version 3, or (at your option)
101debfc3dSmrg any later version.
111debfc3dSmrg
121debfc3dSmrg GCC is distributed in the hope that it will be useful,
131debfc3dSmrg but WITHOUT ANY WARRANTY; without even the implied warranty of
141debfc3dSmrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
151debfc3dSmrg GNU General Public License for more details.
161debfc3dSmrg
171debfc3dSmrg You should have received a copy of the GNU General Public License
181debfc3dSmrg along with GCC; see the file COPYING3. If not see
191debfc3dSmrg <http://www.gnu.org/licenses/>. */
201debfc3dSmrg
211debfc3dSmrg #include "config.h"
221debfc3dSmrg #include "system.h"
231debfc3dSmrg #include "coretypes.h"
241debfc3dSmrg #include "tm.h"
251debfc3dSmrg #include "is-a.h"
261debfc3dSmrg #include "vec.h"
271debfc3dSmrg #include "tree.h"
281debfc3dSmrg #include "dominance.h"
29a2dc1f3fSmrg #include "basic-block.h"
301debfc3dSmrg #include "function.h"
31c0a68be4Smrg #include "cfganal.h"
32c0a68be4Smrg #include "cfg.h"
331debfc3dSmrg #include "bitmap.h"
341debfc3dSmrg #include "dumpfile.h"
351debfc3dSmrg #include "cgraph.h"
361debfc3dSmrg #include "print-tree.h"
371debfc3dSmrg #include "cfghooks.h"
38*8feb0f0bSmrg #include "alloc-pool.h"
391debfc3dSmrg #include "symbol-summary.h"
401debfc3dSmrg #include "hsa-common.h"
411debfc3dSmrg
421debfc3dSmrg
431debfc3dSmrg /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
441debfc3dSmrg
451debfc3dSmrg static void
naive_process_phi(hsa_insn_phi * phi,const vec<edge> & predecessors)46a2dc1f3fSmrg naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
471debfc3dSmrg {
481debfc3dSmrg unsigned count = phi->operand_count ();
491debfc3dSmrg for (unsigned i = 0; i < count; i++)
501debfc3dSmrg {
511debfc3dSmrg gcc_checking_assert (phi->get_op (i));
521debfc3dSmrg hsa_op_base *op = phi->get_op (i);
531debfc3dSmrg hsa_bb *hbb;
541debfc3dSmrg edge e;
551debfc3dSmrg
561debfc3dSmrg if (!op)
571debfc3dSmrg break;
581debfc3dSmrg
59a2dc1f3fSmrg e = predecessors[i];
601debfc3dSmrg if (single_succ_p (e->src))
611debfc3dSmrg hbb = hsa_bb_for_bb (e->src);
621debfc3dSmrg else
631debfc3dSmrg {
641debfc3dSmrg basic_block old_dest = e->dest;
651debfc3dSmrg hbb = hsa_init_new_bb (split_edge (e));
661debfc3dSmrg
671debfc3dSmrg /* If switch insn used this edge, fix jump table. */
681debfc3dSmrg hsa_bb *source = hsa_bb_for_bb (e->src);
691debfc3dSmrg hsa_insn_sbr *sbr;
701debfc3dSmrg if (source->m_last_insn
711debfc3dSmrg && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
721debfc3dSmrg sbr->replace_all_labels (old_dest, hbb->m_bb);
731debfc3dSmrg }
741debfc3dSmrg
751debfc3dSmrg hsa_build_append_simple_mov (phi->m_dest, op, hbb);
761debfc3dSmrg }
771debfc3dSmrg }
781debfc3dSmrg
791debfc3dSmrg /* Naive out-of SSA. */
801debfc3dSmrg
811debfc3dSmrg static void
naive_outof_ssa(void)821debfc3dSmrg naive_outof_ssa (void)
831debfc3dSmrg {
841debfc3dSmrg basic_block bb;
851debfc3dSmrg
861debfc3dSmrg hsa_cfun->m_in_ssa = false;
871debfc3dSmrg
881debfc3dSmrg FOR_ALL_BB_FN (bb, cfun)
891debfc3dSmrg {
901debfc3dSmrg hsa_bb *hbb = hsa_bb_for_bb (bb);
911debfc3dSmrg hsa_insn_phi *phi;
921debfc3dSmrg
93a2dc1f3fSmrg /* naive_process_phi can call split_edge on an incoming edge which order if
94a2dc1f3fSmrg the incoming edges to the basic block and thus make it inconsistent with
95a2dc1f3fSmrg the ordering of PHI arguments, so we collect them in advance. */
96a2dc1f3fSmrg auto_vec<edge, 8> predecessors;
97a2dc1f3fSmrg unsigned pred_count = EDGE_COUNT (bb->preds);
98a2dc1f3fSmrg for (unsigned i = 0; i < pred_count; i++)
99a2dc1f3fSmrg predecessors.safe_push (EDGE_PRED (bb, i));
100a2dc1f3fSmrg
1011debfc3dSmrg for (phi = hbb->m_first_phi;
1021debfc3dSmrg phi;
1031debfc3dSmrg phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
104a2dc1f3fSmrg naive_process_phi (phi, predecessors);
1051debfc3dSmrg
1061debfc3dSmrg /* Zap PHI nodes, they will be deallocated when everything else will. */
1071debfc3dSmrg hbb->m_first_phi = NULL;
1081debfc3dSmrg hbb->m_last_phi = NULL;
1091debfc3dSmrg }
1101debfc3dSmrg }
1111debfc3dSmrg
1121debfc3dSmrg /* Return register class number for the given HSA TYPE. 0 means the 'c' one
1131debfc3dSmrg bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
1141debfc3dSmrg and 3 for 'q' 128 bit class. */
1151debfc3dSmrg
1161debfc3dSmrg static int
m_reg_class_for_type(BrigType16_t type)1171debfc3dSmrg m_reg_class_for_type (BrigType16_t type)
1181debfc3dSmrg {
1191debfc3dSmrg switch (type)
1201debfc3dSmrg {
1211debfc3dSmrg case BRIG_TYPE_B1:
1221debfc3dSmrg return 0;
1231debfc3dSmrg
1241debfc3dSmrg case BRIG_TYPE_U8:
1251debfc3dSmrg case BRIG_TYPE_U16:
1261debfc3dSmrg case BRIG_TYPE_U32:
1271debfc3dSmrg case BRIG_TYPE_S8:
1281debfc3dSmrg case BRIG_TYPE_S16:
1291debfc3dSmrg case BRIG_TYPE_S32:
1301debfc3dSmrg case BRIG_TYPE_F16:
1311debfc3dSmrg case BRIG_TYPE_F32:
1321debfc3dSmrg case BRIG_TYPE_B8:
1331debfc3dSmrg case BRIG_TYPE_B16:
1341debfc3dSmrg case BRIG_TYPE_B32:
1351debfc3dSmrg case BRIG_TYPE_U8X4:
1361debfc3dSmrg case BRIG_TYPE_S8X4:
1371debfc3dSmrg case BRIG_TYPE_U16X2:
1381debfc3dSmrg case BRIG_TYPE_S16X2:
1391debfc3dSmrg case BRIG_TYPE_F16X2:
1401debfc3dSmrg return 1;
1411debfc3dSmrg
1421debfc3dSmrg case BRIG_TYPE_U64:
1431debfc3dSmrg case BRIG_TYPE_S64:
1441debfc3dSmrg case BRIG_TYPE_F64:
1451debfc3dSmrg case BRIG_TYPE_B64:
1461debfc3dSmrg case BRIG_TYPE_U8X8:
1471debfc3dSmrg case BRIG_TYPE_S8X8:
1481debfc3dSmrg case BRIG_TYPE_U16X4:
1491debfc3dSmrg case BRIG_TYPE_S16X4:
1501debfc3dSmrg case BRIG_TYPE_F16X4:
1511debfc3dSmrg case BRIG_TYPE_U32X2:
1521debfc3dSmrg case BRIG_TYPE_S32X2:
1531debfc3dSmrg case BRIG_TYPE_F32X2:
1541debfc3dSmrg return 2;
1551debfc3dSmrg
1561debfc3dSmrg case BRIG_TYPE_B128:
1571debfc3dSmrg case BRIG_TYPE_U8X16:
1581debfc3dSmrg case BRIG_TYPE_S8X16:
1591debfc3dSmrg case BRIG_TYPE_U16X8:
1601debfc3dSmrg case BRIG_TYPE_S16X8:
1611debfc3dSmrg case BRIG_TYPE_F16X8:
1621debfc3dSmrg case BRIG_TYPE_U32X4:
1631debfc3dSmrg case BRIG_TYPE_U64X2:
1641debfc3dSmrg case BRIG_TYPE_S32X4:
1651debfc3dSmrg case BRIG_TYPE_S64X2:
1661debfc3dSmrg case BRIG_TYPE_F32X4:
1671debfc3dSmrg case BRIG_TYPE_F64X2:
1681debfc3dSmrg return 3;
1691debfc3dSmrg
1701debfc3dSmrg default:
1711debfc3dSmrg gcc_unreachable ();
1721debfc3dSmrg }
1731debfc3dSmrg }
1741debfc3dSmrg
1751debfc3dSmrg /* If the Ith operands of INSN is or contains a register (in an address),
1761debfc3dSmrg return the address of that register operand. If not return NULL. */
1771debfc3dSmrg
1781debfc3dSmrg static hsa_op_reg **
insn_reg_addr(hsa_insn_basic * insn,int i)1791debfc3dSmrg insn_reg_addr (hsa_insn_basic *insn, int i)
1801debfc3dSmrg {
1811debfc3dSmrg hsa_op_base *op = insn->get_op (i);
1821debfc3dSmrg if (!op)
1831debfc3dSmrg return NULL;
1841debfc3dSmrg hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
1851debfc3dSmrg if (reg)
1861debfc3dSmrg return (hsa_op_reg **) insn->get_op_addr (i);
1871debfc3dSmrg hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
1881debfc3dSmrg if (addr && addr->m_reg)
1891debfc3dSmrg return &addr->m_reg;
1901debfc3dSmrg return NULL;
1911debfc3dSmrg }
1921debfc3dSmrg
1931debfc3dSmrg struct m_reg_class_desc
1941debfc3dSmrg {
1951debfc3dSmrg unsigned next_avail, max_num;
1961debfc3dSmrg unsigned used_num, max_used;
1971debfc3dSmrg uint64_t used[2];
1981debfc3dSmrg char cl_char;
1991debfc3dSmrg };
2001debfc3dSmrg
2011debfc3dSmrg /* Rewrite the instructions in BB to observe spilled live ranges.
2021debfc3dSmrg CLASSES is the global register class state. */
2031debfc3dSmrg
2041debfc3dSmrg static void
rewrite_code_bb(basic_block bb,struct m_reg_class_desc * classes)2051debfc3dSmrg rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
2061debfc3dSmrg {
2071debfc3dSmrg hsa_bb *hbb = hsa_bb_for_bb (bb);
2081debfc3dSmrg hsa_insn_basic *insn, *next_insn;
2091debfc3dSmrg
2101debfc3dSmrg for (insn = hbb->m_first_insn; insn; insn = next_insn)
2111debfc3dSmrg {
2121debfc3dSmrg next_insn = insn->m_next;
2131debfc3dSmrg unsigned count = insn->operand_count ();
2141debfc3dSmrg for (unsigned i = 0; i < count; i++)
2151debfc3dSmrg {
2161debfc3dSmrg gcc_checking_assert (insn->get_op (i));
2171debfc3dSmrg hsa_op_reg **regaddr = insn_reg_addr (insn, i);
2181debfc3dSmrg
2191debfc3dSmrg if (regaddr)
2201debfc3dSmrg {
2211debfc3dSmrg hsa_op_reg *reg = *regaddr;
2221debfc3dSmrg if (reg->m_reg_class)
2231debfc3dSmrg continue;
2241debfc3dSmrg gcc_assert (reg->m_spill_sym);
2251debfc3dSmrg
2261debfc3dSmrg int cl = m_reg_class_for_type (reg->m_type);
2271debfc3dSmrg hsa_op_reg *tmp, *tmp2;
2281debfc3dSmrg if (insn->op_output_p (i))
2291debfc3dSmrg tmp = hsa_spill_out (insn, reg, &tmp2);
2301debfc3dSmrg else
2311debfc3dSmrg tmp = hsa_spill_in (insn, reg, &tmp2);
2321debfc3dSmrg
2331debfc3dSmrg *regaddr = tmp;
2341debfc3dSmrg
2351debfc3dSmrg tmp->m_reg_class = classes[cl].cl_char;
2361debfc3dSmrg tmp->m_hard_num = (char) (classes[cl].max_num + i);
2371debfc3dSmrg if (tmp2)
2381debfc3dSmrg {
2391debfc3dSmrg gcc_assert (cl == 0);
2401debfc3dSmrg tmp2->m_reg_class = classes[1].cl_char;
2411debfc3dSmrg tmp2->m_hard_num = (char) (classes[1].max_num + i);
2421debfc3dSmrg }
2431debfc3dSmrg }
2441debfc3dSmrg }
2451debfc3dSmrg }
2461debfc3dSmrg }
2471debfc3dSmrg
2481debfc3dSmrg /* Dump current function to dump file F, with info specific
2491debfc3dSmrg to register allocation. */
2501debfc3dSmrg
2511debfc3dSmrg void
dump_hsa_cfun_regalloc(FILE * f)2521debfc3dSmrg dump_hsa_cfun_regalloc (FILE *f)
2531debfc3dSmrg {
2541debfc3dSmrg basic_block bb;
2551debfc3dSmrg
2561debfc3dSmrg fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
2571debfc3dSmrg
2581debfc3dSmrg FOR_ALL_BB_FN (bb, cfun)
2591debfc3dSmrg {
260*8feb0f0bSmrg hsa_bb *hbb = (class hsa_bb *) bb->aux;
2611debfc3dSmrg bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n");
2621debfc3dSmrg dump_hsa_bb (f, hbb);
2631debfc3dSmrg bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
2641debfc3dSmrg }
2651debfc3dSmrg }
2661debfc3dSmrg
2671debfc3dSmrg /* Given the global register allocation state CLASSES and a
2681debfc3dSmrg register REG, try to give it a hardware register. If successful,
2691debfc3dSmrg store that hardreg in REG and return it, otherwise return -1.
2701debfc3dSmrg Also changes CLASSES to accommodate for the allocated register. */
2711debfc3dSmrg
2721debfc3dSmrg static int
try_alloc_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)2731debfc3dSmrg try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
2741debfc3dSmrg {
2751debfc3dSmrg int cl = m_reg_class_for_type (reg->m_type);
2761debfc3dSmrg int ret = -1;
2771debfc3dSmrg if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
2781debfc3dSmrg >= 128 - 5)
2791debfc3dSmrg return -1;
2801debfc3dSmrg if (classes[cl].used_num < classes[cl].max_num)
2811debfc3dSmrg {
2821debfc3dSmrg unsigned int i;
2831debfc3dSmrg classes[cl].used_num++;
2841debfc3dSmrg if (classes[cl].used_num > classes[cl].max_used)
2851debfc3dSmrg classes[cl].max_used = classes[cl].used_num;
2861debfc3dSmrg for (i = 0; i < classes[cl].used_num; i++)
2871debfc3dSmrg if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
2881debfc3dSmrg break;
2891debfc3dSmrg ret = i;
2901debfc3dSmrg classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
2911debfc3dSmrg reg->m_reg_class = classes[cl].cl_char;
2921debfc3dSmrg reg->m_hard_num = i;
2931debfc3dSmrg }
2941debfc3dSmrg return ret;
2951debfc3dSmrg }
2961debfc3dSmrg
2971debfc3dSmrg /* Free up hardregs used by REG, into allocation state CLASSES. */
2981debfc3dSmrg
2991debfc3dSmrg static void
free_reg(struct m_reg_class_desc * classes,hsa_op_reg * reg)3001debfc3dSmrg free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
3011debfc3dSmrg {
3021debfc3dSmrg int cl = m_reg_class_for_type (reg->m_type);
3031debfc3dSmrg int ret = reg->m_hard_num;
3041debfc3dSmrg gcc_assert (reg->m_reg_class == classes[cl].cl_char);
3051debfc3dSmrg classes[cl].used_num--;
3061debfc3dSmrg classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
3071debfc3dSmrg }
3081debfc3dSmrg
3091debfc3dSmrg /* Note that the live range for REG ends at least at END. */
3101debfc3dSmrg
3111debfc3dSmrg static void
note_lr_end(hsa_op_reg * reg,int end)3121debfc3dSmrg note_lr_end (hsa_op_reg *reg, int end)
3131debfc3dSmrg {
3141debfc3dSmrg if (reg->m_lr_end < end)
3151debfc3dSmrg reg->m_lr_end = end;
3161debfc3dSmrg }
3171debfc3dSmrg
3181debfc3dSmrg /* Note that the live range for REG starts at least at BEGIN. */
3191debfc3dSmrg
3201debfc3dSmrg static void
note_lr_begin(hsa_op_reg * reg,int begin)3211debfc3dSmrg note_lr_begin (hsa_op_reg *reg, int begin)
3221debfc3dSmrg {
3231debfc3dSmrg if (reg->m_lr_begin > begin)
3241debfc3dSmrg reg->m_lr_begin = begin;
3251debfc3dSmrg }
3261debfc3dSmrg
3271debfc3dSmrg /* Given two registers A and B, return -1, 0 or 1 if A's live range
3281debfc3dSmrg starts before, at or after B's live range. */
3291debfc3dSmrg
3301debfc3dSmrg static int
cmp_begin(const void * a,const void * b)3311debfc3dSmrg cmp_begin (const void *a, const void *b)
3321debfc3dSmrg {
3331debfc3dSmrg const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
3341debfc3dSmrg const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
3351debfc3dSmrg int ret;
3361debfc3dSmrg if (rega == regb)
3371debfc3dSmrg return 0;
3381debfc3dSmrg ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
3391debfc3dSmrg if (ret)
3401debfc3dSmrg return ret;
3411debfc3dSmrg return ((*rega)->m_order - (*regb)->m_order);
3421debfc3dSmrg }
3431debfc3dSmrg
3441debfc3dSmrg /* Given two registers REGA and REGB, return true if REGA's
3451debfc3dSmrg live range ends after REGB's. This results in a sorting order
3461debfc3dSmrg with earlier end points at the end. */
3471debfc3dSmrg
3481debfc3dSmrg static bool
cmp_end(hsa_op_reg * const & rega,hsa_op_reg * const & regb)3491debfc3dSmrg cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b)
3501debfc3dSmrg {
3511debfc3dSmrg int ret;
3521debfc3dSmrg if (rega == regb)
3531debfc3dSmrg return false;
3541debfc3dSmrg ret = (regb)->m_lr_end - (rega)->m_lr_end;
3551debfc3dSmrg if (ret)
3561debfc3dSmrg return ret < 0;
3571debfc3dSmrg return (((regb)->m_order - (rega)->m_order)) < 0;
3581debfc3dSmrg }
3591debfc3dSmrg
3601debfc3dSmrg /* Expire all old intervals in ACTIVE (a per-regclass vector),
3611debfc3dSmrg that is, those that end before the interval REG starts. Give
3621debfc3dSmrg back resources freed so into the state CLASSES. */
3631debfc3dSmrg
3641debfc3dSmrg static void
expire_old_intervals(hsa_op_reg * reg,vec<hsa_op_reg * > * active,struct m_reg_class_desc * classes)3651debfc3dSmrg expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
3661debfc3dSmrg struct m_reg_class_desc *classes)
3671debfc3dSmrg {
3681debfc3dSmrg for (int i = 0; i < 4; i++)
3691debfc3dSmrg while (!active[i].is_empty ())
3701debfc3dSmrg {
3711debfc3dSmrg hsa_op_reg *a = active[i].pop ();
3721debfc3dSmrg if (a->m_lr_end > reg->m_lr_begin)
3731debfc3dSmrg {
3741debfc3dSmrg active[i].quick_push (a);
3751debfc3dSmrg break;
3761debfc3dSmrg }
3771debfc3dSmrg free_reg (classes, a);
3781debfc3dSmrg }
3791debfc3dSmrg }
3801debfc3dSmrg
3811debfc3dSmrg /* The interval REG didn't get a hardreg. Spill it or one of those
3821debfc3dSmrg from ACTIVE (if the latter, then REG will become allocated to the
3831debfc3dSmrg hardreg that formerly was used by it). */
3841debfc3dSmrg
3851debfc3dSmrg static void
spill_at_interval(hsa_op_reg * reg,vec<hsa_op_reg * > * active)3861debfc3dSmrg spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
3871debfc3dSmrg {
3881debfc3dSmrg int cl = m_reg_class_for_type (reg->m_type);
3891debfc3dSmrg gcc_assert (!active[cl].is_empty ());
3901debfc3dSmrg hsa_op_reg *cand = active[cl][0];
3911debfc3dSmrg if (cand->m_lr_end > reg->m_lr_end)
3921debfc3dSmrg {
3931debfc3dSmrg reg->m_reg_class = cand->m_reg_class;
3941debfc3dSmrg reg->m_hard_num = cand->m_hard_num;
3951debfc3dSmrg active[cl].ordered_remove (0);
3961debfc3dSmrg unsigned place = active[cl].lower_bound (reg, cmp_end);
3971debfc3dSmrg active[cl].quick_insert (place, reg);
3981debfc3dSmrg }
3991debfc3dSmrg else
4001debfc3dSmrg cand = reg;
4011debfc3dSmrg
4021debfc3dSmrg gcc_assert (!cand->m_spill_sym);
4031debfc3dSmrg BrigType16_t type = cand->m_type;
4041debfc3dSmrg if (type == BRIG_TYPE_B1)
4051debfc3dSmrg type = BRIG_TYPE_U8;
4061debfc3dSmrg cand->m_reg_class = 0;
4071debfc3dSmrg cand->m_spill_sym = hsa_get_spill_symbol (type);
4081debfc3dSmrg cand->m_spill_sym->m_name_number = cand->m_order;
4091debfc3dSmrg }
4101debfc3dSmrg
4111debfc3dSmrg /* Given the global register state CLASSES allocate all HSA virtual
4121debfc3dSmrg registers either to hardregs or to a spill symbol. */
4131debfc3dSmrg
4141debfc3dSmrg static void
linear_scan_regalloc(struct m_reg_class_desc * classes)4151debfc3dSmrg linear_scan_regalloc (struct m_reg_class_desc *classes)
4161debfc3dSmrg {
4171debfc3dSmrg /* Compute liveness. */
4181debfc3dSmrg bool changed;
4191debfc3dSmrg int i, n;
4201debfc3dSmrg int insn_order;
4211debfc3dSmrg int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
4221debfc3dSmrg bitmap work = BITMAP_ALLOC (NULL);
4231debfc3dSmrg vec<hsa_op_reg*> ind2reg = vNULL;
4241debfc3dSmrg vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
4251debfc3dSmrg hsa_insn_basic *m_last_insn;
4261debfc3dSmrg
4271debfc3dSmrg /* We will need the reverse post order for linearization,
4281debfc3dSmrg and the post order for liveness analysis, which is the same
4291debfc3dSmrg backward. */
4301debfc3dSmrg n = pre_and_rev_post_order_compute (NULL, bbs, true);
4311debfc3dSmrg ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
4321debfc3dSmrg
4331debfc3dSmrg /* Give all instructions a linearized number, at the same time
4341debfc3dSmrg build a mapping from register index to register. */
4351debfc3dSmrg insn_order = 1;
4361debfc3dSmrg for (i = 0; i < n; i++)
4371debfc3dSmrg {
4381debfc3dSmrg basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
4391debfc3dSmrg hsa_bb *hbb = hsa_bb_for_bb (bb);
4401debfc3dSmrg hsa_insn_basic *insn;
4411debfc3dSmrg for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
4421debfc3dSmrg {
4431debfc3dSmrg unsigned opi;
4441debfc3dSmrg insn->m_number = insn_order++;
4451debfc3dSmrg for (opi = 0; opi < insn->operand_count (); opi++)
4461debfc3dSmrg {
4471debfc3dSmrg gcc_checking_assert (insn->get_op (opi));
4481debfc3dSmrg hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
4491debfc3dSmrg if (regaddr)
4501debfc3dSmrg ind2reg[(*regaddr)->m_order] = *regaddr;
4511debfc3dSmrg }
4521debfc3dSmrg }
4531debfc3dSmrg }
4541debfc3dSmrg
4551debfc3dSmrg /* Initialize all live ranges to [after-end, 0). */
4561debfc3dSmrg for (i = 0; i < hsa_cfun->m_reg_count; i++)
4571debfc3dSmrg if (ind2reg[i])
4581debfc3dSmrg ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
4591debfc3dSmrg
4601debfc3dSmrg /* Classic liveness analysis, as long as something changes:
4611debfc3dSmrg m_liveout is union (m_livein of successors)
4621debfc3dSmrg m_livein is m_liveout minus defs plus uses. */
4631debfc3dSmrg do
4641debfc3dSmrg {
4651debfc3dSmrg changed = false;
4661debfc3dSmrg for (i = n - 1; i >= 0; i--)
4671debfc3dSmrg {
4681debfc3dSmrg edge e;
4691debfc3dSmrg edge_iterator ei;
4701debfc3dSmrg basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
4711debfc3dSmrg hsa_bb *hbb = hsa_bb_for_bb (bb);
4721debfc3dSmrg
4731debfc3dSmrg /* Union of successors m_livein (or empty if none). */
4741debfc3dSmrg bool first = true;
4751debfc3dSmrg FOR_EACH_EDGE (e, ei, bb->succs)
4761debfc3dSmrg if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
4771debfc3dSmrg {
4781debfc3dSmrg hsa_bb *succ = hsa_bb_for_bb (e->dest);
4791debfc3dSmrg if (first)
4801debfc3dSmrg {
4811debfc3dSmrg bitmap_copy (work, succ->m_livein);
4821debfc3dSmrg first = false;
4831debfc3dSmrg }
4841debfc3dSmrg else
4851debfc3dSmrg bitmap_ior_into (work, succ->m_livein);
4861debfc3dSmrg }
4871debfc3dSmrg if (first)
4881debfc3dSmrg bitmap_clear (work);
4891debfc3dSmrg
4901debfc3dSmrg bitmap_copy (hbb->m_liveout, work);
4911debfc3dSmrg
4921debfc3dSmrg /* Remove defs, include uses in a backward insn walk. */
4931debfc3dSmrg hsa_insn_basic *insn;
4941debfc3dSmrg for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
4951debfc3dSmrg {
4961debfc3dSmrg unsigned opi;
4971debfc3dSmrg unsigned ndefs = insn->input_count ();
4981debfc3dSmrg for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
4991debfc3dSmrg {
5001debfc3dSmrg gcc_checking_assert (insn->get_op (opi));
5011debfc3dSmrg hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
5021debfc3dSmrg if (regaddr)
5031debfc3dSmrg bitmap_clear_bit (work, (*regaddr)->m_order);
5041debfc3dSmrg }
5051debfc3dSmrg for (; opi < insn->operand_count (); opi++)
5061debfc3dSmrg {
5071debfc3dSmrg gcc_checking_assert (insn->get_op (opi));
5081debfc3dSmrg hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
5091debfc3dSmrg if (regaddr)
5101debfc3dSmrg bitmap_set_bit (work, (*regaddr)->m_order);
5111debfc3dSmrg }
5121debfc3dSmrg }
5131debfc3dSmrg
5141debfc3dSmrg /* Note if that changed something. */
5151debfc3dSmrg if (bitmap_ior_into (hbb->m_livein, work))
5161debfc3dSmrg changed = true;
5171debfc3dSmrg }
5181debfc3dSmrg }
5191debfc3dSmrg while (changed);
5201debfc3dSmrg
5211debfc3dSmrg /* Make one pass through all instructions in linear order,
5221debfc3dSmrg noting and merging possible live range start and end points. */
5231debfc3dSmrg m_last_insn = NULL;
5241debfc3dSmrg for (i = n - 1; i >= 0; i--)
5251debfc3dSmrg {
5261debfc3dSmrg basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
5271debfc3dSmrg hsa_bb *hbb = hsa_bb_for_bb (bb);
5281debfc3dSmrg hsa_insn_basic *insn;
5291debfc3dSmrg int after_end_number;
5301debfc3dSmrg unsigned bit;
5311debfc3dSmrg bitmap_iterator bi;
5321debfc3dSmrg
5331debfc3dSmrg if (m_last_insn)
5341debfc3dSmrg after_end_number = m_last_insn->m_number;
5351debfc3dSmrg else
5361debfc3dSmrg after_end_number = insn_order;
5371debfc3dSmrg /* Everything live-out in this BB has at least an end point
5381debfc3dSmrg after us. */
5391debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
5401debfc3dSmrg note_lr_end (ind2reg[bit], after_end_number);
5411debfc3dSmrg
5421debfc3dSmrg for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
5431debfc3dSmrg {
5441debfc3dSmrg unsigned opi;
5451debfc3dSmrg unsigned ndefs = insn->input_count ();
5461debfc3dSmrg for (opi = 0; opi < insn->operand_count (); opi++)
5471debfc3dSmrg {
5481debfc3dSmrg gcc_checking_assert (insn->get_op (opi));
5491debfc3dSmrg hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
5501debfc3dSmrg if (regaddr)
5511debfc3dSmrg {
5521debfc3dSmrg hsa_op_reg *reg = *regaddr;
5531debfc3dSmrg if (opi < ndefs)
5541debfc3dSmrg note_lr_begin (reg, insn->m_number);
5551debfc3dSmrg else
5561debfc3dSmrg note_lr_end (reg, insn->m_number);
5571debfc3dSmrg }
5581debfc3dSmrg }
5591debfc3dSmrg }
5601debfc3dSmrg
5611debfc3dSmrg /* Everything live-in in this BB has a start point before
5621debfc3dSmrg our first insn. */
5631debfc3dSmrg int before_start_number;
5641debfc3dSmrg if (hbb->m_first_insn)
5651debfc3dSmrg before_start_number = hbb->m_first_insn->m_number;
5661debfc3dSmrg else
5671debfc3dSmrg before_start_number = after_end_number;
5681debfc3dSmrg before_start_number--;
5691debfc3dSmrg EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
5701debfc3dSmrg note_lr_begin (ind2reg[bit], before_start_number);
5711debfc3dSmrg
5721debfc3dSmrg if (hbb->m_first_insn)
5731debfc3dSmrg m_last_insn = hbb->m_first_insn;
5741debfc3dSmrg }
5751debfc3dSmrg
5761debfc3dSmrg for (i = 0; i < hsa_cfun->m_reg_count; i++)
5771debfc3dSmrg if (ind2reg[i])
5781debfc3dSmrg {
5791debfc3dSmrg /* All regs that have still their start at after all code actually
5801debfc3dSmrg are defined at the start of the routine (prologue). */
5811debfc3dSmrg if (ind2reg[i]->m_lr_begin == insn_order)
5821debfc3dSmrg ind2reg[i]->m_lr_begin = 0;
5831debfc3dSmrg /* All regs that have no use but a def will have lr_end == 0,
5841debfc3dSmrg they are actually live from def until after the insn they are
5851debfc3dSmrg defined in. */
5861debfc3dSmrg if (ind2reg[i]->m_lr_end == 0)
5871debfc3dSmrg ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
5881debfc3dSmrg }
5891debfc3dSmrg
5901debfc3dSmrg /* Sort all intervals by increasing start point. */
5911debfc3dSmrg gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
5921debfc3dSmrg
5931debfc3dSmrg if (flag_checking)
5941debfc3dSmrg for (unsigned i = 0; i < ind2reg.length (); i++)
5951debfc3dSmrg gcc_assert (ind2reg[i]);
5961debfc3dSmrg
5971debfc3dSmrg ind2reg.qsort (cmp_begin);
5981debfc3dSmrg for (i = 0; i < 4; i++)
5991debfc3dSmrg active[i].reserve_exact (hsa_cfun->m_reg_count);
6001debfc3dSmrg
6011debfc3dSmrg /* Now comes the linear scan allocation. */
6021debfc3dSmrg for (i = 0; i < hsa_cfun->m_reg_count; i++)
6031debfc3dSmrg {
6041debfc3dSmrg hsa_op_reg *reg = ind2reg[i];
6051debfc3dSmrg if (!reg)
6061debfc3dSmrg continue;
6071debfc3dSmrg expire_old_intervals (reg, active, classes);
6081debfc3dSmrg int cl = m_reg_class_for_type (reg->m_type);
6091debfc3dSmrg if (try_alloc_reg (classes, reg) >= 0)
6101debfc3dSmrg {
6111debfc3dSmrg unsigned place = active[cl].lower_bound (reg, cmp_end);
6121debfc3dSmrg active[cl].quick_insert (place, reg);
6131debfc3dSmrg }
6141debfc3dSmrg else
6151debfc3dSmrg spill_at_interval (reg, active);
6161debfc3dSmrg
6171debfc3dSmrg /* Some interesting dumping as we go. */
6181debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
6191debfc3dSmrg {
6201debfc3dSmrg fprintf (dump_file, " reg%d: [%5d, %5d)->",
6211debfc3dSmrg reg->m_order, reg->m_lr_begin, reg->m_lr_end);
6221debfc3dSmrg if (reg->m_reg_class)
6231debfc3dSmrg fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
6241debfc3dSmrg else
6251debfc3dSmrg fprintf (dump_file, "[%%__%s_%i]",
6261debfc3dSmrg hsa_seg_name (reg->m_spill_sym->m_segment),
6271debfc3dSmrg reg->m_spill_sym->m_name_number);
6281debfc3dSmrg for (int cl = 0; cl < 4; cl++)
6291debfc3dSmrg {
6301debfc3dSmrg bool first = true;
6311debfc3dSmrg hsa_op_reg *r;
6321debfc3dSmrg fprintf (dump_file, " {");
6331debfc3dSmrg for (int j = 0; active[cl].iterate (j, &r); j++)
6341debfc3dSmrg if (first)
6351debfc3dSmrg {
6361debfc3dSmrg fprintf (dump_file, "%d", r->m_order);
6371debfc3dSmrg first = false;
6381debfc3dSmrg }
6391debfc3dSmrg else
6401debfc3dSmrg fprintf (dump_file, ", %d", r->m_order);
6411debfc3dSmrg fprintf (dump_file, "}");
6421debfc3dSmrg }
6431debfc3dSmrg fprintf (dump_file, "\n");
6441debfc3dSmrg }
6451debfc3dSmrg }
6461debfc3dSmrg
6471debfc3dSmrg BITMAP_FREE (work);
6481debfc3dSmrg free (bbs);
6491debfc3dSmrg
6501debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
6511debfc3dSmrg {
6521debfc3dSmrg fprintf (dump_file, "------- After liveness: -------\n");
6531debfc3dSmrg dump_hsa_cfun_regalloc (dump_file);
6541debfc3dSmrg fprintf (dump_file, " ----- Intervals:\n");
6551debfc3dSmrg for (i = 0; i < hsa_cfun->m_reg_count; i++)
6561debfc3dSmrg {
6571debfc3dSmrg hsa_op_reg *reg = ind2reg[i];
6581debfc3dSmrg if (!reg)
6591debfc3dSmrg continue;
6601debfc3dSmrg fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order,
6611debfc3dSmrg reg->m_lr_begin, reg->m_lr_end);
6621debfc3dSmrg if (reg->m_reg_class)
6631debfc3dSmrg fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
6641debfc3dSmrg else
6651debfc3dSmrg fprintf (dump_file, "[%%__%s_%i]\n",
6661debfc3dSmrg hsa_seg_name (reg->m_spill_sym->m_segment),
6671debfc3dSmrg reg->m_spill_sym->m_name_number);
6681debfc3dSmrg }
6691debfc3dSmrg }
6701debfc3dSmrg
6711debfc3dSmrg for (i = 0; i < 4; i++)
6721debfc3dSmrg active[i].release ();
6731debfc3dSmrg ind2reg.release ();
6741debfc3dSmrg }
6751debfc3dSmrg
6761debfc3dSmrg /* Entry point for register allocation. */
6771debfc3dSmrg
6781debfc3dSmrg static void
regalloc(void)6791debfc3dSmrg regalloc (void)
6801debfc3dSmrg {
6811debfc3dSmrg basic_block bb;
6821debfc3dSmrg m_reg_class_desc classes[4];
6831debfc3dSmrg
6841debfc3dSmrg /* If there are no registers used in the function, exit right away. */
6851debfc3dSmrg if (hsa_cfun->m_reg_count == 0)
6861debfc3dSmrg return;
6871debfc3dSmrg
6881debfc3dSmrg memset (classes, 0, sizeof (classes));
6891debfc3dSmrg classes[0].next_avail = 0;
6901debfc3dSmrg classes[0].max_num = 7;
6911debfc3dSmrg classes[0].cl_char = 'c';
6921debfc3dSmrg classes[1].cl_char = 's';
6931debfc3dSmrg classes[2].cl_char = 'd';
6941debfc3dSmrg classes[3].cl_char = 'q';
6951debfc3dSmrg
6961debfc3dSmrg for (int i = 1; i < 4; i++)
6971debfc3dSmrg {
6981debfc3dSmrg classes[i].next_avail = 0;
6991debfc3dSmrg classes[i].max_num = 20;
7001debfc3dSmrg }
7011debfc3dSmrg
7021debfc3dSmrg linear_scan_regalloc (classes);
7031debfc3dSmrg
7041debfc3dSmrg FOR_ALL_BB_FN (bb, cfun)
7051debfc3dSmrg rewrite_code_bb (bb, classes);
7061debfc3dSmrg }
7071debfc3dSmrg
7081debfc3dSmrg /* Out of SSA and register allocation on HSAIL IL. */
7091debfc3dSmrg
7101debfc3dSmrg void
hsa_regalloc(void)7111debfc3dSmrg hsa_regalloc (void)
7121debfc3dSmrg {
7131debfc3dSmrg hsa_cfun->update_dominance ();
7141debfc3dSmrg naive_outof_ssa ();
7151debfc3dSmrg
7161debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
7171debfc3dSmrg {
7181debfc3dSmrg fprintf (dump_file, "------- After out-of-SSA: -------\n");
7191debfc3dSmrg dump_hsa_cfun (dump_file);
7201debfc3dSmrg }
7211debfc3dSmrg
7221debfc3dSmrg regalloc ();
7231debfc3dSmrg
7241debfc3dSmrg if (dump_file && (dump_flags & TDF_DETAILS))
7251debfc3dSmrg {
7261debfc3dSmrg fprintf (dump_file, "------- After register allocation: -------\n");
7271debfc3dSmrg dump_hsa_cfun (dump_file);
7281debfc3dSmrg }
7291debfc3dSmrg }
730