1*e4b17023SJohn Marino /* Vectorizer Specific Loop Manipulations
2*e4b17023SJohn Marino Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012
3*e4b17023SJohn Marino Free Software Foundation, Inc.
4*e4b17023SJohn Marino Contributed by Dorit Naishlos <dorit@il.ibm.com>
5*e4b17023SJohn Marino and Ira Rosen <irar@il.ibm.com>
6*e4b17023SJohn Marino
7*e4b17023SJohn Marino This file is part of GCC.
8*e4b17023SJohn Marino
9*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
10*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
11*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
12*e4b17023SJohn Marino version.
13*e4b17023SJohn Marino
14*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
16*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17*e4b17023SJohn Marino for more details.
18*e4b17023SJohn Marino
19*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
20*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
21*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
22*e4b17023SJohn Marino
23*e4b17023SJohn Marino #include "config.h"
24*e4b17023SJohn Marino #include "system.h"
25*e4b17023SJohn Marino #include "coretypes.h"
26*e4b17023SJohn Marino #include "tm.h"
27*e4b17023SJohn Marino #include "ggc.h"
28*e4b17023SJohn Marino #include "tree.h"
29*e4b17023SJohn Marino #include "basic-block.h"
30*e4b17023SJohn Marino #include "tree-pretty-print.h"
31*e4b17023SJohn Marino #include "gimple-pretty-print.h"
32*e4b17023SJohn Marino #include "tree-flow.h"
33*e4b17023SJohn Marino #include "tree-dump.h"
34*e4b17023SJohn Marino #include "cfgloop.h"
35*e4b17023SJohn Marino #include "cfglayout.h"
36*e4b17023SJohn Marino #include "diagnostic-core.h"
37*e4b17023SJohn Marino #include "tree-scalar-evolution.h"
38*e4b17023SJohn Marino #include "tree-vectorizer.h"
39*e4b17023SJohn Marino #include "langhooks.h"
40*e4b17023SJohn Marino
41*e4b17023SJohn Marino /*************************************************************************
42*e4b17023SJohn Marino Simple Loop Peeling Utilities
43*e4b17023SJohn Marino
44*e4b17023SJohn Marino Utilities to support loop peeling for vectorization purposes.
45*e4b17023SJohn Marino *************************************************************************/
46*e4b17023SJohn Marino
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino /* Renames the use *OP_P. */
49*e4b17023SJohn Marino
50*e4b17023SJohn Marino static void
rename_use_op(use_operand_p op_p)51*e4b17023SJohn Marino rename_use_op (use_operand_p op_p)
52*e4b17023SJohn Marino {
53*e4b17023SJohn Marino tree new_name;
54*e4b17023SJohn Marino
55*e4b17023SJohn Marino if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
56*e4b17023SJohn Marino return;
57*e4b17023SJohn Marino
58*e4b17023SJohn Marino new_name = get_current_def (USE_FROM_PTR (op_p));
59*e4b17023SJohn Marino
60*e4b17023SJohn Marino /* Something defined outside of the loop. */
61*e4b17023SJohn Marino if (!new_name)
62*e4b17023SJohn Marino return;
63*e4b17023SJohn Marino
64*e4b17023SJohn Marino /* An ordinary ssa name defined in the loop. */
65*e4b17023SJohn Marino
66*e4b17023SJohn Marino SET_USE (op_p, new_name);
67*e4b17023SJohn Marino }
68*e4b17023SJohn Marino
69*e4b17023SJohn Marino
70*e4b17023SJohn Marino /* Renames the variables in basic block BB. */
71*e4b17023SJohn Marino
72*e4b17023SJohn Marino void
rename_variables_in_bb(basic_block bb)73*e4b17023SJohn Marino rename_variables_in_bb (basic_block bb)
74*e4b17023SJohn Marino {
75*e4b17023SJohn Marino gimple_stmt_iterator gsi;
76*e4b17023SJohn Marino gimple stmt;
77*e4b17023SJohn Marino use_operand_p use_p;
78*e4b17023SJohn Marino ssa_op_iter iter;
79*e4b17023SJohn Marino edge e;
80*e4b17023SJohn Marino edge_iterator ei;
81*e4b17023SJohn Marino struct loop *loop = bb->loop_father;
82*e4b17023SJohn Marino
83*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
84*e4b17023SJohn Marino {
85*e4b17023SJohn Marino stmt = gsi_stmt (gsi);
86*e4b17023SJohn Marino FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
87*e4b17023SJohn Marino rename_use_op (use_p);
88*e4b17023SJohn Marino }
89*e4b17023SJohn Marino
90*e4b17023SJohn Marino FOR_EACH_EDGE (e, ei, bb->succs)
91*e4b17023SJohn Marino {
92*e4b17023SJohn Marino if (!flow_bb_inside_loop_p (loop, e->dest))
93*e4b17023SJohn Marino continue;
94*e4b17023SJohn Marino for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
95*e4b17023SJohn Marino rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
96*e4b17023SJohn Marino }
97*e4b17023SJohn Marino }
98*e4b17023SJohn Marino
99*e4b17023SJohn Marino
100*e4b17023SJohn Marino /* Renames variables in new generated LOOP. */
101*e4b17023SJohn Marino
102*e4b17023SJohn Marino void
rename_variables_in_loop(struct loop * loop)103*e4b17023SJohn Marino rename_variables_in_loop (struct loop *loop)
104*e4b17023SJohn Marino {
105*e4b17023SJohn Marino unsigned i;
106*e4b17023SJohn Marino basic_block *bbs;
107*e4b17023SJohn Marino
108*e4b17023SJohn Marino bbs = get_loop_body (loop);
109*e4b17023SJohn Marino
110*e4b17023SJohn Marino for (i = 0; i < loop->num_nodes; i++)
111*e4b17023SJohn Marino rename_variables_in_bb (bbs[i]);
112*e4b17023SJohn Marino
113*e4b17023SJohn Marino free (bbs);
114*e4b17023SJohn Marino }
115*e4b17023SJohn Marino
116*e4b17023SJohn Marino typedef struct
117*e4b17023SJohn Marino {
118*e4b17023SJohn Marino tree from, to;
119*e4b17023SJohn Marino basic_block bb;
120*e4b17023SJohn Marino } adjust_info;
121*e4b17023SJohn Marino
122*e4b17023SJohn Marino DEF_VEC_O(adjust_info);
123*e4b17023SJohn Marino DEF_VEC_ALLOC_O_STACK(adjust_info);
124*e4b17023SJohn Marino #define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
125*e4b17023SJohn Marino
126*e4b17023SJohn Marino /* A stack of values to be adjusted in debug stmts. We have to
127*e4b17023SJohn Marino process them LIFO, so that the closest substitution applies. If we
128*e4b17023SJohn Marino processed them FIFO, without the stack, we might substitute uses
129*e4b17023SJohn Marino with a PHI DEF that would soon become non-dominant, and when we got
130*e4b17023SJohn Marino to the suitable one, it wouldn't have anything to substitute any
131*e4b17023SJohn Marino more. */
VEC(adjust_info,stack)132*e4b17023SJohn Marino static VEC(adjust_info, stack) *adjust_vec;
133*e4b17023SJohn Marino
134*e4b17023SJohn Marino /* Adjust any debug stmts that referenced AI->from values to use the
135*e4b17023SJohn Marino loop-closed AI->to, if the references are dominated by AI->bb and
136*e4b17023SJohn Marino not by the definition of AI->from. */
137*e4b17023SJohn Marino
138*e4b17023SJohn Marino static void
139*e4b17023SJohn Marino adjust_debug_stmts_now (adjust_info *ai)
140*e4b17023SJohn Marino {
141*e4b17023SJohn Marino basic_block bbphi = ai->bb;
142*e4b17023SJohn Marino tree orig_def = ai->from;
143*e4b17023SJohn Marino tree new_def = ai->to;
144*e4b17023SJohn Marino imm_use_iterator imm_iter;
145*e4b17023SJohn Marino gimple stmt;
146*e4b17023SJohn Marino basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
147*e4b17023SJohn Marino
148*e4b17023SJohn Marino gcc_assert (dom_info_available_p (CDI_DOMINATORS));
149*e4b17023SJohn Marino
150*e4b17023SJohn Marino /* Adjust any debug stmts that held onto non-loop-closed
151*e4b17023SJohn Marino references. */
152*e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
153*e4b17023SJohn Marino {
154*e4b17023SJohn Marino use_operand_p use_p;
155*e4b17023SJohn Marino basic_block bbuse;
156*e4b17023SJohn Marino
157*e4b17023SJohn Marino if (!is_gimple_debug (stmt))
158*e4b17023SJohn Marino continue;
159*e4b17023SJohn Marino
160*e4b17023SJohn Marino gcc_assert (gimple_debug_bind_p (stmt));
161*e4b17023SJohn Marino
162*e4b17023SJohn Marino bbuse = gimple_bb (stmt);
163*e4b17023SJohn Marino
164*e4b17023SJohn Marino if ((bbuse == bbphi
165*e4b17023SJohn Marino || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
166*e4b17023SJohn Marino && !(bbuse == bbdef
167*e4b17023SJohn Marino || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
168*e4b17023SJohn Marino {
169*e4b17023SJohn Marino if (new_def)
170*e4b17023SJohn Marino FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
171*e4b17023SJohn Marino SET_USE (use_p, new_def);
172*e4b17023SJohn Marino else
173*e4b17023SJohn Marino {
174*e4b17023SJohn Marino gimple_debug_bind_reset_value (stmt);
175*e4b17023SJohn Marino update_stmt (stmt);
176*e4b17023SJohn Marino }
177*e4b17023SJohn Marino }
178*e4b17023SJohn Marino }
179*e4b17023SJohn Marino }
180*e4b17023SJohn Marino
181*e4b17023SJohn Marino /* Adjust debug stmts as scheduled before. */
182*e4b17023SJohn Marino
183*e4b17023SJohn Marino static void
adjust_vec_debug_stmts(void)184*e4b17023SJohn Marino adjust_vec_debug_stmts (void)
185*e4b17023SJohn Marino {
186*e4b17023SJohn Marino if (!MAY_HAVE_DEBUG_STMTS)
187*e4b17023SJohn Marino return;
188*e4b17023SJohn Marino
189*e4b17023SJohn Marino gcc_assert (adjust_vec);
190*e4b17023SJohn Marino
191*e4b17023SJohn Marino while (!VEC_empty (adjust_info, adjust_vec))
192*e4b17023SJohn Marino {
193*e4b17023SJohn Marino adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
194*e4b17023SJohn Marino VEC_pop (adjust_info, adjust_vec);
195*e4b17023SJohn Marino }
196*e4b17023SJohn Marino
197*e4b17023SJohn Marino VEC_free (adjust_info, stack, adjust_vec);
198*e4b17023SJohn Marino }
199*e4b17023SJohn Marino
200*e4b17023SJohn Marino /* Adjust any debug stmts that referenced FROM values to use the
201*e4b17023SJohn Marino loop-closed TO, if the references are dominated by BB and not by
202*e4b17023SJohn Marino the definition of FROM. If adjust_vec is non-NULL, adjustments
203*e4b17023SJohn Marino will be postponed until adjust_vec_debug_stmts is called. */
204*e4b17023SJohn Marino
205*e4b17023SJohn Marino static void
adjust_debug_stmts(tree from,tree to,basic_block bb)206*e4b17023SJohn Marino adjust_debug_stmts (tree from, tree to, basic_block bb)
207*e4b17023SJohn Marino {
208*e4b17023SJohn Marino adjust_info ai;
209*e4b17023SJohn Marino
210*e4b17023SJohn Marino if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
211*e4b17023SJohn Marino && SSA_NAME_VAR (from) != gimple_vop (cfun))
212*e4b17023SJohn Marino {
213*e4b17023SJohn Marino ai.from = from;
214*e4b17023SJohn Marino ai.to = to;
215*e4b17023SJohn Marino ai.bb = bb;
216*e4b17023SJohn Marino
217*e4b17023SJohn Marino if (adjust_vec)
218*e4b17023SJohn Marino VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
219*e4b17023SJohn Marino else
220*e4b17023SJohn Marino adjust_debug_stmts_now (&ai);
221*e4b17023SJohn Marino }
222*e4b17023SJohn Marino }
223*e4b17023SJohn Marino
224*e4b17023SJohn Marino /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
225*e4b17023SJohn Marino to adjust any debug stmts that referenced the old phi arg,
226*e4b17023SJohn Marino presumably non-loop-closed references left over from other
227*e4b17023SJohn Marino transformations. */
228*e4b17023SJohn Marino
229*e4b17023SJohn Marino static void
adjust_phi_and_debug_stmts(gimple update_phi,edge e,tree new_def)230*e4b17023SJohn Marino adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
231*e4b17023SJohn Marino {
232*e4b17023SJohn Marino tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
233*e4b17023SJohn Marino
234*e4b17023SJohn Marino SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
235*e4b17023SJohn Marino
236*e4b17023SJohn Marino if (MAY_HAVE_DEBUG_STMTS)
237*e4b17023SJohn Marino adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
238*e4b17023SJohn Marino gimple_bb (update_phi));
239*e4b17023SJohn Marino }
240*e4b17023SJohn Marino
241*e4b17023SJohn Marino
242*e4b17023SJohn Marino /* Update the PHI nodes of NEW_LOOP.
243*e4b17023SJohn Marino
244*e4b17023SJohn Marino NEW_LOOP is a duplicate of ORIG_LOOP.
245*e4b17023SJohn Marino AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
246*e4b17023SJohn Marino AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
247*e4b17023SJohn Marino executes before it. */
248*e4b17023SJohn Marino
249*e4b17023SJohn Marino static void
slpeel_update_phis_for_duplicate_loop(struct loop * orig_loop,struct loop * new_loop,bool after)250*e4b17023SJohn Marino slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
251*e4b17023SJohn Marino struct loop *new_loop, bool after)
252*e4b17023SJohn Marino {
253*e4b17023SJohn Marino tree new_ssa_name;
254*e4b17023SJohn Marino gimple phi_new, phi_orig;
255*e4b17023SJohn Marino tree def;
256*e4b17023SJohn Marino edge orig_loop_latch = loop_latch_edge (orig_loop);
257*e4b17023SJohn Marino edge orig_entry_e = loop_preheader_edge (orig_loop);
258*e4b17023SJohn Marino edge new_loop_exit_e = single_exit (new_loop);
259*e4b17023SJohn Marino edge new_loop_entry_e = loop_preheader_edge (new_loop);
260*e4b17023SJohn Marino edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
261*e4b17023SJohn Marino gimple_stmt_iterator gsi_new, gsi_orig;
262*e4b17023SJohn Marino
263*e4b17023SJohn Marino /*
264*e4b17023SJohn Marino step 1. For each loop-header-phi:
265*e4b17023SJohn Marino Add the first phi argument for the phi in NEW_LOOP
266*e4b17023SJohn Marino (the one associated with the entry of NEW_LOOP)
267*e4b17023SJohn Marino
268*e4b17023SJohn Marino step 2. For each loop-header-phi:
269*e4b17023SJohn Marino Add the second phi argument for the phi in NEW_LOOP
270*e4b17023SJohn Marino (the one associated with the latch of NEW_LOOP)
271*e4b17023SJohn Marino
272*e4b17023SJohn Marino step 3. Update the phis in the successor block of NEW_LOOP.
273*e4b17023SJohn Marino
274*e4b17023SJohn Marino case 1: NEW_LOOP was placed before ORIG_LOOP:
275*e4b17023SJohn Marino The successor block of NEW_LOOP is the header of ORIG_LOOP.
276*e4b17023SJohn Marino Updating the phis in the successor block can therefore be done
277*e4b17023SJohn Marino along with the scanning of the loop header phis, because the
278*e4b17023SJohn Marino header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
279*e4b17023SJohn Marino phi nodes, organized in the same order.
280*e4b17023SJohn Marino
281*e4b17023SJohn Marino case 2: NEW_LOOP was placed after ORIG_LOOP:
282*e4b17023SJohn Marino The successor block of NEW_LOOP is the original exit block of
283*e4b17023SJohn Marino ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
284*e4b17023SJohn Marino We postpone updating these phis to a later stage (when
285*e4b17023SJohn Marino loop guards are added).
286*e4b17023SJohn Marino */
287*e4b17023SJohn Marino
288*e4b17023SJohn Marino
289*e4b17023SJohn Marino /* Scan the phis in the headers of the old and new loops
290*e4b17023SJohn Marino (they are organized in exactly the same order). */
291*e4b17023SJohn Marino
292*e4b17023SJohn Marino for (gsi_new = gsi_start_phis (new_loop->header),
293*e4b17023SJohn Marino gsi_orig = gsi_start_phis (orig_loop->header);
294*e4b17023SJohn Marino !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
295*e4b17023SJohn Marino gsi_next (&gsi_new), gsi_next (&gsi_orig))
296*e4b17023SJohn Marino {
297*e4b17023SJohn Marino source_location locus;
298*e4b17023SJohn Marino phi_new = gsi_stmt (gsi_new);
299*e4b17023SJohn Marino phi_orig = gsi_stmt (gsi_orig);
300*e4b17023SJohn Marino
301*e4b17023SJohn Marino /* step 1. */
302*e4b17023SJohn Marino def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
303*e4b17023SJohn Marino locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
304*e4b17023SJohn Marino add_phi_arg (phi_new, def, new_loop_entry_e, locus);
305*e4b17023SJohn Marino
306*e4b17023SJohn Marino /* step 2. */
307*e4b17023SJohn Marino def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
308*e4b17023SJohn Marino locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
309*e4b17023SJohn Marino if (TREE_CODE (def) != SSA_NAME)
310*e4b17023SJohn Marino continue;
311*e4b17023SJohn Marino
312*e4b17023SJohn Marino new_ssa_name = get_current_def (def);
313*e4b17023SJohn Marino if (!new_ssa_name)
314*e4b17023SJohn Marino {
315*e4b17023SJohn Marino /* This only happens if there are no definitions
316*e4b17023SJohn Marino inside the loop. use the phi_result in this case. */
317*e4b17023SJohn Marino new_ssa_name = PHI_RESULT (phi_new);
318*e4b17023SJohn Marino }
319*e4b17023SJohn Marino
320*e4b17023SJohn Marino /* An ordinary ssa name defined in the loop. */
321*e4b17023SJohn Marino add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
322*e4b17023SJohn Marino
323*e4b17023SJohn Marino /* Drop any debug references outside the loop, if they would
324*e4b17023SJohn Marino become ill-formed SSA. */
325*e4b17023SJohn Marino adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
326*e4b17023SJohn Marino
327*e4b17023SJohn Marino /* step 3 (case 1). */
328*e4b17023SJohn Marino if (!after)
329*e4b17023SJohn Marino {
330*e4b17023SJohn Marino gcc_assert (new_loop_exit_e == orig_entry_e);
331*e4b17023SJohn Marino adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
332*e4b17023SJohn Marino }
333*e4b17023SJohn Marino }
334*e4b17023SJohn Marino }
335*e4b17023SJohn Marino
336*e4b17023SJohn Marino
337*e4b17023SJohn Marino /* Update PHI nodes for a guard of the LOOP.
338*e4b17023SJohn Marino
339*e4b17023SJohn Marino Input:
340*e4b17023SJohn Marino - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
341*e4b17023SJohn Marino controls whether LOOP is to be executed. GUARD_EDGE is the edge that
342*e4b17023SJohn Marino originates from the guard-bb, skips LOOP and reaches the (unique) exit
343*e4b17023SJohn Marino bb of LOOP. This loop-exit-bb is an empty bb with one successor.
344*e4b17023SJohn Marino We denote this bb NEW_MERGE_BB because before the guard code was added
345*e4b17023SJohn Marino it had a single predecessor (the LOOP header), and now it became a merge
346*e4b17023SJohn Marino point of two paths - the path that ends with the LOOP exit-edge, and
347*e4b17023SJohn Marino the path that ends with GUARD_EDGE.
348*e4b17023SJohn Marino - NEW_EXIT_BB: New basic block that is added by this function between LOOP
349*e4b17023SJohn Marino and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
350*e4b17023SJohn Marino
351*e4b17023SJohn Marino ===> The CFG before the guard-code was added:
352*e4b17023SJohn Marino LOOP_header_bb:
353*e4b17023SJohn Marino loop_body
354*e4b17023SJohn Marino if (exit_loop) goto update_bb
355*e4b17023SJohn Marino else goto LOOP_header_bb
356*e4b17023SJohn Marino update_bb:
357*e4b17023SJohn Marino
358*e4b17023SJohn Marino ==> The CFG after the guard-code was added:
359*e4b17023SJohn Marino guard_bb:
360*e4b17023SJohn Marino if (LOOP_guard_condition) goto new_merge_bb
361*e4b17023SJohn Marino else goto LOOP_header_bb
362*e4b17023SJohn Marino LOOP_header_bb:
363*e4b17023SJohn Marino loop_body
364*e4b17023SJohn Marino if (exit_loop_condition) goto new_merge_bb
365*e4b17023SJohn Marino else goto LOOP_header_bb
366*e4b17023SJohn Marino new_merge_bb:
367*e4b17023SJohn Marino goto update_bb
368*e4b17023SJohn Marino update_bb:
369*e4b17023SJohn Marino
370*e4b17023SJohn Marino ==> The CFG after this function:
371*e4b17023SJohn Marino guard_bb:
372*e4b17023SJohn Marino if (LOOP_guard_condition) goto new_merge_bb
373*e4b17023SJohn Marino else goto LOOP_header_bb
374*e4b17023SJohn Marino LOOP_header_bb:
375*e4b17023SJohn Marino loop_body
376*e4b17023SJohn Marino if (exit_loop_condition) goto new_exit_bb
377*e4b17023SJohn Marino else goto LOOP_header_bb
378*e4b17023SJohn Marino new_exit_bb:
379*e4b17023SJohn Marino new_merge_bb:
380*e4b17023SJohn Marino goto update_bb
381*e4b17023SJohn Marino update_bb:
382*e4b17023SJohn Marino
383*e4b17023SJohn Marino This function:
384*e4b17023SJohn Marino 1. creates and updates the relevant phi nodes to account for the new
385*e4b17023SJohn Marino incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
386*e4b17023SJohn Marino 1.1. Create phi nodes at NEW_MERGE_BB.
387*e4b17023SJohn Marino 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
388*e4b17023SJohn Marino UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
389*e4b17023SJohn Marino 2. preserves loop-closed-ssa-form by creating the required phi nodes
390*e4b17023SJohn Marino at the exit of LOOP (i.e, in NEW_EXIT_BB).
391*e4b17023SJohn Marino
392*e4b17023SJohn Marino There are two flavors to this function:
393*e4b17023SJohn Marino
394*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1:
395*e4b17023SJohn Marino Here the guard controls whether we enter or skip LOOP, where LOOP is a
396*e4b17023SJohn Marino prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
397*e4b17023SJohn Marino for variables that have phis in the loop header.
398*e4b17023SJohn Marino
399*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard2:
400*e4b17023SJohn Marino Here the guard controls whether we enter or skip LOOP, where LOOP is an
401*e4b17023SJohn Marino epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
402*e4b17023SJohn Marino for variables that have phis in the loop exit.
403*e4b17023SJohn Marino
404*e4b17023SJohn Marino I.E., the overall structure is:
405*e4b17023SJohn Marino
406*e4b17023SJohn Marino loop1_preheader_bb:
407*e4b17023SJohn Marino guard1 (goto loop1/merge1_bb)
408*e4b17023SJohn Marino loop1
409*e4b17023SJohn Marino loop1_exit_bb:
410*e4b17023SJohn Marino guard2 (goto merge1_bb/merge2_bb)
411*e4b17023SJohn Marino merge1_bb
412*e4b17023SJohn Marino loop2
413*e4b17023SJohn Marino loop2_exit_bb
414*e4b17023SJohn Marino merge2_bb
415*e4b17023SJohn Marino next_bb
416*e4b17023SJohn Marino
417*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
418*e4b17023SJohn Marino loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
419*e4b17023SJohn Marino that have phis in loop1->header).
420*e4b17023SJohn Marino
421*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
422*e4b17023SJohn Marino loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
423*e4b17023SJohn Marino that have phis in next_bb). It also adds some of these phis to
424*e4b17023SJohn Marino loop1_exit_bb.
425*e4b17023SJohn Marino
426*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1 is always called before
427*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard2. They are both needed in order
428*e4b17023SJohn Marino to create correct data-flow and loop-closed-ssa-form.
429*e4b17023SJohn Marino
430*e4b17023SJohn Marino Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
431*e4b17023SJohn Marino that change between iterations of a loop (and therefore have a phi-node
432*e4b17023SJohn Marino at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
433*e4b17023SJohn Marino phis for variables that are used out of the loop (and therefore have
434*e4b17023SJohn Marino loop-closed exit phis). Some variables may be both updated between
435*e4b17023SJohn Marino iterations and used after the loop. This is why in loop1_exit_bb we
436*e4b17023SJohn Marino may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
437*e4b17023SJohn Marino and exit phis (created by slpeel_update_phi_nodes_for_guard2).
438*e4b17023SJohn Marino
439*e4b17023SJohn Marino - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
440*e4b17023SJohn Marino an original loop. i.e., we have:
441*e4b17023SJohn Marino
442*e4b17023SJohn Marino orig_loop
443*e4b17023SJohn Marino guard_bb (goto LOOP/new_merge)
444*e4b17023SJohn Marino new_loop <-- LOOP
445*e4b17023SJohn Marino new_exit
446*e4b17023SJohn Marino new_merge
447*e4b17023SJohn Marino next_bb
448*e4b17023SJohn Marino
449*e4b17023SJohn Marino If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
450*e4b17023SJohn Marino have:
451*e4b17023SJohn Marino
452*e4b17023SJohn Marino new_loop
453*e4b17023SJohn Marino guard_bb (goto LOOP/new_merge)
454*e4b17023SJohn Marino orig_loop <-- LOOP
455*e4b17023SJohn Marino new_exit
456*e4b17023SJohn Marino new_merge
457*e4b17023SJohn Marino next_bb
458*e4b17023SJohn Marino
459*e4b17023SJohn Marino The SSA names defined in the original loop have a current
460*e4b17023SJohn Marino reaching definition that that records the corresponding new
461*e4b17023SJohn Marino ssa-name used in the new duplicated loop copy.
462*e4b17023SJohn Marino */
463*e4b17023SJohn Marino
464*e4b17023SJohn Marino /* Function slpeel_update_phi_nodes_for_guard1
465*e4b17023SJohn Marino
466*e4b17023SJohn Marino Input:
467*e4b17023SJohn Marino - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
468*e4b17023SJohn Marino - DEFS - a bitmap of ssa names to mark new names for which we recorded
469*e4b17023SJohn Marino information.
470*e4b17023SJohn Marino
471*e4b17023SJohn Marino In the context of the overall structure, we have:
472*e4b17023SJohn Marino
473*e4b17023SJohn Marino loop1_preheader_bb:
474*e4b17023SJohn Marino guard1 (goto loop1/merge1_bb)
475*e4b17023SJohn Marino LOOP-> loop1
476*e4b17023SJohn Marino loop1_exit_bb:
477*e4b17023SJohn Marino guard2 (goto merge1_bb/merge2_bb)
478*e4b17023SJohn Marino merge1_bb
479*e4b17023SJohn Marino loop2
480*e4b17023SJohn Marino loop2_exit_bb
481*e4b17023SJohn Marino merge2_bb
482*e4b17023SJohn Marino next_bb
483*e4b17023SJohn Marino
484*e4b17023SJohn Marino For each name updated between loop iterations (i.e - for each name that has
485*e4b17023SJohn Marino an entry (loop-header) phi in LOOP) we create a new phi in:
486*e4b17023SJohn Marino 1. merge1_bb (to account for the edge from guard1)
487*e4b17023SJohn Marino 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
488*e4b17023SJohn Marino */
489*e4b17023SJohn Marino
490*e4b17023SJohn Marino static void
slpeel_update_phi_nodes_for_guard1(edge guard_edge,struct loop * loop,bool is_new_loop,basic_block * new_exit_bb,bitmap * defs)491*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
492*e4b17023SJohn Marino bool is_new_loop, basic_block *new_exit_bb,
493*e4b17023SJohn Marino bitmap *defs)
494*e4b17023SJohn Marino {
495*e4b17023SJohn Marino gimple orig_phi, new_phi;
496*e4b17023SJohn Marino gimple update_phi, update_phi2;
497*e4b17023SJohn Marino tree guard_arg, loop_arg;
498*e4b17023SJohn Marino basic_block new_merge_bb = guard_edge->dest;
499*e4b17023SJohn Marino edge e = EDGE_SUCC (new_merge_bb, 0);
500*e4b17023SJohn Marino basic_block update_bb = e->dest;
501*e4b17023SJohn Marino basic_block orig_bb = loop->header;
502*e4b17023SJohn Marino edge new_exit_e;
503*e4b17023SJohn Marino tree current_new_name;
504*e4b17023SJohn Marino gimple_stmt_iterator gsi_orig, gsi_update;
505*e4b17023SJohn Marino
506*e4b17023SJohn Marino /* Create new bb between loop and new_merge_bb. */
507*e4b17023SJohn Marino *new_exit_bb = split_edge (single_exit (loop));
508*e4b17023SJohn Marino
509*e4b17023SJohn Marino new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
510*e4b17023SJohn Marino
511*e4b17023SJohn Marino for (gsi_orig = gsi_start_phis (orig_bb),
512*e4b17023SJohn Marino gsi_update = gsi_start_phis (update_bb);
513*e4b17023SJohn Marino !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
514*e4b17023SJohn Marino gsi_next (&gsi_orig), gsi_next (&gsi_update))
515*e4b17023SJohn Marino {
516*e4b17023SJohn Marino source_location loop_locus, guard_locus;
517*e4b17023SJohn Marino orig_phi = gsi_stmt (gsi_orig);
518*e4b17023SJohn Marino update_phi = gsi_stmt (gsi_update);
519*e4b17023SJohn Marino
520*e4b17023SJohn Marino /** 1. Handle new-merge-point phis **/
521*e4b17023SJohn Marino
522*e4b17023SJohn Marino /* 1.1. Generate new phi node in NEW_MERGE_BB: */
523*e4b17023SJohn Marino new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
524*e4b17023SJohn Marino new_merge_bb);
525*e4b17023SJohn Marino
526*e4b17023SJohn Marino /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
527*e4b17023SJohn Marino of LOOP. Set the two phi args in NEW_PHI for these edges: */
528*e4b17023SJohn Marino loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
529*e4b17023SJohn Marino loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
530*e4b17023SJohn Marino EDGE_SUCC (loop->latch,
531*e4b17023SJohn Marino 0));
532*e4b17023SJohn Marino guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
533*e4b17023SJohn Marino guard_locus
534*e4b17023SJohn Marino = gimple_phi_arg_location_from_edge (orig_phi,
535*e4b17023SJohn Marino loop_preheader_edge (loop));
536*e4b17023SJohn Marino
537*e4b17023SJohn Marino add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
538*e4b17023SJohn Marino add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
539*e4b17023SJohn Marino
540*e4b17023SJohn Marino /* 1.3. Update phi in successor block. */
541*e4b17023SJohn Marino gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
542*e4b17023SJohn Marino || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
543*e4b17023SJohn Marino adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
544*e4b17023SJohn Marino update_phi2 = new_phi;
545*e4b17023SJohn Marino
546*e4b17023SJohn Marino
547*e4b17023SJohn Marino /** 2. Handle loop-closed-ssa-form phis **/
548*e4b17023SJohn Marino
549*e4b17023SJohn Marino if (!is_gimple_reg (PHI_RESULT (orig_phi)))
550*e4b17023SJohn Marino continue;
551*e4b17023SJohn Marino
552*e4b17023SJohn Marino /* 2.1. Generate new phi node in NEW_EXIT_BB: */
553*e4b17023SJohn Marino new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
554*e4b17023SJohn Marino *new_exit_bb);
555*e4b17023SJohn Marino
556*e4b17023SJohn Marino /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
557*e4b17023SJohn Marino add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
558*e4b17023SJohn Marino
559*e4b17023SJohn Marino /* 2.3. Update phi in successor of NEW_EXIT_BB: */
560*e4b17023SJohn Marino gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
561*e4b17023SJohn Marino adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
562*e4b17023SJohn Marino PHI_RESULT (new_phi));
563*e4b17023SJohn Marino
564*e4b17023SJohn Marino /* 2.4. Record the newly created name with set_current_def.
565*e4b17023SJohn Marino We want to find a name such that
566*e4b17023SJohn Marino name = get_current_def (orig_loop_name)
567*e4b17023SJohn Marino and to set its current definition as follows:
568*e4b17023SJohn Marino set_current_def (name, new_phi_name)
569*e4b17023SJohn Marino
570*e4b17023SJohn Marino If LOOP is a new loop then loop_arg is already the name we're
571*e4b17023SJohn Marino looking for. If LOOP is the original loop, then loop_arg is
572*e4b17023SJohn Marino the orig_loop_name and the relevant name is recorded in its
573*e4b17023SJohn Marino current reaching definition. */
574*e4b17023SJohn Marino if (is_new_loop)
575*e4b17023SJohn Marino current_new_name = loop_arg;
576*e4b17023SJohn Marino else
577*e4b17023SJohn Marino {
578*e4b17023SJohn Marino current_new_name = get_current_def (loop_arg);
579*e4b17023SJohn Marino /* current_def is not available only if the variable does not
580*e4b17023SJohn Marino change inside the loop, in which case we also don't care
581*e4b17023SJohn Marino about recording a current_def for it because we won't be
582*e4b17023SJohn Marino trying to create loop-exit-phis for it. */
583*e4b17023SJohn Marino if (!current_new_name)
584*e4b17023SJohn Marino continue;
585*e4b17023SJohn Marino }
586*e4b17023SJohn Marino gcc_assert (get_current_def (current_new_name) == NULL_TREE);
587*e4b17023SJohn Marino
588*e4b17023SJohn Marino set_current_def (current_new_name, PHI_RESULT (new_phi));
589*e4b17023SJohn Marino bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
590*e4b17023SJohn Marino }
591*e4b17023SJohn Marino }
592*e4b17023SJohn Marino
593*e4b17023SJohn Marino
594*e4b17023SJohn Marino /* Function slpeel_update_phi_nodes_for_guard2
595*e4b17023SJohn Marino
596*e4b17023SJohn Marino Input:
597*e4b17023SJohn Marino - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
598*e4b17023SJohn Marino
599*e4b17023SJohn Marino In the context of the overall structure, we have:
600*e4b17023SJohn Marino
601*e4b17023SJohn Marino loop1_preheader_bb:
602*e4b17023SJohn Marino guard1 (goto loop1/merge1_bb)
603*e4b17023SJohn Marino loop1
604*e4b17023SJohn Marino loop1_exit_bb:
605*e4b17023SJohn Marino guard2 (goto merge1_bb/merge2_bb)
606*e4b17023SJohn Marino merge1_bb
607*e4b17023SJohn Marino LOOP-> loop2
608*e4b17023SJohn Marino loop2_exit_bb
609*e4b17023SJohn Marino merge2_bb
610*e4b17023SJohn Marino next_bb
611*e4b17023SJohn Marino
612*e4b17023SJohn Marino For each name used out side the loop (i.e - for each name that has an exit
613*e4b17023SJohn Marino phi in next_bb) we create a new phi in:
614*e4b17023SJohn Marino 1. merge2_bb (to account for the edge from guard_bb)
615*e4b17023SJohn Marino 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
616*e4b17023SJohn Marino 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
617*e4b17023SJohn Marino if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
618*e4b17023SJohn Marino */
619*e4b17023SJohn Marino
620*e4b17023SJohn Marino static void
slpeel_update_phi_nodes_for_guard2(edge guard_edge,struct loop * loop,bool is_new_loop,basic_block * new_exit_bb)621*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
622*e4b17023SJohn Marino bool is_new_loop, basic_block *new_exit_bb)
623*e4b17023SJohn Marino {
624*e4b17023SJohn Marino gimple orig_phi, new_phi;
625*e4b17023SJohn Marino gimple update_phi, update_phi2;
626*e4b17023SJohn Marino tree guard_arg, loop_arg;
627*e4b17023SJohn Marino basic_block new_merge_bb = guard_edge->dest;
628*e4b17023SJohn Marino edge e = EDGE_SUCC (new_merge_bb, 0);
629*e4b17023SJohn Marino basic_block update_bb = e->dest;
630*e4b17023SJohn Marino edge new_exit_e;
631*e4b17023SJohn Marino tree orig_def, orig_def_new_name;
632*e4b17023SJohn Marino tree new_name, new_name2;
633*e4b17023SJohn Marino tree arg;
634*e4b17023SJohn Marino gimple_stmt_iterator gsi;
635*e4b17023SJohn Marino
636*e4b17023SJohn Marino /* Create new bb between loop and new_merge_bb. */
637*e4b17023SJohn Marino *new_exit_bb = split_edge (single_exit (loop));
638*e4b17023SJohn Marino
639*e4b17023SJohn Marino new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
640*e4b17023SJohn Marino
641*e4b17023SJohn Marino for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
642*e4b17023SJohn Marino {
643*e4b17023SJohn Marino update_phi = gsi_stmt (gsi);
644*e4b17023SJohn Marino orig_phi = update_phi;
645*e4b17023SJohn Marino orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
646*e4b17023SJohn Marino /* This loop-closed-phi actually doesn't represent a use
647*e4b17023SJohn Marino out of the loop - the phi arg is a constant. */
648*e4b17023SJohn Marino if (TREE_CODE (orig_def) != SSA_NAME)
649*e4b17023SJohn Marino continue;
650*e4b17023SJohn Marino orig_def_new_name = get_current_def (orig_def);
651*e4b17023SJohn Marino arg = NULL_TREE;
652*e4b17023SJohn Marino
653*e4b17023SJohn Marino /** 1. Handle new-merge-point phis **/
654*e4b17023SJohn Marino
655*e4b17023SJohn Marino /* 1.1. Generate new phi node in NEW_MERGE_BB: */
656*e4b17023SJohn Marino new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
657*e4b17023SJohn Marino new_merge_bb);
658*e4b17023SJohn Marino
659*e4b17023SJohn Marino /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
660*e4b17023SJohn Marino of LOOP. Set the two PHI args in NEW_PHI for these edges: */
661*e4b17023SJohn Marino new_name = orig_def;
662*e4b17023SJohn Marino new_name2 = NULL_TREE;
663*e4b17023SJohn Marino if (orig_def_new_name)
664*e4b17023SJohn Marino {
665*e4b17023SJohn Marino new_name = orig_def_new_name;
666*e4b17023SJohn Marino /* Some variables have both loop-entry-phis and loop-exit-phis.
667*e4b17023SJohn Marino Such variables were given yet newer names by phis placed in
668*e4b17023SJohn Marino guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
669*e4b17023SJohn Marino new_name2 = get_current_def (get_current_def (orig_name)). */
670*e4b17023SJohn Marino new_name2 = get_current_def (new_name);
671*e4b17023SJohn Marino }
672*e4b17023SJohn Marino
673*e4b17023SJohn Marino if (is_new_loop)
674*e4b17023SJohn Marino {
675*e4b17023SJohn Marino guard_arg = orig_def;
676*e4b17023SJohn Marino loop_arg = new_name;
677*e4b17023SJohn Marino }
678*e4b17023SJohn Marino else
679*e4b17023SJohn Marino {
680*e4b17023SJohn Marino guard_arg = new_name;
681*e4b17023SJohn Marino loop_arg = orig_def;
682*e4b17023SJohn Marino }
683*e4b17023SJohn Marino if (new_name2)
684*e4b17023SJohn Marino guard_arg = new_name2;
685*e4b17023SJohn Marino
686*e4b17023SJohn Marino add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
687*e4b17023SJohn Marino add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
688*e4b17023SJohn Marino
689*e4b17023SJohn Marino /* 1.3. Update phi in successor block. */
690*e4b17023SJohn Marino gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
691*e4b17023SJohn Marino adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
692*e4b17023SJohn Marino update_phi2 = new_phi;
693*e4b17023SJohn Marino
694*e4b17023SJohn Marino
695*e4b17023SJohn Marino /** 2. Handle loop-closed-ssa-form phis **/
696*e4b17023SJohn Marino
697*e4b17023SJohn Marino /* 2.1. Generate new phi node in NEW_EXIT_BB: */
698*e4b17023SJohn Marino new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
699*e4b17023SJohn Marino *new_exit_bb);
700*e4b17023SJohn Marino
701*e4b17023SJohn Marino /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
702*e4b17023SJohn Marino add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
703*e4b17023SJohn Marino
704*e4b17023SJohn Marino /* 2.3. Update phi in successor of NEW_EXIT_BB: */
705*e4b17023SJohn Marino gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
706*e4b17023SJohn Marino adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
707*e4b17023SJohn Marino PHI_RESULT (new_phi));
708*e4b17023SJohn Marino
709*e4b17023SJohn Marino
710*e4b17023SJohn Marino /** 3. Handle loop-closed-ssa-form phis for first loop **/
711*e4b17023SJohn Marino
712*e4b17023SJohn Marino /* 3.1. Find the relevant names that need an exit-phi in
713*e4b17023SJohn Marino GUARD_BB, i.e. names for which
714*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1 had not already created a
715*e4b17023SJohn Marino phi node. This is the case for names that are used outside
716*e4b17023SJohn Marino the loop (and therefore need an exit phi) but are not updated
717*e4b17023SJohn Marino across loop iterations (and therefore don't have a
718*e4b17023SJohn Marino loop-header-phi).
719*e4b17023SJohn Marino
720*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1 is responsible for
721*e4b17023SJohn Marino creating loop-exit phis in GUARD_BB for names that have a
722*e4b17023SJohn Marino loop-header-phi. When such a phi is created we also record
723*e4b17023SJohn Marino the new name in its current definition. If this new name
724*e4b17023SJohn Marino exists, then guard_arg was set to this new name (see 1.2
725*e4b17023SJohn Marino above). Therefore, if guard_arg is not this new name, this
726*e4b17023SJohn Marino is an indication that an exit-phi in GUARD_BB was not yet
727*e4b17023SJohn Marino created, so we take care of it here. */
728*e4b17023SJohn Marino if (guard_arg == new_name2)
729*e4b17023SJohn Marino continue;
730*e4b17023SJohn Marino arg = guard_arg;
731*e4b17023SJohn Marino
732*e4b17023SJohn Marino /* 3.2. Generate new phi node in GUARD_BB: */
733*e4b17023SJohn Marino new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
734*e4b17023SJohn Marino guard_edge->src);
735*e4b17023SJohn Marino
736*e4b17023SJohn Marino /* 3.3. GUARD_BB has one incoming edge: */
737*e4b17023SJohn Marino gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
738*e4b17023SJohn Marino add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
739*e4b17023SJohn Marino UNKNOWN_LOCATION);
740*e4b17023SJohn Marino
741*e4b17023SJohn Marino /* 3.4. Update phi in successor of GUARD_BB: */
742*e4b17023SJohn Marino gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
743*e4b17023SJohn Marino == guard_arg);
744*e4b17023SJohn Marino adjust_phi_and_debug_stmts (update_phi2, guard_edge,
745*e4b17023SJohn Marino PHI_RESULT (new_phi));
746*e4b17023SJohn Marino }
747*e4b17023SJohn Marino }
748*e4b17023SJohn Marino
749*e4b17023SJohn Marino
750*e4b17023SJohn Marino /* Make the LOOP iterate NITERS times. This is done by adding a new IV
751*e4b17023SJohn Marino that starts at zero, increases by one and its limit is NITERS.
752*e4b17023SJohn Marino
753*e4b17023SJohn Marino Assumption: the exit-condition of LOOP is the last stmt in the loop. */
754*e4b17023SJohn Marino
755*e4b17023SJohn Marino void
slpeel_make_loop_iterate_ntimes(struct loop * loop,tree niters)756*e4b17023SJohn Marino slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
757*e4b17023SJohn Marino {
758*e4b17023SJohn Marino tree indx_before_incr, indx_after_incr;
759*e4b17023SJohn Marino gimple cond_stmt;
760*e4b17023SJohn Marino gimple orig_cond;
761*e4b17023SJohn Marino edge exit_edge = single_exit (loop);
762*e4b17023SJohn Marino gimple_stmt_iterator loop_cond_gsi;
763*e4b17023SJohn Marino gimple_stmt_iterator incr_gsi;
764*e4b17023SJohn Marino bool insert_after;
765*e4b17023SJohn Marino tree init = build_int_cst (TREE_TYPE (niters), 0);
766*e4b17023SJohn Marino tree step = build_int_cst (TREE_TYPE (niters), 1);
767*e4b17023SJohn Marino LOC loop_loc;
768*e4b17023SJohn Marino enum tree_code code;
769*e4b17023SJohn Marino
770*e4b17023SJohn Marino orig_cond = get_loop_exit_condition (loop);
771*e4b17023SJohn Marino gcc_assert (orig_cond);
772*e4b17023SJohn Marino loop_cond_gsi = gsi_for_stmt (orig_cond);
773*e4b17023SJohn Marino
774*e4b17023SJohn Marino standard_iv_increment_position (loop, &incr_gsi, &insert_after);
775*e4b17023SJohn Marino create_iv (init, step, NULL_TREE, loop,
776*e4b17023SJohn Marino &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
777*e4b17023SJohn Marino
778*e4b17023SJohn Marino indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
779*e4b17023SJohn Marino true, NULL_TREE, true,
780*e4b17023SJohn Marino GSI_SAME_STMT);
781*e4b17023SJohn Marino niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
782*e4b17023SJohn Marino true, GSI_SAME_STMT);
783*e4b17023SJohn Marino
784*e4b17023SJohn Marino code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
785*e4b17023SJohn Marino cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
786*e4b17023SJohn Marino NULL_TREE);
787*e4b17023SJohn Marino
788*e4b17023SJohn Marino gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
789*e4b17023SJohn Marino
790*e4b17023SJohn Marino /* Remove old loop exit test: */
791*e4b17023SJohn Marino gsi_remove (&loop_cond_gsi, true);
792*e4b17023SJohn Marino
793*e4b17023SJohn Marino loop_loc = find_loop_location (loop);
794*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
795*e4b17023SJohn Marino {
796*e4b17023SJohn Marino if (loop_loc != UNKNOWN_LOC)
797*e4b17023SJohn Marino fprintf (dump_file, "\nloop at %s:%d: ",
798*e4b17023SJohn Marino LOC_FILE (loop_loc), LOC_LINE (loop_loc));
799*e4b17023SJohn Marino print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
800*e4b17023SJohn Marino }
801*e4b17023SJohn Marino
802*e4b17023SJohn Marino loop->nb_iterations = niters;
803*e4b17023SJohn Marino }
804*e4b17023SJohn Marino
805*e4b17023SJohn Marino
806*e4b17023SJohn Marino /* Given LOOP this function generates a new copy of it and puts it
807*e4b17023SJohn Marino on E which is either the entry or exit of LOOP. */
808*e4b17023SJohn Marino
809*e4b17023SJohn Marino struct loop *
slpeel_tree_duplicate_loop_to_edge_cfg(struct loop * loop,edge e)810*e4b17023SJohn Marino slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
811*e4b17023SJohn Marino {
812*e4b17023SJohn Marino struct loop *new_loop;
813*e4b17023SJohn Marino basic_block *new_bbs, *bbs;
814*e4b17023SJohn Marino bool at_exit;
815*e4b17023SJohn Marino bool was_imm_dom;
816*e4b17023SJohn Marino basic_block exit_dest;
817*e4b17023SJohn Marino gimple phi;
818*e4b17023SJohn Marino tree phi_arg;
819*e4b17023SJohn Marino edge exit, new_exit;
820*e4b17023SJohn Marino gimple_stmt_iterator gsi;
821*e4b17023SJohn Marino
822*e4b17023SJohn Marino at_exit = (e == single_exit (loop));
823*e4b17023SJohn Marino if (!at_exit && e != loop_preheader_edge (loop))
824*e4b17023SJohn Marino return NULL;
825*e4b17023SJohn Marino
826*e4b17023SJohn Marino bbs = get_loop_body (loop);
827*e4b17023SJohn Marino
828*e4b17023SJohn Marino /* Check whether duplication is possible. */
829*e4b17023SJohn Marino if (!can_copy_bbs_p (bbs, loop->num_nodes))
830*e4b17023SJohn Marino {
831*e4b17023SJohn Marino free (bbs);
832*e4b17023SJohn Marino return NULL;
833*e4b17023SJohn Marino }
834*e4b17023SJohn Marino
835*e4b17023SJohn Marino /* Generate new loop structure. */
836*e4b17023SJohn Marino new_loop = duplicate_loop (loop, loop_outer (loop));
837*e4b17023SJohn Marino if (!new_loop)
838*e4b17023SJohn Marino {
839*e4b17023SJohn Marino free (bbs);
840*e4b17023SJohn Marino return NULL;
841*e4b17023SJohn Marino }
842*e4b17023SJohn Marino
843*e4b17023SJohn Marino exit_dest = single_exit (loop)->dest;
844*e4b17023SJohn Marino was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
845*e4b17023SJohn Marino exit_dest) == loop->header ?
846*e4b17023SJohn Marino true : false);
847*e4b17023SJohn Marino
848*e4b17023SJohn Marino new_bbs = XNEWVEC (basic_block, loop->num_nodes);
849*e4b17023SJohn Marino
850*e4b17023SJohn Marino exit = single_exit (loop);
851*e4b17023SJohn Marino copy_bbs (bbs, loop->num_nodes, new_bbs,
852*e4b17023SJohn Marino &exit, 1, &new_exit, NULL,
853*e4b17023SJohn Marino e->src);
854*e4b17023SJohn Marino
855*e4b17023SJohn Marino /* Duplicating phi args at exit bbs as coming
856*e4b17023SJohn Marino also from exit of duplicated loop. */
857*e4b17023SJohn Marino for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
858*e4b17023SJohn Marino {
859*e4b17023SJohn Marino phi = gsi_stmt (gsi);
860*e4b17023SJohn Marino phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
861*e4b17023SJohn Marino if (phi_arg)
862*e4b17023SJohn Marino {
863*e4b17023SJohn Marino edge new_loop_exit_edge;
864*e4b17023SJohn Marino source_location locus;
865*e4b17023SJohn Marino
866*e4b17023SJohn Marino locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
867*e4b17023SJohn Marino if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
868*e4b17023SJohn Marino new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
869*e4b17023SJohn Marino else
870*e4b17023SJohn Marino new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
871*e4b17023SJohn Marino
872*e4b17023SJohn Marino add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
873*e4b17023SJohn Marino }
874*e4b17023SJohn Marino }
875*e4b17023SJohn Marino
876*e4b17023SJohn Marino if (at_exit) /* Add the loop copy at exit. */
877*e4b17023SJohn Marino {
878*e4b17023SJohn Marino redirect_edge_and_branch_force (e, new_loop->header);
879*e4b17023SJohn Marino PENDING_STMT (e) = NULL;
880*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
881*e4b17023SJohn Marino if (was_imm_dom)
882*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
883*e4b17023SJohn Marino }
884*e4b17023SJohn Marino else /* Add the copy at entry. */
885*e4b17023SJohn Marino {
886*e4b17023SJohn Marino edge new_exit_e;
887*e4b17023SJohn Marino edge entry_e = loop_preheader_edge (loop);
888*e4b17023SJohn Marino basic_block preheader = entry_e->src;
889*e4b17023SJohn Marino
890*e4b17023SJohn Marino if (!flow_bb_inside_loop_p (new_loop,
891*e4b17023SJohn Marino EDGE_SUCC (new_loop->header, 0)->dest))
892*e4b17023SJohn Marino new_exit_e = EDGE_SUCC (new_loop->header, 0);
893*e4b17023SJohn Marino else
894*e4b17023SJohn Marino new_exit_e = EDGE_SUCC (new_loop->header, 1);
895*e4b17023SJohn Marino
896*e4b17023SJohn Marino redirect_edge_and_branch_force (new_exit_e, loop->header);
897*e4b17023SJohn Marino PENDING_STMT (new_exit_e) = NULL;
898*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, loop->header,
899*e4b17023SJohn Marino new_exit_e->src);
900*e4b17023SJohn Marino
901*e4b17023SJohn Marino /* We have to add phi args to the loop->header here as coming
902*e4b17023SJohn Marino from new_exit_e edge. */
903*e4b17023SJohn Marino for (gsi = gsi_start_phis (loop->header);
904*e4b17023SJohn Marino !gsi_end_p (gsi);
905*e4b17023SJohn Marino gsi_next (&gsi))
906*e4b17023SJohn Marino {
907*e4b17023SJohn Marino phi = gsi_stmt (gsi);
908*e4b17023SJohn Marino phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
909*e4b17023SJohn Marino if (phi_arg)
910*e4b17023SJohn Marino add_phi_arg (phi, phi_arg, new_exit_e,
911*e4b17023SJohn Marino gimple_phi_arg_location_from_edge (phi, entry_e));
912*e4b17023SJohn Marino }
913*e4b17023SJohn Marino
914*e4b17023SJohn Marino redirect_edge_and_branch_force (entry_e, new_loop->header);
915*e4b17023SJohn Marino PENDING_STMT (entry_e) = NULL;
916*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
917*e4b17023SJohn Marino }
918*e4b17023SJohn Marino
919*e4b17023SJohn Marino free (new_bbs);
920*e4b17023SJohn Marino free (bbs);
921*e4b17023SJohn Marino
922*e4b17023SJohn Marino return new_loop;
923*e4b17023SJohn Marino }
924*e4b17023SJohn Marino
925*e4b17023SJohn Marino
926*e4b17023SJohn Marino /* Given the condition statement COND, put it as the last statement
927*e4b17023SJohn Marino of GUARD_BB; EXIT_BB is the basic block to skip the loop;
928*e4b17023SJohn Marino Assumes that this is the single exit of the guarded loop.
929*e4b17023SJohn Marino Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
930*e4b17023SJohn Marino
931*e4b17023SJohn Marino static edge
slpeel_add_loop_guard(basic_block guard_bb,tree cond,gimple_seq cond_expr_stmt_list,basic_block exit_bb,basic_block dom_bb)932*e4b17023SJohn Marino slpeel_add_loop_guard (basic_block guard_bb, tree cond,
933*e4b17023SJohn Marino gimple_seq cond_expr_stmt_list,
934*e4b17023SJohn Marino basic_block exit_bb, basic_block dom_bb)
935*e4b17023SJohn Marino {
936*e4b17023SJohn Marino gimple_stmt_iterator gsi;
937*e4b17023SJohn Marino edge new_e, enter_e;
938*e4b17023SJohn Marino gimple cond_stmt;
939*e4b17023SJohn Marino gimple_seq gimplify_stmt_list = NULL;
940*e4b17023SJohn Marino
941*e4b17023SJohn Marino enter_e = EDGE_SUCC (guard_bb, 0);
942*e4b17023SJohn Marino enter_e->flags &= ~EDGE_FALLTHRU;
943*e4b17023SJohn Marino enter_e->flags |= EDGE_FALSE_VALUE;
944*e4b17023SJohn Marino gsi = gsi_last_bb (guard_bb);
945*e4b17023SJohn Marino
946*e4b17023SJohn Marino cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
947*e4b17023SJohn Marino if (gimplify_stmt_list)
948*e4b17023SJohn Marino gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
949*e4b17023SJohn Marino cond_stmt = gimple_build_cond (NE_EXPR,
950*e4b17023SJohn Marino cond, build_int_cst (TREE_TYPE (cond), 0),
951*e4b17023SJohn Marino NULL_TREE, NULL_TREE);
952*e4b17023SJohn Marino if (cond_expr_stmt_list)
953*e4b17023SJohn Marino gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
954*e4b17023SJohn Marino
955*e4b17023SJohn Marino gsi = gsi_last_bb (guard_bb);
956*e4b17023SJohn Marino gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
957*e4b17023SJohn Marino
958*e4b17023SJohn Marino /* Add new edge to connect guard block to the merge/loop-exit block. */
959*e4b17023SJohn Marino new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
960*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
961*e4b17023SJohn Marino return new_e;
962*e4b17023SJohn Marino }
963*e4b17023SJohn Marino
964*e4b17023SJohn Marino
965*e4b17023SJohn Marino /* This function verifies that the following restrictions apply to LOOP:
966*e4b17023SJohn Marino (1) it is innermost
967*e4b17023SJohn Marino (2) it consists of exactly 2 basic blocks - header, and an empty latch.
968*e4b17023SJohn Marino (3) it is single entry, single exit
969*e4b17023SJohn Marino (4) its exit condition is the last stmt in the header
970*e4b17023SJohn Marino (5) E is the entry/exit edge of LOOP.
971*e4b17023SJohn Marino */
972*e4b17023SJohn Marino
973*e4b17023SJohn Marino bool
slpeel_can_duplicate_loop_p(const struct loop * loop,const_edge e)974*e4b17023SJohn Marino slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
975*e4b17023SJohn Marino {
976*e4b17023SJohn Marino edge exit_e = single_exit (loop);
977*e4b17023SJohn Marino edge entry_e = loop_preheader_edge (loop);
978*e4b17023SJohn Marino gimple orig_cond = get_loop_exit_condition (loop);
979*e4b17023SJohn Marino gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
980*e4b17023SJohn Marino
981*e4b17023SJohn Marino if (need_ssa_update_p (cfun))
982*e4b17023SJohn Marino return false;
983*e4b17023SJohn Marino
984*e4b17023SJohn Marino if (loop->inner
985*e4b17023SJohn Marino /* All loops have an outer scope; the only case loop->outer is NULL is for
986*e4b17023SJohn Marino the function itself. */
987*e4b17023SJohn Marino || !loop_outer (loop)
988*e4b17023SJohn Marino || loop->num_nodes != 2
989*e4b17023SJohn Marino || !empty_block_p (loop->latch)
990*e4b17023SJohn Marino || !single_exit (loop)
991*e4b17023SJohn Marino /* Verify that new loop exit condition can be trivially modified. */
992*e4b17023SJohn Marino || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
993*e4b17023SJohn Marino || (e != exit_e && e != entry_e))
994*e4b17023SJohn Marino return false;
995*e4b17023SJohn Marino
996*e4b17023SJohn Marino return true;
997*e4b17023SJohn Marino }
998*e4b17023SJohn Marino
999*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
1000*e4b17023SJohn Marino static void
slpeel_verify_cfg_after_peeling(struct loop * first_loop,struct loop * second_loop)1001*e4b17023SJohn Marino slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1002*e4b17023SJohn Marino struct loop *second_loop)
1003*e4b17023SJohn Marino {
1004*e4b17023SJohn Marino basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1005*e4b17023SJohn Marino basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1006*e4b17023SJohn Marino basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1007*e4b17023SJohn Marino
1008*e4b17023SJohn Marino /* A guard that controls whether the second_loop is to be executed or skipped
1009*e4b17023SJohn Marino is placed in first_loop->exit. first_loop->exit therefore has two
1010*e4b17023SJohn Marino successors - one is the preheader of second_loop, and the other is a bb
1011*e4b17023SJohn Marino after second_loop.
1012*e4b17023SJohn Marino */
1013*e4b17023SJohn Marino gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1014*e4b17023SJohn Marino
1015*e4b17023SJohn Marino /* 1. Verify that one of the successors of first_loop->exit is the preheader
1016*e4b17023SJohn Marino of second_loop. */
1017*e4b17023SJohn Marino
1018*e4b17023SJohn Marino /* The preheader of new_loop is expected to have two predecessors:
1019*e4b17023SJohn Marino first_loop->exit and the block that precedes first_loop. */
1020*e4b17023SJohn Marino
1021*e4b17023SJohn Marino gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1022*e4b17023SJohn Marino && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1023*e4b17023SJohn Marino && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1024*e4b17023SJohn Marino || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1025*e4b17023SJohn Marino && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1026*e4b17023SJohn Marino
1027*e4b17023SJohn Marino /* Verify that the other successor of first_loop->exit is after the
1028*e4b17023SJohn Marino second_loop. */
1029*e4b17023SJohn Marino /* TODO */
1030*e4b17023SJohn Marino }
1031*e4b17023SJohn Marino #endif
1032*e4b17023SJohn Marino
1033*e4b17023SJohn Marino /* If the run time cost model check determines that vectorization is
1034*e4b17023SJohn Marino not profitable and hence scalar loop should be generated then set
1035*e4b17023SJohn Marino FIRST_NITERS to prologue peeled iterations. This will allow all the
1036*e4b17023SJohn Marino iterations to be executed in the prologue peeled scalar loop. */
1037*e4b17023SJohn Marino
1038*e4b17023SJohn Marino static void
set_prologue_iterations(basic_block bb_before_first_loop,tree * first_niters,struct loop * loop,unsigned int th)1039*e4b17023SJohn Marino set_prologue_iterations (basic_block bb_before_first_loop,
1040*e4b17023SJohn Marino tree *first_niters,
1041*e4b17023SJohn Marino struct loop *loop,
1042*e4b17023SJohn Marino unsigned int th)
1043*e4b17023SJohn Marino {
1044*e4b17023SJohn Marino edge e;
1045*e4b17023SJohn Marino basic_block cond_bb, then_bb;
1046*e4b17023SJohn Marino tree var, prologue_after_cost_adjust_name;
1047*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1048*e4b17023SJohn Marino gimple newphi;
1049*e4b17023SJohn Marino edge e_true, e_false, e_fallthru;
1050*e4b17023SJohn Marino gimple cond_stmt;
1051*e4b17023SJohn Marino gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1052*e4b17023SJohn Marino tree cost_pre_condition = NULL_TREE;
1053*e4b17023SJohn Marino tree scalar_loop_iters =
1054*e4b17023SJohn Marino unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1055*e4b17023SJohn Marino
1056*e4b17023SJohn Marino e = single_pred_edge (bb_before_first_loop);
1057*e4b17023SJohn Marino cond_bb = split_edge(e);
1058*e4b17023SJohn Marino
1059*e4b17023SJohn Marino e = single_pred_edge (bb_before_first_loop);
1060*e4b17023SJohn Marino then_bb = split_edge(e);
1061*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1062*e4b17023SJohn Marino
1063*e4b17023SJohn Marino e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1064*e4b17023SJohn Marino EDGE_FALSE_VALUE);
1065*e4b17023SJohn Marino set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1066*e4b17023SJohn Marino
1067*e4b17023SJohn Marino e_true = EDGE_PRED (then_bb, 0);
1068*e4b17023SJohn Marino e_true->flags &= ~EDGE_FALLTHRU;
1069*e4b17023SJohn Marino e_true->flags |= EDGE_TRUE_VALUE;
1070*e4b17023SJohn Marino
1071*e4b17023SJohn Marino e_fallthru = EDGE_SUCC (then_bb, 0);
1072*e4b17023SJohn Marino
1073*e4b17023SJohn Marino cost_pre_condition =
1074*e4b17023SJohn Marino fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1075*e4b17023SJohn Marino build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1076*e4b17023SJohn Marino cost_pre_condition =
1077*e4b17023SJohn Marino force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1078*e4b17023SJohn Marino true, NULL_TREE);
1079*e4b17023SJohn Marino cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1080*e4b17023SJohn Marino build_int_cst (TREE_TYPE (cost_pre_condition),
1081*e4b17023SJohn Marino 0), NULL_TREE, NULL_TREE);
1082*e4b17023SJohn Marino
1083*e4b17023SJohn Marino gsi = gsi_last_bb (cond_bb);
1084*e4b17023SJohn Marino if (gimplify_stmt_list)
1085*e4b17023SJohn Marino gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1086*e4b17023SJohn Marino
1087*e4b17023SJohn Marino gsi = gsi_last_bb (cond_bb);
1088*e4b17023SJohn Marino gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1089*e4b17023SJohn Marino
1090*e4b17023SJohn Marino var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1091*e4b17023SJohn Marino "prologue_after_cost_adjust");
1092*e4b17023SJohn Marino add_referenced_var (var);
1093*e4b17023SJohn Marino prologue_after_cost_adjust_name =
1094*e4b17023SJohn Marino force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1095*e4b17023SJohn Marino
1096*e4b17023SJohn Marino gsi = gsi_last_bb (then_bb);
1097*e4b17023SJohn Marino if (stmts)
1098*e4b17023SJohn Marino gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1099*e4b17023SJohn Marino
1100*e4b17023SJohn Marino newphi = create_phi_node (var, bb_before_first_loop);
1101*e4b17023SJohn Marino add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1102*e4b17023SJohn Marino UNKNOWN_LOCATION);
1103*e4b17023SJohn Marino add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
1104*e4b17023SJohn Marino
1105*e4b17023SJohn Marino *first_niters = PHI_RESULT (newphi);
1106*e4b17023SJohn Marino }
1107*e4b17023SJohn Marino
1108*e4b17023SJohn Marino /* Function slpeel_tree_peel_loop_to_edge.
1109*e4b17023SJohn Marino
1110*e4b17023SJohn Marino Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1111*e4b17023SJohn Marino that is placed on the entry (exit) edge E of LOOP. After this transformation
1112*e4b17023SJohn Marino we have two loops one after the other - first-loop iterates FIRST_NITERS
1113*e4b17023SJohn Marino times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1114*e4b17023SJohn Marino If the cost model indicates that it is profitable to emit a scalar
1115*e4b17023SJohn Marino loop instead of the vector one, then the prolog (epilog) loop will iterate
1116*e4b17023SJohn Marino for the entire unchanged scalar iterations of the loop.
1117*e4b17023SJohn Marino
1118*e4b17023SJohn Marino Input:
1119*e4b17023SJohn Marino - LOOP: the loop to be peeled.
1120*e4b17023SJohn Marino - E: the exit or entry edge of LOOP.
1121*e4b17023SJohn Marino If it is the entry edge, we peel the first iterations of LOOP. In this
1122*e4b17023SJohn Marino case first-loop is LOOP, and second-loop is the newly created loop.
1123*e4b17023SJohn Marino If it is the exit edge, we peel the last iterations of LOOP. In this
1124*e4b17023SJohn Marino case, first-loop is the newly created loop, and second-loop is LOOP.
1125*e4b17023SJohn Marino - NITERS: the number of iterations that LOOP iterates.
1126*e4b17023SJohn Marino - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1127*e4b17023SJohn Marino - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1128*e4b17023SJohn Marino for updating the loop bound of the first-loop to FIRST_NITERS. If it
1129*e4b17023SJohn Marino is false, the caller of this function may want to take care of this
1130*e4b17023SJohn Marino (this can be useful if we don't want new stmts added to first-loop).
1131*e4b17023SJohn Marino - TH: cost model profitability threshold of iterations for vectorization.
1132*e4b17023SJohn Marino - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1133*e4b17023SJohn Marino during versioning and hence needs to occur during
1134*e4b17023SJohn Marino prologue generation or whether cost model check
1135*e4b17023SJohn Marino has not occurred during prologue generation and hence
1136*e4b17023SJohn Marino needs to occur during epilogue generation.
1137*e4b17023SJohn Marino
1138*e4b17023SJohn Marino
1139*e4b17023SJohn Marino Output:
1140*e4b17023SJohn Marino The function returns a pointer to the new loop-copy, or NULL if it failed
1141*e4b17023SJohn Marino to perform the transformation.
1142*e4b17023SJohn Marino
1143*e4b17023SJohn Marino The function generates two if-then-else guards: one before the first loop,
1144*e4b17023SJohn Marino and the other before the second loop:
1145*e4b17023SJohn Marino The first guard is:
1146*e4b17023SJohn Marino if (FIRST_NITERS == 0) then skip the first loop,
1147*e4b17023SJohn Marino and go directly to the second loop.
1148*e4b17023SJohn Marino The second guard is:
1149*e4b17023SJohn Marino if (FIRST_NITERS == NITERS) then skip the second loop.
1150*e4b17023SJohn Marino
1151*e4b17023SJohn Marino If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1152*e4b17023SJohn Marino then the generated condition is combined with COND_EXPR and the
1153*e4b17023SJohn Marino statements in COND_EXPR_STMT_LIST are emitted together with it.
1154*e4b17023SJohn Marino
1155*e4b17023SJohn Marino FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1156*e4b17023SJohn Marino FORNOW the resulting code will not be in loop-closed-ssa form.
1157*e4b17023SJohn Marino */
1158*e4b17023SJohn Marino
1159*e4b17023SJohn Marino static struct loop*
slpeel_tree_peel_loop_to_edge(struct loop * loop,edge e,tree * first_niters,tree niters,bool update_first_loop_count,unsigned int th,bool check_profitability,tree cond_expr,gimple_seq cond_expr_stmt_list)1160*e4b17023SJohn Marino slpeel_tree_peel_loop_to_edge (struct loop *loop,
1161*e4b17023SJohn Marino edge e, tree *first_niters,
1162*e4b17023SJohn Marino tree niters, bool update_first_loop_count,
1163*e4b17023SJohn Marino unsigned int th, bool check_profitability,
1164*e4b17023SJohn Marino tree cond_expr, gimple_seq cond_expr_stmt_list)
1165*e4b17023SJohn Marino {
1166*e4b17023SJohn Marino struct loop *new_loop = NULL, *first_loop, *second_loop;
1167*e4b17023SJohn Marino edge skip_e;
1168*e4b17023SJohn Marino tree pre_condition = NULL_TREE;
1169*e4b17023SJohn Marino bitmap definitions;
1170*e4b17023SJohn Marino basic_block bb_before_second_loop, bb_after_second_loop;
1171*e4b17023SJohn Marino basic_block bb_before_first_loop;
1172*e4b17023SJohn Marino basic_block bb_between_loops;
1173*e4b17023SJohn Marino basic_block new_exit_bb;
1174*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1175*e4b17023SJohn Marino edge exit_e = single_exit (loop);
1176*e4b17023SJohn Marino LOC loop_loc;
1177*e4b17023SJohn Marino tree cost_pre_condition = NULL_TREE;
1178*e4b17023SJohn Marino
1179*e4b17023SJohn Marino if (!slpeel_can_duplicate_loop_p (loop, e))
1180*e4b17023SJohn Marino return NULL;
1181*e4b17023SJohn Marino
1182*e4b17023SJohn Marino /* We have to initialize cfg_hooks. Then, when calling
1183*e4b17023SJohn Marino cfg_hooks->split_edge, the function tree_split_edge
1184*e4b17023SJohn Marino is actually called and, when calling cfg_hooks->duplicate_block,
1185*e4b17023SJohn Marino the function tree_duplicate_bb is called. */
1186*e4b17023SJohn Marino gimple_register_cfg_hooks ();
1187*e4b17023SJohn Marino
1188*e4b17023SJohn Marino /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1189*e4b17023SJohn Marino in the exit bb and rename all the uses after the loop. This simplifies
1190*e4b17023SJohn Marino the *guard[12] routines, which assume loop closed SSA form for all PHIs
1191*e4b17023SJohn Marino (but normally loop closed SSA form doesn't require virtual PHIs to be
1192*e4b17023SJohn Marino in the same form). Doing this early simplifies the checking what
1193*e4b17023SJohn Marino uses should be renamed. */
1194*e4b17023SJohn Marino for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1195*e4b17023SJohn Marino if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
1196*e4b17023SJohn Marino {
1197*e4b17023SJohn Marino gimple phi = gsi_stmt (gsi);
1198*e4b17023SJohn Marino for (gsi = gsi_start_phis (exit_e->dest);
1199*e4b17023SJohn Marino !gsi_end_p (gsi); gsi_next (&gsi))
1200*e4b17023SJohn Marino if (!is_gimple_reg (gimple_phi_result (gsi_stmt (gsi))))
1201*e4b17023SJohn Marino break;
1202*e4b17023SJohn Marino if (gsi_end_p (gsi))
1203*e4b17023SJohn Marino {
1204*e4b17023SJohn Marino gimple new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
1205*e4b17023SJohn Marino exit_e->dest);
1206*e4b17023SJohn Marino tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1207*e4b17023SJohn Marino imm_use_iterator imm_iter;
1208*e4b17023SJohn Marino gimple stmt;
1209*e4b17023SJohn Marino tree new_vop = make_ssa_name (SSA_NAME_VAR (PHI_RESULT (phi)),
1210*e4b17023SJohn Marino new_phi);
1211*e4b17023SJohn Marino use_operand_p use_p;
1212*e4b17023SJohn Marino
1213*e4b17023SJohn Marino add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
1214*e4b17023SJohn Marino gimple_phi_set_result (new_phi, new_vop);
1215*e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1216*e4b17023SJohn Marino if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1217*e4b17023SJohn Marino FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1218*e4b17023SJohn Marino SET_USE (use_p, new_vop);
1219*e4b17023SJohn Marino }
1220*e4b17023SJohn Marino break;
1221*e4b17023SJohn Marino }
1222*e4b17023SJohn Marino
1223*e4b17023SJohn Marino /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1224*e4b17023SJohn Marino Resulting CFG would be:
1225*e4b17023SJohn Marino
1226*e4b17023SJohn Marino first_loop:
1227*e4b17023SJohn Marino do {
1228*e4b17023SJohn Marino } while ...
1229*e4b17023SJohn Marino
1230*e4b17023SJohn Marino second_loop:
1231*e4b17023SJohn Marino do {
1232*e4b17023SJohn Marino } while ...
1233*e4b17023SJohn Marino
1234*e4b17023SJohn Marino orig_exit_bb:
1235*e4b17023SJohn Marino */
1236*e4b17023SJohn Marino
1237*e4b17023SJohn Marino if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1238*e4b17023SJohn Marino {
1239*e4b17023SJohn Marino loop_loc = find_loop_location (loop);
1240*e4b17023SJohn Marino if (dump_file && (dump_flags & TDF_DETAILS))
1241*e4b17023SJohn Marino {
1242*e4b17023SJohn Marino if (loop_loc != UNKNOWN_LOC)
1243*e4b17023SJohn Marino fprintf (dump_file, "\n%s:%d: note: ",
1244*e4b17023SJohn Marino LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1245*e4b17023SJohn Marino fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1246*e4b17023SJohn Marino }
1247*e4b17023SJohn Marino return NULL;
1248*e4b17023SJohn Marino }
1249*e4b17023SJohn Marino
1250*e4b17023SJohn Marino if (MAY_HAVE_DEBUG_STMTS)
1251*e4b17023SJohn Marino {
1252*e4b17023SJohn Marino gcc_assert (!adjust_vec);
1253*e4b17023SJohn Marino adjust_vec = VEC_alloc (adjust_info, stack, 32);
1254*e4b17023SJohn Marino }
1255*e4b17023SJohn Marino
1256*e4b17023SJohn Marino if (e == exit_e)
1257*e4b17023SJohn Marino {
1258*e4b17023SJohn Marino /* NEW_LOOP was placed after LOOP. */
1259*e4b17023SJohn Marino first_loop = loop;
1260*e4b17023SJohn Marino second_loop = new_loop;
1261*e4b17023SJohn Marino }
1262*e4b17023SJohn Marino else
1263*e4b17023SJohn Marino {
1264*e4b17023SJohn Marino /* NEW_LOOP was placed before LOOP. */
1265*e4b17023SJohn Marino first_loop = new_loop;
1266*e4b17023SJohn Marino second_loop = loop;
1267*e4b17023SJohn Marino }
1268*e4b17023SJohn Marino
1269*e4b17023SJohn Marino definitions = ssa_names_to_replace ();
1270*e4b17023SJohn Marino slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1271*e4b17023SJohn Marino rename_variables_in_loop (new_loop);
1272*e4b17023SJohn Marino
1273*e4b17023SJohn Marino
1274*e4b17023SJohn Marino /* 2. Add the guard code in one of the following ways:
1275*e4b17023SJohn Marino
1276*e4b17023SJohn Marino 2.a Add the guard that controls whether the first loop is executed.
1277*e4b17023SJohn Marino This occurs when this function is invoked for prologue or epilogue
1278*e4b17023SJohn Marino generation and when the cost model check can be done at compile time.
1279*e4b17023SJohn Marino
1280*e4b17023SJohn Marino Resulting CFG would be:
1281*e4b17023SJohn Marino
1282*e4b17023SJohn Marino bb_before_first_loop:
1283*e4b17023SJohn Marino if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1284*e4b17023SJohn Marino GOTO first-loop
1285*e4b17023SJohn Marino
1286*e4b17023SJohn Marino first_loop:
1287*e4b17023SJohn Marino do {
1288*e4b17023SJohn Marino } while ...
1289*e4b17023SJohn Marino
1290*e4b17023SJohn Marino bb_before_second_loop:
1291*e4b17023SJohn Marino
1292*e4b17023SJohn Marino second_loop:
1293*e4b17023SJohn Marino do {
1294*e4b17023SJohn Marino } while ...
1295*e4b17023SJohn Marino
1296*e4b17023SJohn Marino orig_exit_bb:
1297*e4b17023SJohn Marino
1298*e4b17023SJohn Marino 2.b Add the cost model check that allows the prologue
1299*e4b17023SJohn Marino to iterate for the entire unchanged scalar
1300*e4b17023SJohn Marino iterations of the loop in the event that the cost
1301*e4b17023SJohn Marino model indicates that the scalar loop is more
1302*e4b17023SJohn Marino profitable than the vector one. This occurs when
1303*e4b17023SJohn Marino this function is invoked for prologue generation
1304*e4b17023SJohn Marino and the cost model check needs to be done at run
1305*e4b17023SJohn Marino time.
1306*e4b17023SJohn Marino
1307*e4b17023SJohn Marino Resulting CFG after prologue peeling would be:
1308*e4b17023SJohn Marino
1309*e4b17023SJohn Marino if (scalar_loop_iterations <= th)
1310*e4b17023SJohn Marino FIRST_NITERS = scalar_loop_iterations
1311*e4b17023SJohn Marino
1312*e4b17023SJohn Marino bb_before_first_loop:
1313*e4b17023SJohn Marino if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1314*e4b17023SJohn Marino GOTO first-loop
1315*e4b17023SJohn Marino
1316*e4b17023SJohn Marino first_loop:
1317*e4b17023SJohn Marino do {
1318*e4b17023SJohn Marino } while ...
1319*e4b17023SJohn Marino
1320*e4b17023SJohn Marino bb_before_second_loop:
1321*e4b17023SJohn Marino
1322*e4b17023SJohn Marino second_loop:
1323*e4b17023SJohn Marino do {
1324*e4b17023SJohn Marino } while ...
1325*e4b17023SJohn Marino
1326*e4b17023SJohn Marino orig_exit_bb:
1327*e4b17023SJohn Marino
1328*e4b17023SJohn Marino 2.c Add the cost model check that allows the epilogue
1329*e4b17023SJohn Marino to iterate for the entire unchanged scalar
1330*e4b17023SJohn Marino iterations of the loop in the event that the cost
1331*e4b17023SJohn Marino model indicates that the scalar loop is more
1332*e4b17023SJohn Marino profitable than the vector one. This occurs when
1333*e4b17023SJohn Marino this function is invoked for epilogue generation
1334*e4b17023SJohn Marino and the cost model check needs to be done at run
1335*e4b17023SJohn Marino time. This check is combined with any pre-existing
1336*e4b17023SJohn Marino check in COND_EXPR to avoid versioning.
1337*e4b17023SJohn Marino
1338*e4b17023SJohn Marino Resulting CFG after prologue peeling would be:
1339*e4b17023SJohn Marino
1340*e4b17023SJohn Marino bb_before_first_loop:
1341*e4b17023SJohn Marino if ((scalar_loop_iterations <= th)
1342*e4b17023SJohn Marino ||
1343*e4b17023SJohn Marino FIRST_NITERS == 0) GOTO bb_before_second_loop
1344*e4b17023SJohn Marino GOTO first-loop
1345*e4b17023SJohn Marino
1346*e4b17023SJohn Marino first_loop:
1347*e4b17023SJohn Marino do {
1348*e4b17023SJohn Marino } while ...
1349*e4b17023SJohn Marino
1350*e4b17023SJohn Marino bb_before_second_loop:
1351*e4b17023SJohn Marino
1352*e4b17023SJohn Marino second_loop:
1353*e4b17023SJohn Marino do {
1354*e4b17023SJohn Marino } while ...
1355*e4b17023SJohn Marino
1356*e4b17023SJohn Marino orig_exit_bb:
1357*e4b17023SJohn Marino */
1358*e4b17023SJohn Marino
1359*e4b17023SJohn Marino bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1360*e4b17023SJohn Marino bb_before_second_loop = split_edge (single_exit (first_loop));
1361*e4b17023SJohn Marino
1362*e4b17023SJohn Marino /* Epilogue peeling. */
1363*e4b17023SJohn Marino if (!update_first_loop_count)
1364*e4b17023SJohn Marino {
1365*e4b17023SJohn Marino pre_condition =
1366*e4b17023SJohn Marino fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1367*e4b17023SJohn Marino build_int_cst (TREE_TYPE (*first_niters), 0));
1368*e4b17023SJohn Marino if (check_profitability)
1369*e4b17023SJohn Marino {
1370*e4b17023SJohn Marino tree scalar_loop_iters
1371*e4b17023SJohn Marino = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1372*e4b17023SJohn Marino (loop_vec_info_for_loop (loop)));
1373*e4b17023SJohn Marino cost_pre_condition =
1374*e4b17023SJohn Marino fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1375*e4b17023SJohn Marino build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1376*e4b17023SJohn Marino
1377*e4b17023SJohn Marino pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1378*e4b17023SJohn Marino cost_pre_condition, pre_condition);
1379*e4b17023SJohn Marino }
1380*e4b17023SJohn Marino if (cond_expr)
1381*e4b17023SJohn Marino {
1382*e4b17023SJohn Marino pre_condition =
1383*e4b17023SJohn Marino fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1384*e4b17023SJohn Marino pre_condition,
1385*e4b17023SJohn Marino fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1386*e4b17023SJohn Marino cond_expr));
1387*e4b17023SJohn Marino }
1388*e4b17023SJohn Marino }
1389*e4b17023SJohn Marino
1390*e4b17023SJohn Marino /* Prologue peeling. */
1391*e4b17023SJohn Marino else
1392*e4b17023SJohn Marino {
1393*e4b17023SJohn Marino if (check_profitability)
1394*e4b17023SJohn Marino set_prologue_iterations (bb_before_first_loop, first_niters,
1395*e4b17023SJohn Marino loop, th);
1396*e4b17023SJohn Marino
1397*e4b17023SJohn Marino pre_condition =
1398*e4b17023SJohn Marino fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1399*e4b17023SJohn Marino build_int_cst (TREE_TYPE (*first_niters), 0));
1400*e4b17023SJohn Marino }
1401*e4b17023SJohn Marino
1402*e4b17023SJohn Marino skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1403*e4b17023SJohn Marino cond_expr_stmt_list,
1404*e4b17023SJohn Marino bb_before_second_loop, bb_before_first_loop);
1405*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1406*e4b17023SJohn Marino first_loop == new_loop,
1407*e4b17023SJohn Marino &new_exit_bb, &definitions);
1408*e4b17023SJohn Marino
1409*e4b17023SJohn Marino
1410*e4b17023SJohn Marino /* 3. Add the guard that controls whether the second loop is executed.
1411*e4b17023SJohn Marino Resulting CFG would be:
1412*e4b17023SJohn Marino
1413*e4b17023SJohn Marino bb_before_first_loop:
1414*e4b17023SJohn Marino if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1415*e4b17023SJohn Marino GOTO first-loop
1416*e4b17023SJohn Marino
1417*e4b17023SJohn Marino first_loop:
1418*e4b17023SJohn Marino do {
1419*e4b17023SJohn Marino } while ...
1420*e4b17023SJohn Marino
1421*e4b17023SJohn Marino bb_between_loops:
1422*e4b17023SJohn Marino if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1423*e4b17023SJohn Marino GOTO bb_before_second_loop
1424*e4b17023SJohn Marino
1425*e4b17023SJohn Marino bb_before_second_loop:
1426*e4b17023SJohn Marino
1427*e4b17023SJohn Marino second_loop:
1428*e4b17023SJohn Marino do {
1429*e4b17023SJohn Marino } while ...
1430*e4b17023SJohn Marino
1431*e4b17023SJohn Marino bb_after_second_loop:
1432*e4b17023SJohn Marino
1433*e4b17023SJohn Marino orig_exit_bb:
1434*e4b17023SJohn Marino */
1435*e4b17023SJohn Marino
1436*e4b17023SJohn Marino bb_between_loops = new_exit_bb;
1437*e4b17023SJohn Marino bb_after_second_loop = split_edge (single_exit (second_loop));
1438*e4b17023SJohn Marino
1439*e4b17023SJohn Marino pre_condition =
1440*e4b17023SJohn Marino fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
1441*e4b17023SJohn Marino skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1442*e4b17023SJohn Marino bb_after_second_loop, bb_before_first_loop);
1443*e4b17023SJohn Marino slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1444*e4b17023SJohn Marino second_loop == new_loop, &new_exit_bb);
1445*e4b17023SJohn Marino
1446*e4b17023SJohn Marino /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1447*e4b17023SJohn Marino */
1448*e4b17023SJohn Marino if (update_first_loop_count)
1449*e4b17023SJohn Marino slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
1450*e4b17023SJohn Marino
1451*e4b17023SJohn Marino BITMAP_FREE (definitions);
1452*e4b17023SJohn Marino delete_update_ssa ();
1453*e4b17023SJohn Marino
1454*e4b17023SJohn Marino adjust_vec_debug_stmts ();
1455*e4b17023SJohn Marino
1456*e4b17023SJohn Marino return new_loop;
1457*e4b17023SJohn Marino }
1458*e4b17023SJohn Marino
1459*e4b17023SJohn Marino /* Function vect_get_loop_location.
1460*e4b17023SJohn Marino
1461*e4b17023SJohn Marino Extract the location of the loop in the source code.
1462*e4b17023SJohn Marino If the loop is not well formed for vectorization, an estimated
1463*e4b17023SJohn Marino location is calculated.
1464*e4b17023SJohn Marino Return the loop location if succeed and NULL if not. */
1465*e4b17023SJohn Marino
1466*e4b17023SJohn Marino LOC
find_loop_location(struct loop * loop)1467*e4b17023SJohn Marino find_loop_location (struct loop *loop)
1468*e4b17023SJohn Marino {
1469*e4b17023SJohn Marino gimple stmt = NULL;
1470*e4b17023SJohn Marino basic_block bb;
1471*e4b17023SJohn Marino gimple_stmt_iterator si;
1472*e4b17023SJohn Marino
1473*e4b17023SJohn Marino if (!loop)
1474*e4b17023SJohn Marino return UNKNOWN_LOC;
1475*e4b17023SJohn Marino
1476*e4b17023SJohn Marino stmt = get_loop_exit_condition (loop);
1477*e4b17023SJohn Marino
1478*e4b17023SJohn Marino if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1479*e4b17023SJohn Marino return gimple_location (stmt);
1480*e4b17023SJohn Marino
1481*e4b17023SJohn Marino /* If we got here the loop is probably not "well formed",
1482*e4b17023SJohn Marino try to estimate the loop location */
1483*e4b17023SJohn Marino
1484*e4b17023SJohn Marino if (!loop->header)
1485*e4b17023SJohn Marino return UNKNOWN_LOC;
1486*e4b17023SJohn Marino
1487*e4b17023SJohn Marino bb = loop->header;
1488*e4b17023SJohn Marino
1489*e4b17023SJohn Marino for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1490*e4b17023SJohn Marino {
1491*e4b17023SJohn Marino stmt = gsi_stmt (si);
1492*e4b17023SJohn Marino if (gimple_location (stmt) != UNKNOWN_LOC)
1493*e4b17023SJohn Marino return gimple_location (stmt);
1494*e4b17023SJohn Marino }
1495*e4b17023SJohn Marino
1496*e4b17023SJohn Marino return UNKNOWN_LOC;
1497*e4b17023SJohn Marino }
1498*e4b17023SJohn Marino
1499*e4b17023SJohn Marino
1500*e4b17023SJohn Marino /* This function builds ni_name = number of iterations loop executes
1501*e4b17023SJohn Marino on the loop preheader. If SEQ is given the stmt is instead emitted
1502*e4b17023SJohn Marino there. */
1503*e4b17023SJohn Marino
1504*e4b17023SJohn Marino static tree
vect_build_loop_niters(loop_vec_info loop_vinfo,gimple_seq seq)1505*e4b17023SJohn Marino vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1506*e4b17023SJohn Marino {
1507*e4b17023SJohn Marino tree ni_name, var;
1508*e4b17023SJohn Marino gimple_seq stmts = NULL;
1509*e4b17023SJohn Marino edge pe;
1510*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1511*e4b17023SJohn Marino tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1512*e4b17023SJohn Marino
1513*e4b17023SJohn Marino var = create_tmp_var (TREE_TYPE (ni), "niters");
1514*e4b17023SJohn Marino add_referenced_var (var);
1515*e4b17023SJohn Marino ni_name = force_gimple_operand (ni, &stmts, false, var);
1516*e4b17023SJohn Marino
1517*e4b17023SJohn Marino pe = loop_preheader_edge (loop);
1518*e4b17023SJohn Marino if (stmts)
1519*e4b17023SJohn Marino {
1520*e4b17023SJohn Marino if (seq)
1521*e4b17023SJohn Marino gimple_seq_add_seq (&seq, stmts);
1522*e4b17023SJohn Marino else
1523*e4b17023SJohn Marino {
1524*e4b17023SJohn Marino basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1525*e4b17023SJohn Marino gcc_assert (!new_bb);
1526*e4b17023SJohn Marino }
1527*e4b17023SJohn Marino }
1528*e4b17023SJohn Marino
1529*e4b17023SJohn Marino return ni_name;
1530*e4b17023SJohn Marino }
1531*e4b17023SJohn Marino
1532*e4b17023SJohn Marino
1533*e4b17023SJohn Marino /* This function generates the following statements:
1534*e4b17023SJohn Marino
1535*e4b17023SJohn Marino ni_name = number of iterations loop executes
1536*e4b17023SJohn Marino ratio = ni_name / vf
1537*e4b17023SJohn Marino ratio_mult_vf_name = ratio * vf
1538*e4b17023SJohn Marino
1539*e4b17023SJohn Marino and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1540*e4b17023SJohn Marino if that is non-NULL. */
1541*e4b17023SJohn Marino
1542*e4b17023SJohn Marino static void
vect_generate_tmps_on_preheader(loop_vec_info loop_vinfo,tree * ni_name_ptr,tree * ratio_mult_vf_name_ptr,tree * ratio_name_ptr,gimple_seq cond_expr_stmt_list)1543*e4b17023SJohn Marino vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1544*e4b17023SJohn Marino tree *ni_name_ptr,
1545*e4b17023SJohn Marino tree *ratio_mult_vf_name_ptr,
1546*e4b17023SJohn Marino tree *ratio_name_ptr,
1547*e4b17023SJohn Marino gimple_seq cond_expr_stmt_list)
1548*e4b17023SJohn Marino {
1549*e4b17023SJohn Marino
1550*e4b17023SJohn Marino edge pe;
1551*e4b17023SJohn Marino basic_block new_bb;
1552*e4b17023SJohn Marino gimple_seq stmts;
1553*e4b17023SJohn Marino tree ni_name, ni_minus_gap_name;
1554*e4b17023SJohn Marino tree var;
1555*e4b17023SJohn Marino tree ratio_name;
1556*e4b17023SJohn Marino tree ratio_mult_vf_name;
1557*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1558*e4b17023SJohn Marino tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1559*e4b17023SJohn Marino int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1560*e4b17023SJohn Marino tree log_vf;
1561*e4b17023SJohn Marino
1562*e4b17023SJohn Marino pe = loop_preheader_edge (loop);
1563*e4b17023SJohn Marino
1564*e4b17023SJohn Marino /* Generate temporary variable that contains
1565*e4b17023SJohn Marino number of iterations loop executes. */
1566*e4b17023SJohn Marino
1567*e4b17023SJohn Marino ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1568*e4b17023SJohn Marino log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1569*e4b17023SJohn Marino
1570*e4b17023SJohn Marino /* If epilogue loop is required because of data accesses with gaps, we
1571*e4b17023SJohn Marino subtract one iteration from the total number of iterations here for
1572*e4b17023SJohn Marino correct calculation of RATIO. */
1573*e4b17023SJohn Marino if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1574*e4b17023SJohn Marino {
1575*e4b17023SJohn Marino ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1576*e4b17023SJohn Marino ni_name,
1577*e4b17023SJohn Marino build_one_cst (TREE_TYPE (ni_name)));
1578*e4b17023SJohn Marino if (!is_gimple_val (ni_minus_gap_name))
1579*e4b17023SJohn Marino {
1580*e4b17023SJohn Marino var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1581*e4b17023SJohn Marino add_referenced_var (var);
1582*e4b17023SJohn Marino
1583*e4b17023SJohn Marino stmts = NULL;
1584*e4b17023SJohn Marino ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1585*e4b17023SJohn Marino true, var);
1586*e4b17023SJohn Marino if (cond_expr_stmt_list)
1587*e4b17023SJohn Marino gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1588*e4b17023SJohn Marino else
1589*e4b17023SJohn Marino {
1590*e4b17023SJohn Marino pe = loop_preheader_edge (loop);
1591*e4b17023SJohn Marino new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1592*e4b17023SJohn Marino gcc_assert (!new_bb);
1593*e4b17023SJohn Marino }
1594*e4b17023SJohn Marino }
1595*e4b17023SJohn Marino }
1596*e4b17023SJohn Marino else
1597*e4b17023SJohn Marino ni_minus_gap_name = ni_name;
1598*e4b17023SJohn Marino
1599*e4b17023SJohn Marino /* Create: ratio = ni >> log2(vf) */
1600*e4b17023SJohn Marino
1601*e4b17023SJohn Marino ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1602*e4b17023SJohn Marino ni_minus_gap_name, log_vf);
1603*e4b17023SJohn Marino if (!is_gimple_val (ratio_name))
1604*e4b17023SJohn Marino {
1605*e4b17023SJohn Marino var = create_tmp_var (TREE_TYPE (ni), "bnd");
1606*e4b17023SJohn Marino add_referenced_var (var);
1607*e4b17023SJohn Marino
1608*e4b17023SJohn Marino stmts = NULL;
1609*e4b17023SJohn Marino ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1610*e4b17023SJohn Marino if (cond_expr_stmt_list)
1611*e4b17023SJohn Marino gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1612*e4b17023SJohn Marino else
1613*e4b17023SJohn Marino {
1614*e4b17023SJohn Marino pe = loop_preheader_edge (loop);
1615*e4b17023SJohn Marino new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1616*e4b17023SJohn Marino gcc_assert (!new_bb);
1617*e4b17023SJohn Marino }
1618*e4b17023SJohn Marino }
1619*e4b17023SJohn Marino
1620*e4b17023SJohn Marino /* Create: ratio_mult_vf = ratio << log2 (vf). */
1621*e4b17023SJohn Marino
1622*e4b17023SJohn Marino ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1623*e4b17023SJohn Marino ratio_name, log_vf);
1624*e4b17023SJohn Marino if (!is_gimple_val (ratio_mult_vf_name))
1625*e4b17023SJohn Marino {
1626*e4b17023SJohn Marino var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1627*e4b17023SJohn Marino add_referenced_var (var);
1628*e4b17023SJohn Marino
1629*e4b17023SJohn Marino stmts = NULL;
1630*e4b17023SJohn Marino ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1631*e4b17023SJohn Marino true, var);
1632*e4b17023SJohn Marino if (cond_expr_stmt_list)
1633*e4b17023SJohn Marino gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1634*e4b17023SJohn Marino else
1635*e4b17023SJohn Marino {
1636*e4b17023SJohn Marino pe = loop_preheader_edge (loop);
1637*e4b17023SJohn Marino new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1638*e4b17023SJohn Marino gcc_assert (!new_bb);
1639*e4b17023SJohn Marino }
1640*e4b17023SJohn Marino }
1641*e4b17023SJohn Marino
1642*e4b17023SJohn Marino *ni_name_ptr = ni_name;
1643*e4b17023SJohn Marino *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1644*e4b17023SJohn Marino *ratio_name_ptr = ratio_name;
1645*e4b17023SJohn Marino
1646*e4b17023SJohn Marino return;
1647*e4b17023SJohn Marino }
1648*e4b17023SJohn Marino
1649*e4b17023SJohn Marino /* Function vect_can_advance_ivs_p
1650*e4b17023SJohn Marino
1651*e4b17023SJohn Marino In case the number of iterations that LOOP iterates is unknown at compile
1652*e4b17023SJohn Marino time, an epilog loop will be generated, and the loop induction variables
1653*e4b17023SJohn Marino (IVs) will be "advanced" to the value they are supposed to take just before
1654*e4b17023SJohn Marino the epilog loop. Here we check that the access function of the loop IVs
1655*e4b17023SJohn Marino and the expression that represents the loop bound are simple enough.
1656*e4b17023SJohn Marino These restrictions will be relaxed in the future. */
1657*e4b17023SJohn Marino
1658*e4b17023SJohn Marino bool
vect_can_advance_ivs_p(loop_vec_info loop_vinfo)1659*e4b17023SJohn Marino vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1660*e4b17023SJohn Marino {
1661*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1662*e4b17023SJohn Marino basic_block bb = loop->header;
1663*e4b17023SJohn Marino gimple phi;
1664*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1665*e4b17023SJohn Marino
1666*e4b17023SJohn Marino /* Analyze phi functions of the loop header. */
1667*e4b17023SJohn Marino
1668*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1669*e4b17023SJohn Marino fprintf (vect_dump, "vect_can_advance_ivs_p:");
1670*e4b17023SJohn Marino
1671*e4b17023SJohn Marino for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1672*e4b17023SJohn Marino {
1673*e4b17023SJohn Marino tree access_fn = NULL;
1674*e4b17023SJohn Marino tree evolution_part;
1675*e4b17023SJohn Marino
1676*e4b17023SJohn Marino phi = gsi_stmt (gsi);
1677*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1678*e4b17023SJohn Marino {
1679*e4b17023SJohn Marino fprintf (vect_dump, "Analyze phi: ");
1680*e4b17023SJohn Marino print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1681*e4b17023SJohn Marino }
1682*e4b17023SJohn Marino
1683*e4b17023SJohn Marino /* Skip virtual phi's. The data dependences that are associated with
1684*e4b17023SJohn Marino virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1685*e4b17023SJohn Marino
1686*e4b17023SJohn Marino if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1687*e4b17023SJohn Marino {
1688*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1689*e4b17023SJohn Marino fprintf (vect_dump, "virtual phi. skip.");
1690*e4b17023SJohn Marino continue;
1691*e4b17023SJohn Marino }
1692*e4b17023SJohn Marino
1693*e4b17023SJohn Marino /* Skip reduction phis. */
1694*e4b17023SJohn Marino
1695*e4b17023SJohn Marino if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1696*e4b17023SJohn Marino {
1697*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1698*e4b17023SJohn Marino fprintf (vect_dump, "reduc phi. skip.");
1699*e4b17023SJohn Marino continue;
1700*e4b17023SJohn Marino }
1701*e4b17023SJohn Marino
1702*e4b17023SJohn Marino /* Analyze the evolution function. */
1703*e4b17023SJohn Marino
1704*e4b17023SJohn Marino access_fn = instantiate_parameters
1705*e4b17023SJohn Marino (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1706*e4b17023SJohn Marino
1707*e4b17023SJohn Marino if (!access_fn)
1708*e4b17023SJohn Marino {
1709*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1710*e4b17023SJohn Marino fprintf (vect_dump, "No Access function.");
1711*e4b17023SJohn Marino return false;
1712*e4b17023SJohn Marino }
1713*e4b17023SJohn Marino
1714*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1715*e4b17023SJohn Marino {
1716*e4b17023SJohn Marino fprintf (vect_dump, "Access function of PHI: ");
1717*e4b17023SJohn Marino print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1718*e4b17023SJohn Marino }
1719*e4b17023SJohn Marino
1720*e4b17023SJohn Marino evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1721*e4b17023SJohn Marino
1722*e4b17023SJohn Marino if (evolution_part == NULL_TREE)
1723*e4b17023SJohn Marino {
1724*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1725*e4b17023SJohn Marino fprintf (vect_dump, "No evolution.");
1726*e4b17023SJohn Marino return false;
1727*e4b17023SJohn Marino }
1728*e4b17023SJohn Marino
1729*e4b17023SJohn Marino /* FORNOW: We do not transform initial conditions of IVs
1730*e4b17023SJohn Marino which evolution functions are a polynomial of degree >= 2. */
1731*e4b17023SJohn Marino
1732*e4b17023SJohn Marino if (tree_is_chrec (evolution_part))
1733*e4b17023SJohn Marino return false;
1734*e4b17023SJohn Marino }
1735*e4b17023SJohn Marino
1736*e4b17023SJohn Marino return true;
1737*e4b17023SJohn Marino }
1738*e4b17023SJohn Marino
1739*e4b17023SJohn Marino
1740*e4b17023SJohn Marino /* Function vect_update_ivs_after_vectorizer.
1741*e4b17023SJohn Marino
1742*e4b17023SJohn Marino "Advance" the induction variables of LOOP to the value they should take
1743*e4b17023SJohn Marino after the execution of LOOP. This is currently necessary because the
1744*e4b17023SJohn Marino vectorizer does not handle induction variables that are used after the
1745*e4b17023SJohn Marino loop. Such a situation occurs when the last iterations of LOOP are
1746*e4b17023SJohn Marino peeled, because:
1747*e4b17023SJohn Marino 1. We introduced new uses after LOOP for IVs that were not originally used
1748*e4b17023SJohn Marino after LOOP: the IVs of LOOP are now used by an epilog loop.
1749*e4b17023SJohn Marino 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1750*e4b17023SJohn Marino times, whereas the loop IVs should be bumped N times.
1751*e4b17023SJohn Marino
1752*e4b17023SJohn Marino Input:
1753*e4b17023SJohn Marino - LOOP - a loop that is going to be vectorized. The last few iterations
1754*e4b17023SJohn Marino of LOOP were peeled.
1755*e4b17023SJohn Marino - NITERS - the number of iterations that LOOP executes (before it is
1756*e4b17023SJohn Marino vectorized). i.e, the number of times the ivs should be bumped.
1757*e4b17023SJohn Marino - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1758*e4b17023SJohn Marino coming out from LOOP on which there are uses of the LOOP ivs
1759*e4b17023SJohn Marino (this is the path from LOOP->exit to epilog_loop->preheader).
1760*e4b17023SJohn Marino
1761*e4b17023SJohn Marino The new definitions of the ivs are placed in LOOP->exit.
1762*e4b17023SJohn Marino The phi args associated with the edge UPDATE_E in the bb
1763*e4b17023SJohn Marino UPDATE_E->dest are updated accordingly.
1764*e4b17023SJohn Marino
1765*e4b17023SJohn Marino Assumption 1: Like the rest of the vectorizer, this function assumes
1766*e4b17023SJohn Marino a single loop exit that has a single predecessor.
1767*e4b17023SJohn Marino
1768*e4b17023SJohn Marino Assumption 2: The phi nodes in the LOOP header and in update_bb are
1769*e4b17023SJohn Marino organized in the same order.
1770*e4b17023SJohn Marino
1771*e4b17023SJohn Marino Assumption 3: The access function of the ivs is simple enough (see
1772*e4b17023SJohn Marino vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1773*e4b17023SJohn Marino
1774*e4b17023SJohn Marino Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1775*e4b17023SJohn Marino coming out of LOOP on which the ivs of LOOP are used (this is the path
1776*e4b17023SJohn Marino that leads to the epilog loop; other paths skip the epilog loop). This
1777*e4b17023SJohn Marino path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1778*e4b17023SJohn Marino needs to have its phis updated.
1779*e4b17023SJohn Marino */
1780*e4b17023SJohn Marino
1781*e4b17023SJohn Marino static void
vect_update_ivs_after_vectorizer(loop_vec_info loop_vinfo,tree niters,edge update_e)1782*e4b17023SJohn Marino vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1783*e4b17023SJohn Marino edge update_e)
1784*e4b17023SJohn Marino {
1785*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1786*e4b17023SJohn Marino basic_block exit_bb = single_exit (loop)->dest;
1787*e4b17023SJohn Marino gimple phi, phi1;
1788*e4b17023SJohn Marino gimple_stmt_iterator gsi, gsi1;
1789*e4b17023SJohn Marino basic_block update_bb = update_e->dest;
1790*e4b17023SJohn Marino
1791*e4b17023SJohn Marino /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1792*e4b17023SJohn Marino
1793*e4b17023SJohn Marino /* Make sure there exists a single-predecessor exit bb: */
1794*e4b17023SJohn Marino gcc_assert (single_pred_p (exit_bb));
1795*e4b17023SJohn Marino
1796*e4b17023SJohn Marino for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1797*e4b17023SJohn Marino !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1798*e4b17023SJohn Marino gsi_next (&gsi), gsi_next (&gsi1))
1799*e4b17023SJohn Marino {
1800*e4b17023SJohn Marino tree init_expr;
1801*e4b17023SJohn Marino tree step_expr, off;
1802*e4b17023SJohn Marino tree type;
1803*e4b17023SJohn Marino tree var, ni, ni_name;
1804*e4b17023SJohn Marino gimple_stmt_iterator last_gsi;
1805*e4b17023SJohn Marino stmt_vec_info stmt_info;
1806*e4b17023SJohn Marino
1807*e4b17023SJohn Marino phi = gsi_stmt (gsi);
1808*e4b17023SJohn Marino phi1 = gsi_stmt (gsi1);
1809*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1810*e4b17023SJohn Marino {
1811*e4b17023SJohn Marino fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1812*e4b17023SJohn Marino print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1813*e4b17023SJohn Marino }
1814*e4b17023SJohn Marino
1815*e4b17023SJohn Marino /* Skip virtual phi's. */
1816*e4b17023SJohn Marino if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1817*e4b17023SJohn Marino {
1818*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1819*e4b17023SJohn Marino fprintf (vect_dump, "virtual phi. skip.");
1820*e4b17023SJohn Marino continue;
1821*e4b17023SJohn Marino }
1822*e4b17023SJohn Marino
1823*e4b17023SJohn Marino /* Skip reduction phis. */
1824*e4b17023SJohn Marino stmt_info = vinfo_for_stmt (phi);
1825*e4b17023SJohn Marino if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
1826*e4b17023SJohn Marino {
1827*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1828*e4b17023SJohn Marino fprintf (vect_dump, "reduc phi. skip.");
1829*e4b17023SJohn Marino continue;
1830*e4b17023SJohn Marino }
1831*e4b17023SJohn Marino
1832*e4b17023SJohn Marino type = TREE_TYPE (gimple_phi_result (phi));
1833*e4b17023SJohn Marino step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1834*e4b17023SJohn Marino step_expr = unshare_expr (step_expr);
1835*e4b17023SJohn Marino
1836*e4b17023SJohn Marino /* FORNOW: We do not support IVs whose evolution function is a polynomial
1837*e4b17023SJohn Marino of degree >= 2 or exponential. */
1838*e4b17023SJohn Marino gcc_assert (!tree_is_chrec (step_expr));
1839*e4b17023SJohn Marino
1840*e4b17023SJohn Marino init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1841*e4b17023SJohn Marino
1842*e4b17023SJohn Marino off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1843*e4b17023SJohn Marino fold_convert (TREE_TYPE (step_expr), niters),
1844*e4b17023SJohn Marino step_expr);
1845*e4b17023SJohn Marino if (POINTER_TYPE_P (type))
1846*e4b17023SJohn Marino ni = fold_build_pointer_plus (init_expr, off);
1847*e4b17023SJohn Marino else
1848*e4b17023SJohn Marino ni = fold_build2 (PLUS_EXPR, type,
1849*e4b17023SJohn Marino init_expr, fold_convert (type, off));
1850*e4b17023SJohn Marino
1851*e4b17023SJohn Marino var = create_tmp_var (type, "tmp");
1852*e4b17023SJohn Marino add_referenced_var (var);
1853*e4b17023SJohn Marino
1854*e4b17023SJohn Marino last_gsi = gsi_last_bb (exit_bb);
1855*e4b17023SJohn Marino ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1856*e4b17023SJohn Marino true, GSI_SAME_STMT);
1857*e4b17023SJohn Marino
1858*e4b17023SJohn Marino /* Fix phi expressions in the successor bb. */
1859*e4b17023SJohn Marino adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1860*e4b17023SJohn Marino }
1861*e4b17023SJohn Marino }
1862*e4b17023SJohn Marino
1863*e4b17023SJohn Marino /* Return the more conservative threshold between the
1864*e4b17023SJohn Marino min_profitable_iters returned by the cost model and the user
1865*e4b17023SJohn Marino specified threshold, if provided. */
1866*e4b17023SJohn Marino
1867*e4b17023SJohn Marino static unsigned int
conservative_cost_threshold(loop_vec_info loop_vinfo,int min_profitable_iters)1868*e4b17023SJohn Marino conservative_cost_threshold (loop_vec_info loop_vinfo,
1869*e4b17023SJohn Marino int min_profitable_iters)
1870*e4b17023SJohn Marino {
1871*e4b17023SJohn Marino unsigned int th;
1872*e4b17023SJohn Marino int min_scalar_loop_bound;
1873*e4b17023SJohn Marino
1874*e4b17023SJohn Marino min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1875*e4b17023SJohn Marino * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1876*e4b17023SJohn Marino
1877*e4b17023SJohn Marino /* Use the cost model only if it is more conservative than user specified
1878*e4b17023SJohn Marino threshold. */
1879*e4b17023SJohn Marino th = (unsigned) min_scalar_loop_bound;
1880*e4b17023SJohn Marino if (min_profitable_iters
1881*e4b17023SJohn Marino && (!min_scalar_loop_bound
1882*e4b17023SJohn Marino || min_profitable_iters > min_scalar_loop_bound))
1883*e4b17023SJohn Marino th = (unsigned) min_profitable_iters;
1884*e4b17023SJohn Marino
1885*e4b17023SJohn Marino if (th && vect_print_dump_info (REPORT_COST))
1886*e4b17023SJohn Marino fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1887*e4b17023SJohn Marino
1888*e4b17023SJohn Marino return th;
1889*e4b17023SJohn Marino }
1890*e4b17023SJohn Marino
1891*e4b17023SJohn Marino /* Function vect_do_peeling_for_loop_bound
1892*e4b17023SJohn Marino
1893*e4b17023SJohn Marino Peel the last iterations of the loop represented by LOOP_VINFO.
1894*e4b17023SJohn Marino The peeled iterations form a new epilog loop. Given that the loop now
1895*e4b17023SJohn Marino iterates NITERS times, the new epilog loop iterates
1896*e4b17023SJohn Marino NITERS % VECTORIZATION_FACTOR times.
1897*e4b17023SJohn Marino
1898*e4b17023SJohn Marino The original loop will later be made to iterate
1899*e4b17023SJohn Marino NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1900*e4b17023SJohn Marino
1901*e4b17023SJohn Marino COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1902*e4b17023SJohn Marino test. */
1903*e4b17023SJohn Marino
1904*e4b17023SJohn Marino void
vect_do_peeling_for_loop_bound(loop_vec_info loop_vinfo,tree * ratio,tree cond_expr,gimple_seq cond_expr_stmt_list)1905*e4b17023SJohn Marino vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1906*e4b17023SJohn Marino tree cond_expr, gimple_seq cond_expr_stmt_list)
1907*e4b17023SJohn Marino {
1908*e4b17023SJohn Marino tree ni_name, ratio_mult_vf_name;
1909*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1910*e4b17023SJohn Marino struct loop *new_loop;
1911*e4b17023SJohn Marino edge update_e;
1912*e4b17023SJohn Marino basic_block preheader;
1913*e4b17023SJohn Marino int loop_num;
1914*e4b17023SJohn Marino bool check_profitability = false;
1915*e4b17023SJohn Marino unsigned int th = 0;
1916*e4b17023SJohn Marino int min_profitable_iters;
1917*e4b17023SJohn Marino
1918*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
1919*e4b17023SJohn Marino fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1920*e4b17023SJohn Marino
1921*e4b17023SJohn Marino initialize_original_copy_tables ();
1922*e4b17023SJohn Marino
1923*e4b17023SJohn Marino /* Generate the following variables on the preheader of original loop:
1924*e4b17023SJohn Marino
1925*e4b17023SJohn Marino ni_name = number of iteration the original loop executes
1926*e4b17023SJohn Marino ratio = ni_name / vf
1927*e4b17023SJohn Marino ratio_mult_vf_name = ratio * vf */
1928*e4b17023SJohn Marino vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1929*e4b17023SJohn Marino &ratio_mult_vf_name, ratio,
1930*e4b17023SJohn Marino cond_expr_stmt_list);
1931*e4b17023SJohn Marino
1932*e4b17023SJohn Marino loop_num = loop->num;
1933*e4b17023SJohn Marino
1934*e4b17023SJohn Marino /* If cost model check not done during versioning and
1935*e4b17023SJohn Marino peeling for alignment. */
1936*e4b17023SJohn Marino if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1937*e4b17023SJohn Marino && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1938*e4b17023SJohn Marino && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1939*e4b17023SJohn Marino && !cond_expr)
1940*e4b17023SJohn Marino {
1941*e4b17023SJohn Marino check_profitability = true;
1942*e4b17023SJohn Marino
1943*e4b17023SJohn Marino /* Get profitability threshold for vectorized loop. */
1944*e4b17023SJohn Marino min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1945*e4b17023SJohn Marino
1946*e4b17023SJohn Marino th = conservative_cost_threshold (loop_vinfo,
1947*e4b17023SJohn Marino min_profitable_iters);
1948*e4b17023SJohn Marino }
1949*e4b17023SJohn Marino
1950*e4b17023SJohn Marino new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1951*e4b17023SJohn Marino &ratio_mult_vf_name, ni_name, false,
1952*e4b17023SJohn Marino th, check_profitability,
1953*e4b17023SJohn Marino cond_expr, cond_expr_stmt_list);
1954*e4b17023SJohn Marino gcc_assert (new_loop);
1955*e4b17023SJohn Marino gcc_assert (loop_num == loop->num);
1956*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
1957*e4b17023SJohn Marino slpeel_verify_cfg_after_peeling (loop, new_loop);
1958*e4b17023SJohn Marino #endif
1959*e4b17023SJohn Marino
1960*e4b17023SJohn Marino /* A guard that controls whether the new_loop is to be executed or skipped
1961*e4b17023SJohn Marino is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1962*e4b17023SJohn Marino is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1963*e4b17023SJohn Marino is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1964*e4b17023SJohn Marino is on the path where the LOOP IVs are used and need to be updated. */
1965*e4b17023SJohn Marino
1966*e4b17023SJohn Marino preheader = loop_preheader_edge (new_loop)->src;
1967*e4b17023SJohn Marino if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1968*e4b17023SJohn Marino update_e = EDGE_PRED (preheader, 0);
1969*e4b17023SJohn Marino else
1970*e4b17023SJohn Marino update_e = EDGE_PRED (preheader, 1);
1971*e4b17023SJohn Marino
1972*e4b17023SJohn Marino /* Update IVs of original loop as if they were advanced
1973*e4b17023SJohn Marino by ratio_mult_vf_name steps. */
1974*e4b17023SJohn Marino vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1975*e4b17023SJohn Marino
1976*e4b17023SJohn Marino /* After peeling we have to reset scalar evolution analyzer. */
1977*e4b17023SJohn Marino scev_reset ();
1978*e4b17023SJohn Marino
1979*e4b17023SJohn Marino free_original_copy_tables ();
1980*e4b17023SJohn Marino }
1981*e4b17023SJohn Marino
1982*e4b17023SJohn Marino
1983*e4b17023SJohn Marino /* Function vect_gen_niters_for_prolog_loop
1984*e4b17023SJohn Marino
1985*e4b17023SJohn Marino Set the number of iterations for the loop represented by LOOP_VINFO
1986*e4b17023SJohn Marino to the minimum between LOOP_NITERS (the original iteration count of the loop)
1987*e4b17023SJohn Marino and the misalignment of DR - the data reference recorded in
1988*e4b17023SJohn Marino LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1989*e4b17023SJohn Marino this loop, the data reference DR will refer to an aligned location.
1990*e4b17023SJohn Marino
1991*e4b17023SJohn Marino The following computation is generated:
1992*e4b17023SJohn Marino
1993*e4b17023SJohn Marino If the misalignment of DR is known at compile time:
1994*e4b17023SJohn Marino addr_mis = int mis = DR_MISALIGNMENT (dr);
1995*e4b17023SJohn Marino Else, compute address misalignment in bytes:
1996*e4b17023SJohn Marino addr_mis = addr & (vectype_align - 1)
1997*e4b17023SJohn Marino
1998*e4b17023SJohn Marino prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1999*e4b17023SJohn Marino
2000*e4b17023SJohn Marino (elem_size = element type size; an element is the scalar element whose type
2001*e4b17023SJohn Marino is the inner type of the vectype)
2002*e4b17023SJohn Marino
2003*e4b17023SJohn Marino When the step of the data-ref in the loop is not 1 (as in interleaved data
2004*e4b17023SJohn Marino and SLP), the number of iterations of the prolog must be divided by the step
2005*e4b17023SJohn Marino (which is equal to the size of interleaved group).
2006*e4b17023SJohn Marino
2007*e4b17023SJohn Marino The above formulas assume that VF == number of elements in the vector. This
2008*e4b17023SJohn Marino may not hold when there are multiple-types in the loop.
2009*e4b17023SJohn Marino In this case, for some data-references in the loop the VF does not represent
2010*e4b17023SJohn Marino the number of elements that fit in the vector. Therefore, instead of VF we
2011*e4b17023SJohn Marino use TYPE_VECTOR_SUBPARTS. */
2012*e4b17023SJohn Marino
2013*e4b17023SJohn Marino static tree
vect_gen_niters_for_prolog_loop(loop_vec_info loop_vinfo,tree loop_niters)2014*e4b17023SJohn Marino vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2015*e4b17023SJohn Marino {
2016*e4b17023SJohn Marino struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2017*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2018*e4b17023SJohn Marino tree var;
2019*e4b17023SJohn Marino gimple_seq stmts;
2020*e4b17023SJohn Marino tree iters, iters_name;
2021*e4b17023SJohn Marino edge pe;
2022*e4b17023SJohn Marino basic_block new_bb;
2023*e4b17023SJohn Marino gimple dr_stmt = DR_STMT (dr);
2024*e4b17023SJohn Marino stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2025*e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2026*e4b17023SJohn Marino int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2027*e4b17023SJohn Marino tree niters_type = TREE_TYPE (loop_niters);
2028*e4b17023SJohn Marino int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2029*e4b17023SJohn Marino
2030*e4b17023SJohn Marino pe = loop_preheader_edge (loop);
2031*e4b17023SJohn Marino
2032*e4b17023SJohn Marino if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2033*e4b17023SJohn Marino {
2034*e4b17023SJohn Marino int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2035*e4b17023SJohn Marino
2036*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2037*e4b17023SJohn Marino fprintf (vect_dump, "known peeling = %d.", npeel);
2038*e4b17023SJohn Marino
2039*e4b17023SJohn Marino iters = build_int_cst (niters_type, npeel);
2040*e4b17023SJohn Marino }
2041*e4b17023SJohn Marino else
2042*e4b17023SJohn Marino {
2043*e4b17023SJohn Marino gimple_seq new_stmts = NULL;
2044*e4b17023SJohn Marino bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
2045*e4b17023SJohn Marino tree offset = negative
2046*e4b17023SJohn Marino ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2047*e4b17023SJohn Marino tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2048*e4b17023SJohn Marino &new_stmts, offset, loop);
2049*e4b17023SJohn Marino tree ptr_type = TREE_TYPE (start_addr);
2050*e4b17023SJohn Marino tree size = TYPE_SIZE (ptr_type);
2051*e4b17023SJohn Marino tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2052*e4b17023SJohn Marino tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
2053*e4b17023SJohn Marino HOST_WIDE_INT elem_size =
2054*e4b17023SJohn Marino int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
2055*e4b17023SJohn Marino tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
2056*e4b17023SJohn Marino tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2057*e4b17023SJohn Marino tree nelements_tree = build_int_cst (type, nelements);
2058*e4b17023SJohn Marino tree byte_misalign;
2059*e4b17023SJohn Marino tree elem_misalign;
2060*e4b17023SJohn Marino
2061*e4b17023SJohn Marino new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2062*e4b17023SJohn Marino gcc_assert (!new_bb);
2063*e4b17023SJohn Marino
2064*e4b17023SJohn Marino /* Create: byte_misalign = addr & (vectype_align - 1) */
2065*e4b17023SJohn Marino byte_misalign =
2066*e4b17023SJohn Marino fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
2067*e4b17023SJohn Marino vectype_align_minus_1);
2068*e4b17023SJohn Marino
2069*e4b17023SJohn Marino /* Create: elem_misalign = byte_misalign / element_size */
2070*e4b17023SJohn Marino elem_misalign =
2071*e4b17023SJohn Marino fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2072*e4b17023SJohn Marino
2073*e4b17023SJohn Marino /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
2074*e4b17023SJohn Marino if (negative)
2075*e4b17023SJohn Marino iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
2076*e4b17023SJohn Marino else
2077*e4b17023SJohn Marino iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2078*e4b17023SJohn Marino iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2079*e4b17023SJohn Marino iters = fold_convert (niters_type, iters);
2080*e4b17023SJohn Marino }
2081*e4b17023SJohn Marino
2082*e4b17023SJohn Marino /* Create: prolog_loop_niters = min (iters, loop_niters) */
2083*e4b17023SJohn Marino /* If the loop bound is known at compile time we already verified that it is
2084*e4b17023SJohn Marino greater than vf; since the misalignment ('iters') is at most vf, there's
2085*e4b17023SJohn Marino no need to generate the MIN_EXPR in this case. */
2086*e4b17023SJohn Marino if (TREE_CODE (loop_niters) != INTEGER_CST)
2087*e4b17023SJohn Marino iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2088*e4b17023SJohn Marino
2089*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2090*e4b17023SJohn Marino {
2091*e4b17023SJohn Marino fprintf (vect_dump, "niters for prolog loop: ");
2092*e4b17023SJohn Marino print_generic_expr (vect_dump, iters, TDF_SLIM);
2093*e4b17023SJohn Marino }
2094*e4b17023SJohn Marino
2095*e4b17023SJohn Marino var = create_tmp_var (niters_type, "prolog_loop_niters");
2096*e4b17023SJohn Marino add_referenced_var (var);
2097*e4b17023SJohn Marino stmts = NULL;
2098*e4b17023SJohn Marino iters_name = force_gimple_operand (iters, &stmts, false, var);
2099*e4b17023SJohn Marino
2100*e4b17023SJohn Marino /* Insert stmt on loop preheader edge. */
2101*e4b17023SJohn Marino if (stmts)
2102*e4b17023SJohn Marino {
2103*e4b17023SJohn Marino basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2104*e4b17023SJohn Marino gcc_assert (!new_bb);
2105*e4b17023SJohn Marino }
2106*e4b17023SJohn Marino
2107*e4b17023SJohn Marino return iters_name;
2108*e4b17023SJohn Marino }
2109*e4b17023SJohn Marino
2110*e4b17023SJohn Marino
2111*e4b17023SJohn Marino /* Function vect_update_init_of_dr
2112*e4b17023SJohn Marino
2113*e4b17023SJohn Marino NITERS iterations were peeled from LOOP. DR represents a data reference
2114*e4b17023SJohn Marino in LOOP. This function updates the information recorded in DR to
2115*e4b17023SJohn Marino account for the fact that the first NITERS iterations had already been
2116*e4b17023SJohn Marino executed. Specifically, it updates the OFFSET field of DR. */
2117*e4b17023SJohn Marino
2118*e4b17023SJohn Marino static void
vect_update_init_of_dr(struct data_reference * dr,tree niters)2119*e4b17023SJohn Marino vect_update_init_of_dr (struct data_reference *dr, tree niters)
2120*e4b17023SJohn Marino {
2121*e4b17023SJohn Marino tree offset = DR_OFFSET (dr);
2122*e4b17023SJohn Marino
2123*e4b17023SJohn Marino niters = fold_build2 (MULT_EXPR, sizetype,
2124*e4b17023SJohn Marino fold_convert (sizetype, niters),
2125*e4b17023SJohn Marino fold_convert (sizetype, DR_STEP (dr)));
2126*e4b17023SJohn Marino offset = fold_build2 (PLUS_EXPR, sizetype,
2127*e4b17023SJohn Marino fold_convert (sizetype, offset), niters);
2128*e4b17023SJohn Marino DR_OFFSET (dr) = offset;
2129*e4b17023SJohn Marino }
2130*e4b17023SJohn Marino
2131*e4b17023SJohn Marino
2132*e4b17023SJohn Marino /* Function vect_update_inits_of_drs
2133*e4b17023SJohn Marino
2134*e4b17023SJohn Marino NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2135*e4b17023SJohn Marino This function updates the information recorded for the data references in
2136*e4b17023SJohn Marino the loop to account for the fact that the first NITERS iterations had
2137*e4b17023SJohn Marino already been executed. Specifically, it updates the initial_condition of
2138*e4b17023SJohn Marino the access_function of all the data_references in the loop. */
2139*e4b17023SJohn Marino
2140*e4b17023SJohn Marino static void
vect_update_inits_of_drs(loop_vec_info loop_vinfo,tree niters)2141*e4b17023SJohn Marino vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2142*e4b17023SJohn Marino {
2143*e4b17023SJohn Marino unsigned int i;
2144*e4b17023SJohn Marino VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2145*e4b17023SJohn Marino struct data_reference *dr;
2146*e4b17023SJohn Marino
2147*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2148*e4b17023SJohn Marino fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2149*e4b17023SJohn Marino
2150*e4b17023SJohn Marino FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2151*e4b17023SJohn Marino vect_update_init_of_dr (dr, niters);
2152*e4b17023SJohn Marino }
2153*e4b17023SJohn Marino
2154*e4b17023SJohn Marino
2155*e4b17023SJohn Marino /* Function vect_do_peeling_for_alignment
2156*e4b17023SJohn Marino
2157*e4b17023SJohn Marino Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2158*e4b17023SJohn Marino 'niters' is set to the misalignment of one of the data references in the
2159*e4b17023SJohn Marino loop, thereby forcing it to refer to an aligned location at the beginning
2160*e4b17023SJohn Marino of the execution of this loop. The data reference for which we are
2161*e4b17023SJohn Marino peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2162*e4b17023SJohn Marino
2163*e4b17023SJohn Marino void
vect_do_peeling_for_alignment(loop_vec_info loop_vinfo)2164*e4b17023SJohn Marino vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2165*e4b17023SJohn Marino {
2166*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2167*e4b17023SJohn Marino tree niters_of_prolog_loop, ni_name;
2168*e4b17023SJohn Marino tree n_iters;
2169*e4b17023SJohn Marino tree wide_prolog_niters;
2170*e4b17023SJohn Marino struct loop *new_loop;
2171*e4b17023SJohn Marino unsigned int th = 0;
2172*e4b17023SJohn Marino int min_profitable_iters;
2173*e4b17023SJohn Marino
2174*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS))
2175*e4b17023SJohn Marino fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2176*e4b17023SJohn Marino
2177*e4b17023SJohn Marino initialize_original_copy_tables ();
2178*e4b17023SJohn Marino
2179*e4b17023SJohn Marino ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2180*e4b17023SJohn Marino niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
2181*e4b17023SJohn Marino ni_name);
2182*e4b17023SJohn Marino
2183*e4b17023SJohn Marino /* Get profitability threshold for vectorized loop. */
2184*e4b17023SJohn Marino min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2185*e4b17023SJohn Marino th = conservative_cost_threshold (loop_vinfo,
2186*e4b17023SJohn Marino min_profitable_iters);
2187*e4b17023SJohn Marino
2188*e4b17023SJohn Marino /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2189*e4b17023SJohn Marino new_loop =
2190*e4b17023SJohn Marino slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2191*e4b17023SJohn Marino &niters_of_prolog_loop, ni_name, true,
2192*e4b17023SJohn Marino th, true, NULL_TREE, NULL);
2193*e4b17023SJohn Marino
2194*e4b17023SJohn Marino gcc_assert (new_loop);
2195*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
2196*e4b17023SJohn Marino slpeel_verify_cfg_after_peeling (new_loop, loop);
2197*e4b17023SJohn Marino #endif
2198*e4b17023SJohn Marino
2199*e4b17023SJohn Marino /* Update number of times loop executes. */
2200*e4b17023SJohn Marino n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2201*e4b17023SJohn Marino LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2202*e4b17023SJohn Marino TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2203*e4b17023SJohn Marino
2204*e4b17023SJohn Marino if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2205*e4b17023SJohn Marino wide_prolog_niters = niters_of_prolog_loop;
2206*e4b17023SJohn Marino else
2207*e4b17023SJohn Marino {
2208*e4b17023SJohn Marino gimple_seq seq = NULL;
2209*e4b17023SJohn Marino edge pe = loop_preheader_edge (loop);
2210*e4b17023SJohn Marino tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2211*e4b17023SJohn Marino tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2212*e4b17023SJohn Marino add_referenced_var (var);
2213*e4b17023SJohn Marino wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2214*e4b17023SJohn Marino var);
2215*e4b17023SJohn Marino if (seq)
2216*e4b17023SJohn Marino {
2217*e4b17023SJohn Marino /* Insert stmt on loop preheader edge. */
2218*e4b17023SJohn Marino basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2219*e4b17023SJohn Marino gcc_assert (!new_bb);
2220*e4b17023SJohn Marino }
2221*e4b17023SJohn Marino }
2222*e4b17023SJohn Marino
2223*e4b17023SJohn Marino /* Update the init conditions of the access functions of all data refs. */
2224*e4b17023SJohn Marino vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2225*e4b17023SJohn Marino
2226*e4b17023SJohn Marino /* After peeling we have to reset scalar evolution analyzer. */
2227*e4b17023SJohn Marino scev_reset ();
2228*e4b17023SJohn Marino
2229*e4b17023SJohn Marino free_original_copy_tables ();
2230*e4b17023SJohn Marino }
2231*e4b17023SJohn Marino
2232*e4b17023SJohn Marino
2233*e4b17023SJohn Marino /* Function vect_create_cond_for_align_checks.
2234*e4b17023SJohn Marino
2235*e4b17023SJohn Marino Create a conditional expression that represents the alignment checks for
2236*e4b17023SJohn Marino all of data references (array element references) whose alignment must be
2237*e4b17023SJohn Marino checked at runtime.
2238*e4b17023SJohn Marino
2239*e4b17023SJohn Marino Input:
2240*e4b17023SJohn Marino COND_EXPR - input conditional expression. New conditions will be chained
2241*e4b17023SJohn Marino with logical AND operation.
2242*e4b17023SJohn Marino LOOP_VINFO - two fields of the loop information are used.
2243*e4b17023SJohn Marino LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2244*e4b17023SJohn Marino LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2245*e4b17023SJohn Marino
2246*e4b17023SJohn Marino Output:
2247*e4b17023SJohn Marino COND_EXPR_STMT_LIST - statements needed to construct the conditional
2248*e4b17023SJohn Marino expression.
2249*e4b17023SJohn Marino The returned value is the conditional expression to be used in the if
2250*e4b17023SJohn Marino statement that controls which version of the loop gets executed at runtime.
2251*e4b17023SJohn Marino
2252*e4b17023SJohn Marino The algorithm makes two assumptions:
2253*e4b17023SJohn Marino 1) The number of bytes "n" in a vector is a power of 2.
2254*e4b17023SJohn Marino 2) An address "a" is aligned if a%n is zero and that this
2255*e4b17023SJohn Marino test can be done as a&(n-1) == 0. For example, for 16
2256*e4b17023SJohn Marino byte vectors the test is a&0xf == 0. */
2257*e4b17023SJohn Marino
2258*e4b17023SJohn Marino static void
vect_create_cond_for_align_checks(loop_vec_info loop_vinfo,tree * cond_expr,gimple_seq * cond_expr_stmt_list)2259*e4b17023SJohn Marino vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2260*e4b17023SJohn Marino tree *cond_expr,
2261*e4b17023SJohn Marino gimple_seq *cond_expr_stmt_list)
2262*e4b17023SJohn Marino {
2263*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2264*e4b17023SJohn Marino VEC(gimple,heap) *may_misalign_stmts
2265*e4b17023SJohn Marino = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2266*e4b17023SJohn Marino gimple ref_stmt;
2267*e4b17023SJohn Marino int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2268*e4b17023SJohn Marino tree mask_cst;
2269*e4b17023SJohn Marino unsigned int i;
2270*e4b17023SJohn Marino tree psize;
2271*e4b17023SJohn Marino tree int_ptrsize_type;
2272*e4b17023SJohn Marino char tmp_name[20];
2273*e4b17023SJohn Marino tree or_tmp_name = NULL_TREE;
2274*e4b17023SJohn Marino tree and_tmp, and_tmp_name;
2275*e4b17023SJohn Marino gimple and_stmt;
2276*e4b17023SJohn Marino tree ptrsize_zero;
2277*e4b17023SJohn Marino tree part_cond_expr;
2278*e4b17023SJohn Marino
2279*e4b17023SJohn Marino /* Check that mask is one less than a power of 2, i.e., mask is
2280*e4b17023SJohn Marino all zeros followed by all ones. */
2281*e4b17023SJohn Marino gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2282*e4b17023SJohn Marino
2283*e4b17023SJohn Marino /* CHECKME: what is the best integer or unsigned type to use to hold a
2284*e4b17023SJohn Marino cast from a pointer value? */
2285*e4b17023SJohn Marino psize = TYPE_SIZE (ptr_type_node);
2286*e4b17023SJohn Marino int_ptrsize_type
2287*e4b17023SJohn Marino = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2288*e4b17023SJohn Marino
2289*e4b17023SJohn Marino /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2290*e4b17023SJohn Marino of the first vector of the i'th data reference. */
2291*e4b17023SJohn Marino
2292*e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, ref_stmt)
2293*e4b17023SJohn Marino {
2294*e4b17023SJohn Marino gimple_seq new_stmt_list = NULL;
2295*e4b17023SJohn Marino tree addr_base;
2296*e4b17023SJohn Marino tree addr_tmp, addr_tmp_name;
2297*e4b17023SJohn Marino tree or_tmp, new_or_tmp_name;
2298*e4b17023SJohn Marino gimple addr_stmt, or_stmt;
2299*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2300*e4b17023SJohn Marino tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2301*e4b17023SJohn Marino bool negative = tree_int_cst_compare
2302*e4b17023SJohn Marino (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2303*e4b17023SJohn Marino tree offset = negative
2304*e4b17023SJohn Marino ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
2305*e4b17023SJohn Marino
2306*e4b17023SJohn Marino /* create: addr_tmp = (int)(address_of_first_vector) */
2307*e4b17023SJohn Marino addr_base =
2308*e4b17023SJohn Marino vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2309*e4b17023SJohn Marino offset, loop);
2310*e4b17023SJohn Marino if (new_stmt_list != NULL)
2311*e4b17023SJohn Marino gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2312*e4b17023SJohn Marino
2313*e4b17023SJohn Marino sprintf (tmp_name, "%s%d", "addr2int", i);
2314*e4b17023SJohn Marino addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2315*e4b17023SJohn Marino add_referenced_var (addr_tmp);
2316*e4b17023SJohn Marino addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2317*e4b17023SJohn Marino addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2318*e4b17023SJohn Marino addr_base, NULL_TREE);
2319*e4b17023SJohn Marino SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2320*e4b17023SJohn Marino gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2321*e4b17023SJohn Marino
2322*e4b17023SJohn Marino /* The addresses are OR together. */
2323*e4b17023SJohn Marino
2324*e4b17023SJohn Marino if (or_tmp_name != NULL_TREE)
2325*e4b17023SJohn Marino {
2326*e4b17023SJohn Marino /* create: or_tmp = or_tmp | addr_tmp */
2327*e4b17023SJohn Marino sprintf (tmp_name, "%s%d", "orptrs", i);
2328*e4b17023SJohn Marino or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2329*e4b17023SJohn Marino add_referenced_var (or_tmp);
2330*e4b17023SJohn Marino new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2331*e4b17023SJohn Marino or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2332*e4b17023SJohn Marino new_or_tmp_name,
2333*e4b17023SJohn Marino or_tmp_name, addr_tmp_name);
2334*e4b17023SJohn Marino SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2335*e4b17023SJohn Marino gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2336*e4b17023SJohn Marino or_tmp_name = new_or_tmp_name;
2337*e4b17023SJohn Marino }
2338*e4b17023SJohn Marino else
2339*e4b17023SJohn Marino or_tmp_name = addr_tmp_name;
2340*e4b17023SJohn Marino
2341*e4b17023SJohn Marino } /* end for i */
2342*e4b17023SJohn Marino
2343*e4b17023SJohn Marino mask_cst = build_int_cst (int_ptrsize_type, mask);
2344*e4b17023SJohn Marino
2345*e4b17023SJohn Marino /* create: and_tmp = or_tmp & mask */
2346*e4b17023SJohn Marino and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2347*e4b17023SJohn Marino add_referenced_var (and_tmp);
2348*e4b17023SJohn Marino and_tmp_name = make_ssa_name (and_tmp, NULL);
2349*e4b17023SJohn Marino
2350*e4b17023SJohn Marino and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2351*e4b17023SJohn Marino or_tmp_name, mask_cst);
2352*e4b17023SJohn Marino SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2353*e4b17023SJohn Marino gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2354*e4b17023SJohn Marino
2355*e4b17023SJohn Marino /* Make and_tmp the left operand of the conditional test against zero.
2356*e4b17023SJohn Marino if and_tmp has a nonzero bit then some address is unaligned. */
2357*e4b17023SJohn Marino ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2358*e4b17023SJohn Marino part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2359*e4b17023SJohn Marino and_tmp_name, ptrsize_zero);
2360*e4b17023SJohn Marino if (*cond_expr)
2361*e4b17023SJohn Marino *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2362*e4b17023SJohn Marino *cond_expr, part_cond_expr);
2363*e4b17023SJohn Marino else
2364*e4b17023SJohn Marino *cond_expr = part_cond_expr;
2365*e4b17023SJohn Marino }
2366*e4b17023SJohn Marino
2367*e4b17023SJohn Marino
2368*e4b17023SJohn Marino /* Function vect_vfa_segment_size.
2369*e4b17023SJohn Marino
2370*e4b17023SJohn Marino Create an expression that computes the size of segment
2371*e4b17023SJohn Marino that will be accessed for a data reference. The functions takes into
2372*e4b17023SJohn Marino account that realignment loads may access one more vector.
2373*e4b17023SJohn Marino
2374*e4b17023SJohn Marino Input:
2375*e4b17023SJohn Marino DR: The data reference.
2376*e4b17023SJohn Marino LENGTH_FACTOR: segment length to consider.
2377*e4b17023SJohn Marino
2378*e4b17023SJohn Marino Return an expression whose value is the size of segment which will be
2379*e4b17023SJohn Marino accessed by DR. */
2380*e4b17023SJohn Marino
2381*e4b17023SJohn Marino static tree
vect_vfa_segment_size(struct data_reference * dr,tree length_factor)2382*e4b17023SJohn Marino vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2383*e4b17023SJohn Marino {
2384*e4b17023SJohn Marino tree segment_length;
2385*e4b17023SJohn Marino
2386*e4b17023SJohn Marino if (!compare_tree_int (DR_STEP (dr), 0))
2387*e4b17023SJohn Marino segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2388*e4b17023SJohn Marino else
2389*e4b17023SJohn Marino segment_length = size_binop (MULT_EXPR,
2390*e4b17023SJohn Marino fold_convert (sizetype, DR_STEP (dr)),
2391*e4b17023SJohn Marino fold_convert (sizetype, length_factor));
2392*e4b17023SJohn Marino
2393*e4b17023SJohn Marino if (vect_supportable_dr_alignment (dr, false)
2394*e4b17023SJohn Marino == dr_explicit_realign_optimized)
2395*e4b17023SJohn Marino {
2396*e4b17023SJohn Marino tree vector_size = TYPE_SIZE_UNIT
2397*e4b17023SJohn Marino (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2398*e4b17023SJohn Marino
2399*e4b17023SJohn Marino segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2400*e4b17023SJohn Marino }
2401*e4b17023SJohn Marino return segment_length;
2402*e4b17023SJohn Marino }
2403*e4b17023SJohn Marino
2404*e4b17023SJohn Marino
2405*e4b17023SJohn Marino /* Function vect_create_cond_for_alias_checks.
2406*e4b17023SJohn Marino
2407*e4b17023SJohn Marino Create a conditional expression that represents the run-time checks for
2408*e4b17023SJohn Marino overlapping of address ranges represented by a list of data references
2409*e4b17023SJohn Marino relations passed as input.
2410*e4b17023SJohn Marino
2411*e4b17023SJohn Marino Input:
2412*e4b17023SJohn Marino COND_EXPR - input conditional expression. New conditions will be chained
2413*e4b17023SJohn Marino with logical AND operation.
2414*e4b17023SJohn Marino LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2415*e4b17023SJohn Marino to be checked.
2416*e4b17023SJohn Marino
2417*e4b17023SJohn Marino Output:
2418*e4b17023SJohn Marino COND_EXPR - conditional expression.
2419*e4b17023SJohn Marino COND_EXPR_STMT_LIST - statements needed to construct the conditional
2420*e4b17023SJohn Marino expression.
2421*e4b17023SJohn Marino
2422*e4b17023SJohn Marino
2423*e4b17023SJohn Marino The returned value is the conditional expression to be used in the if
2424*e4b17023SJohn Marino statement that controls which version of the loop gets executed at runtime.
2425*e4b17023SJohn Marino */
2426*e4b17023SJohn Marino
2427*e4b17023SJohn Marino static void
vect_create_cond_for_alias_checks(loop_vec_info loop_vinfo,tree * cond_expr,gimple_seq * cond_expr_stmt_list)2428*e4b17023SJohn Marino vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2429*e4b17023SJohn Marino tree * cond_expr,
2430*e4b17023SJohn Marino gimple_seq * cond_expr_stmt_list)
2431*e4b17023SJohn Marino {
2432*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2433*e4b17023SJohn Marino VEC (ddr_p, heap) * may_alias_ddrs =
2434*e4b17023SJohn Marino LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2435*e4b17023SJohn Marino int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2436*e4b17023SJohn Marino tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2437*e4b17023SJohn Marino
2438*e4b17023SJohn Marino ddr_p ddr;
2439*e4b17023SJohn Marino unsigned int i;
2440*e4b17023SJohn Marino tree part_cond_expr, length_factor;
2441*e4b17023SJohn Marino
2442*e4b17023SJohn Marino /* Create expression
2443*e4b17023SJohn Marino ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2444*e4b17023SJohn Marino || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2445*e4b17023SJohn Marino &&
2446*e4b17023SJohn Marino ...
2447*e4b17023SJohn Marino &&
2448*e4b17023SJohn Marino ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2449*e4b17023SJohn Marino || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
2450*e4b17023SJohn Marino
2451*e4b17023SJohn Marino if (VEC_empty (ddr_p, may_alias_ddrs))
2452*e4b17023SJohn Marino return;
2453*e4b17023SJohn Marino
2454*e4b17023SJohn Marino FOR_EACH_VEC_ELT (ddr_p, may_alias_ddrs, i, ddr)
2455*e4b17023SJohn Marino {
2456*e4b17023SJohn Marino struct data_reference *dr_a, *dr_b;
2457*e4b17023SJohn Marino gimple dr_group_first_a, dr_group_first_b;
2458*e4b17023SJohn Marino tree addr_base_a, addr_base_b;
2459*e4b17023SJohn Marino tree segment_length_a, segment_length_b;
2460*e4b17023SJohn Marino gimple stmt_a, stmt_b;
2461*e4b17023SJohn Marino tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2462*e4b17023SJohn Marino
2463*e4b17023SJohn Marino dr_a = DDR_A (ddr);
2464*e4b17023SJohn Marino stmt_a = DR_STMT (DDR_A (ddr));
2465*e4b17023SJohn Marino dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2466*e4b17023SJohn Marino if (dr_group_first_a)
2467*e4b17023SJohn Marino {
2468*e4b17023SJohn Marino stmt_a = dr_group_first_a;
2469*e4b17023SJohn Marino dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2470*e4b17023SJohn Marino }
2471*e4b17023SJohn Marino
2472*e4b17023SJohn Marino dr_b = DDR_B (ddr);
2473*e4b17023SJohn Marino stmt_b = DR_STMT (DDR_B (ddr));
2474*e4b17023SJohn Marino dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2475*e4b17023SJohn Marino if (dr_group_first_b)
2476*e4b17023SJohn Marino {
2477*e4b17023SJohn Marino stmt_b = dr_group_first_b;
2478*e4b17023SJohn Marino dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2479*e4b17023SJohn Marino }
2480*e4b17023SJohn Marino
2481*e4b17023SJohn Marino addr_base_a =
2482*e4b17023SJohn Marino vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2483*e4b17023SJohn Marino NULL_TREE, loop);
2484*e4b17023SJohn Marino addr_base_b =
2485*e4b17023SJohn Marino vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2486*e4b17023SJohn Marino NULL_TREE, loop);
2487*e4b17023SJohn Marino
2488*e4b17023SJohn Marino if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2489*e4b17023SJohn Marino length_factor = scalar_loop_iters;
2490*e4b17023SJohn Marino else
2491*e4b17023SJohn Marino length_factor = size_int (vect_factor);
2492*e4b17023SJohn Marino segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2493*e4b17023SJohn Marino segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2494*e4b17023SJohn Marino
2495*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DR_DETAILS))
2496*e4b17023SJohn Marino {
2497*e4b17023SJohn Marino fprintf (vect_dump,
2498*e4b17023SJohn Marino "create runtime check for data references ");
2499*e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2500*e4b17023SJohn Marino fprintf (vect_dump, " and ");
2501*e4b17023SJohn Marino print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2502*e4b17023SJohn Marino }
2503*e4b17023SJohn Marino
2504*e4b17023SJohn Marino seg_a_min = addr_base_a;
2505*e4b17023SJohn Marino seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2506*e4b17023SJohn Marino if (tree_int_cst_compare (DR_STEP (dr_a), size_zero_node) < 0)
2507*e4b17023SJohn Marino seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2508*e4b17023SJohn Marino
2509*e4b17023SJohn Marino seg_b_min = addr_base_b;
2510*e4b17023SJohn Marino seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2511*e4b17023SJohn Marino if (tree_int_cst_compare (DR_STEP (dr_b), size_zero_node) < 0)
2512*e4b17023SJohn Marino seg_b_min = seg_b_max, seg_b_max = addr_base_b;
2513*e4b17023SJohn Marino
2514*e4b17023SJohn Marino part_cond_expr =
2515*e4b17023SJohn Marino fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2516*e4b17023SJohn Marino fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2517*e4b17023SJohn Marino fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
2518*e4b17023SJohn Marino
2519*e4b17023SJohn Marino if (*cond_expr)
2520*e4b17023SJohn Marino *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2521*e4b17023SJohn Marino *cond_expr, part_cond_expr);
2522*e4b17023SJohn Marino else
2523*e4b17023SJohn Marino *cond_expr = part_cond_expr;
2524*e4b17023SJohn Marino }
2525*e4b17023SJohn Marino
2526*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2527*e4b17023SJohn Marino fprintf (vect_dump, "created %u versioning for alias checks.\n",
2528*e4b17023SJohn Marino VEC_length (ddr_p, may_alias_ddrs));
2529*e4b17023SJohn Marino }
2530*e4b17023SJohn Marino
2531*e4b17023SJohn Marino
2532*e4b17023SJohn Marino /* Function vect_loop_versioning.
2533*e4b17023SJohn Marino
2534*e4b17023SJohn Marino If the loop has data references that may or may not be aligned or/and
2535*e4b17023SJohn Marino has data reference relations whose independence was not proven then
2536*e4b17023SJohn Marino two versions of the loop need to be generated, one which is vectorized
2537*e4b17023SJohn Marino and one which isn't. A test is then generated to control which of the
2538*e4b17023SJohn Marino loops is executed. The test checks for the alignment of all of the
2539*e4b17023SJohn Marino data references that may or may not be aligned. An additional
2540*e4b17023SJohn Marino sequence of runtime tests is generated for each pairs of DDRs whose
2541*e4b17023SJohn Marino independence was not proven. The vectorized version of loop is
2542*e4b17023SJohn Marino executed only if both alias and alignment tests are passed.
2543*e4b17023SJohn Marino
2544*e4b17023SJohn Marino The test generated to check which version of loop is executed
2545*e4b17023SJohn Marino is modified to also check for profitability as indicated by the
2546*e4b17023SJohn Marino cost model initially.
2547*e4b17023SJohn Marino
2548*e4b17023SJohn Marino The versioning precondition(s) are placed in *COND_EXPR and
2549*e4b17023SJohn Marino *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is
2550*e4b17023SJohn Marino also performed, otherwise only the conditions are generated. */
2551*e4b17023SJohn Marino
2552*e4b17023SJohn Marino void
vect_loop_versioning(loop_vec_info loop_vinfo,bool do_versioning,tree * cond_expr,gimple_seq * cond_expr_stmt_list)2553*e4b17023SJohn Marino vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2554*e4b17023SJohn Marino tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2555*e4b17023SJohn Marino {
2556*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2557*e4b17023SJohn Marino basic_block condition_bb;
2558*e4b17023SJohn Marino gimple_stmt_iterator gsi, cond_exp_gsi;
2559*e4b17023SJohn Marino basic_block merge_bb;
2560*e4b17023SJohn Marino basic_block new_exit_bb;
2561*e4b17023SJohn Marino edge new_exit_e, e;
2562*e4b17023SJohn Marino gimple orig_phi, new_phi;
2563*e4b17023SJohn Marino tree arg;
2564*e4b17023SJohn Marino unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2565*e4b17023SJohn Marino gimple_seq gimplify_stmt_list = NULL;
2566*e4b17023SJohn Marino tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2567*e4b17023SJohn Marino int min_profitable_iters = 0;
2568*e4b17023SJohn Marino unsigned int th;
2569*e4b17023SJohn Marino
2570*e4b17023SJohn Marino /* Get profitability threshold for vectorized loop. */
2571*e4b17023SJohn Marino min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2572*e4b17023SJohn Marino
2573*e4b17023SJohn Marino th = conservative_cost_threshold (loop_vinfo,
2574*e4b17023SJohn Marino min_profitable_iters);
2575*e4b17023SJohn Marino
2576*e4b17023SJohn Marino *cond_expr =
2577*e4b17023SJohn Marino fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2578*e4b17023SJohn Marino build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2579*e4b17023SJohn Marino
2580*e4b17023SJohn Marino *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2581*e4b17023SJohn Marino false, NULL_TREE);
2582*e4b17023SJohn Marino
2583*e4b17023SJohn Marino if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2584*e4b17023SJohn Marino vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2585*e4b17023SJohn Marino cond_expr_stmt_list);
2586*e4b17023SJohn Marino
2587*e4b17023SJohn Marino if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2588*e4b17023SJohn Marino vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2589*e4b17023SJohn Marino cond_expr_stmt_list);
2590*e4b17023SJohn Marino
2591*e4b17023SJohn Marino *cond_expr =
2592*e4b17023SJohn Marino fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2593*e4b17023SJohn Marino *cond_expr =
2594*e4b17023SJohn Marino force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2595*e4b17023SJohn Marino gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2596*e4b17023SJohn Marino
2597*e4b17023SJohn Marino /* If we only needed the extra conditions and a new loop copy
2598*e4b17023SJohn Marino bail out here. */
2599*e4b17023SJohn Marino if (!do_versioning)
2600*e4b17023SJohn Marino return;
2601*e4b17023SJohn Marino
2602*e4b17023SJohn Marino initialize_original_copy_tables ();
2603*e4b17023SJohn Marino loop_version (loop, *cond_expr, &condition_bb,
2604*e4b17023SJohn Marino prob, prob, REG_BR_PROB_BASE - prob, true);
2605*e4b17023SJohn Marino free_original_copy_tables();
2606*e4b17023SJohn Marino
2607*e4b17023SJohn Marino /* Loop versioning violates an assumption we try to maintain during
2608*e4b17023SJohn Marino vectorization - that the loop exit block has a single predecessor.
2609*e4b17023SJohn Marino After versioning, the exit block of both loop versions is the same
2610*e4b17023SJohn Marino basic block (i.e. it has two predecessors). Just in order to simplify
2611*e4b17023SJohn Marino following transformations in the vectorizer, we fix this situation
2612*e4b17023SJohn Marino here by adding a new (empty) block on the exit-edge of the loop,
2613*e4b17023SJohn Marino with the proper loop-exit phis to maintain loop-closed-form. */
2614*e4b17023SJohn Marino
2615*e4b17023SJohn Marino merge_bb = single_exit (loop)->dest;
2616*e4b17023SJohn Marino gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2617*e4b17023SJohn Marino new_exit_bb = split_edge (single_exit (loop));
2618*e4b17023SJohn Marino new_exit_e = single_exit (loop);
2619*e4b17023SJohn Marino e = EDGE_SUCC (new_exit_bb, 0);
2620*e4b17023SJohn Marino
2621*e4b17023SJohn Marino for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2622*e4b17023SJohn Marino {
2623*e4b17023SJohn Marino orig_phi = gsi_stmt (gsi);
2624*e4b17023SJohn Marino new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2625*e4b17023SJohn Marino new_exit_bb);
2626*e4b17023SJohn Marino arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2627*e4b17023SJohn Marino add_phi_arg (new_phi, arg, new_exit_e,
2628*e4b17023SJohn Marino gimple_phi_arg_location_from_edge (orig_phi, e));
2629*e4b17023SJohn Marino adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2630*e4b17023SJohn Marino }
2631*e4b17023SJohn Marino
2632*e4b17023SJohn Marino /* End loop-exit-fixes after versioning. */
2633*e4b17023SJohn Marino
2634*e4b17023SJohn Marino update_ssa (TODO_update_ssa);
2635*e4b17023SJohn Marino if (*cond_expr_stmt_list)
2636*e4b17023SJohn Marino {
2637*e4b17023SJohn Marino cond_exp_gsi = gsi_last_bb (condition_bb);
2638*e4b17023SJohn Marino gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2639*e4b17023SJohn Marino GSI_SAME_STMT);
2640*e4b17023SJohn Marino *cond_expr_stmt_list = NULL;
2641*e4b17023SJohn Marino }
2642*e4b17023SJohn Marino *cond_expr = NULL_TREE;
2643*e4b17023SJohn Marino }
2644*e4b17023SJohn Marino
2645