1*38fd1498Szrj /* Analysis Utilities for Loop Vectorization.
2*38fd1498Szrj Copyright (C) 2006-2018 Free Software Foundation, Inc.
3*38fd1498Szrj Contributed by Dorit Nuzman <dorit@il.ibm.com>
4*38fd1498Szrj
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3. If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>. */
20*38fd1498Szrj
21*38fd1498Szrj #include "config.h"
22*38fd1498Szrj #include "system.h"
23*38fd1498Szrj #include "coretypes.h"
24*38fd1498Szrj #include "backend.h"
25*38fd1498Szrj #include "rtl.h"
26*38fd1498Szrj #include "tree.h"
27*38fd1498Szrj #include "gimple.h"
28*38fd1498Szrj #include "ssa.h"
29*38fd1498Szrj #include "expmed.h"
30*38fd1498Szrj #include "optabs-tree.h"
31*38fd1498Szrj #include "insn-config.h"
32*38fd1498Szrj #include "recog.h" /* FIXME: for insn_data */
33*38fd1498Szrj #include "fold-const.h"
34*38fd1498Szrj #include "stor-layout.h"
35*38fd1498Szrj #include "tree-eh.h"
36*38fd1498Szrj #include "gimplify.h"
37*38fd1498Szrj #include "gimple-iterator.h"
38*38fd1498Szrj #include "cfgloop.h"
39*38fd1498Szrj #include "tree-vectorizer.h"
40*38fd1498Szrj #include "dumpfile.h"
41*38fd1498Szrj #include "builtins.h"
42*38fd1498Szrj #include "internal-fn.h"
43*38fd1498Szrj #include "case-cfn-macros.h"
44*38fd1498Szrj #include "fold-const-call.h"
45*38fd1498Szrj #include "attribs.h"
46*38fd1498Szrj #include "cgraph.h"
47*38fd1498Szrj #include "omp-simd-clone.h"
48*38fd1498Szrj
49*38fd1498Szrj /* Pattern recognition functions */
50*38fd1498Szrj static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51*38fd1498Szrj tree *);
52*38fd1498Szrj static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
53*38fd1498Szrj tree *);
54*38fd1498Szrj static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
55*38fd1498Szrj tree *);
56*38fd1498Szrj static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
57*38fd1498Szrj tree *);
58*38fd1498Szrj static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
59*38fd1498Szrj static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
60*38fd1498Szrj tree *);
61*38fd1498Szrj static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
62*38fd1498Szrj tree *, tree *);
63*38fd1498Szrj static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
64*38fd1498Szrj static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
65*38fd1498Szrj tree *, tree *);
66*38fd1498Szrj static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
67*38fd1498Szrj tree *, tree *);
68*38fd1498Szrj
69*38fd1498Szrj static gimple *vect_recog_mult_pattern (vec<gimple *> *,
70*38fd1498Szrj tree *, tree *);
71*38fd1498Szrj
72*38fd1498Szrj static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
73*38fd1498Szrj tree *, tree *);
74*38fd1498Szrj static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
75*38fd1498Szrj static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
76*38fd1498Szrj static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
77*38fd1498Szrj tree *);
78*38fd1498Szrj
79*38fd1498Szrj struct vect_recog_func
80*38fd1498Szrj {
81*38fd1498Szrj vect_recog_func_ptr fn;
82*38fd1498Szrj const char *name;
83*38fd1498Szrj };
84*38fd1498Szrj
85*38fd1498Szrj /* Note that ordering matters - the first pattern matching on a stmt
86*38fd1498Szrj is taken which means usually the more complex one needs to preceed
87*38fd1498Szrj the less comples onex (widen_sum only after dot_prod or sad for example). */
88*38fd1498Szrj static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
89*38fd1498Szrj { vect_recog_widen_mult_pattern, "widen_mult" },
90*38fd1498Szrj { vect_recog_dot_prod_pattern, "dot_prod" },
91*38fd1498Szrj { vect_recog_sad_pattern, "sad" },
92*38fd1498Szrj { vect_recog_widen_sum_pattern, "widen_sum" },
93*38fd1498Szrj { vect_recog_pow_pattern, "pow" },
94*38fd1498Szrj { vect_recog_widen_shift_pattern, "widen_shift" },
95*38fd1498Szrj { vect_recog_over_widening_pattern, "over_widening" },
96*38fd1498Szrj { vect_recog_rotate_pattern, "rotate" },
97*38fd1498Szrj { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
98*38fd1498Szrj { vect_recog_divmod_pattern, "divmod" },
99*38fd1498Szrj { vect_recog_mult_pattern, "mult" },
100*38fd1498Szrj { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
101*38fd1498Szrj { vect_recog_bool_pattern, "bool" },
102*38fd1498Szrj /* This must come before mask conversion, and includes the parts
103*38fd1498Szrj of mask conversion that are needed for gather and scatter
104*38fd1498Szrj internal functions. */
105*38fd1498Szrj { vect_recog_gather_scatter_pattern, "gather_scatter" },
106*38fd1498Szrj { vect_recog_mask_conversion_pattern, "mask_conversion" }
107*38fd1498Szrj };
108*38fd1498Szrj
109*38fd1498Szrj static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)110*38fd1498Szrj append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
111*38fd1498Szrj {
112*38fd1498Szrj gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113*38fd1498Szrj stmt);
114*38fd1498Szrj }
115*38fd1498Szrj
116*38fd1498Szrj static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)117*38fd1498Szrj new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
118*38fd1498Szrj {
119*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120*38fd1498Szrj append_pattern_def_seq (stmt_info, stmt);
121*38fd1498Szrj }
122*38fd1498Szrj
123*38fd1498Szrj /* Check whether STMT2 is in the same loop or basic block as STMT1.
124*38fd1498Szrj Which of the two applies depends on whether we're currently doing
125*38fd1498Szrj loop-based or basic-block-based vectorization, as determined by
126*38fd1498Szrj the vinfo_for_stmt for STMT1 (which must be defined).
127*38fd1498Szrj
128*38fd1498Szrj If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129*38fd1498Szrj to be defined as well. */
130*38fd1498Szrj
131*38fd1498Szrj static bool
vect_same_loop_or_bb_p(gimple * stmt1,gimple * stmt2)132*38fd1498Szrj vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
133*38fd1498Szrj {
134*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135*38fd1498Szrj return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
136*38fd1498Szrj }
137*38fd1498Szrj
138*38fd1498Szrj /* If the LHS of DEF_STMT has a single use, and that statement is
139*38fd1498Szrj in the same loop or basic block, return it. */
140*38fd1498Szrj
141*38fd1498Szrj static gimple *
vect_single_imm_use(gimple * def_stmt)142*38fd1498Szrj vect_single_imm_use (gimple *def_stmt)
143*38fd1498Szrj {
144*38fd1498Szrj tree lhs = gimple_assign_lhs (def_stmt);
145*38fd1498Szrj use_operand_p use_p;
146*38fd1498Szrj gimple *use_stmt;
147*38fd1498Szrj
148*38fd1498Szrj if (!single_imm_use (lhs, &use_p, &use_stmt))
149*38fd1498Szrj return NULL;
150*38fd1498Szrj
151*38fd1498Szrj if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
152*38fd1498Szrj return NULL;
153*38fd1498Szrj
154*38fd1498Szrj return use_stmt;
155*38fd1498Szrj }
156*38fd1498Szrj
157*38fd1498Szrj /* Check whether NAME, an ssa-name used in USE_STMT,
158*38fd1498Szrj is a result of a type promotion, such that:
159*38fd1498Szrj DEF_STMT: NAME = NOP (name0)
160*38fd1498Szrj If CHECK_SIGN is TRUE, check that either both types are signed or both are
161*38fd1498Szrj unsigned. */
162*38fd1498Szrj
163*38fd1498Szrj static bool
type_conversion_p(tree name,gimple * use_stmt,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)164*38fd1498Szrj type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
165*38fd1498Szrj tree *orig_type, gimple **def_stmt, bool *promotion)
166*38fd1498Szrj {
167*38fd1498Szrj gimple *dummy_gimple;
168*38fd1498Szrj stmt_vec_info stmt_vinfo;
169*38fd1498Szrj tree type = TREE_TYPE (name);
170*38fd1498Szrj tree oprnd0;
171*38fd1498Szrj enum vect_def_type dt;
172*38fd1498Szrj
173*38fd1498Szrj stmt_vinfo = vinfo_for_stmt (use_stmt);
174*38fd1498Szrj if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
175*38fd1498Szrj return false;
176*38fd1498Szrj
177*38fd1498Szrj if (dt != vect_internal_def
178*38fd1498Szrj && dt != vect_external_def && dt != vect_constant_def)
179*38fd1498Szrj return false;
180*38fd1498Szrj
181*38fd1498Szrj if (!*def_stmt)
182*38fd1498Szrj return false;
183*38fd1498Szrj
184*38fd1498Szrj if (dt == vect_internal_def)
185*38fd1498Szrj {
186*38fd1498Szrj stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
187*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
188*38fd1498Szrj return false;
189*38fd1498Szrj }
190*38fd1498Szrj
191*38fd1498Szrj if (!is_gimple_assign (*def_stmt))
192*38fd1498Szrj return false;
193*38fd1498Szrj
194*38fd1498Szrj if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
195*38fd1498Szrj return false;
196*38fd1498Szrj
197*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (*def_stmt);
198*38fd1498Szrj
199*38fd1498Szrj *orig_type = TREE_TYPE (oprnd0);
200*38fd1498Szrj if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
201*38fd1498Szrj || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
202*38fd1498Szrj return false;
203*38fd1498Szrj
204*38fd1498Szrj if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
205*38fd1498Szrj *promotion = true;
206*38fd1498Szrj else
207*38fd1498Szrj *promotion = false;
208*38fd1498Szrj
209*38fd1498Szrj if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
210*38fd1498Szrj return false;
211*38fd1498Szrj
212*38fd1498Szrj return true;
213*38fd1498Szrj }
214*38fd1498Szrj
215*38fd1498Szrj /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
216*38fd1498Szrj is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
217*38fd1498Szrj
218*38fd1498Szrj static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)219*38fd1498Szrj vect_recog_temp_ssa_var (tree type, gimple *stmt)
220*38fd1498Szrj {
221*38fd1498Szrj return make_temp_ssa_name (type, stmt, "patt");
222*38fd1498Szrj }
223*38fd1498Szrj
224*38fd1498Szrj /* Return true if STMT_VINFO describes a reduction for which reassociation
225*38fd1498Szrj is allowed. If STMT_INFO is part of a group, assume that it's part of
226*38fd1498Szrj a reduction chain and optimistically assume that all statements
227*38fd1498Szrj except the last allow reassociation. */
228*38fd1498Szrj
229*38fd1498Szrj static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_vinfo)230*38fd1498Szrj vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
231*38fd1498Szrj {
232*38fd1498Szrj return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
233*38fd1498Szrj ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
234*38fd1498Szrj : GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
235*38fd1498Szrj }
236*38fd1498Szrj
237*38fd1498Szrj /* Function vect_recog_dot_prod_pattern
238*38fd1498Szrj
239*38fd1498Szrj Try to find the following pattern:
240*38fd1498Szrj
241*38fd1498Szrj type x_t, y_t;
242*38fd1498Szrj TYPE1 prod;
243*38fd1498Szrj TYPE2 sum = init;
244*38fd1498Szrj loop:
245*38fd1498Szrj sum_0 = phi <init, sum_1>
246*38fd1498Szrj S1 x_t = ...
247*38fd1498Szrj S2 y_t = ...
248*38fd1498Szrj S3 x_T = (TYPE1) x_t;
249*38fd1498Szrj S4 y_T = (TYPE1) y_t;
250*38fd1498Szrj S5 prod = x_T * y_T;
251*38fd1498Szrj [S6 prod = (TYPE2) prod; #optional]
252*38fd1498Szrj S7 sum_1 = prod + sum_0;
253*38fd1498Szrj
254*38fd1498Szrj where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255*38fd1498Szrj same size of 'TYPE1' or bigger. This is a special case of a reduction
256*38fd1498Szrj computation.
257*38fd1498Szrj
258*38fd1498Szrj Input:
259*38fd1498Szrj
260*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins. In the
261*38fd1498Szrj example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
262*38fd1498Szrj will be detected.
263*38fd1498Szrj
264*38fd1498Szrj Output:
265*38fd1498Szrj
266*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
267*38fd1498Szrj
268*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
269*38fd1498Szrj
270*38fd1498Szrj * Return value: A new stmt that will be used to replace the sequence of
271*38fd1498Szrj stmts that constitute the pattern. In this case it will be:
272*38fd1498Szrj WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273*38fd1498Szrj
274*38fd1498Szrj Note: The dot-prod idiom is a widening reduction pattern that is
275*38fd1498Szrj vectorized without preserving all the intermediate results. It
276*38fd1498Szrj produces only N/2 (widened) results (by summing up pairs of
277*38fd1498Szrj intermediate results) rather than all N results. Therefore, we
278*38fd1498Szrj cannot allow this pattern when we want to get all the results and in
279*38fd1498Szrj the correct order (as is the case when this computation is in an
280*38fd1498Szrj inner-loop nested in an outer-loop that us being vectorized). */
281*38fd1498Szrj
282*38fd1498Szrj static gimple *
vect_recog_dot_prod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)283*38fd1498Szrj vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
284*38fd1498Szrj tree *type_out)
285*38fd1498Szrj {
286*38fd1498Szrj gimple *stmt, *last_stmt = (*stmts)[0];
287*38fd1498Szrj tree oprnd0, oprnd1;
288*38fd1498Szrj tree oprnd00, oprnd01;
289*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
290*38fd1498Szrj tree type, half_type;
291*38fd1498Szrj gimple *pattern_stmt;
292*38fd1498Szrj tree prod_type;
293*38fd1498Szrj loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
294*38fd1498Szrj struct loop *loop;
295*38fd1498Szrj tree var;
296*38fd1498Szrj bool promotion;
297*38fd1498Szrj
298*38fd1498Szrj if (!loop_info)
299*38fd1498Szrj return NULL;
300*38fd1498Szrj
301*38fd1498Szrj loop = LOOP_VINFO_LOOP (loop_info);
302*38fd1498Szrj
303*38fd1498Szrj /* We don't allow changing the order of the computation in the inner-loop
304*38fd1498Szrj when doing outer-loop vectorization. */
305*38fd1498Szrj if (loop && nested_in_vect_loop_p (loop, last_stmt))
306*38fd1498Szrj return NULL;
307*38fd1498Szrj
308*38fd1498Szrj if (!is_gimple_assign (last_stmt))
309*38fd1498Szrj return NULL;
310*38fd1498Szrj
311*38fd1498Szrj type = gimple_expr_type (last_stmt);
312*38fd1498Szrj
313*38fd1498Szrj /* Look for the following pattern
314*38fd1498Szrj DX = (TYPE1) X;
315*38fd1498Szrj DY = (TYPE1) Y;
316*38fd1498Szrj DPROD = DX * DY;
317*38fd1498Szrj DDPROD = (TYPE2) DPROD;
318*38fd1498Szrj sum_1 = DDPROD + sum_0;
319*38fd1498Szrj In which
320*38fd1498Szrj - DX is double the size of X
321*38fd1498Szrj - DY is double the size of Y
322*38fd1498Szrj - DX, DY, DPROD all have the same type
323*38fd1498Szrj - sum is the same size of DPROD or bigger
324*38fd1498Szrj - sum has been recognized as a reduction variable.
325*38fd1498Szrj
326*38fd1498Szrj This is equivalent to:
327*38fd1498Szrj DPROD = X w* Y; #widen mult
328*38fd1498Szrj sum_1 = DPROD w+ sum_0; #widen summation
329*38fd1498Szrj or
330*38fd1498Szrj DPROD = X w* Y; #widen mult
331*38fd1498Szrj sum_1 = DPROD + sum_0; #summation
332*38fd1498Szrj */
333*38fd1498Szrj
334*38fd1498Szrj /* Starting from LAST_STMT, follow the defs of its uses in search
335*38fd1498Szrj of the above pattern. */
336*38fd1498Szrj
337*38fd1498Szrj if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
338*38fd1498Szrj return NULL;
339*38fd1498Szrj
340*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
341*38fd1498Szrj {
342*38fd1498Szrj /* Has been detected as widening-summation? */
343*38fd1498Szrj
344*38fd1498Szrj stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
345*38fd1498Szrj type = gimple_expr_type (stmt);
346*38fd1498Szrj if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
347*38fd1498Szrj return NULL;
348*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (stmt);
349*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (stmt);
350*38fd1498Szrj half_type = TREE_TYPE (oprnd0);
351*38fd1498Szrj }
352*38fd1498Szrj else
353*38fd1498Szrj {
354*38fd1498Szrj gimple *def_stmt;
355*38fd1498Szrj
356*38fd1498Szrj if (!vect_reassociating_reduction_p (stmt_vinfo))
357*38fd1498Szrj return NULL;
358*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
359*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
360*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (oprnd0), type)
361*38fd1498Szrj || !types_compatible_p (TREE_TYPE (oprnd1), type))
362*38fd1498Szrj return NULL;
363*38fd1498Szrj stmt = last_stmt;
364*38fd1498Szrj
365*38fd1498Szrj if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
366*38fd1498Szrj &promotion)
367*38fd1498Szrj && promotion)
368*38fd1498Szrj {
369*38fd1498Szrj stmt = def_stmt;
370*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (stmt);
371*38fd1498Szrj }
372*38fd1498Szrj else
373*38fd1498Szrj half_type = type;
374*38fd1498Szrj }
375*38fd1498Szrj
376*38fd1498Szrj /* So far so good. Since last_stmt was detected as a (summation) reduction,
377*38fd1498Szrj we know that oprnd1 is the reduction variable (defined by a loop-header
378*38fd1498Szrj phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
379*38fd1498Szrj Left to check that oprnd0 is defined by a (widen_)mult_expr */
380*38fd1498Szrj if (TREE_CODE (oprnd0) != SSA_NAME)
381*38fd1498Szrj return NULL;
382*38fd1498Szrj
383*38fd1498Szrj prod_type = half_type;
384*38fd1498Szrj stmt = SSA_NAME_DEF_STMT (oprnd0);
385*38fd1498Szrj
386*38fd1498Szrj /* It could not be the dot_prod pattern if the stmt is outside the loop. */
387*38fd1498Szrj if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
388*38fd1498Szrj return NULL;
389*38fd1498Szrj
390*38fd1498Szrj /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
391*38fd1498Szrj inside the loop (in case we are analyzing an outer-loop). */
392*38fd1498Szrj if (!is_gimple_assign (stmt))
393*38fd1498Szrj return NULL;
394*38fd1498Szrj stmt_vinfo = vinfo_for_stmt (stmt);
395*38fd1498Szrj gcc_assert (stmt_vinfo);
396*38fd1498Szrj if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
397*38fd1498Szrj return NULL;
398*38fd1498Szrj if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
399*38fd1498Szrj return NULL;
400*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
401*38fd1498Szrj {
402*38fd1498Szrj /* Has been detected as a widening multiplication? */
403*38fd1498Szrj
404*38fd1498Szrj stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
405*38fd1498Szrj if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
406*38fd1498Szrj return NULL;
407*38fd1498Szrj stmt_vinfo = vinfo_for_stmt (stmt);
408*38fd1498Szrj gcc_assert (stmt_vinfo);
409*38fd1498Szrj gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
410*38fd1498Szrj oprnd00 = gimple_assign_rhs1 (stmt);
411*38fd1498Szrj oprnd01 = gimple_assign_rhs2 (stmt);
412*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
413*38fd1498Szrj = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
414*38fd1498Szrj }
415*38fd1498Szrj else
416*38fd1498Szrj {
417*38fd1498Szrj tree half_type0, half_type1;
418*38fd1498Szrj gimple *def_stmt;
419*38fd1498Szrj tree oprnd0, oprnd1;
420*38fd1498Szrj
421*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (stmt);
422*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (stmt);
423*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
424*38fd1498Szrj || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
425*38fd1498Szrj return NULL;
426*38fd1498Szrj if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
427*38fd1498Szrj &promotion)
428*38fd1498Szrj || !promotion)
429*38fd1498Szrj return NULL;
430*38fd1498Szrj oprnd00 = gimple_assign_rhs1 (def_stmt);
431*38fd1498Szrj if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
432*38fd1498Szrj &promotion)
433*38fd1498Szrj || !promotion)
434*38fd1498Szrj return NULL;
435*38fd1498Szrj oprnd01 = gimple_assign_rhs1 (def_stmt);
436*38fd1498Szrj if (!types_compatible_p (half_type0, half_type1))
437*38fd1498Szrj return NULL;
438*38fd1498Szrj if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
439*38fd1498Szrj return NULL;
440*38fd1498Szrj }
441*38fd1498Szrj
442*38fd1498Szrj half_type = TREE_TYPE (oprnd00);
443*38fd1498Szrj *type_in = half_type;
444*38fd1498Szrj *type_out = type;
445*38fd1498Szrj
446*38fd1498Szrj /* Pattern detected. Create a stmt to be used to replace the pattern: */
447*38fd1498Szrj var = vect_recog_temp_ssa_var (type, NULL);
448*38fd1498Szrj pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
449*38fd1498Szrj oprnd00, oprnd01, oprnd1);
450*38fd1498Szrj
451*38fd1498Szrj if (dump_enabled_p ())
452*38fd1498Szrj {
453*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
454*38fd1498Szrj "vect_recog_dot_prod_pattern: detected: ");
455*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
456*38fd1498Szrj }
457*38fd1498Szrj
458*38fd1498Szrj return pattern_stmt;
459*38fd1498Szrj }
460*38fd1498Szrj
461*38fd1498Szrj
462*38fd1498Szrj /* Function vect_recog_sad_pattern
463*38fd1498Szrj
464*38fd1498Szrj Try to find the following Sum of Absolute Difference (SAD) pattern:
465*38fd1498Szrj
466*38fd1498Szrj type x_t, y_t;
467*38fd1498Szrj signed TYPE1 diff, abs_diff;
468*38fd1498Szrj TYPE2 sum = init;
469*38fd1498Szrj loop:
470*38fd1498Szrj sum_0 = phi <init, sum_1>
471*38fd1498Szrj S1 x_t = ...
472*38fd1498Szrj S2 y_t = ...
473*38fd1498Szrj S3 x_T = (TYPE1) x_t;
474*38fd1498Szrj S4 y_T = (TYPE1) y_t;
475*38fd1498Szrj S5 diff = x_T - y_T;
476*38fd1498Szrj S6 abs_diff = ABS_EXPR <diff>;
477*38fd1498Szrj [S7 abs_diff = (TYPE2) abs_diff; #optional]
478*38fd1498Szrj S8 sum_1 = abs_diff + sum_0;
479*38fd1498Szrj
480*38fd1498Szrj where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481*38fd1498Szrj same size of 'TYPE1' or bigger. This is a special case of a reduction
482*38fd1498Szrj computation.
483*38fd1498Szrj
484*38fd1498Szrj Input:
485*38fd1498Szrj
486*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins. In the
487*38fd1498Szrj example, when this function is called with S8, the pattern
488*38fd1498Szrj {S3,S4,S5,S6,S7,S8} will be detected.
489*38fd1498Szrj
490*38fd1498Szrj Output:
491*38fd1498Szrj
492*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
493*38fd1498Szrj
494*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
495*38fd1498Szrj
496*38fd1498Szrj * Return value: A new stmt that will be used to replace the sequence of
497*38fd1498Szrj stmts that constitute the pattern. In this case it will be:
498*38fd1498Szrj SAD_EXPR <x_t, y_t, sum_0>
499*38fd1498Szrj */
500*38fd1498Szrj
501*38fd1498Szrj static gimple *
vect_recog_sad_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)502*38fd1498Szrj vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
503*38fd1498Szrj tree *type_out)
504*38fd1498Szrj {
505*38fd1498Szrj gimple *last_stmt = (*stmts)[0];
506*38fd1498Szrj tree sad_oprnd0, sad_oprnd1;
507*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
508*38fd1498Szrj tree half_type;
509*38fd1498Szrj loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
510*38fd1498Szrj struct loop *loop;
511*38fd1498Szrj bool promotion;
512*38fd1498Szrj
513*38fd1498Szrj if (!loop_info)
514*38fd1498Szrj return NULL;
515*38fd1498Szrj
516*38fd1498Szrj loop = LOOP_VINFO_LOOP (loop_info);
517*38fd1498Szrj
518*38fd1498Szrj /* We don't allow changing the order of the computation in the inner-loop
519*38fd1498Szrj when doing outer-loop vectorization. */
520*38fd1498Szrj if (loop && nested_in_vect_loop_p (loop, last_stmt))
521*38fd1498Szrj return NULL;
522*38fd1498Szrj
523*38fd1498Szrj if (!is_gimple_assign (last_stmt))
524*38fd1498Szrj return NULL;
525*38fd1498Szrj
526*38fd1498Szrj tree sum_type = gimple_expr_type (last_stmt);
527*38fd1498Szrj
528*38fd1498Szrj /* Look for the following pattern
529*38fd1498Szrj DX = (TYPE1) X;
530*38fd1498Szrj DY = (TYPE1) Y;
531*38fd1498Szrj DDIFF = DX - DY;
532*38fd1498Szrj DAD = ABS_EXPR <DDIFF>;
533*38fd1498Szrj DDPROD = (TYPE2) DPROD;
534*38fd1498Szrj sum_1 = DAD + sum_0;
535*38fd1498Szrj In which
536*38fd1498Szrj - DX is at least double the size of X
537*38fd1498Szrj - DY is at least double the size of Y
538*38fd1498Szrj - DX, DY, DDIFF, DAD all have the same type
539*38fd1498Szrj - sum is the same size of DAD or bigger
540*38fd1498Szrj - sum has been recognized as a reduction variable.
541*38fd1498Szrj
542*38fd1498Szrj This is equivalent to:
543*38fd1498Szrj DDIFF = X w- Y; #widen sub
544*38fd1498Szrj DAD = ABS_EXPR <DDIFF>;
545*38fd1498Szrj sum_1 = DAD w+ sum_0; #widen summation
546*38fd1498Szrj or
547*38fd1498Szrj DDIFF = X w- Y; #widen sub
548*38fd1498Szrj DAD = ABS_EXPR <DDIFF>;
549*38fd1498Szrj sum_1 = DAD + sum_0; #summation
550*38fd1498Szrj */
551*38fd1498Szrj
552*38fd1498Szrj /* Starting from LAST_STMT, follow the defs of its uses in search
553*38fd1498Szrj of the above pattern. */
554*38fd1498Szrj
555*38fd1498Szrj if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
556*38fd1498Szrj return NULL;
557*38fd1498Szrj
558*38fd1498Szrj tree plus_oprnd0, plus_oprnd1;
559*38fd1498Szrj
560*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
561*38fd1498Szrj {
562*38fd1498Szrj /* Has been detected as widening-summation? */
563*38fd1498Szrj
564*38fd1498Szrj gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
565*38fd1498Szrj sum_type = gimple_expr_type (stmt);
566*38fd1498Szrj if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
567*38fd1498Szrj return NULL;
568*38fd1498Szrj plus_oprnd0 = gimple_assign_rhs1 (stmt);
569*38fd1498Szrj plus_oprnd1 = gimple_assign_rhs2 (stmt);
570*38fd1498Szrj half_type = TREE_TYPE (plus_oprnd0);
571*38fd1498Szrj }
572*38fd1498Szrj else
573*38fd1498Szrj {
574*38fd1498Szrj gimple *def_stmt;
575*38fd1498Szrj
576*38fd1498Szrj if (!vect_reassociating_reduction_p (stmt_vinfo))
577*38fd1498Szrj return NULL;
578*38fd1498Szrj plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
579*38fd1498Szrj plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
580*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
581*38fd1498Szrj || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
582*38fd1498Szrj return NULL;
583*38fd1498Szrj
584*38fd1498Szrj /* The type conversion could be promotion, demotion,
585*38fd1498Szrj or just signed -> unsigned. */
586*38fd1498Szrj if (type_conversion_p (plus_oprnd0, last_stmt, false,
587*38fd1498Szrj &half_type, &def_stmt, &promotion))
588*38fd1498Szrj plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
589*38fd1498Szrj else
590*38fd1498Szrj half_type = sum_type;
591*38fd1498Szrj }
592*38fd1498Szrj
593*38fd1498Szrj /* So far so good. Since last_stmt was detected as a (summation) reduction,
594*38fd1498Szrj we know that plus_oprnd1 is the reduction variable (defined by a loop-header
595*38fd1498Szrj phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
596*38fd1498Szrj Then check that plus_oprnd0 is defined by an abs_expr. */
597*38fd1498Szrj
598*38fd1498Szrj if (TREE_CODE (plus_oprnd0) != SSA_NAME)
599*38fd1498Szrj return NULL;
600*38fd1498Szrj
601*38fd1498Szrj tree abs_type = half_type;
602*38fd1498Szrj gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
603*38fd1498Szrj
604*38fd1498Szrj /* It could not be the sad pattern if the abs_stmt is outside the loop. */
605*38fd1498Szrj if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
606*38fd1498Szrj return NULL;
607*38fd1498Szrj
608*38fd1498Szrj /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
609*38fd1498Szrj inside the loop (in case we are analyzing an outer-loop). */
610*38fd1498Szrj if (!is_gimple_assign (abs_stmt))
611*38fd1498Szrj return NULL;
612*38fd1498Szrj
613*38fd1498Szrj stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
614*38fd1498Szrj gcc_assert (abs_stmt_vinfo);
615*38fd1498Szrj if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
616*38fd1498Szrj return NULL;
617*38fd1498Szrj if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
618*38fd1498Szrj return NULL;
619*38fd1498Szrj
620*38fd1498Szrj tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
621*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
622*38fd1498Szrj return NULL;
623*38fd1498Szrj if (TYPE_UNSIGNED (abs_type))
624*38fd1498Szrj return NULL;
625*38fd1498Szrj
626*38fd1498Szrj /* We then detect if the operand of abs_expr is defined by a minus_expr. */
627*38fd1498Szrj
628*38fd1498Szrj if (TREE_CODE (abs_oprnd) != SSA_NAME)
629*38fd1498Szrj return NULL;
630*38fd1498Szrj
631*38fd1498Szrj gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
632*38fd1498Szrj
633*38fd1498Szrj /* It could not be the sad pattern if the diff_stmt is outside the loop. */
634*38fd1498Szrj if (!gimple_bb (diff_stmt)
635*38fd1498Szrj || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
636*38fd1498Szrj return NULL;
637*38fd1498Szrj
638*38fd1498Szrj /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
639*38fd1498Szrj inside the loop (in case we are analyzing an outer-loop). */
640*38fd1498Szrj if (!is_gimple_assign (diff_stmt))
641*38fd1498Szrj return NULL;
642*38fd1498Szrj
643*38fd1498Szrj stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
644*38fd1498Szrj gcc_assert (diff_stmt_vinfo);
645*38fd1498Szrj if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
646*38fd1498Szrj return NULL;
647*38fd1498Szrj if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
648*38fd1498Szrj return NULL;
649*38fd1498Szrj
650*38fd1498Szrj tree half_type0, half_type1;
651*38fd1498Szrj gimple *def_stmt;
652*38fd1498Szrj
653*38fd1498Szrj tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
654*38fd1498Szrj tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
655*38fd1498Szrj
656*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
657*38fd1498Szrj || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
658*38fd1498Szrj return NULL;
659*38fd1498Szrj if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
660*38fd1498Szrj &half_type0, &def_stmt, &promotion)
661*38fd1498Szrj || !promotion)
662*38fd1498Szrj return NULL;
663*38fd1498Szrj sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
664*38fd1498Szrj
665*38fd1498Szrj if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
666*38fd1498Szrj &half_type1, &def_stmt, &promotion)
667*38fd1498Szrj || !promotion)
668*38fd1498Szrj return NULL;
669*38fd1498Szrj sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
670*38fd1498Szrj
671*38fd1498Szrj if (!types_compatible_p (half_type0, half_type1))
672*38fd1498Szrj return NULL;
673*38fd1498Szrj if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
674*38fd1498Szrj || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
675*38fd1498Szrj return NULL;
676*38fd1498Szrj
677*38fd1498Szrj *type_in = TREE_TYPE (sad_oprnd0);
678*38fd1498Szrj *type_out = sum_type;
679*38fd1498Szrj
680*38fd1498Szrj /* Pattern detected. Create a stmt to be used to replace the pattern: */
681*38fd1498Szrj tree var = vect_recog_temp_ssa_var (sum_type, NULL);
682*38fd1498Szrj gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
683*38fd1498Szrj sad_oprnd1, plus_oprnd1);
684*38fd1498Szrj
685*38fd1498Szrj if (dump_enabled_p ())
686*38fd1498Szrj {
687*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
688*38fd1498Szrj "vect_recog_sad_pattern: detected: ");
689*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
690*38fd1498Szrj }
691*38fd1498Szrj
692*38fd1498Szrj return pattern_stmt;
693*38fd1498Szrj }
694*38fd1498Szrj
695*38fd1498Szrj
696*38fd1498Szrj /* Handle widening operation by a constant. At the moment we support MULT_EXPR
697*38fd1498Szrj and LSHIFT_EXPR.
698*38fd1498Szrj
699*38fd1498Szrj For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
700*38fd1498Szrj we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701*38fd1498Szrj
702*38fd1498Szrj Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
703*38fd1498Szrj HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
704*38fd1498Szrj that satisfies the above restrictions, we can perform a widening opeartion
705*38fd1498Szrj from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
706*38fd1498Szrj with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
707*38fd1498Szrj
708*38fd1498Szrj static bool
vect_handle_widen_op_by_const(gimple * stmt,enum tree_code code,tree const_oprnd,tree * oprnd,gimple ** wstmt,tree type,tree * half_type,gimple * def_stmt)709*38fd1498Szrj vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
710*38fd1498Szrj tree const_oprnd, tree *oprnd,
711*38fd1498Szrj gimple **wstmt, tree type,
712*38fd1498Szrj tree *half_type, gimple *def_stmt)
713*38fd1498Szrj {
714*38fd1498Szrj tree new_type, new_oprnd;
715*38fd1498Szrj
716*38fd1498Szrj if (code != MULT_EXPR && code != LSHIFT_EXPR)
717*38fd1498Szrj return false;
718*38fd1498Szrj
719*38fd1498Szrj if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720*38fd1498Szrj || (code == LSHIFT_EXPR
721*38fd1498Szrj && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722*38fd1498Szrj != 1))
723*38fd1498Szrj && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
724*38fd1498Szrj {
725*38fd1498Szrj /* CONST_OPRND is a constant of HALF_TYPE. */
726*38fd1498Szrj *oprnd = gimple_assign_rhs1 (def_stmt);
727*38fd1498Szrj return true;
728*38fd1498Szrj }
729*38fd1498Szrj
730*38fd1498Szrj if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731*38fd1498Szrj return false;
732*38fd1498Szrj
733*38fd1498Szrj if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734*38fd1498Szrj return false;
735*38fd1498Szrj
736*38fd1498Szrj /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737*38fd1498Szrj a type 2 times bigger than HALF_TYPE. */
738*38fd1498Szrj new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739*38fd1498Szrj TYPE_UNSIGNED (type));
740*38fd1498Szrj if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741*38fd1498Szrj || (code == LSHIFT_EXPR
742*38fd1498Szrj && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743*38fd1498Szrj return false;
744*38fd1498Szrj
745*38fd1498Szrj /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
746*38fd1498Szrj *oprnd = gimple_assign_rhs1 (def_stmt);
747*38fd1498Szrj new_oprnd = make_ssa_name (new_type);
748*38fd1498Szrj *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
749*38fd1498Szrj *oprnd = new_oprnd;
750*38fd1498Szrj
751*38fd1498Szrj *half_type = new_type;
752*38fd1498Szrj return true;
753*38fd1498Szrj }
754*38fd1498Szrj
755*38fd1498Szrj
756*38fd1498Szrj /* Function vect_recog_widen_mult_pattern
757*38fd1498Szrj
758*38fd1498Szrj Try to find the following pattern:
759*38fd1498Szrj
760*38fd1498Szrj type1 a_t;
761*38fd1498Szrj type2 b_t;
762*38fd1498Szrj TYPE a_T, b_T, prod_T;
763*38fd1498Szrj
764*38fd1498Szrj S1 a_t = ;
765*38fd1498Szrj S2 b_t = ;
766*38fd1498Szrj S3 a_T = (TYPE) a_t;
767*38fd1498Szrj S4 b_T = (TYPE) b_t;
768*38fd1498Szrj S5 prod_T = a_T * b_T;
769*38fd1498Szrj
770*38fd1498Szrj where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
771*38fd1498Szrj
772*38fd1498Szrj Also detect unsigned cases:
773*38fd1498Szrj
774*38fd1498Szrj unsigned type1 a_t;
775*38fd1498Szrj unsigned type2 b_t;
776*38fd1498Szrj unsigned TYPE u_prod_T;
777*38fd1498Szrj TYPE a_T, b_T, prod_T;
778*38fd1498Szrj
779*38fd1498Szrj S1 a_t = ;
780*38fd1498Szrj S2 b_t = ;
781*38fd1498Szrj S3 a_T = (TYPE) a_t;
782*38fd1498Szrj S4 b_T = (TYPE) b_t;
783*38fd1498Szrj S5 prod_T = a_T * b_T;
784*38fd1498Szrj S6 u_prod_T = (unsigned TYPE) prod_T;
785*38fd1498Szrj
786*38fd1498Szrj and multiplication by constants:
787*38fd1498Szrj
788*38fd1498Szrj type a_t;
789*38fd1498Szrj TYPE a_T, prod_T;
790*38fd1498Szrj
791*38fd1498Szrj S1 a_t = ;
792*38fd1498Szrj S3 a_T = (TYPE) a_t;
793*38fd1498Szrj S5 prod_T = a_T * CONST;
794*38fd1498Szrj
795*38fd1498Szrj A special case of multiplication by constants is when 'TYPE' is 4 times
796*38fd1498Szrj bigger than 'type', but CONST fits an intermediate type 2 times smaller
797*38fd1498Szrj than 'TYPE'. In that case we create an additional pattern stmt for S3
798*38fd1498Szrj to create a variable of the intermediate type, and perform widen-mult
799*38fd1498Szrj on the intermediate type as well:
800*38fd1498Szrj
801*38fd1498Szrj type a_t;
802*38fd1498Szrj interm_type a_it;
803*38fd1498Szrj TYPE a_T, prod_T, prod_T';
804*38fd1498Szrj
805*38fd1498Szrj S1 a_t = ;
806*38fd1498Szrj S3 a_T = (TYPE) a_t;
807*38fd1498Szrj '--> a_it = (interm_type) a_t;
808*38fd1498Szrj S5 prod_T = a_T * CONST;
809*38fd1498Szrj '--> prod_T' = a_it w* CONST;
810*38fd1498Szrj
811*38fd1498Szrj Input/Output:
812*38fd1498Szrj
813*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins. In the
814*38fd1498Szrj example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
815*38fd1498Szrj is detected. In case of unsigned widen-mult, the original stmt (S5) is
816*38fd1498Szrj replaced with S6 in STMTS. In case of multiplication by a constant
817*38fd1498Szrj of an intermediate type (the last case above), STMTS also contains S3
818*38fd1498Szrj (inserted before S5).
819*38fd1498Szrj
820*38fd1498Szrj Output:
821*38fd1498Szrj
822*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
823*38fd1498Szrj
824*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
825*38fd1498Szrj
826*38fd1498Szrj * Return value: A new stmt that will be used to replace the sequence of
827*38fd1498Szrj stmts that constitute the pattern. In this case it will be:
828*38fd1498Szrj WIDEN_MULT <a_t, b_t>
829*38fd1498Szrj If the result of WIDEN_MULT needs to be converted to a larger type, the
830*38fd1498Szrj returned stmt will be this type conversion stmt.
831*38fd1498Szrj */
832*38fd1498Szrj
833*38fd1498Szrj static gimple *
vect_recog_widen_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)834*38fd1498Szrj vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
835*38fd1498Szrj tree *type_in, tree *type_out)
836*38fd1498Szrj {
837*38fd1498Szrj gimple *last_stmt = stmts->pop ();
838*38fd1498Szrj gimple *def_stmt0, *def_stmt1;
839*38fd1498Szrj tree oprnd0, oprnd1;
840*38fd1498Szrj tree type, half_type0, half_type1;
841*38fd1498Szrj gimple *new_stmt = NULL, *pattern_stmt = NULL;
842*38fd1498Szrj tree vectype, vecitype;
843*38fd1498Szrj tree var;
844*38fd1498Szrj enum tree_code dummy_code;
845*38fd1498Szrj int dummy_int;
846*38fd1498Szrj vec<tree> dummy_vec;
847*38fd1498Szrj bool op1_ok;
848*38fd1498Szrj bool promotion;
849*38fd1498Szrj
850*38fd1498Szrj if (!is_gimple_assign (last_stmt))
851*38fd1498Szrj return NULL;
852*38fd1498Szrj
853*38fd1498Szrj type = gimple_expr_type (last_stmt);
854*38fd1498Szrj
855*38fd1498Szrj /* Starting from LAST_STMT, follow the defs of its uses in search
856*38fd1498Szrj of the above pattern. */
857*38fd1498Szrj
858*38fd1498Szrj if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
859*38fd1498Szrj return NULL;
860*38fd1498Szrj
861*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
862*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
863*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (oprnd0), type)
864*38fd1498Szrj || !types_compatible_p (TREE_TYPE (oprnd1), type))
865*38fd1498Szrj return NULL;
866*38fd1498Szrj
867*38fd1498Szrj /* Check argument 0. */
868*38fd1498Szrj if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
869*38fd1498Szrj &promotion)
870*38fd1498Szrj || !promotion)
871*38fd1498Szrj return NULL;
872*38fd1498Szrj /* Check argument 1. */
873*38fd1498Szrj op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
874*38fd1498Szrj &def_stmt1, &promotion);
875*38fd1498Szrj
876*38fd1498Szrj if (op1_ok && promotion)
877*38fd1498Szrj {
878*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (def_stmt0);
879*38fd1498Szrj oprnd1 = gimple_assign_rhs1 (def_stmt1);
880*38fd1498Szrj }
881*38fd1498Szrj else
882*38fd1498Szrj {
883*38fd1498Szrj if (TREE_CODE (oprnd1) == INTEGER_CST
884*38fd1498Szrj && TREE_CODE (half_type0) == INTEGER_TYPE
885*38fd1498Szrj && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
886*38fd1498Szrj &oprnd0, &new_stmt, type,
887*38fd1498Szrj &half_type0, def_stmt0))
888*38fd1498Szrj {
889*38fd1498Szrj half_type1 = half_type0;
890*38fd1498Szrj oprnd1 = fold_convert (half_type1, oprnd1);
891*38fd1498Szrj }
892*38fd1498Szrj else
893*38fd1498Szrj return NULL;
894*38fd1498Szrj }
895*38fd1498Szrj
896*38fd1498Szrj /* If the two arguments have different sizes, convert the one with
897*38fd1498Szrj the smaller type into the larger type. */
898*38fd1498Szrj if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
899*38fd1498Szrj {
900*38fd1498Szrj /* If we already used up the single-stmt slot give up. */
901*38fd1498Szrj if (new_stmt)
902*38fd1498Szrj return NULL;
903*38fd1498Szrj
904*38fd1498Szrj tree* oprnd = NULL;
905*38fd1498Szrj gimple *def_stmt = NULL;
906*38fd1498Szrj
907*38fd1498Szrj if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
908*38fd1498Szrj {
909*38fd1498Szrj def_stmt = def_stmt0;
910*38fd1498Szrj half_type0 = half_type1;
911*38fd1498Szrj oprnd = &oprnd0;
912*38fd1498Szrj }
913*38fd1498Szrj else
914*38fd1498Szrj {
915*38fd1498Szrj def_stmt = def_stmt1;
916*38fd1498Szrj half_type1 = half_type0;
917*38fd1498Szrj oprnd = &oprnd1;
918*38fd1498Szrj }
919*38fd1498Szrj
920*38fd1498Szrj tree old_oprnd = gimple_assign_rhs1 (def_stmt);
921*38fd1498Szrj tree new_oprnd = make_ssa_name (half_type0);
922*38fd1498Szrj new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
923*38fd1498Szrj *oprnd = new_oprnd;
924*38fd1498Szrj }
925*38fd1498Szrj
926*38fd1498Szrj /* Handle unsigned case. Look for
927*38fd1498Szrj S6 u_prod_T = (unsigned TYPE) prod_T;
928*38fd1498Szrj Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
929*38fd1498Szrj if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
930*38fd1498Szrj {
931*38fd1498Szrj gimple *use_stmt;
932*38fd1498Szrj tree use_lhs;
933*38fd1498Szrj tree use_type;
934*38fd1498Szrj
935*38fd1498Szrj if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
936*38fd1498Szrj return NULL;
937*38fd1498Szrj
938*38fd1498Szrj use_stmt = vect_single_imm_use (last_stmt);
939*38fd1498Szrj if (!use_stmt || !is_gimple_assign (use_stmt)
940*38fd1498Szrj || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
941*38fd1498Szrj return NULL;
942*38fd1498Szrj
943*38fd1498Szrj use_lhs = gimple_assign_lhs (use_stmt);
944*38fd1498Szrj use_type = TREE_TYPE (use_lhs);
945*38fd1498Szrj if (!INTEGRAL_TYPE_P (use_type)
946*38fd1498Szrj || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
947*38fd1498Szrj || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
948*38fd1498Szrj return NULL;
949*38fd1498Szrj
950*38fd1498Szrj type = use_type;
951*38fd1498Szrj last_stmt = use_stmt;
952*38fd1498Szrj }
953*38fd1498Szrj
954*38fd1498Szrj if (!types_compatible_p (half_type0, half_type1))
955*38fd1498Szrj return NULL;
956*38fd1498Szrj
957*38fd1498Szrj /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
958*38fd1498Szrj to get an intermediate result of type ITYPE. In this case we need
959*38fd1498Szrj to build a statement to convert this intermediate result to type TYPE. */
960*38fd1498Szrj tree itype = type;
961*38fd1498Szrj if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
962*38fd1498Szrj itype = build_nonstandard_integer_type
963*38fd1498Szrj (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
964*38fd1498Szrj TYPE_UNSIGNED (type));
965*38fd1498Szrj
966*38fd1498Szrj /* Pattern detected. */
967*38fd1498Szrj if (dump_enabled_p ())
968*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
969*38fd1498Szrj "vect_recog_widen_mult_pattern: detected:\n");
970*38fd1498Szrj
971*38fd1498Szrj /* Check target support */
972*38fd1498Szrj vectype = get_vectype_for_scalar_type (half_type0);
973*38fd1498Szrj vecitype = get_vectype_for_scalar_type (itype);
974*38fd1498Szrj if (!vectype
975*38fd1498Szrj || !vecitype
976*38fd1498Szrj || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
977*38fd1498Szrj vecitype, vectype,
978*38fd1498Szrj &dummy_code, &dummy_code,
979*38fd1498Szrj &dummy_int, &dummy_vec))
980*38fd1498Szrj return NULL;
981*38fd1498Szrj
982*38fd1498Szrj *type_in = vectype;
983*38fd1498Szrj *type_out = get_vectype_for_scalar_type (type);
984*38fd1498Szrj
985*38fd1498Szrj /* Pattern supported. Create a stmt to be used to replace the pattern: */
986*38fd1498Szrj var = vect_recog_temp_ssa_var (itype, NULL);
987*38fd1498Szrj pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
988*38fd1498Szrj
989*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
990*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
991*38fd1498Szrj
992*38fd1498Szrj /* If the original two operands have different sizes, we may need to convert
993*38fd1498Szrj the smaller one into the larget type. If this is the case, at this point
994*38fd1498Szrj the new stmt is already built. */
995*38fd1498Szrj if (new_stmt)
996*38fd1498Szrj {
997*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, new_stmt);
998*38fd1498Szrj stmt_vec_info new_stmt_info
999*38fd1498Szrj = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1000*38fd1498Szrj set_vinfo_for_stmt (new_stmt, new_stmt_info);
1001*38fd1498Szrj STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1002*38fd1498Szrj }
1003*38fd1498Szrj
1004*38fd1498Szrj /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005*38fd1498Szrj the result of the widen-mult operation into type TYPE. */
1006*38fd1498Szrj if (itype != type)
1007*38fd1498Szrj {
1008*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1009*38fd1498Szrj stmt_vec_info pattern_stmt_info
1010*38fd1498Szrj = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1011*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1012*38fd1498Szrj STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1013*38fd1498Szrj pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1014*38fd1498Szrj NOP_EXPR,
1015*38fd1498Szrj gimple_assign_lhs (pattern_stmt));
1016*38fd1498Szrj }
1017*38fd1498Szrj
1018*38fd1498Szrj if (dump_enabled_p ())
1019*38fd1498Szrj dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1020*38fd1498Szrj
1021*38fd1498Szrj stmts->safe_push (last_stmt);
1022*38fd1498Szrj return pattern_stmt;
1023*38fd1498Szrj }
1024*38fd1498Szrj
1025*38fd1498Szrj
1026*38fd1498Szrj /* Function vect_recog_pow_pattern
1027*38fd1498Szrj
1028*38fd1498Szrj Try to find the following pattern:
1029*38fd1498Szrj
1030*38fd1498Szrj x = POW (y, N);
1031*38fd1498Szrj
1032*38fd1498Szrj with POW being one of pow, powf, powi, powif and N being
1033*38fd1498Szrj either 2 or 0.5.
1034*38fd1498Szrj
1035*38fd1498Szrj Input:
1036*38fd1498Szrj
1037*38fd1498Szrj * LAST_STMT: A stmt from which the pattern search begins.
1038*38fd1498Szrj
1039*38fd1498Szrj Output:
1040*38fd1498Szrj
1041*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
1042*38fd1498Szrj
1043*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
1044*38fd1498Szrj
1045*38fd1498Szrj * Return value: A new stmt that will be used to replace the sequence of
1046*38fd1498Szrj stmts that constitute the pattern. In this case it will be:
1047*38fd1498Szrj x = x * x
1048*38fd1498Szrj or
1049*38fd1498Szrj x = sqrt (x)
1050*38fd1498Szrj */
1051*38fd1498Szrj
1052*38fd1498Szrj static gimple *
vect_recog_pow_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1053*38fd1498Szrj vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1054*38fd1498Szrj tree *type_out)
1055*38fd1498Szrj {
1056*38fd1498Szrj gimple *last_stmt = (*stmts)[0];
1057*38fd1498Szrj tree base, exp;
1058*38fd1498Szrj gimple *stmt;
1059*38fd1498Szrj tree var;
1060*38fd1498Szrj
1061*38fd1498Szrj if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1062*38fd1498Szrj return NULL;
1063*38fd1498Szrj
1064*38fd1498Szrj switch (gimple_call_combined_fn (last_stmt))
1065*38fd1498Szrj {
1066*38fd1498Szrj CASE_CFN_POW:
1067*38fd1498Szrj CASE_CFN_POWI:
1068*38fd1498Szrj break;
1069*38fd1498Szrj
1070*38fd1498Szrj default:
1071*38fd1498Szrj return NULL;
1072*38fd1498Szrj }
1073*38fd1498Szrj
1074*38fd1498Szrj base = gimple_call_arg (last_stmt, 0);
1075*38fd1498Szrj exp = gimple_call_arg (last_stmt, 1);
1076*38fd1498Szrj if (TREE_CODE (exp) != REAL_CST
1077*38fd1498Szrj && TREE_CODE (exp) != INTEGER_CST)
1078*38fd1498Szrj {
1079*38fd1498Szrj if (flag_unsafe_math_optimizations
1080*38fd1498Szrj && TREE_CODE (base) == REAL_CST
1081*38fd1498Szrj && !gimple_call_internal_p (last_stmt))
1082*38fd1498Szrj {
1083*38fd1498Szrj combined_fn log_cfn;
1084*38fd1498Szrj built_in_function exp_bfn;
1085*38fd1498Szrj switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1086*38fd1498Szrj {
1087*38fd1498Szrj case BUILT_IN_POW:
1088*38fd1498Szrj log_cfn = CFN_BUILT_IN_LOG;
1089*38fd1498Szrj exp_bfn = BUILT_IN_EXP;
1090*38fd1498Szrj break;
1091*38fd1498Szrj case BUILT_IN_POWF:
1092*38fd1498Szrj log_cfn = CFN_BUILT_IN_LOGF;
1093*38fd1498Szrj exp_bfn = BUILT_IN_EXPF;
1094*38fd1498Szrj break;
1095*38fd1498Szrj case BUILT_IN_POWL:
1096*38fd1498Szrj log_cfn = CFN_BUILT_IN_LOGL;
1097*38fd1498Szrj exp_bfn = BUILT_IN_EXPL;
1098*38fd1498Szrj break;
1099*38fd1498Szrj default:
1100*38fd1498Szrj return NULL;
1101*38fd1498Szrj }
1102*38fd1498Szrj tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1103*38fd1498Szrj tree exp_decl = builtin_decl_implicit (exp_bfn);
1104*38fd1498Szrj /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1105*38fd1498Szrj does that, but if C is a power of 2, we want to use
1106*38fd1498Szrj exp2 (log2 (C) * x) in the non-vectorized version, but for
1107*38fd1498Szrj vectorization we don't have vectorized exp2. */
1108*38fd1498Szrj if (logc
1109*38fd1498Szrj && TREE_CODE (logc) == REAL_CST
1110*38fd1498Szrj && exp_decl
1111*38fd1498Szrj && lookup_attribute ("omp declare simd",
1112*38fd1498Szrj DECL_ATTRIBUTES (exp_decl)))
1113*38fd1498Szrj {
1114*38fd1498Szrj cgraph_node *node = cgraph_node::get_create (exp_decl);
1115*38fd1498Szrj if (node->simd_clones == NULL)
1116*38fd1498Szrj {
1117*38fd1498Szrj if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1118*38fd1498Szrj || node->definition)
1119*38fd1498Szrj return NULL;
1120*38fd1498Szrj expand_simd_clones (node);
1121*38fd1498Szrj if (node->simd_clones == NULL)
1122*38fd1498Szrj return NULL;
1123*38fd1498Szrj }
1124*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1125*38fd1498Szrj tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1126*38fd1498Szrj gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1127*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, g);
1128*38fd1498Szrj *type_in = TREE_TYPE (base);
1129*38fd1498Szrj *type_out = NULL_TREE;
1130*38fd1498Szrj tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1131*38fd1498Szrj g = gimple_build_call (exp_decl, 1, def);
1132*38fd1498Szrj gimple_call_set_lhs (g, res);
1133*38fd1498Szrj return g;
1134*38fd1498Szrj }
1135*38fd1498Szrj }
1136*38fd1498Szrj
1137*38fd1498Szrj return NULL;
1138*38fd1498Szrj }
1139*38fd1498Szrj
1140*38fd1498Szrj /* We now have a pow or powi builtin function call with a constant
1141*38fd1498Szrj exponent. */
1142*38fd1498Szrj
1143*38fd1498Szrj *type_out = NULL_TREE;
1144*38fd1498Szrj
1145*38fd1498Szrj /* Catch squaring. */
1146*38fd1498Szrj if ((tree_fits_shwi_p (exp)
1147*38fd1498Szrj && tree_to_shwi (exp) == 2)
1148*38fd1498Szrj || (TREE_CODE (exp) == REAL_CST
1149*38fd1498Szrj && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1150*38fd1498Szrj {
1151*38fd1498Szrj *type_in = TREE_TYPE (base);
1152*38fd1498Szrj
1153*38fd1498Szrj var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1154*38fd1498Szrj stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1155*38fd1498Szrj return stmt;
1156*38fd1498Szrj }
1157*38fd1498Szrj
1158*38fd1498Szrj /* Catch square root. */
1159*38fd1498Szrj if (TREE_CODE (exp) == REAL_CST
1160*38fd1498Szrj && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1161*38fd1498Szrj {
1162*38fd1498Szrj *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1163*38fd1498Szrj if (*type_in
1164*38fd1498Szrj && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1165*38fd1498Szrj OPTIMIZE_FOR_SPEED))
1166*38fd1498Szrj {
1167*38fd1498Szrj gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1168*38fd1498Szrj var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1169*38fd1498Szrj gimple_call_set_lhs (stmt, var);
1170*38fd1498Szrj gimple_call_set_nothrow (stmt, true);
1171*38fd1498Szrj return stmt;
1172*38fd1498Szrj }
1173*38fd1498Szrj }
1174*38fd1498Szrj
1175*38fd1498Szrj return NULL;
1176*38fd1498Szrj }
1177*38fd1498Szrj
1178*38fd1498Szrj
1179*38fd1498Szrj /* Function vect_recog_widen_sum_pattern
1180*38fd1498Szrj
1181*38fd1498Szrj Try to find the following pattern:
1182*38fd1498Szrj
1183*38fd1498Szrj type x_t;
1184*38fd1498Szrj TYPE x_T, sum = init;
1185*38fd1498Szrj loop:
1186*38fd1498Szrj sum_0 = phi <init, sum_1>
1187*38fd1498Szrj S1 x_t = *p;
1188*38fd1498Szrj S2 x_T = (TYPE) x_t;
1189*38fd1498Szrj S3 sum_1 = x_T + sum_0;
1190*38fd1498Szrj
1191*38fd1498Szrj where type 'TYPE' is at least double the size of type 'type', i.e - we're
1192*38fd1498Szrj summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1193*38fd1498Szrj a special case of a reduction computation.
1194*38fd1498Szrj
1195*38fd1498Szrj Input:
1196*38fd1498Szrj
1197*38fd1498Szrj * LAST_STMT: A stmt from which the pattern search begins. In the example,
1198*38fd1498Szrj when this function is called with S3, the pattern {S2,S3} will be detected.
1199*38fd1498Szrj
1200*38fd1498Szrj Output:
1201*38fd1498Szrj
1202*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
1203*38fd1498Szrj
1204*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
1205*38fd1498Szrj
1206*38fd1498Szrj * Return value: A new stmt that will be used to replace the sequence of
1207*38fd1498Szrj stmts that constitute the pattern. In this case it will be:
1208*38fd1498Szrj WIDEN_SUM <x_t, sum_0>
1209*38fd1498Szrj
1210*38fd1498Szrj Note: The widening-sum idiom is a widening reduction pattern that is
1211*38fd1498Szrj vectorized without preserving all the intermediate results. It
1212*38fd1498Szrj produces only N/2 (widened) results (by summing up pairs of
1213*38fd1498Szrj intermediate results) rather than all N results. Therefore, we
1214*38fd1498Szrj cannot allow this pattern when we want to get all the results and in
1215*38fd1498Szrj the correct order (as is the case when this computation is in an
1216*38fd1498Szrj inner-loop nested in an outer-loop that us being vectorized). */
1217*38fd1498Szrj
1218*38fd1498Szrj static gimple *
vect_recog_widen_sum_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1219*38fd1498Szrj vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1220*38fd1498Szrj tree *type_out)
1221*38fd1498Szrj {
1222*38fd1498Szrj gimple *stmt, *last_stmt = (*stmts)[0];
1223*38fd1498Szrj tree oprnd0, oprnd1;
1224*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1225*38fd1498Szrj tree type, half_type;
1226*38fd1498Szrj gimple *pattern_stmt;
1227*38fd1498Szrj loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228*38fd1498Szrj struct loop *loop;
1229*38fd1498Szrj tree var;
1230*38fd1498Szrj bool promotion;
1231*38fd1498Szrj
1232*38fd1498Szrj if (!loop_info)
1233*38fd1498Szrj return NULL;
1234*38fd1498Szrj
1235*38fd1498Szrj loop = LOOP_VINFO_LOOP (loop_info);
1236*38fd1498Szrj
1237*38fd1498Szrj /* We don't allow changing the order of the computation in the inner-loop
1238*38fd1498Szrj when doing outer-loop vectorization. */
1239*38fd1498Szrj if (loop && nested_in_vect_loop_p (loop, last_stmt))
1240*38fd1498Szrj return NULL;
1241*38fd1498Szrj
1242*38fd1498Szrj if (!is_gimple_assign (last_stmt))
1243*38fd1498Szrj return NULL;
1244*38fd1498Szrj
1245*38fd1498Szrj type = gimple_expr_type (last_stmt);
1246*38fd1498Szrj
1247*38fd1498Szrj /* Look for the following pattern
1248*38fd1498Szrj DX = (TYPE) X;
1249*38fd1498Szrj sum_1 = DX + sum_0;
1250*38fd1498Szrj In which DX is at least double the size of X, and sum_1 has been
1251*38fd1498Szrj recognized as a reduction variable.
1252*38fd1498Szrj */
1253*38fd1498Szrj
1254*38fd1498Szrj /* Starting from LAST_STMT, follow the defs of its uses in search
1255*38fd1498Szrj of the above pattern. */
1256*38fd1498Szrj
1257*38fd1498Szrj if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1258*38fd1498Szrj return NULL;
1259*38fd1498Szrj
1260*38fd1498Szrj if (!vect_reassociating_reduction_p (stmt_vinfo))
1261*38fd1498Szrj return NULL;
1262*38fd1498Szrj
1263*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
1264*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
1265*38fd1498Szrj if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1266*38fd1498Szrj || !types_compatible_p (TREE_TYPE (oprnd1), type))
1267*38fd1498Szrj return NULL;
1268*38fd1498Szrj
1269*38fd1498Szrj /* So far so good. Since last_stmt was detected as a (summation) reduction,
1270*38fd1498Szrj we know that oprnd1 is the reduction variable (defined by a loop-header
1271*38fd1498Szrj phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1272*38fd1498Szrj Left to check that oprnd0 is defined by a cast from type 'type' to type
1273*38fd1498Szrj 'TYPE'. */
1274*38fd1498Szrj
1275*38fd1498Szrj if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1276*38fd1498Szrj &promotion)
1277*38fd1498Szrj || !promotion)
1278*38fd1498Szrj return NULL;
1279*38fd1498Szrj
1280*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (stmt);
1281*38fd1498Szrj *type_in = half_type;
1282*38fd1498Szrj *type_out = type;
1283*38fd1498Szrj
1284*38fd1498Szrj /* Pattern detected. Create a stmt to be used to replace the pattern: */
1285*38fd1498Szrj var = vect_recog_temp_ssa_var (type, NULL);
1286*38fd1498Szrj pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1287*38fd1498Szrj
1288*38fd1498Szrj if (dump_enabled_p ())
1289*38fd1498Szrj {
1290*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
1291*38fd1498Szrj "vect_recog_widen_sum_pattern: detected: ");
1292*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1293*38fd1498Szrj }
1294*38fd1498Szrj
1295*38fd1498Szrj return pattern_stmt;
1296*38fd1498Szrj }
1297*38fd1498Szrj
1298*38fd1498Szrj
1299*38fd1498Szrj /* Return TRUE if the operation in STMT can be performed on a smaller type.
1300*38fd1498Szrj
1301*38fd1498Szrj Input:
1302*38fd1498Szrj STMT - a statement to check.
1303*38fd1498Szrj DEF - we support operations with two operands, one of which is constant.
1304*38fd1498Szrj The other operand can be defined by a demotion operation, or by a
1305*38fd1498Szrj previous statement in a sequence of over-promoted operations. In the
1306*38fd1498Szrj later case DEF is used to replace that operand. (It is defined by a
1307*38fd1498Szrj pattern statement we created for the previous statement in the
1308*38fd1498Szrj sequence).
1309*38fd1498Szrj
1310*38fd1498Szrj Input/output:
1311*38fd1498Szrj NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1312*38fd1498Szrj NULL, it's the type of DEF.
1313*38fd1498Szrj STMTS - additional pattern statements. If a pattern statement (type
1314*38fd1498Szrj conversion) is created in this function, its original statement is
1315*38fd1498Szrj added to STMTS.
1316*38fd1498Szrj
1317*38fd1498Szrj Output:
1318*38fd1498Szrj OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1319*38fd1498Szrj operands to use in the new pattern statement for STMT (will be created
1320*38fd1498Szrj in vect_recog_over_widening_pattern ()).
1321*38fd1498Szrj NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1322*38fd1498Szrj statements for STMT: the first one is a type promotion and the second
1323*38fd1498Szrj one is the operation itself. We return the type promotion statement
1324*38fd1498Szrj in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1325*38fd1498Szrj the second pattern statement. */
1326*38fd1498Szrj
1327*38fd1498Szrj static bool
vect_operation_fits_smaller_type(gimple * stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple ** new_def_stmt,vec<gimple * > * stmts)1328*38fd1498Szrj vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1329*38fd1498Szrj tree *op0, tree *op1, gimple **new_def_stmt,
1330*38fd1498Szrj vec<gimple *> *stmts)
1331*38fd1498Szrj {
1332*38fd1498Szrj enum tree_code code;
1333*38fd1498Szrj tree const_oprnd, oprnd;
1334*38fd1498Szrj tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1335*38fd1498Szrj gimple *def_stmt, *new_stmt;
1336*38fd1498Szrj bool first = false;
1337*38fd1498Szrj bool promotion;
1338*38fd1498Szrj
1339*38fd1498Szrj *op0 = NULL_TREE;
1340*38fd1498Szrj *op1 = NULL_TREE;
1341*38fd1498Szrj *new_def_stmt = NULL;
1342*38fd1498Szrj
1343*38fd1498Szrj if (!is_gimple_assign (stmt))
1344*38fd1498Szrj return false;
1345*38fd1498Szrj
1346*38fd1498Szrj code = gimple_assign_rhs_code (stmt);
1347*38fd1498Szrj if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1348*38fd1498Szrj && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1349*38fd1498Szrj return false;
1350*38fd1498Szrj
1351*38fd1498Szrj oprnd = gimple_assign_rhs1 (stmt);
1352*38fd1498Szrj const_oprnd = gimple_assign_rhs2 (stmt);
1353*38fd1498Szrj type = gimple_expr_type (stmt);
1354*38fd1498Szrj
1355*38fd1498Szrj if (TREE_CODE (oprnd) != SSA_NAME
1356*38fd1498Szrj || TREE_CODE (const_oprnd) != INTEGER_CST)
1357*38fd1498Szrj return false;
1358*38fd1498Szrj
1359*38fd1498Szrj /* If oprnd has other uses besides that in stmt we cannot mark it
1360*38fd1498Szrj as being part of a pattern only. */
1361*38fd1498Szrj if (!has_single_use (oprnd))
1362*38fd1498Szrj return false;
1363*38fd1498Szrj
1364*38fd1498Szrj /* If we are in the middle of a sequence, we use DEF from a previous
1365*38fd1498Szrj statement. Otherwise, OPRND has to be a result of type promotion. */
1366*38fd1498Szrj if (*new_type)
1367*38fd1498Szrj {
1368*38fd1498Szrj half_type = *new_type;
1369*38fd1498Szrj oprnd = def;
1370*38fd1498Szrj }
1371*38fd1498Szrj else
1372*38fd1498Szrj {
1373*38fd1498Szrj first = true;
1374*38fd1498Szrj if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1375*38fd1498Szrj &promotion)
1376*38fd1498Szrj || !promotion
1377*38fd1498Szrj || !vect_same_loop_or_bb_p (stmt, def_stmt))
1378*38fd1498Szrj return false;
1379*38fd1498Szrj }
1380*38fd1498Szrj
1381*38fd1498Szrj /* Can we perform the operation on a smaller type? */
1382*38fd1498Szrj switch (code)
1383*38fd1498Szrj {
1384*38fd1498Szrj case BIT_IOR_EXPR:
1385*38fd1498Szrj case BIT_XOR_EXPR:
1386*38fd1498Szrj case BIT_AND_EXPR:
1387*38fd1498Szrj if (!int_fits_type_p (const_oprnd, half_type))
1388*38fd1498Szrj {
1389*38fd1498Szrj /* HALF_TYPE is not enough. Try a bigger type if possible. */
1390*38fd1498Szrj if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1391*38fd1498Szrj return false;
1392*38fd1498Szrj
1393*38fd1498Szrj interm_type = build_nonstandard_integer_type (
1394*38fd1498Szrj TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1395*38fd1498Szrj if (!int_fits_type_p (const_oprnd, interm_type))
1396*38fd1498Szrj return false;
1397*38fd1498Szrj }
1398*38fd1498Szrj
1399*38fd1498Szrj break;
1400*38fd1498Szrj
1401*38fd1498Szrj case LSHIFT_EXPR:
1402*38fd1498Szrj /* Try intermediate type - HALF_TYPE is not enough for sure. */
1403*38fd1498Szrj if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1404*38fd1498Szrj return false;
1405*38fd1498Szrj
1406*38fd1498Szrj /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1407*38fd1498Szrj (e.g., if the original value was char, the shift amount is at most 8
1408*38fd1498Szrj if we want to use short). */
1409*38fd1498Szrj if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1410*38fd1498Szrj return false;
1411*38fd1498Szrj
1412*38fd1498Szrj interm_type = build_nonstandard_integer_type (
1413*38fd1498Szrj TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1414*38fd1498Szrj
1415*38fd1498Szrj if (!vect_supportable_shift (code, interm_type))
1416*38fd1498Szrj return false;
1417*38fd1498Szrj
1418*38fd1498Szrj break;
1419*38fd1498Szrj
1420*38fd1498Szrj case RSHIFT_EXPR:
1421*38fd1498Szrj if (vect_supportable_shift (code, half_type))
1422*38fd1498Szrj break;
1423*38fd1498Szrj
1424*38fd1498Szrj /* Try intermediate type - HALF_TYPE is not supported. */
1425*38fd1498Szrj if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1426*38fd1498Szrj return false;
1427*38fd1498Szrj
1428*38fd1498Szrj interm_type = build_nonstandard_integer_type (
1429*38fd1498Szrj TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1430*38fd1498Szrj
1431*38fd1498Szrj if (!vect_supportable_shift (code, interm_type))
1432*38fd1498Szrj return false;
1433*38fd1498Szrj
1434*38fd1498Szrj break;
1435*38fd1498Szrj
1436*38fd1498Szrj default:
1437*38fd1498Szrj gcc_unreachable ();
1438*38fd1498Szrj }
1439*38fd1498Szrj
1440*38fd1498Szrj /* There are four possible cases:
1441*38fd1498Szrj 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1442*38fd1498Szrj the first statement in the sequence)
1443*38fd1498Szrj a. The original, HALF_TYPE, is not enough - we replace the promotion
1444*38fd1498Szrj from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1445*38fd1498Szrj b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1446*38fd1498Szrj promotion.
1447*38fd1498Szrj 2. OPRND is defined by a pattern statement we created.
1448*38fd1498Szrj a. Its type is not sufficient for the operation, we create a new stmt:
1449*38fd1498Szrj a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1450*38fd1498Szrj this statement in NEW_DEF_STMT, and it is later put in
1451*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1452*38fd1498Szrj b. OPRND is good to use in the new statement. */
1453*38fd1498Szrj if (first)
1454*38fd1498Szrj {
1455*38fd1498Szrj if (interm_type)
1456*38fd1498Szrj {
1457*38fd1498Szrj /* Replace the original type conversion HALF_TYPE->TYPE with
1458*38fd1498Szrj HALF_TYPE->INTERM_TYPE. */
1459*38fd1498Szrj if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1460*38fd1498Szrj {
1461*38fd1498Szrj new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1462*38fd1498Szrj /* Check if the already created pattern stmt is what we need. */
1463*38fd1498Szrj if (!is_gimple_assign (new_stmt)
1464*38fd1498Szrj || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1465*38fd1498Szrj || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1466*38fd1498Szrj return false;
1467*38fd1498Szrj
1468*38fd1498Szrj stmts->safe_push (def_stmt);
1469*38fd1498Szrj oprnd = gimple_assign_lhs (new_stmt);
1470*38fd1498Szrj }
1471*38fd1498Szrj else
1472*38fd1498Szrj {
1473*38fd1498Szrj /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1474*38fd1498Szrj oprnd = gimple_assign_rhs1 (def_stmt);
1475*38fd1498Szrj new_oprnd = make_ssa_name (interm_type);
1476*38fd1498Szrj new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1477*38fd1498Szrj STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1478*38fd1498Szrj stmts->safe_push (def_stmt);
1479*38fd1498Szrj oprnd = new_oprnd;
1480*38fd1498Szrj }
1481*38fd1498Szrj }
1482*38fd1498Szrj else
1483*38fd1498Szrj {
1484*38fd1498Szrj /* Retrieve the operand before the type promotion. */
1485*38fd1498Szrj oprnd = gimple_assign_rhs1 (def_stmt);
1486*38fd1498Szrj }
1487*38fd1498Szrj }
1488*38fd1498Szrj else
1489*38fd1498Szrj {
1490*38fd1498Szrj if (interm_type)
1491*38fd1498Szrj {
1492*38fd1498Szrj /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1493*38fd1498Szrj new_oprnd = make_ssa_name (interm_type);
1494*38fd1498Szrj new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1495*38fd1498Szrj oprnd = new_oprnd;
1496*38fd1498Szrj *new_def_stmt = new_stmt;
1497*38fd1498Szrj }
1498*38fd1498Szrj
1499*38fd1498Szrj /* Otherwise, OPRND is already set. */
1500*38fd1498Szrj }
1501*38fd1498Szrj
1502*38fd1498Szrj if (interm_type)
1503*38fd1498Szrj *new_type = interm_type;
1504*38fd1498Szrj else
1505*38fd1498Szrj *new_type = half_type;
1506*38fd1498Szrj
1507*38fd1498Szrj *op0 = oprnd;
1508*38fd1498Szrj *op1 = fold_convert (*new_type, const_oprnd);
1509*38fd1498Szrj
1510*38fd1498Szrj return true;
1511*38fd1498Szrj }
1512*38fd1498Szrj
1513*38fd1498Szrj
1514*38fd1498Szrj /* Try to find a statement or a sequence of statements that can be performed
1515*38fd1498Szrj on a smaller type:
1516*38fd1498Szrj
1517*38fd1498Szrj type x_t;
1518*38fd1498Szrj TYPE x_T, res0_T, res1_T;
1519*38fd1498Szrj loop:
1520*38fd1498Szrj S1 x_t = *p;
1521*38fd1498Szrj S2 x_T = (TYPE) x_t;
1522*38fd1498Szrj S3 res0_T = op (x_T, C0);
1523*38fd1498Szrj S4 res1_T = op (res0_T, C1);
1524*38fd1498Szrj S5 ... = () res1_T; - type demotion
1525*38fd1498Szrj
1526*38fd1498Szrj where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1527*38fd1498Szrj constants.
1528*38fd1498Szrj Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1529*38fd1498Szrj be 'type' or some intermediate type. For now, we expect S5 to be a type
1530*38fd1498Szrj demotion operation. We also check that S3 and S4 have only one use. */
1531*38fd1498Szrj
1532*38fd1498Szrj static gimple *
vect_recog_over_widening_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1533*38fd1498Szrj vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1534*38fd1498Szrj tree *type_in, tree *type_out)
1535*38fd1498Szrj {
1536*38fd1498Szrj gimple *stmt = stmts->pop ();
1537*38fd1498Szrj gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1538*38fd1498Szrj *use_stmt = NULL;
1539*38fd1498Szrj tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1540*38fd1498Szrj tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1541*38fd1498Szrj bool first;
1542*38fd1498Szrj tree type = NULL;
1543*38fd1498Szrj
1544*38fd1498Szrj first = true;
1545*38fd1498Szrj while (1)
1546*38fd1498Szrj {
1547*38fd1498Szrj if (!vinfo_for_stmt (stmt)
1548*38fd1498Szrj || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1549*38fd1498Szrj return NULL;
1550*38fd1498Szrj
1551*38fd1498Szrj new_def_stmt = NULL;
1552*38fd1498Szrj if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1553*38fd1498Szrj &op0, &op1, &new_def_stmt,
1554*38fd1498Szrj stmts))
1555*38fd1498Szrj {
1556*38fd1498Szrj if (first)
1557*38fd1498Szrj return NULL;
1558*38fd1498Szrj else
1559*38fd1498Szrj break;
1560*38fd1498Szrj }
1561*38fd1498Szrj
1562*38fd1498Szrj /* STMT can be performed on a smaller type. Check its uses. */
1563*38fd1498Szrj use_stmt = vect_single_imm_use (stmt);
1564*38fd1498Szrj if (!use_stmt || !is_gimple_assign (use_stmt))
1565*38fd1498Szrj return NULL;
1566*38fd1498Szrj
1567*38fd1498Szrj /* Create pattern statement for STMT. */
1568*38fd1498Szrj vectype = get_vectype_for_scalar_type (new_type);
1569*38fd1498Szrj if (!vectype)
1570*38fd1498Szrj return NULL;
1571*38fd1498Szrj
1572*38fd1498Szrj /* We want to collect all the statements for which we create pattern
1573*38fd1498Szrj statetments, except for the case when the last statement in the
1574*38fd1498Szrj sequence doesn't have a corresponding pattern statement. In such
1575*38fd1498Szrj case we associate the last pattern statement with the last statement
1576*38fd1498Szrj in the sequence. Therefore, we only add the original statement to
1577*38fd1498Szrj the list if we know that it is not the last. */
1578*38fd1498Szrj if (prev_stmt)
1579*38fd1498Szrj stmts->safe_push (prev_stmt);
1580*38fd1498Szrj
1581*38fd1498Szrj var = vect_recog_temp_ssa_var (new_type, NULL);
1582*38fd1498Szrj pattern_stmt
1583*38fd1498Szrj = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1584*38fd1498Szrj STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1585*38fd1498Szrj new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1586*38fd1498Szrj
1587*38fd1498Szrj if (dump_enabled_p ())
1588*38fd1498Szrj {
1589*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
1590*38fd1498Szrj "created pattern stmt: ");
1591*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1592*38fd1498Szrj }
1593*38fd1498Szrj
1594*38fd1498Szrj type = gimple_expr_type (stmt);
1595*38fd1498Szrj prev_stmt = stmt;
1596*38fd1498Szrj stmt = use_stmt;
1597*38fd1498Szrj
1598*38fd1498Szrj first = false;
1599*38fd1498Szrj }
1600*38fd1498Szrj
1601*38fd1498Szrj /* We got a sequence. We expect it to end with a type demotion operation.
1602*38fd1498Szrj Otherwise, we quit (for now). There are three possible cases: the
1603*38fd1498Szrj conversion is to NEW_TYPE (we don't do anything), the conversion is to
1604*38fd1498Szrj a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1605*38fd1498Szrj NEW_TYPE differs (we create a new conversion statement). */
1606*38fd1498Szrj if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1607*38fd1498Szrj {
1608*38fd1498Szrj use_lhs = gimple_assign_lhs (use_stmt);
1609*38fd1498Szrj use_type = TREE_TYPE (use_lhs);
1610*38fd1498Szrj /* Support only type demotion or signedess change. */
1611*38fd1498Szrj if (!INTEGRAL_TYPE_P (use_type)
1612*38fd1498Szrj || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1613*38fd1498Szrj return NULL;
1614*38fd1498Szrj
1615*38fd1498Szrj /* Check that NEW_TYPE is not bigger than the conversion result. */
1616*38fd1498Szrj if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1617*38fd1498Szrj return NULL;
1618*38fd1498Szrj
1619*38fd1498Szrj if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1620*38fd1498Szrj || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1621*38fd1498Szrj {
1622*38fd1498Szrj /* Create NEW_TYPE->USE_TYPE conversion. */
1623*38fd1498Szrj new_oprnd = make_ssa_name (use_type);
1624*38fd1498Szrj pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1625*38fd1498Szrj STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1626*38fd1498Szrj
1627*38fd1498Szrj *type_in = get_vectype_for_scalar_type (new_type);
1628*38fd1498Szrj *type_out = get_vectype_for_scalar_type (use_type);
1629*38fd1498Szrj
1630*38fd1498Szrj /* We created a pattern statement for the last statement in the
1631*38fd1498Szrj sequence, so we don't need to associate it with the pattern
1632*38fd1498Szrj statement created for PREV_STMT. Therefore, we add PREV_STMT
1633*38fd1498Szrj to the list in order to mark it later in vect_pattern_recog_1. */
1634*38fd1498Szrj if (prev_stmt)
1635*38fd1498Szrj stmts->safe_push (prev_stmt);
1636*38fd1498Szrj }
1637*38fd1498Szrj else
1638*38fd1498Szrj {
1639*38fd1498Szrj if (prev_stmt)
1640*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1641*38fd1498Szrj = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1642*38fd1498Szrj
1643*38fd1498Szrj *type_in = vectype;
1644*38fd1498Szrj *type_out = NULL_TREE;
1645*38fd1498Szrj }
1646*38fd1498Szrj
1647*38fd1498Szrj stmts->safe_push (use_stmt);
1648*38fd1498Szrj }
1649*38fd1498Szrj else
1650*38fd1498Szrj /* TODO: support general case, create a conversion to the correct type. */
1651*38fd1498Szrj return NULL;
1652*38fd1498Szrj
1653*38fd1498Szrj /* Pattern detected. */
1654*38fd1498Szrj if (dump_enabled_p ())
1655*38fd1498Szrj {
1656*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
1657*38fd1498Szrj "vect_recog_over_widening_pattern: detected: ");
1658*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1659*38fd1498Szrj }
1660*38fd1498Szrj
1661*38fd1498Szrj return pattern_stmt;
1662*38fd1498Szrj }
1663*38fd1498Szrj
1664*38fd1498Szrj /* Detect widening shift pattern:
1665*38fd1498Szrj
1666*38fd1498Szrj type a_t;
1667*38fd1498Szrj TYPE a_T, res_T;
1668*38fd1498Szrj
1669*38fd1498Szrj S1 a_t = ;
1670*38fd1498Szrj S2 a_T = (TYPE) a_t;
1671*38fd1498Szrj S3 res_T = a_T << CONST;
1672*38fd1498Szrj
1673*38fd1498Szrj where type 'TYPE' is at least double the size of type 'type'.
1674*38fd1498Szrj
1675*38fd1498Szrj Also detect cases where the shift result is immediately converted
1676*38fd1498Szrj to another type 'result_type' that is no larger in size than 'TYPE'.
1677*38fd1498Szrj In those cases we perform a widen-shift that directly results in
1678*38fd1498Szrj 'result_type', to avoid a possible over-widening situation:
1679*38fd1498Szrj
1680*38fd1498Szrj type a_t;
1681*38fd1498Szrj TYPE a_T, res_T;
1682*38fd1498Szrj result_type res_result;
1683*38fd1498Szrj
1684*38fd1498Szrj S1 a_t = ;
1685*38fd1498Szrj S2 a_T = (TYPE) a_t;
1686*38fd1498Szrj S3 res_T = a_T << CONST;
1687*38fd1498Szrj S4 res_result = (result_type) res_T;
1688*38fd1498Szrj '--> res_result' = a_t w<< CONST;
1689*38fd1498Szrj
1690*38fd1498Szrj And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1691*38fd1498Szrj create an additional pattern stmt for S2 to create a variable of an
1692*38fd1498Szrj intermediate type, and perform widen-shift on the intermediate type:
1693*38fd1498Szrj
1694*38fd1498Szrj type a_t;
1695*38fd1498Szrj interm_type a_it;
1696*38fd1498Szrj TYPE a_T, res_T, res_T';
1697*38fd1498Szrj
1698*38fd1498Szrj S1 a_t = ;
1699*38fd1498Szrj S2 a_T = (TYPE) a_t;
1700*38fd1498Szrj '--> a_it = (interm_type) a_t;
1701*38fd1498Szrj S3 res_T = a_T << CONST;
1702*38fd1498Szrj '--> res_T' = a_it <<* CONST;
1703*38fd1498Szrj
1704*38fd1498Szrj Input/Output:
1705*38fd1498Szrj
1706*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins.
1707*38fd1498Szrj In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1708*38fd1498Szrj in STMTS. When an intermediate type is used and a pattern statement is
1709*38fd1498Szrj created for S2, we also put S2 here (before S3).
1710*38fd1498Szrj
1711*38fd1498Szrj Output:
1712*38fd1498Szrj
1713*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
1714*38fd1498Szrj
1715*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
1716*38fd1498Szrj
1717*38fd1498Szrj * Return value: A new stmt that will be used to replace the sequence of
1718*38fd1498Szrj stmts that constitute the pattern. In this case it will be:
1719*38fd1498Szrj WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1720*38fd1498Szrj
1721*38fd1498Szrj static gimple *
vect_recog_widen_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1722*38fd1498Szrj vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1723*38fd1498Szrj tree *type_in, tree *type_out)
1724*38fd1498Szrj {
1725*38fd1498Szrj gimple *last_stmt = stmts->pop ();
1726*38fd1498Szrj gimple *def_stmt0;
1727*38fd1498Szrj tree oprnd0, oprnd1;
1728*38fd1498Szrj tree type, half_type0;
1729*38fd1498Szrj gimple *pattern_stmt;
1730*38fd1498Szrj tree vectype, vectype_out = NULL_TREE;
1731*38fd1498Szrj tree var;
1732*38fd1498Szrj enum tree_code dummy_code;
1733*38fd1498Szrj int dummy_int;
1734*38fd1498Szrj vec<tree> dummy_vec;
1735*38fd1498Szrj gimple *use_stmt;
1736*38fd1498Szrj bool promotion;
1737*38fd1498Szrj
1738*38fd1498Szrj if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1739*38fd1498Szrj return NULL;
1740*38fd1498Szrj
1741*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1742*38fd1498Szrj return NULL;
1743*38fd1498Szrj
1744*38fd1498Szrj if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1745*38fd1498Szrj return NULL;
1746*38fd1498Szrj
1747*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
1748*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
1749*38fd1498Szrj if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1750*38fd1498Szrj return NULL;
1751*38fd1498Szrj
1752*38fd1498Szrj /* Check operand 0: it has to be defined by a type promotion. */
1753*38fd1498Szrj if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1754*38fd1498Szrj &promotion)
1755*38fd1498Szrj || !promotion)
1756*38fd1498Szrj return NULL;
1757*38fd1498Szrj
1758*38fd1498Szrj /* Check operand 1: has to be positive. We check that it fits the type
1759*38fd1498Szrj in vect_handle_widen_op_by_const (). */
1760*38fd1498Szrj if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1761*38fd1498Szrj return NULL;
1762*38fd1498Szrj
1763*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (def_stmt0);
1764*38fd1498Szrj type = gimple_expr_type (last_stmt);
1765*38fd1498Szrj
1766*38fd1498Szrj /* Check for subsequent conversion to another type. */
1767*38fd1498Szrj use_stmt = vect_single_imm_use (last_stmt);
1768*38fd1498Szrj if (use_stmt && is_gimple_assign (use_stmt)
1769*38fd1498Szrj && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1770*38fd1498Szrj && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1771*38fd1498Szrj {
1772*38fd1498Szrj tree use_lhs = gimple_assign_lhs (use_stmt);
1773*38fd1498Szrj tree use_type = TREE_TYPE (use_lhs);
1774*38fd1498Szrj
1775*38fd1498Szrj if (INTEGRAL_TYPE_P (use_type)
1776*38fd1498Szrj && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1777*38fd1498Szrj {
1778*38fd1498Szrj last_stmt = use_stmt;
1779*38fd1498Szrj type = use_type;
1780*38fd1498Szrj }
1781*38fd1498Szrj }
1782*38fd1498Szrj
1783*38fd1498Szrj /* Check if this a widening operation. */
1784*38fd1498Szrj gimple *wstmt = NULL;
1785*38fd1498Szrj if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1786*38fd1498Szrj &oprnd0, &wstmt,
1787*38fd1498Szrj type, &half_type0, def_stmt0))
1788*38fd1498Szrj return NULL;
1789*38fd1498Szrj
1790*38fd1498Szrj /* Pattern detected. */
1791*38fd1498Szrj if (dump_enabled_p ())
1792*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
1793*38fd1498Szrj "vect_recog_widen_shift_pattern: detected:\n");
1794*38fd1498Szrj
1795*38fd1498Szrj /* Check target support. */
1796*38fd1498Szrj vectype = get_vectype_for_scalar_type (half_type0);
1797*38fd1498Szrj vectype_out = get_vectype_for_scalar_type (type);
1798*38fd1498Szrj
1799*38fd1498Szrj if (!vectype
1800*38fd1498Szrj || !vectype_out
1801*38fd1498Szrj || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1802*38fd1498Szrj vectype_out, vectype,
1803*38fd1498Szrj &dummy_code, &dummy_code,
1804*38fd1498Szrj &dummy_int, &dummy_vec))
1805*38fd1498Szrj return NULL;
1806*38fd1498Szrj
1807*38fd1498Szrj *type_in = vectype;
1808*38fd1498Szrj *type_out = vectype_out;
1809*38fd1498Szrj
1810*38fd1498Szrj /* Pattern supported. Create a stmt to be used to replace the pattern. */
1811*38fd1498Szrj var = vect_recog_temp_ssa_var (type, NULL);
1812*38fd1498Szrj pattern_stmt
1813*38fd1498Szrj = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1814*38fd1498Szrj if (wstmt)
1815*38fd1498Szrj {
1816*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1817*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, wstmt);
1818*38fd1498Szrj stmt_vec_info new_stmt_info
1819*38fd1498Szrj = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1820*38fd1498Szrj set_vinfo_for_stmt (wstmt, new_stmt_info);
1821*38fd1498Szrj STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1822*38fd1498Szrj }
1823*38fd1498Szrj
1824*38fd1498Szrj if (dump_enabled_p ())
1825*38fd1498Szrj dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1826*38fd1498Szrj
1827*38fd1498Szrj stmts->safe_push (last_stmt);
1828*38fd1498Szrj return pattern_stmt;
1829*38fd1498Szrj }
1830*38fd1498Szrj
1831*38fd1498Szrj /* Detect a rotate pattern wouldn't be otherwise vectorized:
1832*38fd1498Szrj
1833*38fd1498Szrj type a_t, b_t, c_t;
1834*38fd1498Szrj
1835*38fd1498Szrj S0 a_t = b_t r<< c_t;
1836*38fd1498Szrj
1837*38fd1498Szrj Input/Output:
1838*38fd1498Szrj
1839*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins,
1840*38fd1498Szrj i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1841*38fd1498Szrj with a sequence:
1842*38fd1498Szrj
1843*38fd1498Szrj S1 d_t = -c_t;
1844*38fd1498Szrj S2 e_t = d_t & (B - 1);
1845*38fd1498Szrj S3 f_t = b_t << c_t;
1846*38fd1498Szrj S4 g_t = b_t >> e_t;
1847*38fd1498Szrj S0 a_t = f_t | g_t;
1848*38fd1498Szrj
1849*38fd1498Szrj where B is element bitsize of type.
1850*38fd1498Szrj
1851*38fd1498Szrj Output:
1852*38fd1498Szrj
1853*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
1854*38fd1498Szrj
1855*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
1856*38fd1498Szrj
1857*38fd1498Szrj * Return value: A new stmt that will be used to replace the rotate
1858*38fd1498Szrj S0 stmt. */
1859*38fd1498Szrj
1860*38fd1498Szrj static gimple *
vect_recog_rotate_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1861*38fd1498Szrj vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1862*38fd1498Szrj {
1863*38fd1498Szrj gimple *last_stmt = stmts->pop ();
1864*38fd1498Szrj tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1865*38fd1498Szrj gimple *pattern_stmt, *def_stmt;
1866*38fd1498Szrj enum tree_code rhs_code;
1867*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1868*38fd1498Szrj vec_info *vinfo = stmt_vinfo->vinfo;
1869*38fd1498Szrj enum vect_def_type dt;
1870*38fd1498Szrj optab optab1, optab2;
1871*38fd1498Szrj edge ext_def = NULL;
1872*38fd1498Szrj
1873*38fd1498Szrj if (!is_gimple_assign (last_stmt))
1874*38fd1498Szrj return NULL;
1875*38fd1498Szrj
1876*38fd1498Szrj rhs_code = gimple_assign_rhs_code (last_stmt);
1877*38fd1498Szrj switch (rhs_code)
1878*38fd1498Szrj {
1879*38fd1498Szrj case LROTATE_EXPR:
1880*38fd1498Szrj case RROTATE_EXPR:
1881*38fd1498Szrj break;
1882*38fd1498Szrj default:
1883*38fd1498Szrj return NULL;
1884*38fd1498Szrj }
1885*38fd1498Szrj
1886*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1887*38fd1498Szrj return NULL;
1888*38fd1498Szrj
1889*38fd1498Szrj lhs = gimple_assign_lhs (last_stmt);
1890*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
1891*38fd1498Szrj type = TREE_TYPE (oprnd0);
1892*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
1893*38fd1498Szrj if (TREE_CODE (oprnd0) != SSA_NAME
1894*38fd1498Szrj || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1895*38fd1498Szrj || !INTEGRAL_TYPE_P (type)
1896*38fd1498Szrj || !TYPE_UNSIGNED (type))
1897*38fd1498Szrj return NULL;
1898*38fd1498Szrj
1899*38fd1498Szrj if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1900*38fd1498Szrj return NULL;
1901*38fd1498Szrj
1902*38fd1498Szrj if (dt != vect_internal_def
1903*38fd1498Szrj && dt != vect_constant_def
1904*38fd1498Szrj && dt != vect_external_def)
1905*38fd1498Szrj return NULL;
1906*38fd1498Szrj
1907*38fd1498Szrj vectype = get_vectype_for_scalar_type (type);
1908*38fd1498Szrj if (vectype == NULL_TREE)
1909*38fd1498Szrj return NULL;
1910*38fd1498Szrj
1911*38fd1498Szrj /* If vector/vector or vector/scalar rotate is supported by the target,
1912*38fd1498Szrj don't do anything here. */
1913*38fd1498Szrj optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1914*38fd1498Szrj if (optab1
1915*38fd1498Szrj && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1916*38fd1498Szrj return NULL;
1917*38fd1498Szrj
1918*38fd1498Szrj if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1919*38fd1498Szrj {
1920*38fd1498Szrj optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1921*38fd1498Szrj if (optab2
1922*38fd1498Szrj && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1923*38fd1498Szrj return NULL;
1924*38fd1498Szrj }
1925*38fd1498Szrj
1926*38fd1498Szrj /* If vector/vector or vector/scalar shifts aren't supported by the target,
1927*38fd1498Szrj don't do anything here either. */
1928*38fd1498Szrj optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1929*38fd1498Szrj optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1930*38fd1498Szrj if (!optab1
1931*38fd1498Szrj || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1932*38fd1498Szrj || !optab2
1933*38fd1498Szrj || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1934*38fd1498Szrj {
1935*38fd1498Szrj if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1936*38fd1498Szrj return NULL;
1937*38fd1498Szrj optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1938*38fd1498Szrj optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1939*38fd1498Szrj if (!optab1
1940*38fd1498Szrj || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1941*38fd1498Szrj || !optab2
1942*38fd1498Szrj || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1943*38fd1498Szrj return NULL;
1944*38fd1498Szrj }
1945*38fd1498Szrj
1946*38fd1498Szrj *type_in = vectype;
1947*38fd1498Szrj *type_out = vectype;
1948*38fd1498Szrj if (*type_in == NULL_TREE)
1949*38fd1498Szrj return NULL;
1950*38fd1498Szrj
1951*38fd1498Szrj if (dt == vect_external_def
1952*38fd1498Szrj && TREE_CODE (oprnd1) == SSA_NAME
1953*38fd1498Szrj && is_a <loop_vec_info> (vinfo))
1954*38fd1498Szrj {
1955*38fd1498Szrj struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1956*38fd1498Szrj ext_def = loop_preheader_edge (loop);
1957*38fd1498Szrj if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1958*38fd1498Szrj {
1959*38fd1498Szrj basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1960*38fd1498Szrj if (bb == NULL
1961*38fd1498Szrj || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1962*38fd1498Szrj ext_def = NULL;
1963*38fd1498Szrj }
1964*38fd1498Szrj }
1965*38fd1498Szrj
1966*38fd1498Szrj def = NULL_TREE;
1967*38fd1498Szrj scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1968*38fd1498Szrj if (TREE_CODE (oprnd1) == INTEGER_CST
1969*38fd1498Szrj || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1970*38fd1498Szrj def = oprnd1;
1971*38fd1498Szrj else if (def_stmt && gimple_assign_cast_p (def_stmt))
1972*38fd1498Szrj {
1973*38fd1498Szrj tree rhs1 = gimple_assign_rhs1 (def_stmt);
1974*38fd1498Szrj if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1975*38fd1498Szrj && TYPE_PRECISION (TREE_TYPE (rhs1))
1976*38fd1498Szrj == TYPE_PRECISION (type))
1977*38fd1498Szrj def = rhs1;
1978*38fd1498Szrj }
1979*38fd1498Szrj
1980*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1981*38fd1498Szrj if (def == NULL_TREE)
1982*38fd1498Szrj {
1983*38fd1498Szrj def = vect_recog_temp_ssa_var (type, NULL);
1984*38fd1498Szrj def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1985*38fd1498Szrj if (ext_def)
1986*38fd1498Szrj {
1987*38fd1498Szrj basic_block new_bb
1988*38fd1498Szrj = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1989*38fd1498Szrj gcc_assert (!new_bb);
1990*38fd1498Szrj }
1991*38fd1498Szrj else
1992*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
1993*38fd1498Szrj }
1994*38fd1498Szrj stype = TREE_TYPE (def);
1995*38fd1498Szrj scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1996*38fd1498Szrj
1997*38fd1498Szrj if (TREE_CODE (def) == INTEGER_CST)
1998*38fd1498Szrj {
1999*38fd1498Szrj if (!tree_fits_uhwi_p (def)
2000*38fd1498Szrj || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2001*38fd1498Szrj || integer_zerop (def))
2002*38fd1498Szrj return NULL;
2003*38fd1498Szrj def2 = build_int_cst (stype,
2004*38fd1498Szrj GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2005*38fd1498Szrj }
2006*38fd1498Szrj else
2007*38fd1498Szrj {
2008*38fd1498Szrj tree vecstype = get_vectype_for_scalar_type (stype);
2009*38fd1498Szrj stmt_vec_info def_stmt_vinfo;
2010*38fd1498Szrj
2011*38fd1498Szrj if (vecstype == NULL_TREE)
2012*38fd1498Szrj return NULL;
2013*38fd1498Szrj def2 = vect_recog_temp_ssa_var (stype, NULL);
2014*38fd1498Szrj def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2015*38fd1498Szrj if (ext_def)
2016*38fd1498Szrj {
2017*38fd1498Szrj basic_block new_bb
2018*38fd1498Szrj = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2019*38fd1498Szrj gcc_assert (!new_bb);
2020*38fd1498Szrj }
2021*38fd1498Szrj else
2022*38fd1498Szrj {
2023*38fd1498Szrj def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2024*38fd1498Szrj set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2025*38fd1498Szrj STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2026*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2027*38fd1498Szrj }
2028*38fd1498Szrj
2029*38fd1498Szrj def2 = vect_recog_temp_ssa_var (stype, NULL);
2030*38fd1498Szrj tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2031*38fd1498Szrj def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2032*38fd1498Szrj gimple_assign_lhs (def_stmt), mask);
2033*38fd1498Szrj if (ext_def)
2034*38fd1498Szrj {
2035*38fd1498Szrj basic_block new_bb
2036*38fd1498Szrj = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2037*38fd1498Szrj gcc_assert (!new_bb);
2038*38fd1498Szrj }
2039*38fd1498Szrj else
2040*38fd1498Szrj {
2041*38fd1498Szrj def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2042*38fd1498Szrj set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2043*38fd1498Szrj STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2044*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2045*38fd1498Szrj }
2046*38fd1498Szrj }
2047*38fd1498Szrj
2048*38fd1498Szrj var1 = vect_recog_temp_ssa_var (type, NULL);
2049*38fd1498Szrj def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2050*38fd1498Szrj ? LSHIFT_EXPR : RSHIFT_EXPR,
2051*38fd1498Szrj oprnd0, def);
2052*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2053*38fd1498Szrj
2054*38fd1498Szrj var2 = vect_recog_temp_ssa_var (type, NULL);
2055*38fd1498Szrj def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2056*38fd1498Szrj ? RSHIFT_EXPR : LSHIFT_EXPR,
2057*38fd1498Szrj oprnd0, def2);
2058*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2059*38fd1498Szrj
2060*38fd1498Szrj /* Pattern detected. */
2061*38fd1498Szrj if (dump_enabled_p ())
2062*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
2063*38fd1498Szrj "vect_recog_rotate_pattern: detected:\n");
2064*38fd1498Szrj
2065*38fd1498Szrj /* Pattern supported. Create a stmt to be used to replace the pattern. */
2066*38fd1498Szrj var = vect_recog_temp_ssa_var (type, NULL);
2067*38fd1498Szrj pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2068*38fd1498Szrj
2069*38fd1498Szrj if (dump_enabled_p ())
2070*38fd1498Szrj dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2071*38fd1498Szrj
2072*38fd1498Szrj stmts->safe_push (last_stmt);
2073*38fd1498Szrj return pattern_stmt;
2074*38fd1498Szrj }
2075*38fd1498Szrj
2076*38fd1498Szrj /* Detect a vector by vector shift pattern that wouldn't be otherwise
2077*38fd1498Szrj vectorized:
2078*38fd1498Szrj
2079*38fd1498Szrj type a_t;
2080*38fd1498Szrj TYPE b_T, res_T;
2081*38fd1498Szrj
2082*38fd1498Szrj S1 a_t = ;
2083*38fd1498Szrj S2 b_T = ;
2084*38fd1498Szrj S3 res_T = b_T op a_t;
2085*38fd1498Szrj
2086*38fd1498Szrj where type 'TYPE' is a type with different size than 'type',
2087*38fd1498Szrj and op is <<, >> or rotate.
2088*38fd1498Szrj
2089*38fd1498Szrj Also detect cases:
2090*38fd1498Szrj
2091*38fd1498Szrj type a_t;
2092*38fd1498Szrj TYPE b_T, c_T, res_T;
2093*38fd1498Szrj
2094*38fd1498Szrj S0 c_T = ;
2095*38fd1498Szrj S1 a_t = (type) c_T;
2096*38fd1498Szrj S2 b_T = ;
2097*38fd1498Szrj S3 res_T = b_T op a_t;
2098*38fd1498Szrj
2099*38fd1498Szrj Input/Output:
2100*38fd1498Szrj
2101*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins,
2102*38fd1498Szrj i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2103*38fd1498Szrj with a shift/rotate which has same type on both operands, in the
2104*38fd1498Szrj second case just b_T op c_T, in the first case with added cast
2105*38fd1498Szrj from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2106*38fd1498Szrj
2107*38fd1498Szrj Output:
2108*38fd1498Szrj
2109*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
2110*38fd1498Szrj
2111*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
2112*38fd1498Szrj
2113*38fd1498Szrj * Return value: A new stmt that will be used to replace the shift/rotate
2114*38fd1498Szrj S3 stmt. */
2115*38fd1498Szrj
2116*38fd1498Szrj static gimple *
vect_recog_vector_vector_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2117*38fd1498Szrj vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2118*38fd1498Szrj tree *type_in, tree *type_out)
2119*38fd1498Szrj {
2120*38fd1498Szrj gimple *last_stmt = stmts->pop ();
2121*38fd1498Szrj tree oprnd0, oprnd1, lhs, var;
2122*38fd1498Szrj gimple *pattern_stmt, *def_stmt;
2123*38fd1498Szrj enum tree_code rhs_code;
2124*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2125*38fd1498Szrj vec_info *vinfo = stmt_vinfo->vinfo;
2126*38fd1498Szrj enum vect_def_type dt;
2127*38fd1498Szrj
2128*38fd1498Szrj if (!is_gimple_assign (last_stmt))
2129*38fd1498Szrj return NULL;
2130*38fd1498Szrj
2131*38fd1498Szrj rhs_code = gimple_assign_rhs_code (last_stmt);
2132*38fd1498Szrj switch (rhs_code)
2133*38fd1498Szrj {
2134*38fd1498Szrj case LSHIFT_EXPR:
2135*38fd1498Szrj case RSHIFT_EXPR:
2136*38fd1498Szrj case LROTATE_EXPR:
2137*38fd1498Szrj case RROTATE_EXPR:
2138*38fd1498Szrj break;
2139*38fd1498Szrj default:
2140*38fd1498Szrj return NULL;
2141*38fd1498Szrj }
2142*38fd1498Szrj
2143*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2144*38fd1498Szrj return NULL;
2145*38fd1498Szrj
2146*38fd1498Szrj lhs = gimple_assign_lhs (last_stmt);
2147*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
2148*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
2149*38fd1498Szrj if (TREE_CODE (oprnd0) != SSA_NAME
2150*38fd1498Szrj || TREE_CODE (oprnd1) != SSA_NAME
2151*38fd1498Szrj || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2152*38fd1498Szrj || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2153*38fd1498Szrj || TYPE_PRECISION (TREE_TYPE (lhs))
2154*38fd1498Szrj != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2155*38fd1498Szrj return NULL;
2156*38fd1498Szrj
2157*38fd1498Szrj if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2158*38fd1498Szrj return NULL;
2159*38fd1498Szrj
2160*38fd1498Szrj if (dt != vect_internal_def)
2161*38fd1498Szrj return NULL;
2162*38fd1498Szrj
2163*38fd1498Szrj *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2164*38fd1498Szrj *type_out = *type_in;
2165*38fd1498Szrj if (*type_in == NULL_TREE)
2166*38fd1498Szrj return NULL;
2167*38fd1498Szrj
2168*38fd1498Szrj tree def = NULL_TREE;
2169*38fd1498Szrj stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2170*38fd1498Szrj if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2171*38fd1498Szrj {
2172*38fd1498Szrj tree rhs1 = gimple_assign_rhs1 (def_stmt);
2173*38fd1498Szrj if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2174*38fd1498Szrj && TYPE_PRECISION (TREE_TYPE (rhs1))
2175*38fd1498Szrj == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2176*38fd1498Szrj {
2177*38fd1498Szrj if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2178*38fd1498Szrj >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2179*38fd1498Szrj def = rhs1;
2180*38fd1498Szrj else
2181*38fd1498Szrj {
2182*38fd1498Szrj tree mask
2183*38fd1498Szrj = build_low_bits_mask (TREE_TYPE (rhs1),
2184*38fd1498Szrj TYPE_PRECISION (TREE_TYPE (oprnd1)));
2185*38fd1498Szrj def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2186*38fd1498Szrj def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2187*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, def_stmt);
2188*38fd1498Szrj }
2189*38fd1498Szrj }
2190*38fd1498Szrj }
2191*38fd1498Szrj
2192*38fd1498Szrj if (def == NULL_TREE)
2193*38fd1498Szrj {
2194*38fd1498Szrj def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2195*38fd1498Szrj def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2196*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, def_stmt);
2197*38fd1498Szrj }
2198*38fd1498Szrj
2199*38fd1498Szrj /* Pattern detected. */
2200*38fd1498Szrj if (dump_enabled_p ())
2201*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
2202*38fd1498Szrj "vect_recog_vector_vector_shift_pattern: detected:\n");
2203*38fd1498Szrj
2204*38fd1498Szrj /* Pattern supported. Create a stmt to be used to replace the pattern. */
2205*38fd1498Szrj var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2206*38fd1498Szrj pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2207*38fd1498Szrj
2208*38fd1498Szrj if (dump_enabled_p ())
2209*38fd1498Szrj dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2210*38fd1498Szrj
2211*38fd1498Szrj stmts->safe_push (last_stmt);
2212*38fd1498Szrj return pattern_stmt;
2213*38fd1498Szrj }
2214*38fd1498Szrj
2215*38fd1498Szrj /* Return true iff the target has a vector optab implementing the operation
2216*38fd1498Szrj CODE on type VECTYPE. */
2217*38fd1498Szrj
2218*38fd1498Szrj static bool
target_has_vecop_for_code(tree_code code,tree vectype)2219*38fd1498Szrj target_has_vecop_for_code (tree_code code, tree vectype)
2220*38fd1498Szrj {
2221*38fd1498Szrj optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2222*38fd1498Szrj return voptab
2223*38fd1498Szrj && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2224*38fd1498Szrj }
2225*38fd1498Szrj
2226*38fd1498Szrj /* Verify that the target has optabs of VECTYPE to perform all the steps
2227*38fd1498Szrj needed by the multiplication-by-immediate synthesis algorithm described by
2228*38fd1498Szrj ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2229*38fd1498Szrj present. Return true iff the target supports all the steps. */
2230*38fd1498Szrj
2231*38fd1498Szrj static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2232*38fd1498Szrj target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2233*38fd1498Szrj tree vectype, bool synth_shift_p)
2234*38fd1498Szrj {
2235*38fd1498Szrj if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2236*38fd1498Szrj return false;
2237*38fd1498Szrj
2238*38fd1498Szrj bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2239*38fd1498Szrj bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2240*38fd1498Szrj
2241*38fd1498Szrj if (var == negate_variant
2242*38fd1498Szrj && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2243*38fd1498Szrj return false;
2244*38fd1498Szrj
2245*38fd1498Szrj /* If we must synthesize shifts with additions make sure that vector
2246*38fd1498Szrj addition is available. */
2247*38fd1498Szrj if ((var == add_variant || synth_shift_p) && !supports_vplus)
2248*38fd1498Szrj return false;
2249*38fd1498Szrj
2250*38fd1498Szrj for (int i = 1; i < alg->ops; i++)
2251*38fd1498Szrj {
2252*38fd1498Szrj switch (alg->op[i])
2253*38fd1498Szrj {
2254*38fd1498Szrj case alg_shift:
2255*38fd1498Szrj break;
2256*38fd1498Szrj case alg_add_t_m2:
2257*38fd1498Szrj case alg_add_t2_m:
2258*38fd1498Szrj case alg_add_factor:
2259*38fd1498Szrj if (!supports_vplus)
2260*38fd1498Szrj return false;
2261*38fd1498Szrj break;
2262*38fd1498Szrj case alg_sub_t_m2:
2263*38fd1498Szrj case alg_sub_t2_m:
2264*38fd1498Szrj case alg_sub_factor:
2265*38fd1498Szrj if (!supports_vminus)
2266*38fd1498Szrj return false;
2267*38fd1498Szrj break;
2268*38fd1498Szrj case alg_unknown:
2269*38fd1498Szrj case alg_m:
2270*38fd1498Szrj case alg_zero:
2271*38fd1498Szrj case alg_impossible:
2272*38fd1498Szrj return false;
2273*38fd1498Szrj default:
2274*38fd1498Szrj gcc_unreachable ();
2275*38fd1498Szrj }
2276*38fd1498Szrj }
2277*38fd1498Szrj
2278*38fd1498Szrj return true;
2279*38fd1498Szrj }
2280*38fd1498Szrj
2281*38fd1498Szrj /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2282*38fd1498Szrj putting the final result in DEST. Append all statements but the last into
2283*38fd1498Szrj VINFO. Return the last statement. */
2284*38fd1498Szrj
2285*38fd1498Szrj static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2286*38fd1498Szrj synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2287*38fd1498Szrj stmt_vec_info vinfo)
2288*38fd1498Szrj {
2289*38fd1498Szrj HOST_WIDE_INT i;
2290*38fd1498Szrj tree itype = TREE_TYPE (op);
2291*38fd1498Szrj tree prev_res = op;
2292*38fd1498Szrj gcc_assert (amnt >= 0);
2293*38fd1498Szrj for (i = 0; i < amnt; i++)
2294*38fd1498Szrj {
2295*38fd1498Szrj tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2296*38fd1498Szrj : dest;
2297*38fd1498Szrj gimple *stmt
2298*38fd1498Szrj = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2299*38fd1498Szrj prev_res = tmp_var;
2300*38fd1498Szrj if (i < amnt - 1)
2301*38fd1498Szrj append_pattern_def_seq (vinfo, stmt);
2302*38fd1498Szrj else
2303*38fd1498Szrj return stmt;
2304*38fd1498Szrj }
2305*38fd1498Szrj gcc_unreachable ();
2306*38fd1498Szrj return NULL;
2307*38fd1498Szrj }
2308*38fd1498Szrj
2309*38fd1498Szrj /* Helper for vect_synth_mult_by_constant. Apply a binary operation
2310*38fd1498Szrj CODE to operands OP1 and OP2, creating a new temporary SSA var in
2311*38fd1498Szrj the process if necessary. Append the resulting assignment statements
2312*38fd1498Szrj to the sequence in STMT_VINFO. Return the SSA variable that holds the
2313*38fd1498Szrj result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2314*38fd1498Szrj left shifts using additions. */
2315*38fd1498Szrj
2316*38fd1498Szrj static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2317*38fd1498Szrj apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2318*38fd1498Szrj stmt_vec_info stmt_vinfo, bool synth_shift_p)
2319*38fd1498Szrj {
2320*38fd1498Szrj if (integer_zerop (op2)
2321*38fd1498Szrj && (code == LSHIFT_EXPR
2322*38fd1498Szrj || code == PLUS_EXPR))
2323*38fd1498Szrj {
2324*38fd1498Szrj gcc_assert (TREE_CODE (op1) == SSA_NAME);
2325*38fd1498Szrj return op1;
2326*38fd1498Szrj }
2327*38fd1498Szrj
2328*38fd1498Szrj gimple *stmt;
2329*38fd1498Szrj tree itype = TREE_TYPE (op1);
2330*38fd1498Szrj tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2331*38fd1498Szrj
2332*38fd1498Szrj if (code == LSHIFT_EXPR
2333*38fd1498Szrj && synth_shift_p)
2334*38fd1498Szrj {
2335*38fd1498Szrj stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2336*38fd1498Szrj stmt_vinfo);
2337*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
2338*38fd1498Szrj return tmp_var;
2339*38fd1498Szrj }
2340*38fd1498Szrj
2341*38fd1498Szrj stmt = gimple_build_assign (tmp_var, code, op1, op2);
2342*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
2343*38fd1498Szrj return tmp_var;
2344*38fd1498Szrj }
2345*38fd1498Szrj
2346*38fd1498Szrj /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2347*38fd1498Szrj and simple arithmetic operations to be vectorized. Record the statements
2348*38fd1498Szrj produced in STMT_VINFO and return the last statement in the sequence or
2349*38fd1498Szrj NULL if it's not possible to synthesize such a multiplication.
2350*38fd1498Szrj This function mirrors the behavior of expand_mult_const in expmed.c but
2351*38fd1498Szrj works on tree-ssa form. */
2352*38fd1498Szrj
2353*38fd1498Szrj static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2354*38fd1498Szrj vect_synth_mult_by_constant (tree op, tree val,
2355*38fd1498Szrj stmt_vec_info stmt_vinfo)
2356*38fd1498Szrj {
2357*38fd1498Szrj tree itype = TREE_TYPE (op);
2358*38fd1498Szrj machine_mode mode = TYPE_MODE (itype);
2359*38fd1498Szrj struct algorithm alg;
2360*38fd1498Szrj mult_variant variant;
2361*38fd1498Szrj if (!tree_fits_shwi_p (val))
2362*38fd1498Szrj return NULL;
2363*38fd1498Szrj
2364*38fd1498Szrj /* Multiplication synthesis by shifts, adds and subs can introduce
2365*38fd1498Szrj signed overflow where the original operation didn't. Perform the
2366*38fd1498Szrj operations on an unsigned type and cast back to avoid this.
2367*38fd1498Szrj In the future we may want to relax this for synthesis algorithms
2368*38fd1498Szrj that we can prove do not cause unexpected overflow. */
2369*38fd1498Szrj bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2370*38fd1498Szrj
2371*38fd1498Szrj tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2372*38fd1498Szrj
2373*38fd1498Szrj /* Targets that don't support vector shifts but support vector additions
2374*38fd1498Szrj can synthesize shifts that way. */
2375*38fd1498Szrj bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2376*38fd1498Szrj
2377*38fd1498Szrj HOST_WIDE_INT hwval = tree_to_shwi (val);
2378*38fd1498Szrj /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2379*38fd1498Szrj The vectorizer's benefit analysis will decide whether it's beneficial
2380*38fd1498Szrj to do this. */
2381*38fd1498Szrj bool possible = choose_mult_variant (mode, hwval, &alg,
2382*38fd1498Szrj &variant, MAX_COST);
2383*38fd1498Szrj if (!possible)
2384*38fd1498Szrj return NULL;
2385*38fd1498Szrj
2386*38fd1498Szrj tree vectype = get_vectype_for_scalar_type (multtype);
2387*38fd1498Szrj
2388*38fd1498Szrj if (!vectype
2389*38fd1498Szrj || !target_supports_mult_synth_alg (&alg, variant,
2390*38fd1498Szrj vectype, synth_shift_p))
2391*38fd1498Szrj return NULL;
2392*38fd1498Szrj
2393*38fd1498Szrj tree accumulator;
2394*38fd1498Szrj
2395*38fd1498Szrj /* Clear out the sequence of statements so we can populate it below. */
2396*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2397*38fd1498Szrj gimple *stmt = NULL;
2398*38fd1498Szrj
2399*38fd1498Szrj if (cast_to_unsigned_p)
2400*38fd1498Szrj {
2401*38fd1498Szrj tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2402*38fd1498Szrj stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2403*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
2404*38fd1498Szrj op = tmp_op;
2405*38fd1498Szrj }
2406*38fd1498Szrj
2407*38fd1498Szrj if (alg.op[0] == alg_zero)
2408*38fd1498Szrj accumulator = build_int_cst (multtype, 0);
2409*38fd1498Szrj else
2410*38fd1498Szrj accumulator = op;
2411*38fd1498Szrj
2412*38fd1498Szrj bool needs_fixup = (variant == negate_variant)
2413*38fd1498Szrj || (variant == add_variant);
2414*38fd1498Szrj
2415*38fd1498Szrj for (int i = 1; i < alg.ops; i++)
2416*38fd1498Szrj {
2417*38fd1498Szrj tree shft_log = build_int_cst (multtype, alg.log[i]);
2418*38fd1498Szrj tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2419*38fd1498Szrj tree tmp_var = NULL_TREE;
2420*38fd1498Szrj
2421*38fd1498Szrj switch (alg.op[i])
2422*38fd1498Szrj {
2423*38fd1498Szrj case alg_shift:
2424*38fd1498Szrj if (synth_shift_p)
2425*38fd1498Szrj stmt
2426*38fd1498Szrj = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2427*38fd1498Szrj stmt_vinfo);
2428*38fd1498Szrj else
2429*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2430*38fd1498Szrj shft_log);
2431*38fd1498Szrj break;
2432*38fd1498Szrj case alg_add_t_m2:
2433*38fd1498Szrj tmp_var
2434*38fd1498Szrj = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2435*38fd1498Szrj stmt_vinfo, synth_shift_p);
2436*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2437*38fd1498Szrj tmp_var);
2438*38fd1498Szrj break;
2439*38fd1498Szrj case alg_sub_t_m2:
2440*38fd1498Szrj tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2441*38fd1498Szrj shft_log, stmt_vinfo,
2442*38fd1498Szrj synth_shift_p);
2443*38fd1498Szrj /* In some algorithms the first step involves zeroing the
2444*38fd1498Szrj accumulator. If subtracting from such an accumulator
2445*38fd1498Szrj just emit the negation directly. */
2446*38fd1498Szrj if (integer_zerop (accumulator))
2447*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2448*38fd1498Szrj else
2449*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2450*38fd1498Szrj tmp_var);
2451*38fd1498Szrj break;
2452*38fd1498Szrj case alg_add_t2_m:
2453*38fd1498Szrj tmp_var
2454*38fd1498Szrj = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2455*38fd1498Szrj stmt_vinfo, synth_shift_p);
2456*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2457*38fd1498Szrj break;
2458*38fd1498Szrj case alg_sub_t2_m:
2459*38fd1498Szrj tmp_var
2460*38fd1498Szrj = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2461*38fd1498Szrj stmt_vinfo, synth_shift_p);
2462*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2463*38fd1498Szrj break;
2464*38fd1498Szrj case alg_add_factor:
2465*38fd1498Szrj tmp_var
2466*38fd1498Szrj = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2467*38fd1498Szrj stmt_vinfo, synth_shift_p);
2468*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2469*38fd1498Szrj tmp_var);
2470*38fd1498Szrj break;
2471*38fd1498Szrj case alg_sub_factor:
2472*38fd1498Szrj tmp_var
2473*38fd1498Szrj = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2474*38fd1498Szrj stmt_vinfo, synth_shift_p);
2475*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2476*38fd1498Szrj accumulator);
2477*38fd1498Szrj break;
2478*38fd1498Szrj default:
2479*38fd1498Szrj gcc_unreachable ();
2480*38fd1498Szrj }
2481*38fd1498Szrj /* We don't want to append the last stmt in the sequence to stmt_vinfo
2482*38fd1498Szrj but rather return it directly. */
2483*38fd1498Szrj
2484*38fd1498Szrj if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2485*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
2486*38fd1498Szrj accumulator = accum_tmp;
2487*38fd1498Szrj }
2488*38fd1498Szrj if (variant == negate_variant)
2489*38fd1498Szrj {
2490*38fd1498Szrj tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2491*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2492*38fd1498Szrj accumulator = accum_tmp;
2493*38fd1498Szrj if (cast_to_unsigned_p)
2494*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
2495*38fd1498Szrj }
2496*38fd1498Szrj else if (variant == add_variant)
2497*38fd1498Szrj {
2498*38fd1498Szrj tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2499*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2500*38fd1498Szrj accumulator = accum_tmp;
2501*38fd1498Szrj if (cast_to_unsigned_p)
2502*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
2503*38fd1498Szrj }
2504*38fd1498Szrj /* Move back to a signed if needed. */
2505*38fd1498Szrj if (cast_to_unsigned_p)
2506*38fd1498Szrj {
2507*38fd1498Szrj tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2508*38fd1498Szrj stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2509*38fd1498Szrj }
2510*38fd1498Szrj
2511*38fd1498Szrj return stmt;
2512*38fd1498Szrj }
2513*38fd1498Szrj
2514*38fd1498Szrj /* Detect multiplication by constant and convert it into a sequence of
2515*38fd1498Szrj shifts and additions, subtractions, negations. We reuse the
2516*38fd1498Szrj choose_mult_variant algorithms from expmed.c
2517*38fd1498Szrj
2518*38fd1498Szrj Input/Output:
2519*38fd1498Szrj
2520*38fd1498Szrj STMTS: Contains a stmt from which the pattern search begins,
2521*38fd1498Szrj i.e. the mult stmt.
2522*38fd1498Szrj
2523*38fd1498Szrj Output:
2524*38fd1498Szrj
2525*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
2526*38fd1498Szrj
2527*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
2528*38fd1498Szrj
2529*38fd1498Szrj * Return value: A new stmt that will be used to replace
2530*38fd1498Szrj the multiplication. */
2531*38fd1498Szrj
2532*38fd1498Szrj static gimple *
vect_recog_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2533*38fd1498Szrj vect_recog_mult_pattern (vec<gimple *> *stmts,
2534*38fd1498Szrj tree *type_in, tree *type_out)
2535*38fd1498Szrj {
2536*38fd1498Szrj gimple *last_stmt = stmts->pop ();
2537*38fd1498Szrj tree oprnd0, oprnd1, vectype, itype;
2538*38fd1498Szrj gimple *pattern_stmt;
2539*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2540*38fd1498Szrj
2541*38fd1498Szrj if (!is_gimple_assign (last_stmt))
2542*38fd1498Szrj return NULL;
2543*38fd1498Szrj
2544*38fd1498Szrj if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2545*38fd1498Szrj return NULL;
2546*38fd1498Szrj
2547*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
2548*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
2549*38fd1498Szrj itype = TREE_TYPE (oprnd0);
2550*38fd1498Szrj
2551*38fd1498Szrj if (TREE_CODE (oprnd0) != SSA_NAME
2552*38fd1498Szrj || TREE_CODE (oprnd1) != INTEGER_CST
2553*38fd1498Szrj || !INTEGRAL_TYPE_P (itype)
2554*38fd1498Szrj || !type_has_mode_precision_p (itype))
2555*38fd1498Szrj return NULL;
2556*38fd1498Szrj
2557*38fd1498Szrj vectype = get_vectype_for_scalar_type (itype);
2558*38fd1498Szrj if (vectype == NULL_TREE)
2559*38fd1498Szrj return NULL;
2560*38fd1498Szrj
2561*38fd1498Szrj /* If the target can handle vectorized multiplication natively,
2562*38fd1498Szrj don't attempt to optimize this. */
2563*38fd1498Szrj optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2564*38fd1498Szrj if (mul_optab != unknown_optab)
2565*38fd1498Szrj {
2566*38fd1498Szrj machine_mode vec_mode = TYPE_MODE (vectype);
2567*38fd1498Szrj int icode = (int) optab_handler (mul_optab, vec_mode);
2568*38fd1498Szrj if (icode != CODE_FOR_nothing)
2569*38fd1498Szrj return NULL;
2570*38fd1498Szrj }
2571*38fd1498Szrj
2572*38fd1498Szrj pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2573*38fd1498Szrj if (!pattern_stmt)
2574*38fd1498Szrj return NULL;
2575*38fd1498Szrj
2576*38fd1498Szrj /* Pattern detected. */
2577*38fd1498Szrj if (dump_enabled_p ())
2578*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
2579*38fd1498Szrj "vect_recog_mult_pattern: detected:\n");
2580*38fd1498Szrj
2581*38fd1498Szrj if (dump_enabled_p ())
2582*38fd1498Szrj dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2583*38fd1498Szrj pattern_stmt,0);
2584*38fd1498Szrj
2585*38fd1498Szrj stmts->safe_push (last_stmt);
2586*38fd1498Szrj *type_in = vectype;
2587*38fd1498Szrj *type_out = vectype;
2588*38fd1498Szrj
2589*38fd1498Szrj return pattern_stmt;
2590*38fd1498Szrj }
2591*38fd1498Szrj
2592*38fd1498Szrj /* Detect a signed division by a constant that wouldn't be
2593*38fd1498Szrj otherwise vectorized:
2594*38fd1498Szrj
2595*38fd1498Szrj type a_t, b_t;
2596*38fd1498Szrj
2597*38fd1498Szrj S1 a_t = b_t / N;
2598*38fd1498Szrj
2599*38fd1498Szrj where type 'type' is an integral type and N is a constant.
2600*38fd1498Szrj
2601*38fd1498Szrj Similarly handle modulo by a constant:
2602*38fd1498Szrj
2603*38fd1498Szrj S4 a_t = b_t % N;
2604*38fd1498Szrj
2605*38fd1498Szrj Input/Output:
2606*38fd1498Szrj
2607*38fd1498Szrj * STMTS: Contains a stmt from which the pattern search begins,
2608*38fd1498Szrj i.e. the division stmt. S1 is replaced by if N is a power
2609*38fd1498Szrj of two constant and type is signed:
2610*38fd1498Szrj S3 y_t = b_t < 0 ? N - 1 : 0;
2611*38fd1498Szrj S2 x_t = b_t + y_t;
2612*38fd1498Szrj S1' a_t = x_t >> log2 (N);
2613*38fd1498Szrj
2614*38fd1498Szrj S4 is replaced if N is a power of two constant and
2615*38fd1498Szrj type is signed by (where *_T temporaries have unsigned type):
2616*38fd1498Szrj S9 y_T = b_t < 0 ? -1U : 0U;
2617*38fd1498Szrj S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2618*38fd1498Szrj S7 z_t = (type) z_T;
2619*38fd1498Szrj S6 w_t = b_t + z_t;
2620*38fd1498Szrj S5 x_t = w_t & (N - 1);
2621*38fd1498Szrj S4' a_t = x_t - z_t;
2622*38fd1498Szrj
2623*38fd1498Szrj Output:
2624*38fd1498Szrj
2625*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
2626*38fd1498Szrj
2627*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
2628*38fd1498Szrj
2629*38fd1498Szrj * Return value: A new stmt that will be used to replace the division
2630*38fd1498Szrj S1 or modulo S4 stmt. */
2631*38fd1498Szrj
2632*38fd1498Szrj static gimple *
vect_recog_divmod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2633*38fd1498Szrj vect_recog_divmod_pattern (vec<gimple *> *stmts,
2634*38fd1498Szrj tree *type_in, tree *type_out)
2635*38fd1498Szrj {
2636*38fd1498Szrj gimple *last_stmt = stmts->pop ();
2637*38fd1498Szrj tree oprnd0, oprnd1, vectype, itype, cond;
2638*38fd1498Szrj gimple *pattern_stmt, *def_stmt;
2639*38fd1498Szrj enum tree_code rhs_code;
2640*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2641*38fd1498Szrj vec_info *vinfo = stmt_vinfo->vinfo;
2642*38fd1498Szrj optab optab;
2643*38fd1498Szrj tree q;
2644*38fd1498Szrj int dummy_int, prec;
2645*38fd1498Szrj stmt_vec_info def_stmt_vinfo;
2646*38fd1498Szrj
2647*38fd1498Szrj if (!is_gimple_assign (last_stmt))
2648*38fd1498Szrj return NULL;
2649*38fd1498Szrj
2650*38fd1498Szrj rhs_code = gimple_assign_rhs_code (last_stmt);
2651*38fd1498Szrj switch (rhs_code)
2652*38fd1498Szrj {
2653*38fd1498Szrj case TRUNC_DIV_EXPR:
2654*38fd1498Szrj case TRUNC_MOD_EXPR:
2655*38fd1498Szrj break;
2656*38fd1498Szrj default:
2657*38fd1498Szrj return NULL;
2658*38fd1498Szrj }
2659*38fd1498Szrj
2660*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2661*38fd1498Szrj return NULL;
2662*38fd1498Szrj
2663*38fd1498Szrj oprnd0 = gimple_assign_rhs1 (last_stmt);
2664*38fd1498Szrj oprnd1 = gimple_assign_rhs2 (last_stmt);
2665*38fd1498Szrj itype = TREE_TYPE (oprnd0);
2666*38fd1498Szrj if (TREE_CODE (oprnd0) != SSA_NAME
2667*38fd1498Szrj || TREE_CODE (oprnd1) != INTEGER_CST
2668*38fd1498Szrj || TREE_CODE (itype) != INTEGER_TYPE
2669*38fd1498Szrj || !type_has_mode_precision_p (itype))
2670*38fd1498Szrj return NULL;
2671*38fd1498Szrj
2672*38fd1498Szrj scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2673*38fd1498Szrj vectype = get_vectype_for_scalar_type (itype);
2674*38fd1498Szrj if (vectype == NULL_TREE)
2675*38fd1498Szrj return NULL;
2676*38fd1498Szrj
2677*38fd1498Szrj /* If the target can handle vectorized division or modulo natively,
2678*38fd1498Szrj don't attempt to optimize this. */
2679*38fd1498Szrj optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2680*38fd1498Szrj if (optab != unknown_optab)
2681*38fd1498Szrj {
2682*38fd1498Szrj machine_mode vec_mode = TYPE_MODE (vectype);
2683*38fd1498Szrj int icode = (int) optab_handler (optab, vec_mode);
2684*38fd1498Szrj if (icode != CODE_FOR_nothing)
2685*38fd1498Szrj return NULL;
2686*38fd1498Szrj }
2687*38fd1498Szrj
2688*38fd1498Szrj prec = TYPE_PRECISION (itype);
2689*38fd1498Szrj if (integer_pow2p (oprnd1))
2690*38fd1498Szrj {
2691*38fd1498Szrj if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2692*38fd1498Szrj return NULL;
2693*38fd1498Szrj
2694*38fd1498Szrj /* Pattern detected. */
2695*38fd1498Szrj if (dump_enabled_p ())
2696*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
2697*38fd1498Szrj "vect_recog_divmod_pattern: detected:\n");
2698*38fd1498Szrj
2699*38fd1498Szrj cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2700*38fd1498Szrj build_int_cst (itype, 0));
2701*38fd1498Szrj if (rhs_code == TRUNC_DIV_EXPR)
2702*38fd1498Szrj {
2703*38fd1498Szrj tree var = vect_recog_temp_ssa_var (itype, NULL);
2704*38fd1498Szrj tree shift;
2705*38fd1498Szrj def_stmt
2706*38fd1498Szrj = gimple_build_assign (var, COND_EXPR, cond,
2707*38fd1498Szrj fold_build2 (MINUS_EXPR, itype, oprnd1,
2708*38fd1498Szrj build_int_cst (itype, 1)),
2709*38fd1498Szrj build_int_cst (itype, 0));
2710*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, def_stmt);
2711*38fd1498Szrj var = vect_recog_temp_ssa_var (itype, NULL);
2712*38fd1498Szrj def_stmt
2713*38fd1498Szrj = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2714*38fd1498Szrj gimple_assign_lhs (def_stmt));
2715*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2716*38fd1498Szrj
2717*38fd1498Szrj shift = build_int_cst (itype, tree_log2 (oprnd1));
2718*38fd1498Szrj pattern_stmt
2719*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2720*38fd1498Szrj RSHIFT_EXPR, var, shift);
2721*38fd1498Szrj }
2722*38fd1498Szrj else
2723*38fd1498Szrj {
2724*38fd1498Szrj tree signmask;
2725*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2726*38fd1498Szrj if (compare_tree_int (oprnd1, 2) == 0)
2727*38fd1498Szrj {
2728*38fd1498Szrj signmask = vect_recog_temp_ssa_var (itype, NULL);
2729*38fd1498Szrj def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2730*38fd1498Szrj build_int_cst (itype, 1),
2731*38fd1498Szrj build_int_cst (itype, 0));
2732*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2733*38fd1498Szrj }
2734*38fd1498Szrj else
2735*38fd1498Szrj {
2736*38fd1498Szrj tree utype
2737*38fd1498Szrj = build_nonstandard_integer_type (prec, 1);
2738*38fd1498Szrj tree vecutype = get_vectype_for_scalar_type (utype);
2739*38fd1498Szrj tree shift
2740*38fd1498Szrj = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2741*38fd1498Szrj - tree_log2 (oprnd1));
2742*38fd1498Szrj tree var = vect_recog_temp_ssa_var (utype, NULL);
2743*38fd1498Szrj
2744*38fd1498Szrj def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2745*38fd1498Szrj build_int_cst (utype, -1),
2746*38fd1498Szrj build_int_cst (utype, 0));
2747*38fd1498Szrj def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2748*38fd1498Szrj set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2749*38fd1498Szrj STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2750*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2751*38fd1498Szrj var = vect_recog_temp_ssa_var (utype, NULL);
2752*38fd1498Szrj def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2753*38fd1498Szrj gimple_assign_lhs (def_stmt),
2754*38fd1498Szrj shift);
2755*38fd1498Szrj def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2756*38fd1498Szrj set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2757*38fd1498Szrj STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2758*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2759*38fd1498Szrj signmask = vect_recog_temp_ssa_var (itype, NULL);
2760*38fd1498Szrj def_stmt
2761*38fd1498Szrj = gimple_build_assign (signmask, NOP_EXPR, var);
2762*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2763*38fd1498Szrj }
2764*38fd1498Szrj def_stmt
2765*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2766*38fd1498Szrj PLUS_EXPR, oprnd0, signmask);
2767*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2768*38fd1498Szrj def_stmt
2769*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2770*38fd1498Szrj BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2771*38fd1498Szrj fold_build2 (MINUS_EXPR, itype, oprnd1,
2772*38fd1498Szrj build_int_cst (itype, 1)));
2773*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2774*38fd1498Szrj
2775*38fd1498Szrj pattern_stmt
2776*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2777*38fd1498Szrj MINUS_EXPR, gimple_assign_lhs (def_stmt),
2778*38fd1498Szrj signmask);
2779*38fd1498Szrj }
2780*38fd1498Szrj
2781*38fd1498Szrj if (dump_enabled_p ())
2782*38fd1498Szrj dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2783*38fd1498Szrj 0);
2784*38fd1498Szrj
2785*38fd1498Szrj stmts->safe_push (last_stmt);
2786*38fd1498Szrj
2787*38fd1498Szrj *type_in = vectype;
2788*38fd1498Szrj *type_out = vectype;
2789*38fd1498Szrj return pattern_stmt;
2790*38fd1498Szrj }
2791*38fd1498Szrj
2792*38fd1498Szrj if (prec > HOST_BITS_PER_WIDE_INT
2793*38fd1498Szrj || integer_zerop (oprnd1))
2794*38fd1498Szrj return NULL;
2795*38fd1498Szrj
2796*38fd1498Szrj if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2797*38fd1498Szrj return NULL;
2798*38fd1498Szrj
2799*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2800*38fd1498Szrj
2801*38fd1498Szrj if (TYPE_UNSIGNED (itype))
2802*38fd1498Szrj {
2803*38fd1498Szrj unsigned HOST_WIDE_INT mh, ml;
2804*38fd1498Szrj int pre_shift, post_shift;
2805*38fd1498Szrj unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2806*38fd1498Szrj & GET_MODE_MASK (itype_mode));
2807*38fd1498Szrj tree t1, t2, t3, t4;
2808*38fd1498Szrj
2809*38fd1498Szrj if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2810*38fd1498Szrj /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2811*38fd1498Szrj return NULL;
2812*38fd1498Szrj
2813*38fd1498Szrj /* Find a suitable multiplier and right shift count
2814*38fd1498Szrj instead of multiplying with D. */
2815*38fd1498Szrj mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2816*38fd1498Szrj
2817*38fd1498Szrj /* If the suggested multiplier is more than SIZE bits, we can do better
2818*38fd1498Szrj for even divisors, using an initial right shift. */
2819*38fd1498Szrj if (mh != 0 && (d & 1) == 0)
2820*38fd1498Szrj {
2821*38fd1498Szrj pre_shift = ctz_or_zero (d);
2822*38fd1498Szrj mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2823*38fd1498Szrj &ml, &post_shift, &dummy_int);
2824*38fd1498Szrj gcc_assert (!mh);
2825*38fd1498Szrj }
2826*38fd1498Szrj else
2827*38fd1498Szrj pre_shift = 0;
2828*38fd1498Szrj
2829*38fd1498Szrj if (mh != 0)
2830*38fd1498Szrj {
2831*38fd1498Szrj if (post_shift - 1 >= prec)
2832*38fd1498Szrj return NULL;
2833*38fd1498Szrj
2834*38fd1498Szrj /* t1 = oprnd0 h* ml;
2835*38fd1498Szrj t2 = oprnd0 - t1;
2836*38fd1498Szrj t3 = t2 >> 1;
2837*38fd1498Szrj t4 = t1 + t3;
2838*38fd1498Szrj q = t4 >> (post_shift - 1); */
2839*38fd1498Szrj t1 = vect_recog_temp_ssa_var (itype, NULL);
2840*38fd1498Szrj def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2841*38fd1498Szrj build_int_cst (itype, ml));
2842*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2843*38fd1498Szrj
2844*38fd1498Szrj t2 = vect_recog_temp_ssa_var (itype, NULL);
2845*38fd1498Szrj def_stmt
2846*38fd1498Szrj = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2847*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2848*38fd1498Szrj
2849*38fd1498Szrj t3 = vect_recog_temp_ssa_var (itype, NULL);
2850*38fd1498Szrj def_stmt
2851*38fd1498Szrj = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2852*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2853*38fd1498Szrj
2854*38fd1498Szrj t4 = vect_recog_temp_ssa_var (itype, NULL);
2855*38fd1498Szrj def_stmt
2856*38fd1498Szrj = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2857*38fd1498Szrj
2858*38fd1498Szrj if (post_shift != 1)
2859*38fd1498Szrj {
2860*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2861*38fd1498Szrj
2862*38fd1498Szrj q = vect_recog_temp_ssa_var (itype, NULL);
2863*38fd1498Szrj pattern_stmt
2864*38fd1498Szrj = gimple_build_assign (q, RSHIFT_EXPR, t4,
2865*38fd1498Szrj build_int_cst (itype, post_shift - 1));
2866*38fd1498Szrj }
2867*38fd1498Szrj else
2868*38fd1498Szrj {
2869*38fd1498Szrj q = t4;
2870*38fd1498Szrj pattern_stmt = def_stmt;
2871*38fd1498Szrj }
2872*38fd1498Szrj }
2873*38fd1498Szrj else
2874*38fd1498Szrj {
2875*38fd1498Szrj if (pre_shift >= prec || post_shift >= prec)
2876*38fd1498Szrj return NULL;
2877*38fd1498Szrj
2878*38fd1498Szrj /* t1 = oprnd0 >> pre_shift;
2879*38fd1498Szrj t2 = t1 h* ml;
2880*38fd1498Szrj q = t2 >> post_shift; */
2881*38fd1498Szrj if (pre_shift)
2882*38fd1498Szrj {
2883*38fd1498Szrj t1 = vect_recog_temp_ssa_var (itype, NULL);
2884*38fd1498Szrj def_stmt
2885*38fd1498Szrj = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2886*38fd1498Szrj build_int_cst (NULL, pre_shift));
2887*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2888*38fd1498Szrj }
2889*38fd1498Szrj else
2890*38fd1498Szrj t1 = oprnd0;
2891*38fd1498Szrj
2892*38fd1498Szrj t2 = vect_recog_temp_ssa_var (itype, NULL);
2893*38fd1498Szrj def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2894*38fd1498Szrj build_int_cst (itype, ml));
2895*38fd1498Szrj
2896*38fd1498Szrj if (post_shift)
2897*38fd1498Szrj {
2898*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2899*38fd1498Szrj
2900*38fd1498Szrj q = vect_recog_temp_ssa_var (itype, NULL);
2901*38fd1498Szrj def_stmt
2902*38fd1498Szrj = gimple_build_assign (q, RSHIFT_EXPR, t2,
2903*38fd1498Szrj build_int_cst (itype, post_shift));
2904*38fd1498Szrj }
2905*38fd1498Szrj else
2906*38fd1498Szrj q = t2;
2907*38fd1498Szrj
2908*38fd1498Szrj pattern_stmt = def_stmt;
2909*38fd1498Szrj }
2910*38fd1498Szrj }
2911*38fd1498Szrj else
2912*38fd1498Szrj {
2913*38fd1498Szrj unsigned HOST_WIDE_INT ml;
2914*38fd1498Szrj int post_shift;
2915*38fd1498Szrj HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2916*38fd1498Szrj unsigned HOST_WIDE_INT abs_d;
2917*38fd1498Szrj bool add = false;
2918*38fd1498Szrj tree t1, t2, t3, t4;
2919*38fd1498Szrj
2920*38fd1498Szrj /* Give up for -1. */
2921*38fd1498Szrj if (d == -1)
2922*38fd1498Szrj return NULL;
2923*38fd1498Szrj
2924*38fd1498Szrj /* Since d might be INT_MIN, we have to cast to
2925*38fd1498Szrj unsigned HOST_WIDE_INT before negating to avoid
2926*38fd1498Szrj undefined signed overflow. */
2927*38fd1498Szrj abs_d = (d >= 0
2928*38fd1498Szrj ? (unsigned HOST_WIDE_INT) d
2929*38fd1498Szrj : - (unsigned HOST_WIDE_INT) d);
2930*38fd1498Szrj
2931*38fd1498Szrj /* n rem d = n rem -d */
2932*38fd1498Szrj if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2933*38fd1498Szrj {
2934*38fd1498Szrj d = abs_d;
2935*38fd1498Szrj oprnd1 = build_int_cst (itype, abs_d);
2936*38fd1498Szrj }
2937*38fd1498Szrj else if (HOST_BITS_PER_WIDE_INT >= prec
2938*38fd1498Szrj && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2939*38fd1498Szrj /* This case is not handled correctly below. */
2940*38fd1498Szrj return NULL;
2941*38fd1498Szrj
2942*38fd1498Szrj choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2943*38fd1498Szrj if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2944*38fd1498Szrj {
2945*38fd1498Szrj add = true;
2946*38fd1498Szrj ml |= HOST_WIDE_INT_M1U << (prec - 1);
2947*38fd1498Szrj }
2948*38fd1498Szrj if (post_shift >= prec)
2949*38fd1498Szrj return NULL;
2950*38fd1498Szrj
2951*38fd1498Szrj /* t1 = oprnd0 h* ml; */
2952*38fd1498Szrj t1 = vect_recog_temp_ssa_var (itype, NULL);
2953*38fd1498Szrj def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2954*38fd1498Szrj build_int_cst (itype, ml));
2955*38fd1498Szrj
2956*38fd1498Szrj if (add)
2957*38fd1498Szrj {
2958*38fd1498Szrj /* t2 = t1 + oprnd0; */
2959*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2960*38fd1498Szrj t2 = vect_recog_temp_ssa_var (itype, NULL);
2961*38fd1498Szrj def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2962*38fd1498Szrj }
2963*38fd1498Szrj else
2964*38fd1498Szrj t2 = t1;
2965*38fd1498Szrj
2966*38fd1498Szrj if (post_shift)
2967*38fd1498Szrj {
2968*38fd1498Szrj /* t3 = t2 >> post_shift; */
2969*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
2970*38fd1498Szrj t3 = vect_recog_temp_ssa_var (itype, NULL);
2971*38fd1498Szrj def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2972*38fd1498Szrj build_int_cst (itype, post_shift));
2973*38fd1498Szrj }
2974*38fd1498Szrj else
2975*38fd1498Szrj t3 = t2;
2976*38fd1498Szrj
2977*38fd1498Szrj wide_int oprnd0_min, oprnd0_max;
2978*38fd1498Szrj int msb = 1;
2979*38fd1498Szrj if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2980*38fd1498Szrj {
2981*38fd1498Szrj if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2982*38fd1498Szrj msb = 0;
2983*38fd1498Szrj else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2984*38fd1498Szrj msb = -1;
2985*38fd1498Szrj }
2986*38fd1498Szrj
2987*38fd1498Szrj if (msb == 0 && d >= 0)
2988*38fd1498Szrj {
2989*38fd1498Szrj /* q = t3; */
2990*38fd1498Szrj q = t3;
2991*38fd1498Szrj pattern_stmt = def_stmt;
2992*38fd1498Szrj }
2993*38fd1498Szrj else
2994*38fd1498Szrj {
2995*38fd1498Szrj /* t4 = oprnd0 >> (prec - 1);
2996*38fd1498Szrj or if we know from VRP that oprnd0 >= 0
2997*38fd1498Szrj t4 = 0;
2998*38fd1498Szrj or if we know from VRP that oprnd0 < 0
2999*38fd1498Szrj t4 = -1; */
3000*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
3001*38fd1498Szrj t4 = vect_recog_temp_ssa_var (itype, NULL);
3002*38fd1498Szrj if (msb != 1)
3003*38fd1498Szrj def_stmt = gimple_build_assign (t4, INTEGER_CST,
3004*38fd1498Szrj build_int_cst (itype, msb));
3005*38fd1498Szrj else
3006*38fd1498Szrj def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3007*38fd1498Szrj build_int_cst (itype, prec - 1));
3008*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
3009*38fd1498Szrj
3010*38fd1498Szrj /* q = t3 - t4; or q = t4 - t3; */
3011*38fd1498Szrj q = vect_recog_temp_ssa_var (itype, NULL);
3012*38fd1498Szrj pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3013*38fd1498Szrj d < 0 ? t3 : t4);
3014*38fd1498Szrj }
3015*38fd1498Szrj }
3016*38fd1498Szrj
3017*38fd1498Szrj if (rhs_code == TRUNC_MOD_EXPR)
3018*38fd1498Szrj {
3019*38fd1498Szrj tree r, t1;
3020*38fd1498Szrj
3021*38fd1498Szrj /* We divided. Now finish by:
3022*38fd1498Szrj t1 = q * oprnd1;
3023*38fd1498Szrj r = oprnd0 - t1; */
3024*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3025*38fd1498Szrj
3026*38fd1498Szrj t1 = vect_recog_temp_ssa_var (itype, NULL);
3027*38fd1498Szrj def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3028*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, def_stmt);
3029*38fd1498Szrj
3030*38fd1498Szrj r = vect_recog_temp_ssa_var (itype, NULL);
3031*38fd1498Szrj pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3032*38fd1498Szrj }
3033*38fd1498Szrj
3034*38fd1498Szrj /* Pattern detected. */
3035*38fd1498Szrj if (dump_enabled_p ())
3036*38fd1498Szrj {
3037*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
3038*38fd1498Szrj "vect_recog_divmod_pattern: detected: ");
3039*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3040*38fd1498Szrj }
3041*38fd1498Szrj
3042*38fd1498Szrj stmts->safe_push (last_stmt);
3043*38fd1498Szrj
3044*38fd1498Szrj *type_in = vectype;
3045*38fd1498Szrj *type_out = vectype;
3046*38fd1498Szrj return pattern_stmt;
3047*38fd1498Szrj }
3048*38fd1498Szrj
3049*38fd1498Szrj /* Function vect_recog_mixed_size_cond_pattern
3050*38fd1498Szrj
3051*38fd1498Szrj Try to find the following pattern:
3052*38fd1498Szrj
3053*38fd1498Szrj type x_t, y_t;
3054*38fd1498Szrj TYPE a_T, b_T, c_T;
3055*38fd1498Szrj loop:
3056*38fd1498Szrj S1 a_T = x_t CMP y_t ? b_T : c_T;
3057*38fd1498Szrj
3058*38fd1498Szrj where type 'TYPE' is an integral type which has different size
3059*38fd1498Szrj from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3060*38fd1498Szrj than 'type', the constants need to fit into an integer type
3061*38fd1498Szrj with the same width as 'type') or results of conversion from 'type'.
3062*38fd1498Szrj
3063*38fd1498Szrj Input:
3064*38fd1498Szrj
3065*38fd1498Szrj * LAST_STMT: A stmt from which the pattern search begins.
3066*38fd1498Szrj
3067*38fd1498Szrj Output:
3068*38fd1498Szrj
3069*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
3070*38fd1498Szrj
3071*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
3072*38fd1498Szrj
3073*38fd1498Szrj * Return value: A new stmt that will be used to replace the pattern.
3074*38fd1498Szrj Additionally a def_stmt is added.
3075*38fd1498Szrj
3076*38fd1498Szrj a_it = x_t CMP y_t ? b_it : c_it;
3077*38fd1498Szrj a_T = (TYPE) a_it; */
3078*38fd1498Szrj
3079*38fd1498Szrj static gimple *
vect_recog_mixed_size_cond_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3080*38fd1498Szrj vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3081*38fd1498Szrj tree *type_out)
3082*38fd1498Szrj {
3083*38fd1498Szrj gimple *last_stmt = (*stmts)[0];
3084*38fd1498Szrj tree cond_expr, then_clause, else_clause;
3085*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3086*38fd1498Szrj tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3087*38fd1498Szrj gimple *pattern_stmt, *def_stmt;
3088*38fd1498Szrj vec_info *vinfo = stmt_vinfo->vinfo;
3089*38fd1498Szrj tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3090*38fd1498Szrj gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3091*38fd1498Szrj bool promotion;
3092*38fd1498Szrj tree comp_scalar_type;
3093*38fd1498Szrj
3094*38fd1498Szrj if (!is_gimple_assign (last_stmt)
3095*38fd1498Szrj || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3096*38fd1498Szrj || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3097*38fd1498Szrj return NULL;
3098*38fd1498Szrj
3099*38fd1498Szrj cond_expr = gimple_assign_rhs1 (last_stmt);
3100*38fd1498Szrj then_clause = gimple_assign_rhs2 (last_stmt);
3101*38fd1498Szrj else_clause = gimple_assign_rhs3 (last_stmt);
3102*38fd1498Szrj
3103*38fd1498Szrj if (!COMPARISON_CLASS_P (cond_expr))
3104*38fd1498Szrj return NULL;
3105*38fd1498Szrj
3106*38fd1498Szrj comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3107*38fd1498Szrj comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3108*38fd1498Szrj if (comp_vectype == NULL_TREE)
3109*38fd1498Szrj return NULL;
3110*38fd1498Szrj
3111*38fd1498Szrj type = gimple_expr_type (last_stmt);
3112*38fd1498Szrj if (types_compatible_p (type, comp_scalar_type)
3113*38fd1498Szrj || ((TREE_CODE (then_clause) != INTEGER_CST
3114*38fd1498Szrj || TREE_CODE (else_clause) != INTEGER_CST)
3115*38fd1498Szrj && !INTEGRAL_TYPE_P (comp_scalar_type))
3116*38fd1498Szrj || !INTEGRAL_TYPE_P (type))
3117*38fd1498Szrj return NULL;
3118*38fd1498Szrj
3119*38fd1498Szrj if ((TREE_CODE (then_clause) != INTEGER_CST
3120*38fd1498Szrj && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3121*38fd1498Szrj &def_stmt0, &promotion))
3122*38fd1498Szrj || (TREE_CODE (else_clause) != INTEGER_CST
3123*38fd1498Szrj && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3124*38fd1498Szrj &def_stmt1, &promotion)))
3125*38fd1498Szrj return NULL;
3126*38fd1498Szrj
3127*38fd1498Szrj if (orig_type0 && orig_type1
3128*38fd1498Szrj && !types_compatible_p (orig_type0, orig_type1))
3129*38fd1498Szrj return NULL;
3130*38fd1498Szrj
3131*38fd1498Szrj if (orig_type0)
3132*38fd1498Szrj {
3133*38fd1498Szrj if (!types_compatible_p (orig_type0, comp_scalar_type))
3134*38fd1498Szrj return NULL;
3135*38fd1498Szrj then_clause = gimple_assign_rhs1 (def_stmt0);
3136*38fd1498Szrj itype = orig_type0;
3137*38fd1498Szrj }
3138*38fd1498Szrj
3139*38fd1498Szrj if (orig_type1)
3140*38fd1498Szrj {
3141*38fd1498Szrj if (!types_compatible_p (orig_type1, comp_scalar_type))
3142*38fd1498Szrj return NULL;
3143*38fd1498Szrj else_clause = gimple_assign_rhs1 (def_stmt1);
3144*38fd1498Szrj itype = orig_type1;
3145*38fd1498Szrj }
3146*38fd1498Szrj
3147*38fd1498Szrj
3148*38fd1498Szrj HOST_WIDE_INT cmp_mode_size
3149*38fd1498Szrj = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3150*38fd1498Szrj
3151*38fd1498Szrj scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3152*38fd1498Szrj if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3153*38fd1498Szrj return NULL;
3154*38fd1498Szrj
3155*38fd1498Szrj vectype = get_vectype_for_scalar_type (type);
3156*38fd1498Szrj if (vectype == NULL_TREE)
3157*38fd1498Szrj return NULL;
3158*38fd1498Szrj
3159*38fd1498Szrj if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3160*38fd1498Szrj return NULL;
3161*38fd1498Szrj
3162*38fd1498Szrj if (itype == NULL_TREE)
3163*38fd1498Szrj itype = build_nonstandard_integer_type (cmp_mode_size,
3164*38fd1498Szrj TYPE_UNSIGNED (type));
3165*38fd1498Szrj
3166*38fd1498Szrj if (itype == NULL_TREE
3167*38fd1498Szrj || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3168*38fd1498Szrj return NULL;
3169*38fd1498Szrj
3170*38fd1498Szrj vecitype = get_vectype_for_scalar_type (itype);
3171*38fd1498Szrj if (vecitype == NULL_TREE)
3172*38fd1498Szrj return NULL;
3173*38fd1498Szrj
3174*38fd1498Szrj if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3175*38fd1498Szrj return NULL;
3176*38fd1498Szrj
3177*38fd1498Szrj if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3178*38fd1498Szrj {
3179*38fd1498Szrj if ((TREE_CODE (then_clause) == INTEGER_CST
3180*38fd1498Szrj && !int_fits_type_p (then_clause, itype))
3181*38fd1498Szrj || (TREE_CODE (else_clause) == INTEGER_CST
3182*38fd1498Szrj && !int_fits_type_p (else_clause, itype)))
3183*38fd1498Szrj return NULL;
3184*38fd1498Szrj }
3185*38fd1498Szrj
3186*38fd1498Szrj def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3187*38fd1498Szrj COND_EXPR, unshare_expr (cond_expr),
3188*38fd1498Szrj fold_convert (itype, then_clause),
3189*38fd1498Szrj fold_convert (itype, else_clause));
3190*38fd1498Szrj pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3191*38fd1498Szrj NOP_EXPR, gimple_assign_lhs (def_stmt));
3192*38fd1498Szrj
3193*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, def_stmt);
3194*38fd1498Szrj def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3195*38fd1498Szrj set_vinfo_for_stmt (def_stmt, def_stmt_info);
3196*38fd1498Szrj STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3197*38fd1498Szrj *type_in = vecitype;
3198*38fd1498Szrj *type_out = vectype;
3199*38fd1498Szrj
3200*38fd1498Szrj if (dump_enabled_p ())
3201*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
3202*38fd1498Szrj "vect_recog_mixed_size_cond_pattern: detected:\n");
3203*38fd1498Szrj
3204*38fd1498Szrj return pattern_stmt;
3205*38fd1498Szrj }
3206*38fd1498Szrj
3207*38fd1498Szrj
3208*38fd1498Szrj /* Helper function of vect_recog_bool_pattern. Called recursively, return
3209*38fd1498Szrj true if bool VAR can and should be optimized that way. Assume it shouldn't
3210*38fd1498Szrj in case it's a result of a comparison which can be directly vectorized into
3211*38fd1498Szrj a vector comparison. Fills in STMTS with all stmts visited during the
3212*38fd1498Szrj walk. */
3213*38fd1498Szrj
3214*38fd1498Szrj static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3215*38fd1498Szrj check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3216*38fd1498Szrj {
3217*38fd1498Szrj gimple *def_stmt;
3218*38fd1498Szrj enum vect_def_type dt;
3219*38fd1498Szrj tree rhs1;
3220*38fd1498Szrj enum tree_code rhs_code;
3221*38fd1498Szrj
3222*38fd1498Szrj if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3223*38fd1498Szrj return false;
3224*38fd1498Szrj
3225*38fd1498Szrj if (dt != vect_internal_def)
3226*38fd1498Szrj return false;
3227*38fd1498Szrj
3228*38fd1498Szrj if (!is_gimple_assign (def_stmt))
3229*38fd1498Szrj return false;
3230*38fd1498Szrj
3231*38fd1498Szrj if (stmts.contains (def_stmt))
3232*38fd1498Szrj return true;
3233*38fd1498Szrj
3234*38fd1498Szrj rhs1 = gimple_assign_rhs1 (def_stmt);
3235*38fd1498Szrj rhs_code = gimple_assign_rhs_code (def_stmt);
3236*38fd1498Szrj switch (rhs_code)
3237*38fd1498Szrj {
3238*38fd1498Szrj case SSA_NAME:
3239*38fd1498Szrj if (! check_bool_pattern (rhs1, vinfo, stmts))
3240*38fd1498Szrj return false;
3241*38fd1498Szrj break;
3242*38fd1498Szrj
3243*38fd1498Szrj CASE_CONVERT:
3244*38fd1498Szrj if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3245*38fd1498Szrj return false;
3246*38fd1498Szrj if (! check_bool_pattern (rhs1, vinfo, stmts))
3247*38fd1498Szrj return false;
3248*38fd1498Szrj break;
3249*38fd1498Szrj
3250*38fd1498Szrj case BIT_NOT_EXPR:
3251*38fd1498Szrj if (! check_bool_pattern (rhs1, vinfo, stmts))
3252*38fd1498Szrj return false;
3253*38fd1498Szrj break;
3254*38fd1498Szrj
3255*38fd1498Szrj case BIT_AND_EXPR:
3256*38fd1498Szrj case BIT_IOR_EXPR:
3257*38fd1498Szrj case BIT_XOR_EXPR:
3258*38fd1498Szrj if (! check_bool_pattern (rhs1, vinfo, stmts)
3259*38fd1498Szrj || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3260*38fd1498Szrj return false;
3261*38fd1498Szrj break;
3262*38fd1498Szrj
3263*38fd1498Szrj default:
3264*38fd1498Szrj if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3265*38fd1498Szrj {
3266*38fd1498Szrj tree vecitype, comp_vectype;
3267*38fd1498Szrj
3268*38fd1498Szrj /* If the comparison can throw, then is_gimple_condexpr will be
3269*38fd1498Szrj false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3270*38fd1498Szrj if (stmt_could_throw_p (def_stmt))
3271*38fd1498Szrj return false;
3272*38fd1498Szrj
3273*38fd1498Szrj comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3274*38fd1498Szrj if (comp_vectype == NULL_TREE)
3275*38fd1498Szrj return false;
3276*38fd1498Szrj
3277*38fd1498Szrj tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3278*38fd1498Szrj if (mask_type
3279*38fd1498Szrj && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3280*38fd1498Szrj return false;
3281*38fd1498Szrj
3282*38fd1498Szrj if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3283*38fd1498Szrj {
3284*38fd1498Szrj scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3285*38fd1498Szrj tree itype
3286*38fd1498Szrj = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3287*38fd1498Szrj vecitype = get_vectype_for_scalar_type (itype);
3288*38fd1498Szrj if (vecitype == NULL_TREE)
3289*38fd1498Szrj return false;
3290*38fd1498Szrj }
3291*38fd1498Szrj else
3292*38fd1498Szrj vecitype = comp_vectype;
3293*38fd1498Szrj if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3294*38fd1498Szrj return false;
3295*38fd1498Szrj }
3296*38fd1498Szrj else
3297*38fd1498Szrj return false;
3298*38fd1498Szrj break;
3299*38fd1498Szrj }
3300*38fd1498Szrj
3301*38fd1498Szrj bool res = stmts.add (def_stmt);
3302*38fd1498Szrj /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3303*38fd1498Szrj gcc_assert (!res);
3304*38fd1498Szrj
3305*38fd1498Szrj return true;
3306*38fd1498Szrj }
3307*38fd1498Szrj
3308*38fd1498Szrj
3309*38fd1498Szrj /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
3310*38fd1498Szrj stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3311*38fd1498Szrj pattern sequence. */
3312*38fd1498Szrj
3313*38fd1498Szrj static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3314*38fd1498Szrj adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3315*38fd1498Szrj {
3316*38fd1498Szrj gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3317*38fd1498Szrj NOP_EXPR, var);
3318*38fd1498Szrj stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3319*38fd1498Szrj set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3320*38fd1498Szrj STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3321*38fd1498Szrj append_pattern_def_seq (stmt_info, cast_stmt);
3322*38fd1498Szrj return gimple_assign_lhs (cast_stmt);
3323*38fd1498Szrj }
3324*38fd1498Szrj
3325*38fd1498Szrj /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3326*38fd1498Szrj VAR is an SSA_NAME that should be transformed from bool to a wider integer
3327*38fd1498Szrj type, OUT_TYPE is the desired final integer type of the whole pattern.
3328*38fd1498Szrj STMT_INFO is the info of the pattern root and is where pattern stmts should
3329*38fd1498Szrj be associated with. DEFS is a map of pattern defs. */
3330*38fd1498Szrj
3331*38fd1498Szrj static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3332*38fd1498Szrj adjust_bool_pattern (tree var, tree out_type,
3333*38fd1498Szrj stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3334*38fd1498Szrj {
3335*38fd1498Szrj gimple *stmt = SSA_NAME_DEF_STMT (var);
3336*38fd1498Szrj enum tree_code rhs_code, def_rhs_code;
3337*38fd1498Szrj tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3338*38fd1498Szrj location_t loc;
3339*38fd1498Szrj gimple *pattern_stmt, *def_stmt;
3340*38fd1498Szrj tree trueval = NULL_TREE;
3341*38fd1498Szrj
3342*38fd1498Szrj rhs1 = gimple_assign_rhs1 (stmt);
3343*38fd1498Szrj rhs2 = gimple_assign_rhs2 (stmt);
3344*38fd1498Szrj rhs_code = gimple_assign_rhs_code (stmt);
3345*38fd1498Szrj loc = gimple_location (stmt);
3346*38fd1498Szrj switch (rhs_code)
3347*38fd1498Szrj {
3348*38fd1498Szrj case SSA_NAME:
3349*38fd1498Szrj CASE_CONVERT:
3350*38fd1498Szrj irhs1 = *defs.get (rhs1);
3351*38fd1498Szrj itype = TREE_TYPE (irhs1);
3352*38fd1498Szrj pattern_stmt
3353*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3354*38fd1498Szrj SSA_NAME, irhs1);
3355*38fd1498Szrj break;
3356*38fd1498Szrj
3357*38fd1498Szrj case BIT_NOT_EXPR:
3358*38fd1498Szrj irhs1 = *defs.get (rhs1);
3359*38fd1498Szrj itype = TREE_TYPE (irhs1);
3360*38fd1498Szrj pattern_stmt
3361*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3362*38fd1498Szrj BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3363*38fd1498Szrj break;
3364*38fd1498Szrj
3365*38fd1498Szrj case BIT_AND_EXPR:
3366*38fd1498Szrj /* Try to optimize x = y & (a < b ? 1 : 0); into
3367*38fd1498Szrj x = (a < b ? y : 0);
3368*38fd1498Szrj
3369*38fd1498Szrj E.g. for:
3370*38fd1498Szrj bool a_b, b_b, c_b;
3371*38fd1498Szrj TYPE d_T;
3372*38fd1498Szrj
3373*38fd1498Szrj S1 a_b = x1 CMP1 y1;
3374*38fd1498Szrj S2 b_b = x2 CMP2 y2;
3375*38fd1498Szrj S3 c_b = a_b & b_b;
3376*38fd1498Szrj S4 d_T = (TYPE) c_b;
3377*38fd1498Szrj
3378*38fd1498Szrj we would normally emit:
3379*38fd1498Szrj
3380*38fd1498Szrj S1' a_T = x1 CMP1 y1 ? 1 : 0;
3381*38fd1498Szrj S2' b_T = x2 CMP2 y2 ? 1 : 0;
3382*38fd1498Szrj S3' c_T = a_T & b_T;
3383*38fd1498Szrj S4' d_T = c_T;
3384*38fd1498Szrj
3385*38fd1498Szrj but we can save one stmt by using the
3386*38fd1498Szrj result of one of the COND_EXPRs in the other COND_EXPR and leave
3387*38fd1498Szrj BIT_AND_EXPR stmt out:
3388*38fd1498Szrj
3389*38fd1498Szrj S1' a_T = x1 CMP1 y1 ? 1 : 0;
3390*38fd1498Szrj S3' c_T = x2 CMP2 y2 ? a_T : 0;
3391*38fd1498Szrj S4' f_T = c_T;
3392*38fd1498Szrj
3393*38fd1498Szrj At least when VEC_COND_EXPR is implemented using masks
3394*38fd1498Szrj cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3395*38fd1498Szrj computes the comparison masks and ands it, in one case with
3396*38fd1498Szrj all ones vector, in the other case with a vector register.
3397*38fd1498Szrj Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3398*38fd1498Szrj often more expensive. */
3399*38fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (rhs2);
3400*38fd1498Szrj def_rhs_code = gimple_assign_rhs_code (def_stmt);
3401*38fd1498Szrj if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3402*38fd1498Szrj {
3403*38fd1498Szrj irhs1 = *defs.get (rhs1);
3404*38fd1498Szrj tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3405*38fd1498Szrj if (TYPE_PRECISION (TREE_TYPE (irhs1))
3406*38fd1498Szrj == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3407*38fd1498Szrj {
3408*38fd1498Szrj rhs_code = def_rhs_code;
3409*38fd1498Szrj rhs1 = def_rhs1;
3410*38fd1498Szrj rhs2 = gimple_assign_rhs2 (def_stmt);
3411*38fd1498Szrj trueval = irhs1;
3412*38fd1498Szrj goto do_compare;
3413*38fd1498Szrj }
3414*38fd1498Szrj else
3415*38fd1498Szrj irhs2 = *defs.get (rhs2);
3416*38fd1498Szrj goto and_ior_xor;
3417*38fd1498Szrj }
3418*38fd1498Szrj def_stmt = SSA_NAME_DEF_STMT (rhs1);
3419*38fd1498Szrj def_rhs_code = gimple_assign_rhs_code (def_stmt);
3420*38fd1498Szrj if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3421*38fd1498Szrj {
3422*38fd1498Szrj irhs2 = *defs.get (rhs2);
3423*38fd1498Szrj tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3424*38fd1498Szrj if (TYPE_PRECISION (TREE_TYPE (irhs2))
3425*38fd1498Szrj == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3426*38fd1498Szrj {
3427*38fd1498Szrj rhs_code = def_rhs_code;
3428*38fd1498Szrj rhs1 = def_rhs1;
3429*38fd1498Szrj rhs2 = gimple_assign_rhs2 (def_stmt);
3430*38fd1498Szrj trueval = irhs2;
3431*38fd1498Szrj goto do_compare;
3432*38fd1498Szrj }
3433*38fd1498Szrj else
3434*38fd1498Szrj irhs1 = *defs.get (rhs1);
3435*38fd1498Szrj goto and_ior_xor;
3436*38fd1498Szrj }
3437*38fd1498Szrj /* FALLTHRU */
3438*38fd1498Szrj case BIT_IOR_EXPR:
3439*38fd1498Szrj case BIT_XOR_EXPR:
3440*38fd1498Szrj irhs1 = *defs.get (rhs1);
3441*38fd1498Szrj irhs2 = *defs.get (rhs2);
3442*38fd1498Szrj and_ior_xor:
3443*38fd1498Szrj if (TYPE_PRECISION (TREE_TYPE (irhs1))
3444*38fd1498Szrj != TYPE_PRECISION (TREE_TYPE (irhs2)))
3445*38fd1498Szrj {
3446*38fd1498Szrj int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3447*38fd1498Szrj int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3448*38fd1498Szrj int out_prec = TYPE_PRECISION (out_type);
3449*38fd1498Szrj if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3450*38fd1498Szrj irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3451*38fd1498Szrj stmt_info);
3452*38fd1498Szrj else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3453*38fd1498Szrj irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3454*38fd1498Szrj stmt_info);
3455*38fd1498Szrj else
3456*38fd1498Szrj {
3457*38fd1498Szrj irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3458*38fd1498Szrj irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3459*38fd1498Szrj }
3460*38fd1498Szrj }
3461*38fd1498Szrj itype = TREE_TYPE (irhs1);
3462*38fd1498Szrj pattern_stmt
3463*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3464*38fd1498Szrj rhs_code, irhs1, irhs2);
3465*38fd1498Szrj break;
3466*38fd1498Szrj
3467*38fd1498Szrj default:
3468*38fd1498Szrj do_compare:
3469*38fd1498Szrj gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3470*38fd1498Szrj if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3471*38fd1498Szrj || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3472*38fd1498Szrj || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3473*38fd1498Szrj GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3474*38fd1498Szrj {
3475*38fd1498Szrj scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3476*38fd1498Szrj itype
3477*38fd1498Szrj = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3478*38fd1498Szrj }
3479*38fd1498Szrj else
3480*38fd1498Szrj itype = TREE_TYPE (rhs1);
3481*38fd1498Szrj cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3482*38fd1498Szrj if (trueval == NULL_TREE)
3483*38fd1498Szrj trueval = build_int_cst (itype, 1);
3484*38fd1498Szrj else
3485*38fd1498Szrj gcc_checking_assert (useless_type_conversion_p (itype,
3486*38fd1498Szrj TREE_TYPE (trueval)));
3487*38fd1498Szrj pattern_stmt
3488*38fd1498Szrj = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3489*38fd1498Szrj COND_EXPR, cond_expr, trueval,
3490*38fd1498Szrj build_int_cst (itype, 0));
3491*38fd1498Szrj break;
3492*38fd1498Szrj }
3493*38fd1498Szrj
3494*38fd1498Szrj gimple_set_location (pattern_stmt, loc);
3495*38fd1498Szrj /* ??? Why does vect_mark_pattern_stmts set the vector type on all
3496*38fd1498Szrj pattern def seq stmts instead of just letting auto-detection do
3497*38fd1498Szrj its work? */
3498*38fd1498Szrj stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3499*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3500*38fd1498Szrj STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3501*38fd1498Szrj append_pattern_def_seq (stmt_info, pattern_stmt);
3502*38fd1498Szrj defs.put (var, gimple_assign_lhs (pattern_stmt));
3503*38fd1498Szrj }
3504*38fd1498Szrj
3505*38fd1498Szrj /* Comparison function to qsort a vector of gimple stmts after UID. */
3506*38fd1498Szrj
3507*38fd1498Szrj static int
sort_after_uid(const void * p1,const void * p2)3508*38fd1498Szrj sort_after_uid (const void *p1, const void *p2)
3509*38fd1498Szrj {
3510*38fd1498Szrj const gimple *stmt1 = *(const gimple * const *)p1;
3511*38fd1498Szrj const gimple *stmt2 = *(const gimple * const *)p2;
3512*38fd1498Szrj return gimple_uid (stmt1) - gimple_uid (stmt2);
3513*38fd1498Szrj }
3514*38fd1498Szrj
3515*38fd1498Szrj /* Create pattern stmts for all stmts participating in the bool pattern
3516*38fd1498Szrj specified by BOOL_STMT_SET and its root STMT with the desired type
3517*38fd1498Szrj OUT_TYPE. Return the def of the pattern root. */
3518*38fd1498Szrj
3519*38fd1498Szrj static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,gimple * stmt)3520*38fd1498Szrj adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3521*38fd1498Szrj tree out_type, gimple *stmt)
3522*38fd1498Szrj {
3523*38fd1498Szrj /* Gather original stmts in the bool pattern in their order of appearance
3524*38fd1498Szrj in the IL. */
3525*38fd1498Szrj auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3526*38fd1498Szrj for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3527*38fd1498Szrj i != bool_stmt_set.end (); ++i)
3528*38fd1498Szrj bool_stmts.quick_push (*i);
3529*38fd1498Szrj bool_stmts.qsort (sort_after_uid);
3530*38fd1498Szrj
3531*38fd1498Szrj /* Now process them in that order, producing pattern stmts. */
3532*38fd1498Szrj hash_map <tree, tree> defs;
3533*38fd1498Szrj for (unsigned i = 0; i < bool_stmts.length (); ++i)
3534*38fd1498Szrj adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3535*38fd1498Szrj out_type, vinfo_for_stmt (stmt), defs);
3536*38fd1498Szrj
3537*38fd1498Szrj /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3538*38fd1498Szrj gimple *pattern_stmt
3539*38fd1498Szrj = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3540*38fd1498Szrj return gimple_assign_lhs (pattern_stmt);
3541*38fd1498Szrj }
3542*38fd1498Szrj
3543*38fd1498Szrj /* Helper for search_type_for_mask. */
3544*38fd1498Szrj
3545*38fd1498Szrj static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3546*38fd1498Szrj search_type_for_mask_1 (tree var, vec_info *vinfo,
3547*38fd1498Szrj hash_map<gimple *, tree> &cache)
3548*38fd1498Szrj {
3549*38fd1498Szrj gimple *def_stmt;
3550*38fd1498Szrj enum vect_def_type dt;
3551*38fd1498Szrj tree rhs1;
3552*38fd1498Szrj enum tree_code rhs_code;
3553*38fd1498Szrj tree res = NULL_TREE, res2;
3554*38fd1498Szrj
3555*38fd1498Szrj if (TREE_CODE (var) != SSA_NAME)
3556*38fd1498Szrj return NULL_TREE;
3557*38fd1498Szrj
3558*38fd1498Szrj if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3559*38fd1498Szrj return NULL_TREE;
3560*38fd1498Szrj
3561*38fd1498Szrj if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3562*38fd1498Szrj return NULL_TREE;
3563*38fd1498Szrj
3564*38fd1498Szrj if (dt != vect_internal_def)
3565*38fd1498Szrj return NULL_TREE;
3566*38fd1498Szrj
3567*38fd1498Szrj if (!is_gimple_assign (def_stmt))
3568*38fd1498Szrj return NULL_TREE;
3569*38fd1498Szrj
3570*38fd1498Szrj tree *c = cache.get (def_stmt);
3571*38fd1498Szrj if (c)
3572*38fd1498Szrj return *c;
3573*38fd1498Szrj
3574*38fd1498Szrj rhs_code = gimple_assign_rhs_code (def_stmt);
3575*38fd1498Szrj rhs1 = gimple_assign_rhs1 (def_stmt);
3576*38fd1498Szrj
3577*38fd1498Szrj switch (rhs_code)
3578*38fd1498Szrj {
3579*38fd1498Szrj case SSA_NAME:
3580*38fd1498Szrj case BIT_NOT_EXPR:
3581*38fd1498Szrj CASE_CONVERT:
3582*38fd1498Szrj res = search_type_for_mask_1 (rhs1, vinfo, cache);
3583*38fd1498Szrj break;
3584*38fd1498Szrj
3585*38fd1498Szrj case BIT_AND_EXPR:
3586*38fd1498Szrj case BIT_IOR_EXPR:
3587*38fd1498Szrj case BIT_XOR_EXPR:
3588*38fd1498Szrj res = search_type_for_mask_1 (rhs1, vinfo, cache);
3589*38fd1498Szrj res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3590*38fd1498Szrj cache);
3591*38fd1498Szrj if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3592*38fd1498Szrj res = res2;
3593*38fd1498Szrj break;
3594*38fd1498Szrj
3595*38fd1498Szrj default:
3596*38fd1498Szrj if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3597*38fd1498Szrj {
3598*38fd1498Szrj tree comp_vectype, mask_type;
3599*38fd1498Szrj
3600*38fd1498Szrj if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3601*38fd1498Szrj {
3602*38fd1498Szrj res = search_type_for_mask_1 (rhs1, vinfo, cache);
3603*38fd1498Szrj res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3604*38fd1498Szrj vinfo, cache);
3605*38fd1498Szrj if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3606*38fd1498Szrj res = res2;
3607*38fd1498Szrj break;
3608*38fd1498Szrj }
3609*38fd1498Szrj
3610*38fd1498Szrj comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3611*38fd1498Szrj if (comp_vectype == NULL_TREE)
3612*38fd1498Szrj {
3613*38fd1498Szrj res = NULL_TREE;
3614*38fd1498Szrj break;
3615*38fd1498Szrj }
3616*38fd1498Szrj
3617*38fd1498Szrj mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3618*38fd1498Szrj if (!mask_type
3619*38fd1498Szrj || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3620*38fd1498Szrj {
3621*38fd1498Szrj res = NULL_TREE;
3622*38fd1498Szrj break;
3623*38fd1498Szrj }
3624*38fd1498Szrj
3625*38fd1498Szrj if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3626*38fd1498Szrj || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3627*38fd1498Szrj {
3628*38fd1498Szrj scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629*38fd1498Szrj res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3630*38fd1498Szrj }
3631*38fd1498Szrj else
3632*38fd1498Szrj res = TREE_TYPE (rhs1);
3633*38fd1498Szrj }
3634*38fd1498Szrj }
3635*38fd1498Szrj
3636*38fd1498Szrj cache.put (def_stmt, res);
3637*38fd1498Szrj return res;
3638*38fd1498Szrj }
3639*38fd1498Szrj
3640*38fd1498Szrj /* Return the proper type for converting bool VAR into
3641*38fd1498Szrj an integer value or NULL_TREE if no such type exists.
3642*38fd1498Szrj The type is chosen so that converted value has the
3643*38fd1498Szrj same number of elements as VAR's vector type. */
3644*38fd1498Szrj
3645*38fd1498Szrj static tree
search_type_for_mask(tree var,vec_info * vinfo)3646*38fd1498Szrj search_type_for_mask (tree var, vec_info *vinfo)
3647*38fd1498Szrj {
3648*38fd1498Szrj hash_map<gimple *, tree> cache;
3649*38fd1498Szrj return search_type_for_mask_1 (var, vinfo, cache);
3650*38fd1498Szrj }
3651*38fd1498Szrj
3652*38fd1498Szrj /* Function vect_recog_bool_pattern
3653*38fd1498Szrj
3654*38fd1498Szrj Try to find pattern like following:
3655*38fd1498Szrj
3656*38fd1498Szrj bool a_b, b_b, c_b, d_b, e_b;
3657*38fd1498Szrj TYPE f_T;
3658*38fd1498Szrj loop:
3659*38fd1498Szrj S1 a_b = x1 CMP1 y1;
3660*38fd1498Szrj S2 b_b = x2 CMP2 y2;
3661*38fd1498Szrj S3 c_b = a_b & b_b;
3662*38fd1498Szrj S4 d_b = x3 CMP3 y3;
3663*38fd1498Szrj S5 e_b = c_b | d_b;
3664*38fd1498Szrj S6 f_T = (TYPE) e_b;
3665*38fd1498Szrj
3666*38fd1498Szrj where type 'TYPE' is an integral type. Or a similar pattern
3667*38fd1498Szrj ending in
3668*38fd1498Szrj
3669*38fd1498Szrj S6 f_Y = e_b ? r_Y : s_Y;
3670*38fd1498Szrj
3671*38fd1498Szrj as results from if-conversion of a complex condition.
3672*38fd1498Szrj
3673*38fd1498Szrj Input:
3674*38fd1498Szrj
3675*38fd1498Szrj * LAST_STMT: A stmt at the end from which the pattern
3676*38fd1498Szrj search begins, i.e. cast of a bool to
3677*38fd1498Szrj an integer type.
3678*38fd1498Szrj
3679*38fd1498Szrj Output:
3680*38fd1498Szrj
3681*38fd1498Szrj * TYPE_IN: The type of the input arguments to the pattern.
3682*38fd1498Szrj
3683*38fd1498Szrj * TYPE_OUT: The type of the output of this pattern.
3684*38fd1498Szrj
3685*38fd1498Szrj * Return value: A new stmt that will be used to replace the pattern.
3686*38fd1498Szrj
3687*38fd1498Szrj Assuming size of TYPE is the same as size of all comparisons
3688*38fd1498Szrj (otherwise some casts would be added where needed), the above
3689*38fd1498Szrj sequence we create related pattern stmts:
3690*38fd1498Szrj S1' a_T = x1 CMP1 y1 ? 1 : 0;
3691*38fd1498Szrj S3' c_T = x2 CMP2 y2 ? a_T : 0;
3692*38fd1498Szrj S4' d_T = x3 CMP3 y3 ? 1 : 0;
3693*38fd1498Szrj S5' e_T = c_T | d_T;
3694*38fd1498Szrj S6' f_T = e_T;
3695*38fd1498Szrj
3696*38fd1498Szrj Instead of the above S3' we could emit:
3697*38fd1498Szrj S2' b_T = x2 CMP2 y2 ? 1 : 0;
3698*38fd1498Szrj S3' c_T = a_T | b_T;
3699*38fd1498Szrj but the above is more efficient. */
3700*38fd1498Szrj
3701*38fd1498Szrj static gimple *
vect_recog_bool_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3702*38fd1498Szrj vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3703*38fd1498Szrj tree *type_out)
3704*38fd1498Szrj {
3705*38fd1498Szrj gimple *last_stmt = stmts->pop ();
3706*38fd1498Szrj enum tree_code rhs_code;
3707*38fd1498Szrj tree var, lhs, rhs, vectype;
3708*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3709*38fd1498Szrj stmt_vec_info new_stmt_info;
3710*38fd1498Szrj vec_info *vinfo = stmt_vinfo->vinfo;
3711*38fd1498Szrj gimple *pattern_stmt;
3712*38fd1498Szrj
3713*38fd1498Szrj if (!is_gimple_assign (last_stmt))
3714*38fd1498Szrj return NULL;
3715*38fd1498Szrj
3716*38fd1498Szrj var = gimple_assign_rhs1 (last_stmt);
3717*38fd1498Szrj lhs = gimple_assign_lhs (last_stmt);
3718*38fd1498Szrj
3719*38fd1498Szrj if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3720*38fd1498Szrj return NULL;
3721*38fd1498Szrj
3722*38fd1498Szrj hash_set<gimple *> bool_stmts;
3723*38fd1498Szrj
3724*38fd1498Szrj rhs_code = gimple_assign_rhs_code (last_stmt);
3725*38fd1498Szrj if (CONVERT_EXPR_CODE_P (rhs_code))
3726*38fd1498Szrj {
3727*38fd1498Szrj if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3728*38fd1498Szrj || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3729*38fd1498Szrj return NULL;
3730*38fd1498Szrj vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3731*38fd1498Szrj if (vectype == NULL_TREE)
3732*38fd1498Szrj return NULL;
3733*38fd1498Szrj
3734*38fd1498Szrj if (check_bool_pattern (var, vinfo, bool_stmts))
3735*38fd1498Szrj {
3736*38fd1498Szrj rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3737*38fd1498Szrj lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3738*38fd1498Szrj if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3739*38fd1498Szrj pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3740*38fd1498Szrj else
3741*38fd1498Szrj pattern_stmt
3742*38fd1498Szrj = gimple_build_assign (lhs, NOP_EXPR, rhs);
3743*38fd1498Szrj }
3744*38fd1498Szrj else
3745*38fd1498Szrj {
3746*38fd1498Szrj tree type = search_type_for_mask (var, vinfo);
3747*38fd1498Szrj tree cst0, cst1, tmp;
3748*38fd1498Szrj
3749*38fd1498Szrj if (!type)
3750*38fd1498Szrj return NULL;
3751*38fd1498Szrj
3752*38fd1498Szrj /* We may directly use cond with narrowed type to avoid
3753*38fd1498Szrj multiple cond exprs with following result packing and
3754*38fd1498Szrj perform single cond with packed mask instead. In case
3755*38fd1498Szrj of widening we better make cond first and then extract
3756*38fd1498Szrj results. */
3757*38fd1498Szrj if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3758*38fd1498Szrj type = TREE_TYPE (lhs);
3759*38fd1498Szrj
3760*38fd1498Szrj cst0 = build_int_cst (type, 0);
3761*38fd1498Szrj cst1 = build_int_cst (type, 1);
3762*38fd1498Szrj tmp = vect_recog_temp_ssa_var (type, NULL);
3763*38fd1498Szrj pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3764*38fd1498Szrj
3765*38fd1498Szrj if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3766*38fd1498Szrj {
3767*38fd1498Szrj tree new_vectype = get_vectype_for_scalar_type (type);
3768*38fd1498Szrj new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3769*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3770*38fd1498Szrj STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3771*38fd1498Szrj new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3772*38fd1498Szrj
3773*38fd1498Szrj lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3774*38fd1498Szrj pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3775*38fd1498Szrj }
3776*38fd1498Szrj }
3777*38fd1498Szrj
3778*38fd1498Szrj *type_out = vectype;
3779*38fd1498Szrj *type_in = vectype;
3780*38fd1498Szrj stmts->safe_push (last_stmt);
3781*38fd1498Szrj if (dump_enabled_p ())
3782*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
3783*38fd1498Szrj "vect_recog_bool_pattern: detected:\n");
3784*38fd1498Szrj
3785*38fd1498Szrj return pattern_stmt;
3786*38fd1498Szrj }
3787*38fd1498Szrj else if (rhs_code == COND_EXPR
3788*38fd1498Szrj && TREE_CODE (var) == SSA_NAME)
3789*38fd1498Szrj {
3790*38fd1498Szrj vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3791*38fd1498Szrj if (vectype == NULL_TREE)
3792*38fd1498Szrj return NULL;
3793*38fd1498Szrj
3794*38fd1498Szrj /* Build a scalar type for the boolean result that when
3795*38fd1498Szrj vectorized matches the vector type of the result in
3796*38fd1498Szrj size and number of elements. */
3797*38fd1498Szrj unsigned prec
3798*38fd1498Szrj = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3799*38fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype));
3800*38fd1498Szrj
3801*38fd1498Szrj tree type
3802*38fd1498Szrj = build_nonstandard_integer_type (prec,
3803*38fd1498Szrj TYPE_UNSIGNED (TREE_TYPE (var)));
3804*38fd1498Szrj if (get_vectype_for_scalar_type (type) == NULL_TREE)
3805*38fd1498Szrj return NULL;
3806*38fd1498Szrj
3807*38fd1498Szrj if (!check_bool_pattern (var, vinfo, bool_stmts))
3808*38fd1498Szrj return NULL;
3809*38fd1498Szrj
3810*38fd1498Szrj rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3811*38fd1498Szrj
3812*38fd1498Szrj lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3813*38fd1498Szrj pattern_stmt
3814*38fd1498Szrj = gimple_build_assign (lhs, COND_EXPR,
3815*38fd1498Szrj build2 (NE_EXPR, boolean_type_node,
3816*38fd1498Szrj rhs, build_int_cst (type, 0)),
3817*38fd1498Szrj gimple_assign_rhs2 (last_stmt),
3818*38fd1498Szrj gimple_assign_rhs3 (last_stmt));
3819*38fd1498Szrj *type_out = vectype;
3820*38fd1498Szrj *type_in = vectype;
3821*38fd1498Szrj stmts->safe_push (last_stmt);
3822*38fd1498Szrj if (dump_enabled_p ())
3823*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
3824*38fd1498Szrj "vect_recog_bool_pattern: detected:\n");
3825*38fd1498Szrj
3826*38fd1498Szrj return pattern_stmt;
3827*38fd1498Szrj }
3828*38fd1498Szrj else if (rhs_code == SSA_NAME
3829*38fd1498Szrj && STMT_VINFO_DATA_REF (stmt_vinfo))
3830*38fd1498Szrj {
3831*38fd1498Szrj stmt_vec_info pattern_stmt_info;
3832*38fd1498Szrj vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3833*38fd1498Szrj gcc_assert (vectype != NULL_TREE);
3834*38fd1498Szrj if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3835*38fd1498Szrj return NULL;
3836*38fd1498Szrj
3837*38fd1498Szrj if (check_bool_pattern (var, vinfo, bool_stmts))
3838*38fd1498Szrj rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3839*38fd1498Szrj else
3840*38fd1498Szrj {
3841*38fd1498Szrj tree type = search_type_for_mask (var, vinfo);
3842*38fd1498Szrj tree cst0, cst1, new_vectype;
3843*38fd1498Szrj
3844*38fd1498Szrj if (!type)
3845*38fd1498Szrj return NULL;
3846*38fd1498Szrj
3847*38fd1498Szrj if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3848*38fd1498Szrj type = TREE_TYPE (vectype);
3849*38fd1498Szrj
3850*38fd1498Szrj cst0 = build_int_cst (type, 0);
3851*38fd1498Szrj cst1 = build_int_cst (type, 1);
3852*38fd1498Szrj new_vectype = get_vectype_for_scalar_type (type);
3853*38fd1498Szrj
3854*38fd1498Szrj rhs = vect_recog_temp_ssa_var (type, NULL);
3855*38fd1498Szrj pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3856*38fd1498Szrj
3857*38fd1498Szrj pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3858*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3859*38fd1498Szrj STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3860*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3861*38fd1498Szrj }
3862*38fd1498Szrj
3863*38fd1498Szrj lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3864*38fd1498Szrj if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3865*38fd1498Szrj {
3866*38fd1498Szrj tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3867*38fd1498Szrj gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3868*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, cast_stmt);
3869*38fd1498Szrj rhs = rhs2;
3870*38fd1498Szrj }
3871*38fd1498Szrj pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3872*38fd1498Szrj pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3873*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3874*38fd1498Szrj STMT_VINFO_DATA_REF (pattern_stmt_info)
3875*38fd1498Szrj = STMT_VINFO_DATA_REF (stmt_vinfo);
3876*38fd1498Szrj STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3877*38fd1498Szrj = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3878*38fd1498Szrj DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3879*38fd1498Szrj *type_out = vectype;
3880*38fd1498Szrj *type_in = vectype;
3881*38fd1498Szrj stmts->safe_push (last_stmt);
3882*38fd1498Szrj if (dump_enabled_p ())
3883*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
3884*38fd1498Szrj "vect_recog_bool_pattern: detected:\n");
3885*38fd1498Szrj return pattern_stmt;
3886*38fd1498Szrj }
3887*38fd1498Szrj else
3888*38fd1498Szrj return NULL;
3889*38fd1498Szrj }
3890*38fd1498Szrj
3891*38fd1498Szrj
3892*38fd1498Szrj /* A helper for vect_recog_mask_conversion_pattern. Build
3893*38fd1498Szrj conversion of MASK to a type suitable for masking VECTYPE.
3894*38fd1498Szrj Built statement gets required vectype and is appended to
3895*38fd1498Szrj a pattern sequence of STMT_VINFO.
3896*38fd1498Szrj
3897*38fd1498Szrj Return converted mask. */
3898*38fd1498Szrj
3899*38fd1498Szrj static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo,vec_info * vinfo)3900*38fd1498Szrj build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3901*38fd1498Szrj vec_info *vinfo)
3902*38fd1498Szrj {
3903*38fd1498Szrj gimple *stmt;
3904*38fd1498Szrj tree masktype, tmp;
3905*38fd1498Szrj stmt_vec_info new_stmt_info;
3906*38fd1498Szrj
3907*38fd1498Szrj masktype = build_same_sized_truth_vector_type (vectype);
3908*38fd1498Szrj tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3909*38fd1498Szrj stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3910*38fd1498Szrj new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3911*38fd1498Szrj set_vinfo_for_stmt (stmt, new_stmt_info);
3912*38fd1498Szrj STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3913*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, stmt);
3914*38fd1498Szrj
3915*38fd1498Szrj return tmp;
3916*38fd1498Szrj }
3917*38fd1498Szrj
3918*38fd1498Szrj
3919*38fd1498Szrj /* Function vect_recog_mask_conversion_pattern
3920*38fd1498Szrj
3921*38fd1498Szrj Try to find statements which require boolean type
3922*38fd1498Szrj converison. Additional conversion statements are
3923*38fd1498Szrj added to handle such cases. For example:
3924*38fd1498Szrj
3925*38fd1498Szrj bool m_1, m_2, m_3;
3926*38fd1498Szrj int i_4, i_5;
3927*38fd1498Szrj double d_6, d_7;
3928*38fd1498Szrj char c_1, c_2, c_3;
3929*38fd1498Szrj
3930*38fd1498Szrj S1 m_1 = i_4 > i_5;
3931*38fd1498Szrj S2 m_2 = d_6 < d_7;
3932*38fd1498Szrj S3 m_3 = m_1 & m_2;
3933*38fd1498Szrj S4 c_1 = m_3 ? c_2 : c_3;
3934*38fd1498Szrj
3935*38fd1498Szrj Will be transformed into:
3936*38fd1498Szrj
3937*38fd1498Szrj S1 m_1 = i_4 > i_5;
3938*38fd1498Szrj S2 m_2 = d_6 < d_7;
3939*38fd1498Szrj S3'' m_2' = (_Bool[bitsize=32])m_2
3940*38fd1498Szrj S3' m_3' = m_1 & m_2';
3941*38fd1498Szrj S4'' m_3'' = (_Bool[bitsize=8])m_3'
3942*38fd1498Szrj S4' c_1' = m_3'' ? c_2 : c_3; */
3943*38fd1498Szrj
3944*38fd1498Szrj static gimple *
vect_recog_mask_conversion_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3945*38fd1498Szrj vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3946*38fd1498Szrj tree *type_out)
3947*38fd1498Szrj {
3948*38fd1498Szrj gimple *last_stmt = stmts->pop ();
3949*38fd1498Szrj enum tree_code rhs_code;
3950*38fd1498Szrj tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3951*38fd1498Szrj tree vectype1, vectype2;
3952*38fd1498Szrj stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3953*38fd1498Szrj stmt_vec_info pattern_stmt_info;
3954*38fd1498Szrj vec_info *vinfo = stmt_vinfo->vinfo;
3955*38fd1498Szrj
3956*38fd1498Szrj /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3957*38fd1498Szrj if (is_gimple_call (last_stmt)
3958*38fd1498Szrj && gimple_call_internal_p (last_stmt)
3959*38fd1498Szrj && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3960*38fd1498Szrj || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3961*38fd1498Szrj {
3962*38fd1498Szrj gcall *pattern_stmt;
3963*38fd1498Szrj bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3964*38fd1498Szrj
3965*38fd1498Szrj if (load)
3966*38fd1498Szrj {
3967*38fd1498Szrj lhs = gimple_call_lhs (last_stmt);
3968*38fd1498Szrj vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3969*38fd1498Szrj }
3970*38fd1498Szrj else
3971*38fd1498Szrj {
3972*38fd1498Szrj rhs2 = gimple_call_arg (last_stmt, 3);
3973*38fd1498Szrj vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3974*38fd1498Szrj }
3975*38fd1498Szrj
3976*38fd1498Szrj rhs1 = gimple_call_arg (last_stmt, 2);
3977*38fd1498Szrj rhs1_type = search_type_for_mask (rhs1, vinfo);
3978*38fd1498Szrj if (!rhs1_type)
3979*38fd1498Szrj return NULL;
3980*38fd1498Szrj vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3981*38fd1498Szrj
3982*38fd1498Szrj if (!vectype1 || !vectype2
3983*38fd1498Szrj || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3984*38fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype2)))
3985*38fd1498Szrj return NULL;
3986*38fd1498Szrj
3987*38fd1498Szrj tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3988*38fd1498Szrj
3989*38fd1498Szrj if (load)
3990*38fd1498Szrj {
3991*38fd1498Szrj lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3992*38fd1498Szrj pattern_stmt
3993*38fd1498Szrj = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3994*38fd1498Szrj gimple_call_arg (last_stmt, 0),
3995*38fd1498Szrj gimple_call_arg (last_stmt, 1),
3996*38fd1498Szrj tmp);
3997*38fd1498Szrj gimple_call_set_lhs (pattern_stmt, lhs);
3998*38fd1498Szrj }
3999*38fd1498Szrj else
4000*38fd1498Szrj pattern_stmt
4001*38fd1498Szrj = gimple_build_call_internal (IFN_MASK_STORE, 4,
4002*38fd1498Szrj gimple_call_arg (last_stmt, 0),
4003*38fd1498Szrj gimple_call_arg (last_stmt, 1),
4004*38fd1498Szrj tmp,
4005*38fd1498Szrj gimple_call_arg (last_stmt, 3));
4006*38fd1498Szrj
4007*38fd1498Szrj gimple_call_set_nothrow (pattern_stmt, true);
4008*38fd1498Szrj
4009*38fd1498Szrj pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4010*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4011*38fd1498Szrj STMT_VINFO_DATA_REF (pattern_stmt_info)
4012*38fd1498Szrj = STMT_VINFO_DATA_REF (stmt_vinfo);
4013*38fd1498Szrj STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4014*38fd1498Szrj = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4015*38fd1498Szrj DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
4016*38fd1498Szrj
4017*38fd1498Szrj *type_out = vectype1;
4018*38fd1498Szrj *type_in = vectype1;
4019*38fd1498Szrj stmts->safe_push (last_stmt);
4020*38fd1498Szrj if (dump_enabled_p ())
4021*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4022*38fd1498Szrj "vect_recog_mask_conversion_pattern: detected:\n");
4023*38fd1498Szrj
4024*38fd1498Szrj return pattern_stmt;
4025*38fd1498Szrj }
4026*38fd1498Szrj
4027*38fd1498Szrj if (!is_gimple_assign (last_stmt))
4028*38fd1498Szrj return NULL;
4029*38fd1498Szrj
4030*38fd1498Szrj gimple *pattern_stmt;
4031*38fd1498Szrj lhs = gimple_assign_lhs (last_stmt);
4032*38fd1498Szrj rhs1 = gimple_assign_rhs1 (last_stmt);
4033*38fd1498Szrj rhs_code = gimple_assign_rhs_code (last_stmt);
4034*38fd1498Szrj
4035*38fd1498Szrj /* Check for cond expression requiring mask conversion. */
4036*38fd1498Szrj if (rhs_code == COND_EXPR)
4037*38fd1498Szrj {
4038*38fd1498Szrj /* vect_recog_mixed_size_cond_pattern could apply.
4039*38fd1498Szrj Do nothing then. */
4040*38fd1498Szrj if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4041*38fd1498Szrj return NULL;
4042*38fd1498Szrj
4043*38fd1498Szrj vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4044*38fd1498Szrj
4045*38fd1498Szrj if (TREE_CODE (rhs1) == SSA_NAME)
4046*38fd1498Szrj {
4047*38fd1498Szrj rhs1_type = search_type_for_mask (rhs1, vinfo);
4048*38fd1498Szrj if (!rhs1_type)
4049*38fd1498Szrj return NULL;
4050*38fd1498Szrj }
4051*38fd1498Szrj else if (COMPARISON_CLASS_P (rhs1))
4052*38fd1498Szrj {
4053*38fd1498Szrj /* Check whether we're comparing scalar booleans and (if so)
4054*38fd1498Szrj whether a better mask type exists than the mask associated
4055*38fd1498Szrj with boolean-sized elements. This avoids unnecessary packs
4056*38fd1498Szrj and unpacks if the booleans are set from comparisons of
4057*38fd1498Szrj wider types. E.g. in:
4058*38fd1498Szrj
4059*38fd1498Szrj int x1, x2, x3, x4, y1, y1;
4060*38fd1498Szrj ...
4061*38fd1498Szrj bool b1 = (x1 == x2);
4062*38fd1498Szrj bool b2 = (x3 == x4);
4063*38fd1498Szrj ... = b1 == b2 ? y1 : y2;
4064*38fd1498Szrj
4065*38fd1498Szrj it is better for b1 and b2 to use the mask type associated
4066*38fd1498Szrj with int elements rather bool (byte) elements. */
4067*38fd1498Szrj rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4068*38fd1498Szrj if (!rhs1_type)
4069*38fd1498Szrj rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4070*38fd1498Szrj }
4071*38fd1498Szrj else
4072*38fd1498Szrj return NULL;
4073*38fd1498Szrj
4074*38fd1498Szrj vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4075*38fd1498Szrj
4076*38fd1498Szrj if (!vectype1 || !vectype2)
4077*38fd1498Szrj return NULL;
4078*38fd1498Szrj
4079*38fd1498Szrj /* Continue if a conversion is needed. Also continue if we have
4080*38fd1498Szrj a comparison whose vector type would normally be different from
4081*38fd1498Szrj VECTYPE2 when considered in isolation. In that case we'll
4082*38fd1498Szrj replace the comparison with an SSA name (so that we can record
4083*38fd1498Szrj its vector type) and behave as though the comparison was an SSA
4084*38fd1498Szrj name from the outset. */
4085*38fd1498Szrj if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4086*38fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype2))
4087*38fd1498Szrj && (TREE_CODE (rhs1) == SSA_NAME
4088*38fd1498Szrj || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4089*38fd1498Szrj return NULL;
4090*38fd1498Szrj
4091*38fd1498Szrj /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4092*38fd1498Szrj in place, we can handle it in vectorizable_condition. This avoids
4093*38fd1498Szrj unnecessary promotion stmts and increased vectorization factor. */
4094*38fd1498Szrj if (COMPARISON_CLASS_P (rhs1)
4095*38fd1498Szrj && INTEGRAL_TYPE_P (rhs1_type)
4096*38fd1498Szrj && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4097*38fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype2)))
4098*38fd1498Szrj {
4099*38fd1498Szrj gimple *dummy;
4100*38fd1498Szrj enum vect_def_type dt;
4101*38fd1498Szrj if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4102*38fd1498Szrj &dummy, &dt)
4103*38fd1498Szrj && dt == vect_external_def
4104*38fd1498Szrj && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4105*38fd1498Szrj &dummy, &dt)
4106*38fd1498Szrj && (dt == vect_external_def
4107*38fd1498Szrj || dt == vect_constant_def))
4108*38fd1498Szrj {
4109*38fd1498Szrj tree wide_scalar_type = build_nonstandard_integer_type
4110*38fd1498Szrj (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4111*38fd1498Szrj TYPE_UNSIGNED (rhs1_type));
4112*38fd1498Szrj tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4113*38fd1498Szrj if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4114*38fd1498Szrj return NULL;
4115*38fd1498Szrj }
4116*38fd1498Szrj }
4117*38fd1498Szrj
4118*38fd1498Szrj /* If rhs1 is a comparison we need to move it into a
4119*38fd1498Szrj separate statement. */
4120*38fd1498Szrj if (TREE_CODE (rhs1) != SSA_NAME)
4121*38fd1498Szrj {
4122*38fd1498Szrj tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4123*38fd1498Szrj pattern_stmt = gimple_build_assign (tmp, rhs1);
4124*38fd1498Szrj rhs1 = tmp;
4125*38fd1498Szrj
4126*38fd1498Szrj pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4127*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4128*38fd1498Szrj STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4129*38fd1498Szrj append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4130*38fd1498Szrj }
4131*38fd1498Szrj
4132*38fd1498Szrj if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4133*38fd1498Szrj TYPE_VECTOR_SUBPARTS (vectype2)))
4134*38fd1498Szrj tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4135*38fd1498Szrj else
4136*38fd1498Szrj tmp = rhs1;
4137*38fd1498Szrj
4138*38fd1498Szrj lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4139*38fd1498Szrj pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4140*38fd1498Szrj gimple_assign_rhs2 (last_stmt),
4141*38fd1498Szrj gimple_assign_rhs3 (last_stmt));
4142*38fd1498Szrj
4143*38fd1498Szrj *type_out = vectype1;
4144*38fd1498Szrj *type_in = vectype1;
4145*38fd1498Szrj stmts->safe_push (last_stmt);
4146*38fd1498Szrj if (dump_enabled_p ())
4147*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4148*38fd1498Szrj "vect_recog_mask_conversion_pattern: detected:\n");
4149*38fd1498Szrj
4150*38fd1498Szrj return pattern_stmt;
4151*38fd1498Szrj }
4152*38fd1498Szrj
4153*38fd1498Szrj /* Now check for binary boolean operations requiring conversion for
4154*38fd1498Szrj one of operands. */
4155*38fd1498Szrj if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4156*38fd1498Szrj return NULL;
4157*38fd1498Szrj
4158*38fd1498Szrj if (rhs_code != BIT_IOR_EXPR
4159*38fd1498Szrj && rhs_code != BIT_XOR_EXPR
4160*38fd1498Szrj && rhs_code != BIT_AND_EXPR
4161*38fd1498Szrj && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4162*38fd1498Szrj return NULL;
4163*38fd1498Szrj
4164*38fd1498Szrj rhs2 = gimple_assign_rhs2 (last_stmt);
4165*38fd1498Szrj
4166*38fd1498Szrj rhs1_type = search_type_for_mask (rhs1, vinfo);
4167*38fd1498Szrj rhs2_type = search_type_for_mask (rhs2, vinfo);
4168*38fd1498Szrj
4169*38fd1498Szrj if (!rhs1_type || !rhs2_type
4170*38fd1498Szrj || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4171*38fd1498Szrj return NULL;
4172*38fd1498Szrj
4173*38fd1498Szrj if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4174*38fd1498Szrj {
4175*38fd1498Szrj vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4176*38fd1498Szrj if (!vectype1)
4177*38fd1498Szrj return NULL;
4178*38fd1498Szrj rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4179*38fd1498Szrj }
4180*38fd1498Szrj else
4181*38fd1498Szrj {
4182*38fd1498Szrj vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4183*38fd1498Szrj if (!vectype1)
4184*38fd1498Szrj return NULL;
4185*38fd1498Szrj rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4186*38fd1498Szrj }
4187*38fd1498Szrj
4188*38fd1498Szrj lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4189*38fd1498Szrj pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4190*38fd1498Szrj
4191*38fd1498Szrj *type_out = vectype1;
4192*38fd1498Szrj *type_in = vectype1;
4193*38fd1498Szrj stmts->safe_push (last_stmt);
4194*38fd1498Szrj if (dump_enabled_p ())
4195*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4196*38fd1498Szrj "vect_recog_mask_conversion_pattern: detected:\n");
4197*38fd1498Szrj
4198*38fd1498Szrj return pattern_stmt;
4199*38fd1498Szrj }
4200*38fd1498Szrj
4201*38fd1498Szrj /* STMT is a load or store. If the load or store is conditional, return
4202*38fd1498Szrj the boolean condition under which it occurs, otherwise return null. */
4203*38fd1498Szrj
4204*38fd1498Szrj static tree
vect_get_load_store_mask(gimple * stmt)4205*38fd1498Szrj vect_get_load_store_mask (gimple *stmt)
4206*38fd1498Szrj {
4207*38fd1498Szrj if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4208*38fd1498Szrj {
4209*38fd1498Szrj gcc_assert (gimple_assign_single_p (def_assign));
4210*38fd1498Szrj return NULL_TREE;
4211*38fd1498Szrj }
4212*38fd1498Szrj
4213*38fd1498Szrj if (gcall *def_call = dyn_cast <gcall *> (stmt))
4214*38fd1498Szrj {
4215*38fd1498Szrj internal_fn ifn = gimple_call_internal_fn (def_call);
4216*38fd1498Szrj int mask_index = internal_fn_mask_index (ifn);
4217*38fd1498Szrj return gimple_call_arg (def_call, mask_index);
4218*38fd1498Szrj }
4219*38fd1498Szrj
4220*38fd1498Szrj gcc_unreachable ();
4221*38fd1498Szrj }
4222*38fd1498Szrj
4223*38fd1498Szrj /* Return the scalar offset type that an internal gather/scatter function
4224*38fd1498Szrj should use. GS_INFO describes the gather/scatter operation. */
4225*38fd1498Szrj
4226*38fd1498Szrj static tree
vect_get_gather_scatter_offset_type(gather_scatter_info * gs_info)4227*38fd1498Szrj vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4228*38fd1498Szrj {
4229*38fd1498Szrj tree offset_type = TREE_TYPE (gs_info->offset);
4230*38fd1498Szrj unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4231*38fd1498Szrj
4232*38fd1498Szrj /* Enforced by vect_check_gather_scatter. */
4233*38fd1498Szrj unsigned int offset_bits = TYPE_PRECISION (offset_type);
4234*38fd1498Szrj gcc_assert (element_bits >= offset_bits);
4235*38fd1498Szrj
4236*38fd1498Szrj /* If the offset is narrower than the elements, extend it according
4237*38fd1498Szrj to its sign. */
4238*38fd1498Szrj if (element_bits > offset_bits)
4239*38fd1498Szrj return build_nonstandard_integer_type (element_bits,
4240*38fd1498Szrj TYPE_UNSIGNED (offset_type));
4241*38fd1498Szrj
4242*38fd1498Szrj return offset_type;
4243*38fd1498Szrj }
4244*38fd1498Szrj
4245*38fd1498Szrj /* Return MASK if MASK is suitable for masking an operation on vectors
4246*38fd1498Szrj of type VECTYPE, otherwise convert it into such a form and return
4247*38fd1498Szrj the result. Associate any conversion statements with STMT_INFO's
4248*38fd1498Szrj pattern. */
4249*38fd1498Szrj
4250*38fd1498Szrj static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4251*38fd1498Szrj vect_convert_mask_for_vectype (tree mask, tree vectype,
4252*38fd1498Szrj stmt_vec_info stmt_info, vec_info *vinfo)
4253*38fd1498Szrj {
4254*38fd1498Szrj tree mask_type = search_type_for_mask (mask, vinfo);
4255*38fd1498Szrj if (mask_type)
4256*38fd1498Szrj {
4257*38fd1498Szrj tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4258*38fd1498Szrj if (mask_vectype
4259*38fd1498Szrj && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4260*38fd1498Szrj TYPE_VECTOR_SUBPARTS (mask_vectype)))
4261*38fd1498Szrj mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4262*38fd1498Szrj }
4263*38fd1498Szrj return mask;
4264*38fd1498Szrj }
4265*38fd1498Szrj
4266*38fd1498Szrj /* Return the equivalent of:
4267*38fd1498Szrj
4268*38fd1498Szrj fold_convert (TYPE, VALUE)
4269*38fd1498Szrj
4270*38fd1498Szrj with the expectation that the operation will be vectorized.
4271*38fd1498Szrj If new statements are needed, add them as pattern statements
4272*38fd1498Szrj to STMT_INFO. */
4273*38fd1498Szrj
4274*38fd1498Szrj static tree
vect_add_conversion_to_patterm(tree type,tree value,stmt_vec_info stmt_info,vec_info * vinfo)4275*38fd1498Szrj vect_add_conversion_to_patterm (tree type, tree value,
4276*38fd1498Szrj stmt_vec_info stmt_info,
4277*38fd1498Szrj vec_info *vinfo)
4278*38fd1498Szrj {
4279*38fd1498Szrj if (useless_type_conversion_p (type, TREE_TYPE (value)))
4280*38fd1498Szrj return value;
4281*38fd1498Szrj
4282*38fd1498Szrj tree new_value = vect_recog_temp_ssa_var (type, NULL);
4283*38fd1498Szrj gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4284*38fd1498Szrj stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4285*38fd1498Szrj set_vinfo_for_stmt (conversion, new_stmt_info);
4286*38fd1498Szrj STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4287*38fd1498Szrj append_pattern_def_seq (stmt_info, conversion);
4288*38fd1498Szrj return new_value;
4289*38fd1498Szrj }
4290*38fd1498Szrj
4291*38fd1498Szrj /* Try to convert STMT into a call to a gather load or scatter store
4292*38fd1498Szrj internal function. Return the final statement on success and set
4293*38fd1498Szrj *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4294*38fd1498Szrj
4295*38fd1498Szrj This function only handles gathers and scatters that were recognized
4296*38fd1498Szrj as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4297*38fd1498Szrj
4298*38fd1498Szrj static gimple *
vect_try_gather_scatter_pattern(gimple * stmt,stmt_vec_info last_stmt_info,tree * type_in,tree * type_out)4299*38fd1498Szrj vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4300*38fd1498Szrj tree *type_in, tree *type_out)
4301*38fd1498Szrj {
4302*38fd1498Szrj /* Currently we only support this for loop vectorization. */
4303*38fd1498Szrj stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4304*38fd1498Szrj loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4305*38fd1498Szrj if (!loop_vinfo)
4306*38fd1498Szrj return NULL;
4307*38fd1498Szrj
4308*38fd1498Szrj /* Make sure that we're looking at a gather load or scatter store. */
4309*38fd1498Szrj data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4310*38fd1498Szrj if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4311*38fd1498Szrj return NULL;
4312*38fd1498Szrj
4313*38fd1498Szrj /* Get the boolean that controls whether the load or store happens.
4314*38fd1498Szrj This is null if the operation is unconditional. */
4315*38fd1498Szrj tree mask = vect_get_load_store_mask (stmt);
4316*38fd1498Szrj
4317*38fd1498Szrj /* Make sure that the target supports an appropriate internal
4318*38fd1498Szrj function for the gather/scatter operation. */
4319*38fd1498Szrj gather_scatter_info gs_info;
4320*38fd1498Szrj if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4321*38fd1498Szrj || gs_info.decl)
4322*38fd1498Szrj return NULL;
4323*38fd1498Szrj
4324*38fd1498Szrj /* Convert the mask to the right form. */
4325*38fd1498Szrj tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4326*38fd1498Szrj if (mask)
4327*38fd1498Szrj mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4328*38fd1498Szrj loop_vinfo);
4329*38fd1498Szrj
4330*38fd1498Szrj /* Get the invariant base and non-invariant offset, converting the
4331*38fd1498Szrj latter to the same width as the vector elements. */
4332*38fd1498Szrj tree base = gs_info.base;
4333*38fd1498Szrj tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4334*38fd1498Szrj tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4335*38fd1498Szrj last_stmt_info, loop_vinfo);
4336*38fd1498Szrj
4337*38fd1498Szrj /* Build the new pattern statement. */
4338*38fd1498Szrj tree scale = size_int (gs_info.scale);
4339*38fd1498Szrj gcall *pattern_stmt;
4340*38fd1498Szrj if (DR_IS_READ (dr))
4341*38fd1498Szrj {
4342*38fd1498Szrj if (mask != NULL)
4343*38fd1498Szrj pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4344*38fd1498Szrj offset, scale, mask);
4345*38fd1498Szrj else
4346*38fd1498Szrj pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4347*38fd1498Szrj offset, scale);
4348*38fd1498Szrj tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4349*38fd1498Szrj gimple_call_set_lhs (pattern_stmt, load_lhs);
4350*38fd1498Szrj }
4351*38fd1498Szrj else
4352*38fd1498Szrj {
4353*38fd1498Szrj tree rhs = vect_get_store_rhs (stmt);
4354*38fd1498Szrj if (mask != NULL)
4355*38fd1498Szrj pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4356*38fd1498Szrj base, offset, scale, rhs,
4357*38fd1498Szrj mask);
4358*38fd1498Szrj else
4359*38fd1498Szrj pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4360*38fd1498Szrj base, offset, scale, rhs);
4361*38fd1498Szrj }
4362*38fd1498Szrj gimple_call_set_nothrow (pattern_stmt, true);
4363*38fd1498Szrj
4364*38fd1498Szrj /* Copy across relevant vectorization info and associate DR with the
4365*38fd1498Szrj new pattern statement instead of the original statement. */
4366*38fd1498Szrj stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4367*38fd1498Szrj loop_vinfo);
4368*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4369*38fd1498Szrj STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4370*38fd1498Szrj STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4371*38fd1498Szrj = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4372*38fd1498Szrj STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4373*38fd1498Szrj = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4374*38fd1498Szrj DR_STMT (dr) = pattern_stmt;
4375*38fd1498Szrj
4376*38fd1498Szrj tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4377*38fd1498Szrj *type_out = vectype;
4378*38fd1498Szrj *type_in = vectype;
4379*38fd1498Szrj
4380*38fd1498Szrj if (dump_enabled_p ())
4381*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4382*38fd1498Szrj "gather/scatter pattern detected:\n");
4383*38fd1498Szrj
4384*38fd1498Szrj return pattern_stmt;
4385*38fd1498Szrj }
4386*38fd1498Szrj
4387*38fd1498Szrj /* Pattern wrapper around vect_try_gather_scatter_pattern. */
4388*38fd1498Szrj
4389*38fd1498Szrj static gimple *
vect_recog_gather_scatter_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)4390*38fd1498Szrj vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4391*38fd1498Szrj tree *type_out)
4392*38fd1498Szrj {
4393*38fd1498Szrj gimple *last_stmt = stmts->pop ();
4394*38fd1498Szrj stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4395*38fd1498Szrj gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4396*38fd1498Szrj last_stmt_info,
4397*38fd1498Szrj type_in, type_out);
4398*38fd1498Szrj if (pattern_stmt)
4399*38fd1498Szrj stmts->safe_push (last_stmt);
4400*38fd1498Szrj return pattern_stmt;
4401*38fd1498Szrj }
4402*38fd1498Szrj
4403*38fd1498Szrj /* Mark statements that are involved in a pattern. */
4404*38fd1498Szrj
4405*38fd1498Szrj static inline void
vect_mark_pattern_stmts(gimple * orig_stmt,gimple * pattern_stmt,tree pattern_vectype)4406*38fd1498Szrj vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4407*38fd1498Szrj tree pattern_vectype)
4408*38fd1498Szrj {
4409*38fd1498Szrj stmt_vec_info pattern_stmt_info, def_stmt_info;
4410*38fd1498Szrj stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4411*38fd1498Szrj vec_info *vinfo = orig_stmt_info->vinfo;
4412*38fd1498Szrj gimple *def_stmt;
4413*38fd1498Szrj
4414*38fd1498Szrj pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4415*38fd1498Szrj if (pattern_stmt_info == NULL)
4416*38fd1498Szrj {
4417*38fd1498Szrj pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4418*38fd1498Szrj set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4419*38fd1498Szrj }
4420*38fd1498Szrj gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4421*38fd1498Szrj
4422*38fd1498Szrj STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4423*38fd1498Szrj STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4424*38fd1498Szrj = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4425*38fd1498Szrj STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4426*38fd1498Szrj STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4427*38fd1498Szrj STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4428*38fd1498Szrj STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4429*38fd1498Szrj = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4430*38fd1498Szrj if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4431*38fd1498Szrj {
4432*38fd1498Szrj gimple_stmt_iterator si;
4433*38fd1498Szrj for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4434*38fd1498Szrj !gsi_end_p (si); gsi_next (&si))
4435*38fd1498Szrj {
4436*38fd1498Szrj def_stmt = gsi_stmt (si);
4437*38fd1498Szrj def_stmt_info = vinfo_for_stmt (def_stmt);
4438*38fd1498Szrj if (def_stmt_info == NULL)
4439*38fd1498Szrj {
4440*38fd1498Szrj def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4441*38fd1498Szrj set_vinfo_for_stmt (def_stmt, def_stmt_info);
4442*38fd1498Szrj }
4443*38fd1498Szrj gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4444*38fd1498Szrj STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4445*38fd1498Szrj STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4446*38fd1498Szrj if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4447*38fd1498Szrj STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4448*38fd1498Szrj }
4449*38fd1498Szrj }
4450*38fd1498Szrj }
4451*38fd1498Szrj
4452*38fd1498Szrj /* Function vect_pattern_recog_1
4453*38fd1498Szrj
4454*38fd1498Szrj Input:
4455*38fd1498Szrj PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4456*38fd1498Szrj computation pattern.
4457*38fd1498Szrj STMT: A stmt from which the pattern search should start.
4458*38fd1498Szrj
4459*38fd1498Szrj If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4460*38fd1498Szrj expression that computes the same functionality and can be used to
4461*38fd1498Szrj replace the sequence of stmts that are involved in the pattern.
4462*38fd1498Szrj
4463*38fd1498Szrj Output:
4464*38fd1498Szrj This function checks if the expression returned by PATTERN_RECOG_FUNC is
4465*38fd1498Szrj supported in vector form by the target. We use 'TYPE_IN' to obtain the
4466*38fd1498Szrj relevant vector type. If 'TYPE_IN' is already a vector type, then this
4467*38fd1498Szrj indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4468*38fd1498Szrj If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4469*38fd1498Szrj to the available target pattern.
4470*38fd1498Szrj
4471*38fd1498Szrj This function also does some bookkeeping, as explained in the documentation
4472*38fd1498Szrj for vect_recog_pattern. */
4473*38fd1498Szrj
4474*38fd1498Szrj static bool
vect_pattern_recog_1(vect_recog_func * recog_func,gimple_stmt_iterator si,vec<gimple * > * stmts_to_replace)4475*38fd1498Szrj vect_pattern_recog_1 (vect_recog_func *recog_func,
4476*38fd1498Szrj gimple_stmt_iterator si,
4477*38fd1498Szrj vec<gimple *> *stmts_to_replace)
4478*38fd1498Szrj {
4479*38fd1498Szrj gimple *stmt = gsi_stmt (si), *pattern_stmt;
4480*38fd1498Szrj stmt_vec_info stmt_info;
4481*38fd1498Szrj loop_vec_info loop_vinfo;
4482*38fd1498Szrj tree pattern_vectype;
4483*38fd1498Szrj tree type_in, type_out;
4484*38fd1498Szrj enum tree_code code;
4485*38fd1498Szrj int i;
4486*38fd1498Szrj gimple *next;
4487*38fd1498Szrj
4488*38fd1498Szrj stmts_to_replace->truncate (0);
4489*38fd1498Szrj stmts_to_replace->quick_push (stmt);
4490*38fd1498Szrj pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4491*38fd1498Szrj if (!pattern_stmt)
4492*38fd1498Szrj return false;
4493*38fd1498Szrj
4494*38fd1498Szrj stmt = stmts_to_replace->last ();
4495*38fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
4496*38fd1498Szrj loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4497*38fd1498Szrj
4498*38fd1498Szrj if (VECTOR_BOOLEAN_TYPE_P (type_in)
4499*38fd1498Szrj || VECTOR_TYPE_P (type_in))
4500*38fd1498Szrj {
4501*38fd1498Szrj /* No need to check target support (already checked by the pattern
4502*38fd1498Szrj recognition function). */
4503*38fd1498Szrj pattern_vectype = type_out ? type_out : type_in;
4504*38fd1498Szrj }
4505*38fd1498Szrj else
4506*38fd1498Szrj {
4507*38fd1498Szrj /* Check target support */
4508*38fd1498Szrj type_in = get_vectype_for_scalar_type (type_in);
4509*38fd1498Szrj if (!type_in)
4510*38fd1498Szrj return false;
4511*38fd1498Szrj if (type_out)
4512*38fd1498Szrj type_out = get_vectype_for_scalar_type (type_out);
4513*38fd1498Szrj else
4514*38fd1498Szrj type_out = type_in;
4515*38fd1498Szrj if (!type_out)
4516*38fd1498Szrj return false;
4517*38fd1498Szrj pattern_vectype = type_out;
4518*38fd1498Szrj
4519*38fd1498Szrj if (is_gimple_assign (pattern_stmt))
4520*38fd1498Szrj {
4521*38fd1498Szrj enum insn_code icode;
4522*38fd1498Szrj code = gimple_assign_rhs_code (pattern_stmt);
4523*38fd1498Szrj optab optab = optab_for_tree_code (code, type_in, optab_default);
4524*38fd1498Szrj machine_mode vec_mode = TYPE_MODE (type_in);
4525*38fd1498Szrj if (!optab
4526*38fd1498Szrj || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4527*38fd1498Szrj || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4528*38fd1498Szrj return false;
4529*38fd1498Szrj }
4530*38fd1498Szrj else
4531*38fd1498Szrj gcc_assert (is_gimple_call (pattern_stmt));
4532*38fd1498Szrj }
4533*38fd1498Szrj
4534*38fd1498Szrj /* Found a vectorizable pattern. */
4535*38fd1498Szrj if (dump_enabled_p ())
4536*38fd1498Szrj {
4537*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4538*38fd1498Szrj "%s pattern recognized: ", recog_func->name);
4539*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4540*38fd1498Szrj }
4541*38fd1498Szrj
4542*38fd1498Szrj /* Mark the stmts that are involved in the pattern. */
4543*38fd1498Szrj vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4544*38fd1498Szrj
4545*38fd1498Szrj /* Patterns cannot be vectorized using SLP, because they change the order of
4546*38fd1498Szrj computation. */
4547*38fd1498Szrj if (loop_vinfo)
4548*38fd1498Szrj FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4549*38fd1498Szrj if (next == stmt)
4550*38fd1498Szrj LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4551*38fd1498Szrj
4552*38fd1498Szrj /* It is possible that additional pattern stmts are created and inserted in
4553*38fd1498Szrj STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
4554*38fd1498Szrj relevant statements. */
4555*38fd1498Szrj for (i = 0; stmts_to_replace->iterate (i, &stmt)
4556*38fd1498Szrj && (unsigned) i < (stmts_to_replace->length () - 1);
4557*38fd1498Szrj i++)
4558*38fd1498Szrj {
4559*38fd1498Szrj stmt_info = vinfo_for_stmt (stmt);
4560*38fd1498Szrj pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4561*38fd1498Szrj if (dump_enabled_p ())
4562*38fd1498Szrj {
4563*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4564*38fd1498Szrj "additional pattern stmt: ");
4565*38fd1498Szrj dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4566*38fd1498Szrj }
4567*38fd1498Szrj
4568*38fd1498Szrj vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4569*38fd1498Szrj }
4570*38fd1498Szrj
4571*38fd1498Szrj return true;
4572*38fd1498Szrj }
4573*38fd1498Szrj
4574*38fd1498Szrj
4575*38fd1498Szrj /* Function vect_pattern_recog
4576*38fd1498Szrj
4577*38fd1498Szrj Input:
4578*38fd1498Szrj LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4579*38fd1498Szrj computation idioms.
4580*38fd1498Szrj
4581*38fd1498Szrj Output - for each computation idiom that is detected we create a new stmt
4582*38fd1498Szrj that provides the same functionality and that can be vectorized. We
4583*38fd1498Szrj also record some information in the struct_stmt_info of the relevant
4584*38fd1498Szrj stmts, as explained below:
4585*38fd1498Szrj
4586*38fd1498Szrj At the entry to this function we have the following stmts, with the
4587*38fd1498Szrj following initial value in the STMT_VINFO fields:
4588*38fd1498Szrj
4589*38fd1498Szrj stmt in_pattern_p related_stmt vec_stmt
4590*38fd1498Szrj S1: a_i = .... - - -
4591*38fd1498Szrj S2: a_2 = ..use(a_i).. - - -
4592*38fd1498Szrj S3: a_1 = ..use(a_2).. - - -
4593*38fd1498Szrj S4: a_0 = ..use(a_1).. - - -
4594*38fd1498Szrj S5: ... = ..use(a_0).. - - -
4595*38fd1498Szrj
4596*38fd1498Szrj Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4597*38fd1498Szrj represented by a single stmt. We then:
4598*38fd1498Szrj - create a new stmt S6 equivalent to the pattern (the stmt is not
4599*38fd1498Szrj inserted into the code)
4600*38fd1498Szrj - fill in the STMT_VINFO fields as follows:
4601*38fd1498Szrj
4602*38fd1498Szrj in_pattern_p related_stmt vec_stmt
4603*38fd1498Szrj S1: a_i = .... - - -
4604*38fd1498Szrj S2: a_2 = ..use(a_i).. - - -
4605*38fd1498Szrj S3: a_1 = ..use(a_2).. - - -
4606*38fd1498Szrj S4: a_0 = ..use(a_1).. true S6 -
4607*38fd1498Szrj '---> S6: a_new = .... - S4 -
4608*38fd1498Szrj S5: ... = ..use(a_0).. - - -
4609*38fd1498Szrj
4610*38fd1498Szrj (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4611*38fd1498Szrj to each other through the RELATED_STMT field).
4612*38fd1498Szrj
4613*38fd1498Szrj S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4614*38fd1498Szrj of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4615*38fd1498Szrj remain irrelevant unless used by stmts other than S4.
4616*38fd1498Szrj
4617*38fd1498Szrj If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4618*38fd1498Szrj (because they are marked as irrelevant). It will vectorize S6, and record
4619*38fd1498Szrj a pointer to the new vector stmt VS6 from S6 (as usual).
4620*38fd1498Szrj S4 will be skipped, and S5 will be vectorized as usual:
4621*38fd1498Szrj
4622*38fd1498Szrj in_pattern_p related_stmt vec_stmt
4623*38fd1498Szrj S1: a_i = .... - - -
4624*38fd1498Szrj S2: a_2 = ..use(a_i).. - - -
4625*38fd1498Szrj S3: a_1 = ..use(a_2).. - - -
4626*38fd1498Szrj > VS6: va_new = .... - - -
4627*38fd1498Szrj S4: a_0 = ..use(a_1).. true S6 VS6
4628*38fd1498Szrj '---> S6: a_new = .... - S4 VS6
4629*38fd1498Szrj > VS5: ... = ..vuse(va_new).. - - -
4630*38fd1498Szrj S5: ... = ..use(a_0).. - - -
4631*38fd1498Szrj
4632*38fd1498Szrj DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4633*38fd1498Szrj elsewhere), and we'll end up with:
4634*38fd1498Szrj
4635*38fd1498Szrj VS6: va_new = ....
4636*38fd1498Szrj VS5: ... = ..vuse(va_new)..
4637*38fd1498Szrj
4638*38fd1498Szrj In case of more than one pattern statements, e.g., widen-mult with
4639*38fd1498Szrj intermediate type:
4640*38fd1498Szrj
4641*38fd1498Szrj S1 a_t = ;
4642*38fd1498Szrj S2 a_T = (TYPE) a_t;
4643*38fd1498Szrj '--> S3: a_it = (interm_type) a_t;
4644*38fd1498Szrj S4 prod_T = a_T * CONST;
4645*38fd1498Szrj '--> S5: prod_T' = a_it w* CONST;
4646*38fd1498Szrj
4647*38fd1498Szrj there may be other users of a_T outside the pattern. In that case S2 will
4648*38fd1498Szrj be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4649*38fd1498Szrj and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4650*38fd1498Szrj be recorded in S3. */
4651*38fd1498Szrj
4652*38fd1498Szrj void
vect_pattern_recog(vec_info * vinfo)4653*38fd1498Szrj vect_pattern_recog (vec_info *vinfo)
4654*38fd1498Szrj {
4655*38fd1498Szrj struct loop *loop;
4656*38fd1498Szrj basic_block *bbs;
4657*38fd1498Szrj unsigned int nbbs;
4658*38fd1498Szrj gimple_stmt_iterator si;
4659*38fd1498Szrj unsigned int i, j;
4660*38fd1498Szrj auto_vec<gimple *, 1> stmts_to_replace;
4661*38fd1498Szrj gimple *stmt;
4662*38fd1498Szrj
4663*38fd1498Szrj if (dump_enabled_p ())
4664*38fd1498Szrj dump_printf_loc (MSG_NOTE, vect_location,
4665*38fd1498Szrj "=== vect_pattern_recog ===\n");
4666*38fd1498Szrj
4667*38fd1498Szrj if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4668*38fd1498Szrj {
4669*38fd1498Szrj loop = LOOP_VINFO_LOOP (loop_vinfo);
4670*38fd1498Szrj bbs = LOOP_VINFO_BBS (loop_vinfo);
4671*38fd1498Szrj nbbs = loop->num_nodes;
4672*38fd1498Szrj
4673*38fd1498Szrj /* Scan through the loop stmts, applying the pattern recognition
4674*38fd1498Szrj functions starting at each stmt visited: */
4675*38fd1498Szrj for (i = 0; i < nbbs; i++)
4676*38fd1498Szrj {
4677*38fd1498Szrj basic_block bb = bbs[i];
4678*38fd1498Szrj for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4679*38fd1498Szrj {
4680*38fd1498Szrj /* Scan over all generic vect_recog_xxx_pattern functions. */
4681*38fd1498Szrj for (j = 0; j < NUM_PATTERNS; j++)
4682*38fd1498Szrj if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4683*38fd1498Szrj &stmts_to_replace))
4684*38fd1498Szrj break;
4685*38fd1498Szrj }
4686*38fd1498Szrj }
4687*38fd1498Szrj }
4688*38fd1498Szrj else
4689*38fd1498Szrj {
4690*38fd1498Szrj bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4691*38fd1498Szrj for (si = bb_vinfo->region_begin;
4692*38fd1498Szrj gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4693*38fd1498Szrj {
4694*38fd1498Szrj if ((stmt = gsi_stmt (si))
4695*38fd1498Szrj && vinfo_for_stmt (stmt)
4696*38fd1498Szrj && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4697*38fd1498Szrj continue;
4698*38fd1498Szrj
4699*38fd1498Szrj /* Scan over all generic vect_recog_xxx_pattern functions. */
4700*38fd1498Szrj for (j = 0; j < NUM_PATTERNS; j++)
4701*38fd1498Szrj if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4702*38fd1498Szrj &stmts_to_replace))
4703*38fd1498Szrj break;
4704*38fd1498Szrj }
4705*38fd1498Szrj }
4706*38fd1498Szrj }
4707