xref: /dflybsd-src/contrib/gcc-8.0/gcc/tree-vect-patterns.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Analysis Utilities for Loop Vectorization.
2*38fd1498Szrj    Copyright (C) 2006-2018 Free Software Foundation, Inc.
3*38fd1498Szrj    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4*38fd1498Szrj 
5*38fd1498Szrj This file is part of GCC.
6*38fd1498Szrj 
7*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
8*38fd1498Szrj the terms of the GNU General Public License as published by the Free
9*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
10*38fd1498Szrj version.
11*38fd1498Szrj 
12*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
14*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15*38fd1498Szrj for more details.
16*38fd1498Szrj 
17*38fd1498Szrj You should have received a copy of the GNU General Public License
18*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
19*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
20*38fd1498Szrj 
21*38fd1498Szrj #include "config.h"
22*38fd1498Szrj #include "system.h"
23*38fd1498Szrj #include "coretypes.h"
24*38fd1498Szrj #include "backend.h"
25*38fd1498Szrj #include "rtl.h"
26*38fd1498Szrj #include "tree.h"
27*38fd1498Szrj #include "gimple.h"
28*38fd1498Szrj #include "ssa.h"
29*38fd1498Szrj #include "expmed.h"
30*38fd1498Szrj #include "optabs-tree.h"
31*38fd1498Szrj #include "insn-config.h"
32*38fd1498Szrj #include "recog.h"		/* FIXME: for insn_data */
33*38fd1498Szrj #include "fold-const.h"
34*38fd1498Szrj #include "stor-layout.h"
35*38fd1498Szrj #include "tree-eh.h"
36*38fd1498Szrj #include "gimplify.h"
37*38fd1498Szrj #include "gimple-iterator.h"
38*38fd1498Szrj #include "cfgloop.h"
39*38fd1498Szrj #include "tree-vectorizer.h"
40*38fd1498Szrj #include "dumpfile.h"
41*38fd1498Szrj #include "builtins.h"
42*38fd1498Szrj #include "internal-fn.h"
43*38fd1498Szrj #include "case-cfn-macros.h"
44*38fd1498Szrj #include "fold-const-call.h"
45*38fd1498Szrj #include "attribs.h"
46*38fd1498Szrj #include "cgraph.h"
47*38fd1498Szrj #include "omp-simd-clone.h"
48*38fd1498Szrj 
49*38fd1498Szrj /* Pattern recognition functions  */
50*38fd1498Szrj static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51*38fd1498Szrj 					    tree *);
52*38fd1498Szrj static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
53*38fd1498Szrj 					     tree *);
54*38fd1498Szrj static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
55*38fd1498Szrj 					   tree *);
56*38fd1498Szrj static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
57*38fd1498Szrj 				      tree *);
58*38fd1498Szrj static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
59*38fd1498Szrj static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
60*38fd1498Szrj                                                  tree *);
61*38fd1498Szrj static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
62*38fd1498Szrj 	                                tree *, tree *);
63*38fd1498Szrj static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
64*38fd1498Szrj static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
65*38fd1498Szrj 						      tree *, tree *);
66*38fd1498Szrj static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
67*38fd1498Szrj 					 tree *, tree *);
68*38fd1498Szrj 
69*38fd1498Szrj static gimple *vect_recog_mult_pattern (vec<gimple *> *,
70*38fd1498Szrj 				       tree *, tree *);
71*38fd1498Szrj 
72*38fd1498Szrj static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
73*38fd1498Szrj 						  tree *, tree *);
74*38fd1498Szrj static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
75*38fd1498Szrj static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
76*38fd1498Szrj static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
77*38fd1498Szrj 						  tree *);
78*38fd1498Szrj 
79*38fd1498Szrj struct vect_recog_func
80*38fd1498Szrj {
81*38fd1498Szrj   vect_recog_func_ptr fn;
82*38fd1498Szrj   const char *name;
83*38fd1498Szrj };
84*38fd1498Szrj 
85*38fd1498Szrj /* Note that ordering matters - the first pattern matching on a stmt
86*38fd1498Szrj    is taken which means usually the more complex one needs to preceed
87*38fd1498Szrj    the less comples onex (widen_sum only after dot_prod or sad for example).  */
88*38fd1498Szrj static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
89*38fd1498Szrj       { vect_recog_widen_mult_pattern, "widen_mult" },
90*38fd1498Szrj       { vect_recog_dot_prod_pattern, "dot_prod" },
91*38fd1498Szrj       { vect_recog_sad_pattern, "sad" },
92*38fd1498Szrj       { vect_recog_widen_sum_pattern, "widen_sum" },
93*38fd1498Szrj       { vect_recog_pow_pattern, "pow" },
94*38fd1498Szrj       { vect_recog_widen_shift_pattern, "widen_shift" },
95*38fd1498Szrj       { vect_recog_over_widening_pattern, "over_widening" },
96*38fd1498Szrj       { vect_recog_rotate_pattern, "rotate" },
97*38fd1498Szrj       { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
98*38fd1498Szrj       {	vect_recog_divmod_pattern, "divmod" },
99*38fd1498Szrj       {	vect_recog_mult_pattern, "mult" },
100*38fd1498Szrj       {	vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
101*38fd1498Szrj       {	vect_recog_bool_pattern, "bool" },
102*38fd1498Szrj       /* This must come before mask conversion, and includes the parts
103*38fd1498Szrj 	 of mask conversion that are needed for gather and scatter
104*38fd1498Szrj 	 internal functions.  */
105*38fd1498Szrj       { vect_recog_gather_scatter_pattern, "gather_scatter" },
106*38fd1498Szrj       {	vect_recog_mask_conversion_pattern, "mask_conversion" }
107*38fd1498Szrj };
108*38fd1498Szrj 
109*38fd1498Szrj static inline void
append_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)110*38fd1498Szrj append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
111*38fd1498Szrj {
112*38fd1498Szrj   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
113*38fd1498Szrj 				      stmt);
114*38fd1498Szrj }
115*38fd1498Szrj 
116*38fd1498Szrj static inline void
new_pattern_def_seq(stmt_vec_info stmt_info,gimple * stmt)117*38fd1498Szrj new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
118*38fd1498Szrj {
119*38fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
120*38fd1498Szrj   append_pattern_def_seq (stmt_info, stmt);
121*38fd1498Szrj }
122*38fd1498Szrj 
123*38fd1498Szrj /* Check whether STMT2 is in the same loop or basic block as STMT1.
124*38fd1498Szrj    Which of the two applies depends on whether we're currently doing
125*38fd1498Szrj    loop-based or basic-block-based vectorization, as determined by
126*38fd1498Szrj    the vinfo_for_stmt for STMT1 (which must be defined).
127*38fd1498Szrj 
128*38fd1498Szrj    If this returns true, vinfo_for_stmt for STMT2 is guaranteed
129*38fd1498Szrj    to be defined as well.  */
130*38fd1498Szrj 
131*38fd1498Szrj static bool
vect_same_loop_or_bb_p(gimple * stmt1,gimple * stmt2)132*38fd1498Szrj vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
133*38fd1498Szrj {
134*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
135*38fd1498Szrj   return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
136*38fd1498Szrj }
137*38fd1498Szrj 
138*38fd1498Szrj /* If the LHS of DEF_STMT has a single use, and that statement is
139*38fd1498Szrj    in the same loop or basic block, return it.  */
140*38fd1498Szrj 
141*38fd1498Szrj static gimple *
vect_single_imm_use(gimple * def_stmt)142*38fd1498Szrj vect_single_imm_use (gimple *def_stmt)
143*38fd1498Szrj {
144*38fd1498Szrj   tree lhs = gimple_assign_lhs (def_stmt);
145*38fd1498Szrj   use_operand_p use_p;
146*38fd1498Szrj   gimple *use_stmt;
147*38fd1498Szrj 
148*38fd1498Szrj   if (!single_imm_use (lhs, &use_p, &use_stmt))
149*38fd1498Szrj     return NULL;
150*38fd1498Szrj 
151*38fd1498Szrj   if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
152*38fd1498Szrj     return NULL;
153*38fd1498Szrj 
154*38fd1498Szrj   return use_stmt;
155*38fd1498Szrj }
156*38fd1498Szrj 
157*38fd1498Szrj /* Check whether NAME, an ssa-name used in USE_STMT,
158*38fd1498Szrj    is a result of a type promotion, such that:
159*38fd1498Szrj      DEF_STMT: NAME = NOP (name0)
160*38fd1498Szrj    If CHECK_SIGN is TRUE, check that either both types are signed or both are
161*38fd1498Szrj    unsigned.  */
162*38fd1498Szrj 
163*38fd1498Szrj static bool
type_conversion_p(tree name,gimple * use_stmt,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)164*38fd1498Szrj type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
165*38fd1498Szrj 		   tree *orig_type, gimple **def_stmt, bool *promotion)
166*38fd1498Szrj {
167*38fd1498Szrj   gimple *dummy_gimple;
168*38fd1498Szrj   stmt_vec_info stmt_vinfo;
169*38fd1498Szrj   tree type = TREE_TYPE (name);
170*38fd1498Szrj   tree oprnd0;
171*38fd1498Szrj   enum vect_def_type dt;
172*38fd1498Szrj 
173*38fd1498Szrj   stmt_vinfo = vinfo_for_stmt (use_stmt);
174*38fd1498Szrj   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
175*38fd1498Szrj     return false;
176*38fd1498Szrj 
177*38fd1498Szrj   if (dt != vect_internal_def
178*38fd1498Szrj       && dt != vect_external_def && dt != vect_constant_def)
179*38fd1498Szrj     return false;
180*38fd1498Szrj 
181*38fd1498Szrj   if (!*def_stmt)
182*38fd1498Szrj     return false;
183*38fd1498Szrj 
184*38fd1498Szrj   if (dt == vect_internal_def)
185*38fd1498Szrj     {
186*38fd1498Szrj       stmt_vec_info def_vinfo = vinfo_for_stmt (*def_stmt);
187*38fd1498Szrj       if (STMT_VINFO_IN_PATTERN_P (def_vinfo))
188*38fd1498Szrj 	return false;
189*38fd1498Szrj     }
190*38fd1498Szrj 
191*38fd1498Szrj   if (!is_gimple_assign (*def_stmt))
192*38fd1498Szrj     return false;
193*38fd1498Szrj 
194*38fd1498Szrj   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
195*38fd1498Szrj     return false;
196*38fd1498Szrj 
197*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (*def_stmt);
198*38fd1498Szrj 
199*38fd1498Szrj   *orig_type = TREE_TYPE (oprnd0);
200*38fd1498Szrj   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
201*38fd1498Szrj       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
202*38fd1498Szrj     return false;
203*38fd1498Szrj 
204*38fd1498Szrj   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
205*38fd1498Szrj     *promotion = true;
206*38fd1498Szrj   else
207*38fd1498Szrj     *promotion = false;
208*38fd1498Szrj 
209*38fd1498Szrj   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
210*38fd1498Szrj     return false;
211*38fd1498Szrj 
212*38fd1498Szrj   return true;
213*38fd1498Szrj }
214*38fd1498Szrj 
215*38fd1498Szrj /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
216*38fd1498Szrj    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
217*38fd1498Szrj 
218*38fd1498Szrj static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)219*38fd1498Szrj vect_recog_temp_ssa_var (tree type, gimple *stmt)
220*38fd1498Szrj {
221*38fd1498Szrj   return make_temp_ssa_name (type, stmt, "patt");
222*38fd1498Szrj }
223*38fd1498Szrj 
224*38fd1498Szrj /* Return true if STMT_VINFO describes a reduction for which reassociation
225*38fd1498Szrj    is allowed.  If STMT_INFO is part of a group, assume that it's part of
226*38fd1498Szrj    a reduction chain and optimistically assume that all statements
227*38fd1498Szrj    except the last allow reassociation.  */
228*38fd1498Szrj 
229*38fd1498Szrj static bool
vect_reassociating_reduction_p(stmt_vec_info stmt_vinfo)230*38fd1498Szrj vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
231*38fd1498Szrj {
232*38fd1498Szrj   return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
233*38fd1498Szrj 	  ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
234*38fd1498Szrj 	  : GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
235*38fd1498Szrj }
236*38fd1498Szrj 
237*38fd1498Szrj /* Function vect_recog_dot_prod_pattern
238*38fd1498Szrj 
239*38fd1498Szrj    Try to find the following pattern:
240*38fd1498Szrj 
241*38fd1498Szrj      type x_t, y_t;
242*38fd1498Szrj      TYPE1 prod;
243*38fd1498Szrj      TYPE2 sum = init;
244*38fd1498Szrj    loop:
245*38fd1498Szrj      sum_0 = phi <init, sum_1>
246*38fd1498Szrj      S1  x_t = ...
247*38fd1498Szrj      S2  y_t = ...
248*38fd1498Szrj      S3  x_T = (TYPE1) x_t;
249*38fd1498Szrj      S4  y_T = (TYPE1) y_t;
250*38fd1498Szrj      S5  prod = x_T * y_T;
251*38fd1498Szrj      [S6  prod = (TYPE2) prod;  #optional]
252*38fd1498Szrj      S7  sum_1 = prod + sum_0;
253*38fd1498Szrj 
254*38fd1498Szrj    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
255*38fd1498Szrj    same size of 'TYPE1' or bigger. This is a special case of a reduction
256*38fd1498Szrj    computation.
257*38fd1498Szrj 
258*38fd1498Szrj    Input:
259*38fd1498Szrj 
260*38fd1498Szrj    * STMTS: Contains a stmt from which the pattern search begins.  In the
261*38fd1498Szrj    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
262*38fd1498Szrj    will be detected.
263*38fd1498Szrj 
264*38fd1498Szrj    Output:
265*38fd1498Szrj 
266*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
267*38fd1498Szrj 
268*38fd1498Szrj    * TYPE_OUT: The type of the output  of this pattern.
269*38fd1498Szrj 
270*38fd1498Szrj    * Return value: A new stmt that will be used to replace the sequence of
271*38fd1498Szrj    stmts that constitute the pattern. In this case it will be:
272*38fd1498Szrj         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
273*38fd1498Szrj 
274*38fd1498Szrj    Note: The dot-prod idiom is a widening reduction pattern that is
275*38fd1498Szrj          vectorized without preserving all the intermediate results. It
276*38fd1498Szrj          produces only N/2 (widened) results (by summing up pairs of
277*38fd1498Szrj          intermediate results) rather than all N results.  Therefore, we
278*38fd1498Szrj          cannot allow this pattern when we want to get all the results and in
279*38fd1498Szrj          the correct order (as is the case when this computation is in an
280*38fd1498Szrj          inner-loop nested in an outer-loop that us being vectorized).  */
281*38fd1498Szrj 
282*38fd1498Szrj static gimple *
vect_recog_dot_prod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)283*38fd1498Szrj vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
284*38fd1498Szrj 			     tree *type_out)
285*38fd1498Szrj {
286*38fd1498Szrj   gimple *stmt, *last_stmt = (*stmts)[0];
287*38fd1498Szrj   tree oprnd0, oprnd1;
288*38fd1498Szrj   tree oprnd00, oprnd01;
289*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
290*38fd1498Szrj   tree type, half_type;
291*38fd1498Szrj   gimple *pattern_stmt;
292*38fd1498Szrj   tree prod_type;
293*38fd1498Szrj   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
294*38fd1498Szrj   struct loop *loop;
295*38fd1498Szrj   tree var;
296*38fd1498Szrj   bool promotion;
297*38fd1498Szrj 
298*38fd1498Szrj   if (!loop_info)
299*38fd1498Szrj     return NULL;
300*38fd1498Szrj 
301*38fd1498Szrj   loop = LOOP_VINFO_LOOP (loop_info);
302*38fd1498Szrj 
303*38fd1498Szrj   /* We don't allow changing the order of the computation in the inner-loop
304*38fd1498Szrj      when doing outer-loop vectorization.  */
305*38fd1498Szrj   if (loop && nested_in_vect_loop_p (loop, last_stmt))
306*38fd1498Szrj     return NULL;
307*38fd1498Szrj 
308*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
309*38fd1498Szrj     return NULL;
310*38fd1498Szrj 
311*38fd1498Szrj   type = gimple_expr_type (last_stmt);
312*38fd1498Szrj 
313*38fd1498Szrj   /* Look for the following pattern
314*38fd1498Szrj           DX = (TYPE1) X;
315*38fd1498Szrj           DY = (TYPE1) Y;
316*38fd1498Szrj           DPROD = DX * DY;
317*38fd1498Szrj           DDPROD = (TYPE2) DPROD;
318*38fd1498Szrj           sum_1 = DDPROD + sum_0;
319*38fd1498Szrj      In which
320*38fd1498Szrj      - DX is double the size of X
321*38fd1498Szrj      - DY is double the size of Y
322*38fd1498Szrj      - DX, DY, DPROD all have the same type
323*38fd1498Szrj      - sum is the same size of DPROD or bigger
324*38fd1498Szrj      - sum has been recognized as a reduction variable.
325*38fd1498Szrj 
326*38fd1498Szrj      This is equivalent to:
327*38fd1498Szrj        DPROD = X w* Y;          #widen mult
328*38fd1498Szrj        sum_1 = DPROD w+ sum_0;  #widen summation
329*38fd1498Szrj      or
330*38fd1498Szrj        DPROD = X w* Y;          #widen mult
331*38fd1498Szrj        sum_1 = DPROD + sum_0;   #summation
332*38fd1498Szrj    */
333*38fd1498Szrj 
334*38fd1498Szrj   /* Starting from LAST_STMT, follow the defs of its uses in search
335*38fd1498Szrj      of the above pattern.  */
336*38fd1498Szrj 
337*38fd1498Szrj   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
338*38fd1498Szrj     return NULL;
339*38fd1498Szrj 
340*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
341*38fd1498Szrj     {
342*38fd1498Szrj       /* Has been detected as widening-summation?  */
343*38fd1498Szrj 
344*38fd1498Szrj       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
345*38fd1498Szrj       type = gimple_expr_type (stmt);
346*38fd1498Szrj       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
347*38fd1498Szrj         return NULL;
348*38fd1498Szrj       oprnd0 = gimple_assign_rhs1 (stmt);
349*38fd1498Szrj       oprnd1 = gimple_assign_rhs2 (stmt);
350*38fd1498Szrj       half_type = TREE_TYPE (oprnd0);
351*38fd1498Szrj     }
352*38fd1498Szrj   else
353*38fd1498Szrj     {
354*38fd1498Szrj       gimple *def_stmt;
355*38fd1498Szrj 
356*38fd1498Szrj       if (!vect_reassociating_reduction_p (stmt_vinfo))
357*38fd1498Szrj 	return NULL;
358*38fd1498Szrj       oprnd0 = gimple_assign_rhs1 (last_stmt);
359*38fd1498Szrj       oprnd1 = gimple_assign_rhs2 (last_stmt);
360*38fd1498Szrj       if (!types_compatible_p (TREE_TYPE (oprnd0), type)
361*38fd1498Szrj 	  || !types_compatible_p (TREE_TYPE (oprnd1), type))
362*38fd1498Szrj         return NULL;
363*38fd1498Szrj       stmt = last_stmt;
364*38fd1498Szrj 
365*38fd1498Szrj       if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
366*38fd1498Szrj 			     &promotion)
367*38fd1498Szrj 	  && promotion)
368*38fd1498Szrj         {
369*38fd1498Szrj           stmt = def_stmt;
370*38fd1498Szrj           oprnd0 = gimple_assign_rhs1 (stmt);
371*38fd1498Szrj         }
372*38fd1498Szrj       else
373*38fd1498Szrj         half_type = type;
374*38fd1498Szrj     }
375*38fd1498Szrj 
376*38fd1498Szrj   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
377*38fd1498Szrj      we know that oprnd1 is the reduction variable (defined by a loop-header
378*38fd1498Szrj      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
379*38fd1498Szrj      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
380*38fd1498Szrj   if (TREE_CODE (oprnd0) != SSA_NAME)
381*38fd1498Szrj     return NULL;
382*38fd1498Szrj 
383*38fd1498Szrj   prod_type = half_type;
384*38fd1498Szrj   stmt = SSA_NAME_DEF_STMT (oprnd0);
385*38fd1498Szrj 
386*38fd1498Szrj   /* It could not be the dot_prod pattern if the stmt is outside the loop.  */
387*38fd1498Szrj   if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
388*38fd1498Szrj     return NULL;
389*38fd1498Szrj 
390*38fd1498Szrj   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
391*38fd1498Szrj      inside the loop (in case we are analyzing an outer-loop).  */
392*38fd1498Szrj   if (!is_gimple_assign (stmt))
393*38fd1498Szrj     return NULL;
394*38fd1498Szrj   stmt_vinfo = vinfo_for_stmt (stmt);
395*38fd1498Szrj   gcc_assert (stmt_vinfo);
396*38fd1498Szrj   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
397*38fd1498Szrj     return NULL;
398*38fd1498Szrj   if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
399*38fd1498Szrj     return NULL;
400*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
401*38fd1498Szrj     {
402*38fd1498Szrj       /* Has been detected as a widening multiplication?  */
403*38fd1498Szrj 
404*38fd1498Szrj       stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
405*38fd1498Szrj       if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
406*38fd1498Szrj         return NULL;
407*38fd1498Szrj       stmt_vinfo = vinfo_for_stmt (stmt);
408*38fd1498Szrj       gcc_assert (stmt_vinfo);
409*38fd1498Szrj       gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
410*38fd1498Szrj       oprnd00 = gimple_assign_rhs1 (stmt);
411*38fd1498Szrj       oprnd01 = gimple_assign_rhs2 (stmt);
412*38fd1498Szrj       STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
413*38fd1498Szrj 	  = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
414*38fd1498Szrj     }
415*38fd1498Szrj   else
416*38fd1498Szrj     {
417*38fd1498Szrj       tree half_type0, half_type1;
418*38fd1498Szrj       gimple *def_stmt;
419*38fd1498Szrj       tree oprnd0, oprnd1;
420*38fd1498Szrj 
421*38fd1498Szrj       oprnd0 = gimple_assign_rhs1 (stmt);
422*38fd1498Szrj       oprnd1 = gimple_assign_rhs2 (stmt);
423*38fd1498Szrj       if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
424*38fd1498Szrj           || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
425*38fd1498Szrj         return NULL;
426*38fd1498Szrj       if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
427*38fd1498Szrj 			      &promotion)
428*38fd1498Szrj 	  || !promotion)
429*38fd1498Szrj         return NULL;
430*38fd1498Szrj       oprnd00 = gimple_assign_rhs1 (def_stmt);
431*38fd1498Szrj       if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
432*38fd1498Szrj 			      &promotion)
433*38fd1498Szrj 	  || !promotion)
434*38fd1498Szrj         return NULL;
435*38fd1498Szrj       oprnd01 = gimple_assign_rhs1 (def_stmt);
436*38fd1498Szrj       if (!types_compatible_p (half_type0, half_type1))
437*38fd1498Szrj         return NULL;
438*38fd1498Szrj       if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
439*38fd1498Szrj 	return NULL;
440*38fd1498Szrj     }
441*38fd1498Szrj 
442*38fd1498Szrj   half_type = TREE_TYPE (oprnd00);
443*38fd1498Szrj   *type_in = half_type;
444*38fd1498Szrj   *type_out = type;
445*38fd1498Szrj 
446*38fd1498Szrj   /* Pattern detected. Create a stmt to be used to replace the pattern: */
447*38fd1498Szrj   var = vect_recog_temp_ssa_var (type, NULL);
448*38fd1498Szrj   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
449*38fd1498Szrj 				      oprnd00, oprnd01, oprnd1);
450*38fd1498Szrj 
451*38fd1498Szrj   if (dump_enabled_p ())
452*38fd1498Szrj     {
453*38fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
454*38fd1498Szrj                        "vect_recog_dot_prod_pattern: detected: ");
455*38fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
456*38fd1498Szrj     }
457*38fd1498Szrj 
458*38fd1498Szrj   return pattern_stmt;
459*38fd1498Szrj }
460*38fd1498Szrj 
461*38fd1498Szrj 
462*38fd1498Szrj /* Function vect_recog_sad_pattern
463*38fd1498Szrj 
464*38fd1498Szrj    Try to find the following Sum of Absolute Difference (SAD) pattern:
465*38fd1498Szrj 
466*38fd1498Szrj      type x_t, y_t;
467*38fd1498Szrj      signed TYPE1 diff, abs_diff;
468*38fd1498Szrj      TYPE2 sum = init;
469*38fd1498Szrj    loop:
470*38fd1498Szrj      sum_0 = phi <init, sum_1>
471*38fd1498Szrj      S1  x_t = ...
472*38fd1498Szrj      S2  y_t = ...
473*38fd1498Szrj      S3  x_T = (TYPE1) x_t;
474*38fd1498Szrj      S4  y_T = (TYPE1) y_t;
475*38fd1498Szrj      S5  diff = x_T - y_T;
476*38fd1498Szrj      S6  abs_diff = ABS_EXPR <diff>;
477*38fd1498Szrj      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
478*38fd1498Szrj      S8  sum_1 = abs_diff + sum_0;
479*38fd1498Szrj 
480*38fd1498Szrj    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
481*38fd1498Szrj    same size of 'TYPE1' or bigger. This is a special case of a reduction
482*38fd1498Szrj    computation.
483*38fd1498Szrj 
484*38fd1498Szrj    Input:
485*38fd1498Szrj 
486*38fd1498Szrj    * STMTS: Contains a stmt from which the pattern search begins.  In the
487*38fd1498Szrj    example, when this function is called with S8, the pattern
488*38fd1498Szrj    {S3,S4,S5,S6,S7,S8} will be detected.
489*38fd1498Szrj 
490*38fd1498Szrj    Output:
491*38fd1498Szrj 
492*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
493*38fd1498Szrj 
494*38fd1498Szrj    * TYPE_OUT: The type of the output of this pattern.
495*38fd1498Szrj 
496*38fd1498Szrj    * Return value: A new stmt that will be used to replace the sequence of
497*38fd1498Szrj    stmts that constitute the pattern. In this case it will be:
498*38fd1498Szrj         SAD_EXPR <x_t, y_t, sum_0>
499*38fd1498Szrj   */
500*38fd1498Szrj 
501*38fd1498Szrj static gimple *
vect_recog_sad_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)502*38fd1498Szrj vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
503*38fd1498Szrj 			     tree *type_out)
504*38fd1498Szrj {
505*38fd1498Szrj   gimple *last_stmt = (*stmts)[0];
506*38fd1498Szrj   tree sad_oprnd0, sad_oprnd1;
507*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
508*38fd1498Szrj   tree half_type;
509*38fd1498Szrj   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
510*38fd1498Szrj   struct loop *loop;
511*38fd1498Szrj   bool promotion;
512*38fd1498Szrj 
513*38fd1498Szrj   if (!loop_info)
514*38fd1498Szrj     return NULL;
515*38fd1498Szrj 
516*38fd1498Szrj   loop = LOOP_VINFO_LOOP (loop_info);
517*38fd1498Szrj 
518*38fd1498Szrj   /* We don't allow changing the order of the computation in the inner-loop
519*38fd1498Szrj      when doing outer-loop vectorization.  */
520*38fd1498Szrj   if (loop && nested_in_vect_loop_p (loop, last_stmt))
521*38fd1498Szrj     return NULL;
522*38fd1498Szrj 
523*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
524*38fd1498Szrj     return NULL;
525*38fd1498Szrj 
526*38fd1498Szrj   tree sum_type = gimple_expr_type (last_stmt);
527*38fd1498Szrj 
528*38fd1498Szrj   /* Look for the following pattern
529*38fd1498Szrj           DX = (TYPE1) X;
530*38fd1498Szrj           DY = (TYPE1) Y;
531*38fd1498Szrj           DDIFF = DX - DY;
532*38fd1498Szrj           DAD = ABS_EXPR <DDIFF>;
533*38fd1498Szrj           DDPROD = (TYPE2) DPROD;
534*38fd1498Szrj           sum_1 = DAD + sum_0;
535*38fd1498Szrj      In which
536*38fd1498Szrj      - DX is at least double the size of X
537*38fd1498Szrj      - DY is at least double the size of Y
538*38fd1498Szrj      - DX, DY, DDIFF, DAD all have the same type
539*38fd1498Szrj      - sum is the same size of DAD or bigger
540*38fd1498Szrj      - sum has been recognized as a reduction variable.
541*38fd1498Szrj 
542*38fd1498Szrj      This is equivalent to:
543*38fd1498Szrj        DDIFF = X w- Y;          #widen sub
544*38fd1498Szrj        DAD = ABS_EXPR <DDIFF>;
545*38fd1498Szrj        sum_1 = DAD w+ sum_0;    #widen summation
546*38fd1498Szrj      or
547*38fd1498Szrj        DDIFF = X w- Y;          #widen sub
548*38fd1498Szrj        DAD = ABS_EXPR <DDIFF>;
549*38fd1498Szrj        sum_1 = DAD + sum_0;     #summation
550*38fd1498Szrj    */
551*38fd1498Szrj 
552*38fd1498Szrj   /* Starting from LAST_STMT, follow the defs of its uses in search
553*38fd1498Szrj      of the above pattern.  */
554*38fd1498Szrj 
555*38fd1498Szrj   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
556*38fd1498Szrj     return NULL;
557*38fd1498Szrj 
558*38fd1498Szrj   tree plus_oprnd0, plus_oprnd1;
559*38fd1498Szrj 
560*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
561*38fd1498Szrj     {
562*38fd1498Szrj       /* Has been detected as widening-summation?  */
563*38fd1498Szrj 
564*38fd1498Szrj       gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
565*38fd1498Szrj       sum_type = gimple_expr_type (stmt);
566*38fd1498Szrj       if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
567*38fd1498Szrj         return NULL;
568*38fd1498Szrj       plus_oprnd0 = gimple_assign_rhs1 (stmt);
569*38fd1498Szrj       plus_oprnd1 = gimple_assign_rhs2 (stmt);
570*38fd1498Szrj       half_type = TREE_TYPE (plus_oprnd0);
571*38fd1498Szrj     }
572*38fd1498Szrj   else
573*38fd1498Szrj     {
574*38fd1498Szrj       gimple *def_stmt;
575*38fd1498Szrj 
576*38fd1498Szrj       if (!vect_reassociating_reduction_p (stmt_vinfo))
577*38fd1498Szrj 	return NULL;
578*38fd1498Szrj       plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
579*38fd1498Szrj       plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
580*38fd1498Szrj       if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
581*38fd1498Szrj 	  || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
582*38fd1498Szrj         return NULL;
583*38fd1498Szrj 
584*38fd1498Szrj       /* The type conversion could be promotion, demotion,
585*38fd1498Szrj          or just signed -> unsigned.  */
586*38fd1498Szrj       if (type_conversion_p (plus_oprnd0, last_stmt, false,
587*38fd1498Szrj                              &half_type, &def_stmt, &promotion))
588*38fd1498Szrj         plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
589*38fd1498Szrj       else
590*38fd1498Szrj         half_type = sum_type;
591*38fd1498Szrj     }
592*38fd1498Szrj 
593*38fd1498Szrj   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
594*38fd1498Szrj      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
595*38fd1498Szrj      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
596*38fd1498Szrj      Then check that plus_oprnd0 is defined by an abs_expr.  */
597*38fd1498Szrj 
598*38fd1498Szrj   if (TREE_CODE (plus_oprnd0) != SSA_NAME)
599*38fd1498Szrj     return NULL;
600*38fd1498Szrj 
601*38fd1498Szrj   tree abs_type = half_type;
602*38fd1498Szrj   gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
603*38fd1498Szrj 
604*38fd1498Szrj   /* It could not be the sad pattern if the abs_stmt is outside the loop.  */
605*38fd1498Szrj   if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
606*38fd1498Szrj     return NULL;
607*38fd1498Szrj 
608*38fd1498Szrj   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
609*38fd1498Szrj      inside the loop (in case we are analyzing an outer-loop).  */
610*38fd1498Szrj   if (!is_gimple_assign (abs_stmt))
611*38fd1498Szrj     return NULL;
612*38fd1498Szrj 
613*38fd1498Szrj   stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
614*38fd1498Szrj   gcc_assert (abs_stmt_vinfo);
615*38fd1498Szrj   if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
616*38fd1498Szrj     return NULL;
617*38fd1498Szrj   if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
618*38fd1498Szrj     return NULL;
619*38fd1498Szrj 
620*38fd1498Szrj   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
621*38fd1498Szrj   if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
622*38fd1498Szrj     return NULL;
623*38fd1498Szrj   if (TYPE_UNSIGNED (abs_type))
624*38fd1498Szrj     return NULL;
625*38fd1498Szrj 
626*38fd1498Szrj   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
627*38fd1498Szrj 
628*38fd1498Szrj   if (TREE_CODE (abs_oprnd) != SSA_NAME)
629*38fd1498Szrj     return NULL;
630*38fd1498Szrj 
631*38fd1498Szrj   gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
632*38fd1498Szrj 
633*38fd1498Szrj   /* It could not be the sad pattern if the diff_stmt is outside the loop.  */
634*38fd1498Szrj   if (!gimple_bb (diff_stmt)
635*38fd1498Szrj       || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
636*38fd1498Szrj     return NULL;
637*38fd1498Szrj 
638*38fd1498Szrj   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
639*38fd1498Szrj      inside the loop (in case we are analyzing an outer-loop).  */
640*38fd1498Szrj   if (!is_gimple_assign (diff_stmt))
641*38fd1498Szrj     return NULL;
642*38fd1498Szrj 
643*38fd1498Szrj   stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
644*38fd1498Szrj   gcc_assert (diff_stmt_vinfo);
645*38fd1498Szrj   if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
646*38fd1498Szrj     return NULL;
647*38fd1498Szrj   if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
648*38fd1498Szrj     return NULL;
649*38fd1498Szrj 
650*38fd1498Szrj   tree half_type0, half_type1;
651*38fd1498Szrj   gimple *def_stmt;
652*38fd1498Szrj 
653*38fd1498Szrj   tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
654*38fd1498Szrj   tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
655*38fd1498Szrj 
656*38fd1498Szrj   if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
657*38fd1498Szrj       || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
658*38fd1498Szrj     return NULL;
659*38fd1498Szrj   if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
660*38fd1498Szrj                           &half_type0, &def_stmt, &promotion)
661*38fd1498Szrj       || !promotion)
662*38fd1498Szrj     return NULL;
663*38fd1498Szrj   sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
664*38fd1498Szrj 
665*38fd1498Szrj   if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
666*38fd1498Szrj                           &half_type1, &def_stmt, &promotion)
667*38fd1498Szrj       || !promotion)
668*38fd1498Szrj     return NULL;
669*38fd1498Szrj   sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
670*38fd1498Szrj 
671*38fd1498Szrj   if (!types_compatible_p (half_type0, half_type1))
672*38fd1498Szrj     return NULL;
673*38fd1498Szrj   if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
674*38fd1498Szrj       || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
675*38fd1498Szrj     return NULL;
676*38fd1498Szrj 
677*38fd1498Szrj   *type_in = TREE_TYPE (sad_oprnd0);
678*38fd1498Szrj   *type_out = sum_type;
679*38fd1498Szrj 
680*38fd1498Szrj   /* Pattern detected. Create a stmt to be used to replace the pattern: */
681*38fd1498Szrj   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
682*38fd1498Szrj   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
683*38fd1498Szrj 					      sad_oprnd1, plus_oprnd1);
684*38fd1498Szrj 
685*38fd1498Szrj   if (dump_enabled_p ())
686*38fd1498Szrj     {
687*38fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
688*38fd1498Szrj                        "vect_recog_sad_pattern: detected: ");
689*38fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
690*38fd1498Szrj     }
691*38fd1498Szrj 
692*38fd1498Szrj   return pattern_stmt;
693*38fd1498Szrj }
694*38fd1498Szrj 
695*38fd1498Szrj 
696*38fd1498Szrj /* Handle widening operation by a constant.  At the moment we support MULT_EXPR
697*38fd1498Szrj    and LSHIFT_EXPR.
698*38fd1498Szrj 
699*38fd1498Szrj    For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
700*38fd1498Szrj    we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
701*38fd1498Szrj 
702*38fd1498Szrj    Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
703*38fd1498Szrj    HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
704*38fd1498Szrj    that satisfies the above restrictions,  we can perform a widening opeartion
705*38fd1498Szrj    from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
706*38fd1498Szrj    with a_it = (interm_type) a_t;  Store such operation in *WSTMT.  */
707*38fd1498Szrj 
708*38fd1498Szrj static bool
vect_handle_widen_op_by_const(gimple * stmt,enum tree_code code,tree const_oprnd,tree * oprnd,gimple ** wstmt,tree type,tree * half_type,gimple * def_stmt)709*38fd1498Szrj vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
710*38fd1498Szrj 		               tree const_oprnd, tree *oprnd,
711*38fd1498Szrj 			       gimple **wstmt, tree type,
712*38fd1498Szrj 			       tree *half_type, gimple *def_stmt)
713*38fd1498Szrj {
714*38fd1498Szrj   tree new_type, new_oprnd;
715*38fd1498Szrj 
716*38fd1498Szrj   if (code != MULT_EXPR && code != LSHIFT_EXPR)
717*38fd1498Szrj     return false;
718*38fd1498Szrj 
719*38fd1498Szrj   if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720*38fd1498Szrj         || (code == LSHIFT_EXPR
721*38fd1498Szrj             && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722*38fd1498Szrj 	    	!= 1))
723*38fd1498Szrj       && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
724*38fd1498Szrj     {
725*38fd1498Szrj       /* CONST_OPRND is a constant of HALF_TYPE.  */
726*38fd1498Szrj       *oprnd = gimple_assign_rhs1 (def_stmt);
727*38fd1498Szrj       return true;
728*38fd1498Szrj     }
729*38fd1498Szrj 
730*38fd1498Szrj   if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731*38fd1498Szrj     return false;
732*38fd1498Szrj 
733*38fd1498Szrj   if (!vect_same_loop_or_bb_p (stmt, def_stmt))
734*38fd1498Szrj     return false;
735*38fd1498Szrj 
736*38fd1498Szrj   /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
737*38fd1498Szrj      a type 2 times bigger than HALF_TYPE.  */
738*38fd1498Szrj   new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739*38fd1498Szrj                                              TYPE_UNSIGNED (type));
740*38fd1498Szrj   if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741*38fd1498Szrj       || (code == LSHIFT_EXPR
742*38fd1498Szrj           && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
743*38fd1498Szrj     return false;
744*38fd1498Szrj 
745*38fd1498Szrj   /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t;  */
746*38fd1498Szrj   *oprnd = gimple_assign_rhs1 (def_stmt);
747*38fd1498Szrj   new_oprnd = make_ssa_name (new_type);
748*38fd1498Szrj   *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
749*38fd1498Szrj   *oprnd = new_oprnd;
750*38fd1498Szrj 
751*38fd1498Szrj   *half_type = new_type;
752*38fd1498Szrj   return true;
753*38fd1498Szrj }
754*38fd1498Szrj 
755*38fd1498Szrj 
756*38fd1498Szrj /* Function vect_recog_widen_mult_pattern
757*38fd1498Szrj 
758*38fd1498Szrj    Try to find the following pattern:
759*38fd1498Szrj 
760*38fd1498Szrj      type1 a_t;
761*38fd1498Szrj      type2 b_t;
762*38fd1498Szrj      TYPE a_T, b_T, prod_T;
763*38fd1498Szrj 
764*38fd1498Szrj      S1  a_t = ;
765*38fd1498Szrj      S2  b_t = ;
766*38fd1498Szrj      S3  a_T = (TYPE) a_t;
767*38fd1498Szrj      S4  b_T = (TYPE) b_t;
768*38fd1498Szrj      S5  prod_T = a_T * b_T;
769*38fd1498Szrj 
770*38fd1498Szrj    where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
771*38fd1498Szrj 
772*38fd1498Szrj    Also detect unsigned cases:
773*38fd1498Szrj 
774*38fd1498Szrj      unsigned type1 a_t;
775*38fd1498Szrj      unsigned type2 b_t;
776*38fd1498Szrj      unsigned TYPE u_prod_T;
777*38fd1498Szrj      TYPE a_T, b_T, prod_T;
778*38fd1498Szrj 
779*38fd1498Szrj      S1  a_t = ;
780*38fd1498Szrj      S2  b_t = ;
781*38fd1498Szrj      S3  a_T = (TYPE) a_t;
782*38fd1498Szrj      S4  b_T = (TYPE) b_t;
783*38fd1498Szrj      S5  prod_T = a_T * b_T;
784*38fd1498Szrj      S6  u_prod_T = (unsigned TYPE) prod_T;
785*38fd1498Szrj 
786*38fd1498Szrj    and multiplication by constants:
787*38fd1498Szrj 
788*38fd1498Szrj      type a_t;
789*38fd1498Szrj      TYPE a_T, prod_T;
790*38fd1498Szrj 
791*38fd1498Szrj      S1  a_t = ;
792*38fd1498Szrj      S3  a_T = (TYPE) a_t;
793*38fd1498Szrj      S5  prod_T = a_T * CONST;
794*38fd1498Szrj 
795*38fd1498Szrj    A special case of multiplication by constants is when 'TYPE' is 4 times
796*38fd1498Szrj    bigger than 'type', but CONST fits an intermediate type 2 times smaller
797*38fd1498Szrj    than 'TYPE'.  In that case we create an additional pattern stmt for S3
798*38fd1498Szrj    to create a variable of the intermediate type, and perform widen-mult
799*38fd1498Szrj    on the intermediate type as well:
800*38fd1498Szrj 
801*38fd1498Szrj      type a_t;
802*38fd1498Szrj      interm_type a_it;
803*38fd1498Szrj      TYPE a_T, prod_T,  prod_T';
804*38fd1498Szrj 
805*38fd1498Szrj      S1  a_t = ;
806*38fd1498Szrj      S3  a_T = (TYPE) a_t;
807*38fd1498Szrj            '--> a_it = (interm_type) a_t;
808*38fd1498Szrj      S5  prod_T = a_T * CONST;
809*38fd1498Szrj            '--> prod_T' = a_it w* CONST;
810*38fd1498Szrj 
811*38fd1498Szrj    Input/Output:
812*38fd1498Szrj 
813*38fd1498Szrj    * STMTS: Contains a stmt from which the pattern search begins.  In the
814*38fd1498Szrj    example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
815*38fd1498Szrj    is detected.  In case of unsigned widen-mult, the original stmt (S5) is
816*38fd1498Szrj    replaced with S6 in STMTS.  In case of multiplication by a constant
817*38fd1498Szrj    of an intermediate type (the last case above), STMTS also contains S3
818*38fd1498Szrj    (inserted before S5).
819*38fd1498Szrj 
820*38fd1498Szrj    Output:
821*38fd1498Szrj 
822*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
823*38fd1498Szrj 
824*38fd1498Szrj    * TYPE_OUT: The type of the output of this pattern.
825*38fd1498Szrj 
826*38fd1498Szrj    * Return value: A new stmt that will be used to replace the sequence of
827*38fd1498Szrj    stmts that constitute the pattern.  In this case it will be:
828*38fd1498Szrj         WIDEN_MULT <a_t, b_t>
829*38fd1498Szrj    If the result of WIDEN_MULT needs to be converted to a larger type, the
830*38fd1498Szrj    returned stmt will be this type conversion stmt.
831*38fd1498Szrj */
832*38fd1498Szrj 
833*38fd1498Szrj static gimple *
vect_recog_widen_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)834*38fd1498Szrj vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
835*38fd1498Szrj                                tree *type_in, tree *type_out)
836*38fd1498Szrj {
837*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
838*38fd1498Szrj   gimple *def_stmt0, *def_stmt1;
839*38fd1498Szrj   tree oprnd0, oprnd1;
840*38fd1498Szrj   tree type, half_type0, half_type1;
841*38fd1498Szrj   gimple *new_stmt = NULL, *pattern_stmt = NULL;
842*38fd1498Szrj   tree vectype, vecitype;
843*38fd1498Szrj   tree var;
844*38fd1498Szrj   enum tree_code dummy_code;
845*38fd1498Szrj   int dummy_int;
846*38fd1498Szrj   vec<tree> dummy_vec;
847*38fd1498Szrj   bool op1_ok;
848*38fd1498Szrj   bool promotion;
849*38fd1498Szrj 
850*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
851*38fd1498Szrj     return NULL;
852*38fd1498Szrj 
853*38fd1498Szrj   type = gimple_expr_type (last_stmt);
854*38fd1498Szrj 
855*38fd1498Szrj   /* Starting from LAST_STMT, follow the defs of its uses in search
856*38fd1498Szrj      of the above pattern.  */
857*38fd1498Szrj 
858*38fd1498Szrj   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
859*38fd1498Szrj     return NULL;
860*38fd1498Szrj 
861*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
862*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
863*38fd1498Szrj   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
864*38fd1498Szrj       || !types_compatible_p (TREE_TYPE (oprnd1), type))
865*38fd1498Szrj     return NULL;
866*38fd1498Szrj 
867*38fd1498Szrj   /* Check argument 0.  */
868*38fd1498Szrj   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
869*38fd1498Szrj                          &promotion)
870*38fd1498Szrj       || !promotion)
871*38fd1498Szrj      return NULL;
872*38fd1498Szrj   /* Check argument 1.  */
873*38fd1498Szrj   op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
874*38fd1498Szrj                               &def_stmt1, &promotion);
875*38fd1498Szrj 
876*38fd1498Szrj   if (op1_ok && promotion)
877*38fd1498Szrj     {
878*38fd1498Szrj       oprnd0 = gimple_assign_rhs1 (def_stmt0);
879*38fd1498Szrj       oprnd1 = gimple_assign_rhs1 (def_stmt1);
880*38fd1498Szrj     }
881*38fd1498Szrj   else
882*38fd1498Szrj     {
883*38fd1498Szrj       if (TREE_CODE (oprnd1) == INTEGER_CST
884*38fd1498Szrj           && TREE_CODE (half_type0) == INTEGER_TYPE
885*38fd1498Szrj           && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
886*38fd1498Szrj 		                            &oprnd0, &new_stmt, type,
887*38fd1498Szrj 					    &half_type0, def_stmt0))
888*38fd1498Szrj 	{
889*38fd1498Szrj 	  half_type1 = half_type0;
890*38fd1498Szrj 	  oprnd1 = fold_convert (half_type1, oprnd1);
891*38fd1498Szrj 	}
892*38fd1498Szrj       else
893*38fd1498Szrj         return NULL;
894*38fd1498Szrj     }
895*38fd1498Szrj 
896*38fd1498Szrj   /* If the two arguments have different sizes, convert the one with
897*38fd1498Szrj      the smaller type into the larger type.  */
898*38fd1498Szrj   if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
899*38fd1498Szrj     {
900*38fd1498Szrj       /* If we already used up the single-stmt slot give up.  */
901*38fd1498Szrj       if (new_stmt)
902*38fd1498Szrj 	return NULL;
903*38fd1498Szrj 
904*38fd1498Szrj       tree* oprnd = NULL;
905*38fd1498Szrj       gimple *def_stmt = NULL;
906*38fd1498Szrj 
907*38fd1498Szrj       if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
908*38fd1498Szrj 	{
909*38fd1498Szrj 	  def_stmt = def_stmt0;
910*38fd1498Szrj 	  half_type0 = half_type1;
911*38fd1498Szrj 	  oprnd = &oprnd0;
912*38fd1498Szrj 	}
913*38fd1498Szrj       else
914*38fd1498Szrj 	{
915*38fd1498Szrj 	  def_stmt = def_stmt1;
916*38fd1498Szrj 	  half_type1 = half_type0;
917*38fd1498Szrj 	  oprnd = &oprnd1;
918*38fd1498Szrj 	}
919*38fd1498Szrj 
920*38fd1498Szrj       tree old_oprnd = gimple_assign_rhs1 (def_stmt);
921*38fd1498Szrj       tree new_oprnd = make_ssa_name (half_type0);
922*38fd1498Szrj       new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
923*38fd1498Szrj       *oprnd = new_oprnd;
924*38fd1498Szrj     }
925*38fd1498Szrj 
926*38fd1498Szrj   /* Handle unsigned case.  Look for
927*38fd1498Szrj      S6  u_prod_T = (unsigned TYPE) prod_T;
928*38fd1498Szrj      Use unsigned TYPE as the type for WIDEN_MULT_EXPR.  */
929*38fd1498Szrj   if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
930*38fd1498Szrj     {
931*38fd1498Szrj       gimple *use_stmt;
932*38fd1498Szrj       tree use_lhs;
933*38fd1498Szrj       tree use_type;
934*38fd1498Szrj 
935*38fd1498Szrj       if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
936*38fd1498Szrj         return NULL;
937*38fd1498Szrj 
938*38fd1498Szrj       use_stmt = vect_single_imm_use (last_stmt);
939*38fd1498Szrj       if (!use_stmt || !is_gimple_assign (use_stmt)
940*38fd1498Szrj 	  || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
941*38fd1498Szrj         return NULL;
942*38fd1498Szrj 
943*38fd1498Szrj       use_lhs = gimple_assign_lhs (use_stmt);
944*38fd1498Szrj       use_type = TREE_TYPE (use_lhs);
945*38fd1498Szrj       if (!INTEGRAL_TYPE_P (use_type)
946*38fd1498Szrj           || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
947*38fd1498Szrj           || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
948*38fd1498Szrj         return NULL;
949*38fd1498Szrj 
950*38fd1498Szrj       type = use_type;
951*38fd1498Szrj       last_stmt = use_stmt;
952*38fd1498Szrj     }
953*38fd1498Szrj 
954*38fd1498Szrj   if (!types_compatible_p (half_type0, half_type1))
955*38fd1498Szrj     return NULL;
956*38fd1498Szrj 
957*38fd1498Szrj   /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
958*38fd1498Szrj      to get an intermediate result of type ITYPE.  In this case we need
959*38fd1498Szrj      to build a statement to convert this intermediate result to type TYPE.  */
960*38fd1498Szrj   tree itype = type;
961*38fd1498Szrj   if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
962*38fd1498Szrj     itype = build_nonstandard_integer_type
963*38fd1498Szrj 	      (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (half_type0)) * 2,
964*38fd1498Szrj 	       TYPE_UNSIGNED (type));
965*38fd1498Szrj 
966*38fd1498Szrj   /* Pattern detected.  */
967*38fd1498Szrj   if (dump_enabled_p ())
968*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
969*38fd1498Szrj                      "vect_recog_widen_mult_pattern: detected:\n");
970*38fd1498Szrj 
971*38fd1498Szrj   /* Check target support  */
972*38fd1498Szrj   vectype = get_vectype_for_scalar_type (half_type0);
973*38fd1498Szrj   vecitype = get_vectype_for_scalar_type (itype);
974*38fd1498Szrj   if (!vectype
975*38fd1498Szrj       || !vecitype
976*38fd1498Szrj       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
977*38fd1498Szrj 					  vecitype, vectype,
978*38fd1498Szrj 					  &dummy_code, &dummy_code,
979*38fd1498Szrj 					  &dummy_int, &dummy_vec))
980*38fd1498Szrj     return NULL;
981*38fd1498Szrj 
982*38fd1498Szrj   *type_in = vectype;
983*38fd1498Szrj   *type_out = get_vectype_for_scalar_type (type);
984*38fd1498Szrj 
985*38fd1498Szrj   /* Pattern supported. Create a stmt to be used to replace the pattern: */
986*38fd1498Szrj   var = vect_recog_temp_ssa_var (itype, NULL);
987*38fd1498Szrj   pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
988*38fd1498Szrj 
989*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
990*38fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
991*38fd1498Szrj 
992*38fd1498Szrj   /* If the original two operands have different sizes, we may need to convert
993*38fd1498Szrj      the smaller one into the larget type.  If this is the case, at this point
994*38fd1498Szrj      the new stmt is already built.  */
995*38fd1498Szrj   if (new_stmt)
996*38fd1498Szrj     {
997*38fd1498Szrj       append_pattern_def_seq (stmt_vinfo, new_stmt);
998*38fd1498Szrj       stmt_vec_info new_stmt_info
999*38fd1498Szrj         = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
1000*38fd1498Szrj       set_vinfo_for_stmt (new_stmt, new_stmt_info);
1001*38fd1498Szrj       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1002*38fd1498Szrj     }
1003*38fd1498Szrj 
1004*38fd1498Szrj   /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1005*38fd1498Szrj      the result of the widen-mult operation into type TYPE.  */
1006*38fd1498Szrj   if (itype != type)
1007*38fd1498Szrj     {
1008*38fd1498Szrj       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1009*38fd1498Szrj       stmt_vec_info pattern_stmt_info
1010*38fd1498Szrj         = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
1011*38fd1498Szrj       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1012*38fd1498Szrj       STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1013*38fd1498Szrj       pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1014*38fd1498Szrj 					  NOP_EXPR,
1015*38fd1498Szrj 					  gimple_assign_lhs (pattern_stmt));
1016*38fd1498Szrj     }
1017*38fd1498Szrj 
1018*38fd1498Szrj   if (dump_enabled_p ())
1019*38fd1498Szrj     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1020*38fd1498Szrj 
1021*38fd1498Szrj   stmts->safe_push (last_stmt);
1022*38fd1498Szrj   return pattern_stmt;
1023*38fd1498Szrj }
1024*38fd1498Szrj 
1025*38fd1498Szrj 
1026*38fd1498Szrj /* Function vect_recog_pow_pattern
1027*38fd1498Szrj 
1028*38fd1498Szrj    Try to find the following pattern:
1029*38fd1498Szrj 
1030*38fd1498Szrj      x = POW (y, N);
1031*38fd1498Szrj 
1032*38fd1498Szrj    with POW being one of pow, powf, powi, powif and N being
1033*38fd1498Szrj    either 2 or 0.5.
1034*38fd1498Szrj 
1035*38fd1498Szrj    Input:
1036*38fd1498Szrj 
1037*38fd1498Szrj    * LAST_STMT: A stmt from which the pattern search begins.
1038*38fd1498Szrj 
1039*38fd1498Szrj    Output:
1040*38fd1498Szrj 
1041*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
1042*38fd1498Szrj 
1043*38fd1498Szrj    * TYPE_OUT: The type of the output of this pattern.
1044*38fd1498Szrj 
1045*38fd1498Szrj    * Return value: A new stmt that will be used to replace the sequence of
1046*38fd1498Szrj    stmts that constitute the pattern. In this case it will be:
1047*38fd1498Szrj         x = x * x
1048*38fd1498Szrj    or
1049*38fd1498Szrj 	x = sqrt (x)
1050*38fd1498Szrj */
1051*38fd1498Szrj 
1052*38fd1498Szrj static gimple *
vect_recog_pow_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1053*38fd1498Szrj vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
1054*38fd1498Szrj 			tree *type_out)
1055*38fd1498Szrj {
1056*38fd1498Szrj   gimple *last_stmt = (*stmts)[0];
1057*38fd1498Szrj   tree base, exp;
1058*38fd1498Szrj   gimple *stmt;
1059*38fd1498Szrj   tree var;
1060*38fd1498Szrj 
1061*38fd1498Szrj   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1062*38fd1498Szrj     return NULL;
1063*38fd1498Szrj 
1064*38fd1498Szrj   switch (gimple_call_combined_fn (last_stmt))
1065*38fd1498Szrj     {
1066*38fd1498Szrj     CASE_CFN_POW:
1067*38fd1498Szrj     CASE_CFN_POWI:
1068*38fd1498Szrj       break;
1069*38fd1498Szrj 
1070*38fd1498Szrj     default:
1071*38fd1498Szrj       return NULL;
1072*38fd1498Szrj     }
1073*38fd1498Szrj 
1074*38fd1498Szrj   base = gimple_call_arg (last_stmt, 0);
1075*38fd1498Szrj   exp = gimple_call_arg (last_stmt, 1);
1076*38fd1498Szrj   if (TREE_CODE (exp) != REAL_CST
1077*38fd1498Szrj       && TREE_CODE (exp) != INTEGER_CST)
1078*38fd1498Szrj     {
1079*38fd1498Szrj       if (flag_unsafe_math_optimizations
1080*38fd1498Szrj 	  && TREE_CODE (base) == REAL_CST
1081*38fd1498Szrj 	  && !gimple_call_internal_p (last_stmt))
1082*38fd1498Szrj 	{
1083*38fd1498Szrj 	  combined_fn log_cfn;
1084*38fd1498Szrj 	  built_in_function exp_bfn;
1085*38fd1498Szrj 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1086*38fd1498Szrj 	    {
1087*38fd1498Szrj 	    case BUILT_IN_POW:
1088*38fd1498Szrj 	      log_cfn = CFN_BUILT_IN_LOG;
1089*38fd1498Szrj 	      exp_bfn = BUILT_IN_EXP;
1090*38fd1498Szrj 	      break;
1091*38fd1498Szrj 	    case BUILT_IN_POWF:
1092*38fd1498Szrj 	      log_cfn = CFN_BUILT_IN_LOGF;
1093*38fd1498Szrj 	      exp_bfn = BUILT_IN_EXPF;
1094*38fd1498Szrj 	      break;
1095*38fd1498Szrj 	    case BUILT_IN_POWL:
1096*38fd1498Szrj 	      log_cfn = CFN_BUILT_IN_LOGL;
1097*38fd1498Szrj 	      exp_bfn = BUILT_IN_EXPL;
1098*38fd1498Szrj 	      break;
1099*38fd1498Szrj 	    default:
1100*38fd1498Szrj 	      return NULL;
1101*38fd1498Szrj 	    }
1102*38fd1498Szrj 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1103*38fd1498Szrj 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1104*38fd1498Szrj 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1105*38fd1498Szrj 	     does that, but if C is a power of 2, we want to use
1106*38fd1498Szrj 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1107*38fd1498Szrj 	     vectorization we don't have vectorized exp2.  */
1108*38fd1498Szrj 	  if (logc
1109*38fd1498Szrj 	      && TREE_CODE (logc) == REAL_CST
1110*38fd1498Szrj 	      && exp_decl
1111*38fd1498Szrj 	      && lookup_attribute ("omp declare simd",
1112*38fd1498Szrj 				   DECL_ATTRIBUTES (exp_decl)))
1113*38fd1498Szrj 	    {
1114*38fd1498Szrj 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1115*38fd1498Szrj 	      if (node->simd_clones == NULL)
1116*38fd1498Szrj 		{
1117*38fd1498Szrj 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1118*38fd1498Szrj 		      || node->definition)
1119*38fd1498Szrj 		    return NULL;
1120*38fd1498Szrj 		  expand_simd_clones (node);
1121*38fd1498Szrj 		  if (node->simd_clones == NULL)
1122*38fd1498Szrj 		    return NULL;
1123*38fd1498Szrj 		}
1124*38fd1498Szrj 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1125*38fd1498Szrj 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1126*38fd1498Szrj 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1127*38fd1498Szrj 	      new_pattern_def_seq (stmt_vinfo, g);
1128*38fd1498Szrj 	      *type_in = TREE_TYPE (base);
1129*38fd1498Szrj 	      *type_out = NULL_TREE;
1130*38fd1498Szrj 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1131*38fd1498Szrj 	      g = gimple_build_call (exp_decl, 1, def);
1132*38fd1498Szrj 	      gimple_call_set_lhs (g, res);
1133*38fd1498Szrj 	      return g;
1134*38fd1498Szrj 	    }
1135*38fd1498Szrj 	}
1136*38fd1498Szrj 
1137*38fd1498Szrj       return NULL;
1138*38fd1498Szrj     }
1139*38fd1498Szrj 
1140*38fd1498Szrj   /* We now have a pow or powi builtin function call with a constant
1141*38fd1498Szrj      exponent.  */
1142*38fd1498Szrj 
1143*38fd1498Szrj   *type_out = NULL_TREE;
1144*38fd1498Szrj 
1145*38fd1498Szrj   /* Catch squaring.  */
1146*38fd1498Szrj   if ((tree_fits_shwi_p (exp)
1147*38fd1498Szrj        && tree_to_shwi (exp) == 2)
1148*38fd1498Szrj       || (TREE_CODE (exp) == REAL_CST
1149*38fd1498Szrj           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1150*38fd1498Szrj     {
1151*38fd1498Szrj       *type_in = TREE_TYPE (base);
1152*38fd1498Szrj 
1153*38fd1498Szrj       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1154*38fd1498Szrj       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1155*38fd1498Szrj       return stmt;
1156*38fd1498Szrj     }
1157*38fd1498Szrj 
1158*38fd1498Szrj   /* Catch square root.  */
1159*38fd1498Szrj   if (TREE_CODE (exp) == REAL_CST
1160*38fd1498Szrj       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1161*38fd1498Szrj     {
1162*38fd1498Szrj       *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1163*38fd1498Szrj       if (*type_in
1164*38fd1498Szrj 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_in,
1165*38fd1498Szrj 					     OPTIMIZE_FOR_SPEED))
1166*38fd1498Szrj 	{
1167*38fd1498Szrj 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1168*38fd1498Szrj 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1169*38fd1498Szrj 	  gimple_call_set_lhs (stmt, var);
1170*38fd1498Szrj 	  gimple_call_set_nothrow (stmt, true);
1171*38fd1498Szrj 	  return stmt;
1172*38fd1498Szrj 	}
1173*38fd1498Szrj     }
1174*38fd1498Szrj 
1175*38fd1498Szrj   return NULL;
1176*38fd1498Szrj }
1177*38fd1498Szrj 
1178*38fd1498Szrj 
1179*38fd1498Szrj /* Function vect_recog_widen_sum_pattern
1180*38fd1498Szrj 
1181*38fd1498Szrj    Try to find the following pattern:
1182*38fd1498Szrj 
1183*38fd1498Szrj      type x_t;
1184*38fd1498Szrj      TYPE x_T, sum = init;
1185*38fd1498Szrj    loop:
1186*38fd1498Szrj      sum_0 = phi <init, sum_1>
1187*38fd1498Szrj      S1  x_t = *p;
1188*38fd1498Szrj      S2  x_T = (TYPE) x_t;
1189*38fd1498Szrj      S3  sum_1 = x_T + sum_0;
1190*38fd1498Szrj 
1191*38fd1498Szrj    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1192*38fd1498Szrj    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1193*38fd1498Szrj    a special case of a reduction computation.
1194*38fd1498Szrj 
1195*38fd1498Szrj    Input:
1196*38fd1498Szrj 
1197*38fd1498Szrj    * LAST_STMT: A stmt from which the pattern search begins. In the example,
1198*38fd1498Szrj    when this function is called with S3, the pattern {S2,S3} will be detected.
1199*38fd1498Szrj 
1200*38fd1498Szrj    Output:
1201*38fd1498Szrj 
1202*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
1203*38fd1498Szrj 
1204*38fd1498Szrj    * TYPE_OUT: The type of the output of this pattern.
1205*38fd1498Szrj 
1206*38fd1498Szrj    * Return value: A new stmt that will be used to replace the sequence of
1207*38fd1498Szrj    stmts that constitute the pattern. In this case it will be:
1208*38fd1498Szrj         WIDEN_SUM <x_t, sum_0>
1209*38fd1498Szrj 
1210*38fd1498Szrj    Note: The widening-sum idiom is a widening reduction pattern that is
1211*38fd1498Szrj 	 vectorized without preserving all the intermediate results. It
1212*38fd1498Szrj          produces only N/2 (widened) results (by summing up pairs of
1213*38fd1498Szrj 	 intermediate results) rather than all N results.  Therefore, we
1214*38fd1498Szrj 	 cannot allow this pattern when we want to get all the results and in
1215*38fd1498Szrj 	 the correct order (as is the case when this computation is in an
1216*38fd1498Szrj 	 inner-loop nested in an outer-loop that us being vectorized).  */
1217*38fd1498Szrj 
1218*38fd1498Szrj static gimple *
vect_recog_widen_sum_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1219*38fd1498Szrj vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
1220*38fd1498Szrj 			      tree *type_out)
1221*38fd1498Szrj {
1222*38fd1498Szrj   gimple *stmt, *last_stmt = (*stmts)[0];
1223*38fd1498Szrj   tree oprnd0, oprnd1;
1224*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1225*38fd1498Szrj   tree type, half_type;
1226*38fd1498Szrj   gimple *pattern_stmt;
1227*38fd1498Szrj   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228*38fd1498Szrj   struct loop *loop;
1229*38fd1498Szrj   tree var;
1230*38fd1498Szrj   bool promotion;
1231*38fd1498Szrj 
1232*38fd1498Szrj   if (!loop_info)
1233*38fd1498Szrj     return NULL;
1234*38fd1498Szrj 
1235*38fd1498Szrj   loop = LOOP_VINFO_LOOP (loop_info);
1236*38fd1498Szrj 
1237*38fd1498Szrj   /* We don't allow changing the order of the computation in the inner-loop
1238*38fd1498Szrj      when doing outer-loop vectorization.  */
1239*38fd1498Szrj   if (loop && nested_in_vect_loop_p (loop, last_stmt))
1240*38fd1498Szrj     return NULL;
1241*38fd1498Szrj 
1242*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
1243*38fd1498Szrj     return NULL;
1244*38fd1498Szrj 
1245*38fd1498Szrj   type = gimple_expr_type (last_stmt);
1246*38fd1498Szrj 
1247*38fd1498Szrj   /* Look for the following pattern
1248*38fd1498Szrj           DX = (TYPE) X;
1249*38fd1498Szrj           sum_1 = DX + sum_0;
1250*38fd1498Szrj      In which DX is at least double the size of X, and sum_1 has been
1251*38fd1498Szrj      recognized as a reduction variable.
1252*38fd1498Szrj    */
1253*38fd1498Szrj 
1254*38fd1498Szrj   /* Starting from LAST_STMT, follow the defs of its uses in search
1255*38fd1498Szrj      of the above pattern.  */
1256*38fd1498Szrj 
1257*38fd1498Szrj   if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
1258*38fd1498Szrj     return NULL;
1259*38fd1498Szrj 
1260*38fd1498Szrj   if (!vect_reassociating_reduction_p (stmt_vinfo))
1261*38fd1498Szrj     return NULL;
1262*38fd1498Szrj 
1263*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
1264*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
1265*38fd1498Szrj   if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1266*38fd1498Szrj       || !types_compatible_p (TREE_TYPE (oprnd1), type))
1267*38fd1498Szrj     return NULL;
1268*38fd1498Szrj 
1269*38fd1498Szrj   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1270*38fd1498Szrj      we know that oprnd1 is the reduction variable (defined by a loop-header
1271*38fd1498Szrj      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1272*38fd1498Szrj      Left to check that oprnd0 is defined by a cast from type 'type' to type
1273*38fd1498Szrj      'TYPE'.  */
1274*38fd1498Szrj 
1275*38fd1498Szrj   if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1276*38fd1498Szrj                           &promotion)
1277*38fd1498Szrj       || !promotion)
1278*38fd1498Szrj      return NULL;
1279*38fd1498Szrj 
1280*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (stmt);
1281*38fd1498Szrj   *type_in = half_type;
1282*38fd1498Szrj   *type_out = type;
1283*38fd1498Szrj 
1284*38fd1498Szrj   /* Pattern detected. Create a stmt to be used to replace the pattern: */
1285*38fd1498Szrj   var = vect_recog_temp_ssa_var (type, NULL);
1286*38fd1498Szrj   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
1287*38fd1498Szrj 
1288*38fd1498Szrj   if (dump_enabled_p ())
1289*38fd1498Szrj     {
1290*38fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
1291*38fd1498Szrj                        "vect_recog_widen_sum_pattern: detected: ");
1292*38fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1293*38fd1498Szrj     }
1294*38fd1498Szrj 
1295*38fd1498Szrj   return pattern_stmt;
1296*38fd1498Szrj }
1297*38fd1498Szrj 
1298*38fd1498Szrj 
1299*38fd1498Szrj /* Return TRUE if the operation in STMT can be performed on a smaller type.
1300*38fd1498Szrj 
1301*38fd1498Szrj    Input:
1302*38fd1498Szrj    STMT - a statement to check.
1303*38fd1498Szrj    DEF - we support operations with two operands, one of which is constant.
1304*38fd1498Szrj          The other operand can be defined by a demotion operation, or by a
1305*38fd1498Szrj          previous statement in a sequence of over-promoted operations.  In the
1306*38fd1498Szrj          later case DEF is used to replace that operand.  (It is defined by a
1307*38fd1498Szrj          pattern statement we created for the previous statement in the
1308*38fd1498Szrj          sequence).
1309*38fd1498Szrj 
1310*38fd1498Szrj    Input/output:
1311*38fd1498Szrj    NEW_TYPE - Output: a smaller type that we are trying to use.  Input: if not
1312*38fd1498Szrj          NULL, it's the type of DEF.
1313*38fd1498Szrj    STMTS - additional pattern statements.  If a pattern statement (type
1314*38fd1498Szrj          conversion) is created in this function, its original statement is
1315*38fd1498Szrj          added to STMTS.
1316*38fd1498Szrj 
1317*38fd1498Szrj    Output:
1318*38fd1498Szrj    OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1319*38fd1498Szrj          operands to use in the new pattern statement for STMT (will be created
1320*38fd1498Szrj          in vect_recog_over_widening_pattern ()).
1321*38fd1498Szrj    NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1322*38fd1498Szrj          statements for STMT: the first one is a type promotion and the second
1323*38fd1498Szrj          one is the operation itself.  We return the type promotion statement
1324*38fd1498Szrj 	 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1325*38fd1498Szrj          the second pattern statement.  */
1326*38fd1498Szrj 
1327*38fd1498Szrj static bool
vect_operation_fits_smaller_type(gimple * stmt,tree def,tree * new_type,tree * op0,tree * op1,gimple ** new_def_stmt,vec<gimple * > * stmts)1328*38fd1498Szrj vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1329*38fd1498Szrj 				  tree *op0, tree *op1, gimple **new_def_stmt,
1330*38fd1498Szrj 				  vec<gimple *> *stmts)
1331*38fd1498Szrj {
1332*38fd1498Szrj   enum tree_code code;
1333*38fd1498Szrj   tree const_oprnd, oprnd;
1334*38fd1498Szrj   tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1335*38fd1498Szrj   gimple *def_stmt, *new_stmt;
1336*38fd1498Szrj   bool first = false;
1337*38fd1498Szrj   bool promotion;
1338*38fd1498Szrj 
1339*38fd1498Szrj   *op0 = NULL_TREE;
1340*38fd1498Szrj   *op1 = NULL_TREE;
1341*38fd1498Szrj   *new_def_stmt = NULL;
1342*38fd1498Szrj 
1343*38fd1498Szrj   if (!is_gimple_assign (stmt))
1344*38fd1498Szrj     return false;
1345*38fd1498Szrj 
1346*38fd1498Szrj   code = gimple_assign_rhs_code (stmt);
1347*38fd1498Szrj   if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1348*38fd1498Szrj       && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1349*38fd1498Szrj     return false;
1350*38fd1498Szrj 
1351*38fd1498Szrj   oprnd = gimple_assign_rhs1 (stmt);
1352*38fd1498Szrj   const_oprnd = gimple_assign_rhs2 (stmt);
1353*38fd1498Szrj   type = gimple_expr_type (stmt);
1354*38fd1498Szrj 
1355*38fd1498Szrj   if (TREE_CODE (oprnd) != SSA_NAME
1356*38fd1498Szrj       || TREE_CODE (const_oprnd) != INTEGER_CST)
1357*38fd1498Szrj     return false;
1358*38fd1498Szrj 
1359*38fd1498Szrj   /* If oprnd has other uses besides that in stmt we cannot mark it
1360*38fd1498Szrj      as being part of a pattern only.  */
1361*38fd1498Szrj   if (!has_single_use (oprnd))
1362*38fd1498Szrj     return false;
1363*38fd1498Szrj 
1364*38fd1498Szrj   /* If we are in the middle of a sequence, we use DEF from a previous
1365*38fd1498Szrj      statement.  Otherwise, OPRND has to be a result of type promotion.  */
1366*38fd1498Szrj   if (*new_type)
1367*38fd1498Szrj     {
1368*38fd1498Szrj       half_type = *new_type;
1369*38fd1498Szrj       oprnd = def;
1370*38fd1498Szrj     }
1371*38fd1498Szrj   else
1372*38fd1498Szrj     {
1373*38fd1498Szrj       first = true;
1374*38fd1498Szrj       if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
1375*38fd1498Szrj 			      &promotion)
1376*38fd1498Szrj 	  || !promotion
1377*38fd1498Szrj 	  || !vect_same_loop_or_bb_p (stmt, def_stmt))
1378*38fd1498Szrj         return false;
1379*38fd1498Szrj     }
1380*38fd1498Szrj 
1381*38fd1498Szrj   /* Can we perform the operation on a smaller type?  */
1382*38fd1498Szrj   switch (code)
1383*38fd1498Szrj     {
1384*38fd1498Szrj       case BIT_IOR_EXPR:
1385*38fd1498Szrj       case BIT_XOR_EXPR:
1386*38fd1498Szrj       case BIT_AND_EXPR:
1387*38fd1498Szrj         if (!int_fits_type_p (const_oprnd, half_type))
1388*38fd1498Szrj           {
1389*38fd1498Szrj             /* HALF_TYPE is not enough.  Try a bigger type if possible.  */
1390*38fd1498Szrj             if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1391*38fd1498Szrj               return false;
1392*38fd1498Szrj 
1393*38fd1498Szrj             interm_type = build_nonstandard_integer_type (
1394*38fd1498Szrj                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1395*38fd1498Szrj             if (!int_fits_type_p (const_oprnd, interm_type))
1396*38fd1498Szrj               return false;
1397*38fd1498Szrj           }
1398*38fd1498Szrj 
1399*38fd1498Szrj         break;
1400*38fd1498Szrj 
1401*38fd1498Szrj       case LSHIFT_EXPR:
1402*38fd1498Szrj         /* Try intermediate type - HALF_TYPE is not enough for sure.  */
1403*38fd1498Szrj         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1404*38fd1498Szrj           return false;
1405*38fd1498Szrj 
1406*38fd1498Szrj         /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1407*38fd1498Szrj           (e.g., if the original value was char, the shift amount is at most 8
1408*38fd1498Szrj            if we want to use short).  */
1409*38fd1498Szrj         if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1410*38fd1498Szrj           return false;
1411*38fd1498Szrj 
1412*38fd1498Szrj         interm_type = build_nonstandard_integer_type (
1413*38fd1498Szrj                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1414*38fd1498Szrj 
1415*38fd1498Szrj         if (!vect_supportable_shift (code, interm_type))
1416*38fd1498Szrj           return false;
1417*38fd1498Szrj 
1418*38fd1498Szrj         break;
1419*38fd1498Szrj 
1420*38fd1498Szrj       case RSHIFT_EXPR:
1421*38fd1498Szrj         if (vect_supportable_shift (code, half_type))
1422*38fd1498Szrj           break;
1423*38fd1498Szrj 
1424*38fd1498Szrj         /* Try intermediate type - HALF_TYPE is not supported.  */
1425*38fd1498Szrj         if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1426*38fd1498Szrj           return false;
1427*38fd1498Szrj 
1428*38fd1498Szrj         interm_type = build_nonstandard_integer_type (
1429*38fd1498Szrj                         TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1430*38fd1498Szrj 
1431*38fd1498Szrj         if (!vect_supportable_shift (code, interm_type))
1432*38fd1498Szrj           return false;
1433*38fd1498Szrj 
1434*38fd1498Szrj         break;
1435*38fd1498Szrj 
1436*38fd1498Szrj       default:
1437*38fd1498Szrj         gcc_unreachable ();
1438*38fd1498Szrj     }
1439*38fd1498Szrj 
1440*38fd1498Szrj   /* There are four possible cases:
1441*38fd1498Szrj      1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1442*38fd1498Szrj         the first statement in the sequence)
1443*38fd1498Szrj         a. The original, HALF_TYPE, is not enough - we replace the promotion
1444*38fd1498Szrj            from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1445*38fd1498Szrj         b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1446*38fd1498Szrj            promotion.
1447*38fd1498Szrj      2. OPRND is defined by a pattern statement we created.
1448*38fd1498Szrj         a. Its type is not sufficient for the operation, we create a new stmt:
1449*38fd1498Szrj            a type conversion for OPRND from HALF_TYPE to INTERM_TYPE.  We store
1450*38fd1498Szrj            this statement in NEW_DEF_STMT, and it is later put in
1451*38fd1498Szrj 	   STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1452*38fd1498Szrj         b. OPRND is good to use in the new statement.  */
1453*38fd1498Szrj   if (first)
1454*38fd1498Szrj     {
1455*38fd1498Szrj       if (interm_type)
1456*38fd1498Szrj         {
1457*38fd1498Szrj           /* Replace the original type conversion HALF_TYPE->TYPE with
1458*38fd1498Szrj              HALF_TYPE->INTERM_TYPE.  */
1459*38fd1498Szrj           if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1460*38fd1498Szrj             {
1461*38fd1498Szrj               new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1462*38fd1498Szrj               /* Check if the already created pattern stmt is what we need.  */
1463*38fd1498Szrj               if (!is_gimple_assign (new_stmt)
1464*38fd1498Szrj                   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1465*38fd1498Szrj                   || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1466*38fd1498Szrj                 return false;
1467*38fd1498Szrj 
1468*38fd1498Szrj 	      stmts->safe_push (def_stmt);
1469*38fd1498Szrj               oprnd = gimple_assign_lhs (new_stmt);
1470*38fd1498Szrj             }
1471*38fd1498Szrj           else
1472*38fd1498Szrj             {
1473*38fd1498Szrj               /* Create NEW_OPRND = (INTERM_TYPE) OPRND.  */
1474*38fd1498Szrj               oprnd = gimple_assign_rhs1 (def_stmt);
1475*38fd1498Szrj 	      new_oprnd = make_ssa_name (interm_type);
1476*38fd1498Szrj 	      new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1477*38fd1498Szrj               STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1478*38fd1498Szrj               stmts->safe_push (def_stmt);
1479*38fd1498Szrj               oprnd = new_oprnd;
1480*38fd1498Szrj             }
1481*38fd1498Szrj         }
1482*38fd1498Szrj       else
1483*38fd1498Szrj         {
1484*38fd1498Szrj           /* Retrieve the operand before the type promotion.  */
1485*38fd1498Szrj           oprnd = gimple_assign_rhs1 (def_stmt);
1486*38fd1498Szrj         }
1487*38fd1498Szrj     }
1488*38fd1498Szrj   else
1489*38fd1498Szrj     {
1490*38fd1498Szrj       if (interm_type)
1491*38fd1498Szrj         {
1492*38fd1498Szrj           /* Create a type conversion HALF_TYPE->INTERM_TYPE.  */
1493*38fd1498Szrj 	  new_oprnd = make_ssa_name (interm_type);
1494*38fd1498Szrj 	  new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1495*38fd1498Szrj           oprnd = new_oprnd;
1496*38fd1498Szrj           *new_def_stmt = new_stmt;
1497*38fd1498Szrj         }
1498*38fd1498Szrj 
1499*38fd1498Szrj       /* Otherwise, OPRND is already set.  */
1500*38fd1498Szrj     }
1501*38fd1498Szrj 
1502*38fd1498Szrj   if (interm_type)
1503*38fd1498Szrj     *new_type = interm_type;
1504*38fd1498Szrj   else
1505*38fd1498Szrj     *new_type = half_type;
1506*38fd1498Szrj 
1507*38fd1498Szrj   *op0 = oprnd;
1508*38fd1498Szrj   *op1 = fold_convert (*new_type, const_oprnd);
1509*38fd1498Szrj 
1510*38fd1498Szrj   return true;
1511*38fd1498Szrj }
1512*38fd1498Szrj 
1513*38fd1498Szrj 
1514*38fd1498Szrj /* Try to find a statement or a sequence of statements that can be performed
1515*38fd1498Szrj    on a smaller type:
1516*38fd1498Szrj 
1517*38fd1498Szrj      type x_t;
1518*38fd1498Szrj      TYPE x_T, res0_T, res1_T;
1519*38fd1498Szrj    loop:
1520*38fd1498Szrj      S1  x_t = *p;
1521*38fd1498Szrj      S2  x_T = (TYPE) x_t;
1522*38fd1498Szrj      S3  res0_T = op (x_T, C0);
1523*38fd1498Szrj      S4  res1_T = op (res0_T, C1);
1524*38fd1498Szrj      S5  ... = () res1_T;  - type demotion
1525*38fd1498Szrj 
1526*38fd1498Szrj    where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1527*38fd1498Szrj    constants.
1528*38fd1498Szrj    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1529*38fd1498Szrj    be 'type' or some intermediate type.  For now, we expect S5 to be a type
1530*38fd1498Szrj    demotion operation.  We also check that S3 and S4 have only one use.  */
1531*38fd1498Szrj 
1532*38fd1498Szrj static gimple *
vect_recog_over_widening_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1533*38fd1498Szrj vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1534*38fd1498Szrj                                   tree *type_in, tree *type_out)
1535*38fd1498Szrj {
1536*38fd1498Szrj   gimple *stmt = stmts->pop ();
1537*38fd1498Szrj   gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1538*38fd1498Szrj 	 *use_stmt = NULL;
1539*38fd1498Szrj   tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
1540*38fd1498Szrj   tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1541*38fd1498Szrj   bool first;
1542*38fd1498Szrj   tree type = NULL;
1543*38fd1498Szrj 
1544*38fd1498Szrj   first = true;
1545*38fd1498Szrj   while (1)
1546*38fd1498Szrj     {
1547*38fd1498Szrj       if (!vinfo_for_stmt (stmt)
1548*38fd1498Szrj           || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1549*38fd1498Szrj         return NULL;
1550*38fd1498Szrj 
1551*38fd1498Szrj       new_def_stmt = NULL;
1552*38fd1498Szrj       if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1553*38fd1498Szrj                                              &op0, &op1, &new_def_stmt,
1554*38fd1498Szrj                                              stmts))
1555*38fd1498Szrj         {
1556*38fd1498Szrj           if (first)
1557*38fd1498Szrj             return NULL;
1558*38fd1498Szrj           else
1559*38fd1498Szrj             break;
1560*38fd1498Szrj         }
1561*38fd1498Szrj 
1562*38fd1498Szrj       /* STMT can be performed on a smaller type.  Check its uses.  */
1563*38fd1498Szrj       use_stmt = vect_single_imm_use (stmt);
1564*38fd1498Szrj       if (!use_stmt || !is_gimple_assign (use_stmt))
1565*38fd1498Szrj         return NULL;
1566*38fd1498Szrj 
1567*38fd1498Szrj       /* Create pattern statement for STMT.  */
1568*38fd1498Szrj       vectype = get_vectype_for_scalar_type (new_type);
1569*38fd1498Szrj       if (!vectype)
1570*38fd1498Szrj         return NULL;
1571*38fd1498Szrj 
1572*38fd1498Szrj       /* We want to collect all the statements for which we create pattern
1573*38fd1498Szrj          statetments, except for the case when the last statement in the
1574*38fd1498Szrj          sequence doesn't have a corresponding pattern statement.  In such
1575*38fd1498Szrj          case we associate the last pattern statement with the last statement
1576*38fd1498Szrj          in the sequence.  Therefore, we only add the original statement to
1577*38fd1498Szrj          the list if we know that it is not the last.  */
1578*38fd1498Szrj       if (prev_stmt)
1579*38fd1498Szrj         stmts->safe_push (prev_stmt);
1580*38fd1498Szrj 
1581*38fd1498Szrj       var = vect_recog_temp_ssa_var (new_type, NULL);
1582*38fd1498Szrj       pattern_stmt
1583*38fd1498Szrj 	= gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1584*38fd1498Szrj       STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
1585*38fd1498Szrj       new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1586*38fd1498Szrj 
1587*38fd1498Szrj       if (dump_enabled_p ())
1588*38fd1498Szrj         {
1589*38fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
1590*38fd1498Szrj                            "created pattern stmt: ");
1591*38fd1498Szrj           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1592*38fd1498Szrj         }
1593*38fd1498Szrj 
1594*38fd1498Szrj       type = gimple_expr_type (stmt);
1595*38fd1498Szrj       prev_stmt = stmt;
1596*38fd1498Szrj       stmt = use_stmt;
1597*38fd1498Szrj 
1598*38fd1498Szrj       first = false;
1599*38fd1498Szrj     }
1600*38fd1498Szrj 
1601*38fd1498Szrj   /* We got a sequence.  We expect it to end with a type demotion operation.
1602*38fd1498Szrj      Otherwise, we quit (for now).  There are three possible cases: the
1603*38fd1498Szrj      conversion is to NEW_TYPE (we don't do anything), the conversion is to
1604*38fd1498Szrj      a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1605*38fd1498Szrj      NEW_TYPE differs (we create a new conversion statement).  */
1606*38fd1498Szrj   if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1607*38fd1498Szrj     {
1608*38fd1498Szrj       use_lhs = gimple_assign_lhs (use_stmt);
1609*38fd1498Szrj       use_type = TREE_TYPE (use_lhs);
1610*38fd1498Szrj       /* Support only type demotion or signedess change.  */
1611*38fd1498Szrj       if (!INTEGRAL_TYPE_P (use_type)
1612*38fd1498Szrj 	  || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1613*38fd1498Szrj         return NULL;
1614*38fd1498Szrj 
1615*38fd1498Szrj       /* Check that NEW_TYPE is not bigger than the conversion result.  */
1616*38fd1498Szrj       if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1617*38fd1498Szrj 	return NULL;
1618*38fd1498Szrj 
1619*38fd1498Szrj       if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1620*38fd1498Szrj           || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1621*38fd1498Szrj         {
1622*38fd1498Szrj           /* Create NEW_TYPE->USE_TYPE conversion.  */
1623*38fd1498Szrj 	  new_oprnd = make_ssa_name (use_type);
1624*38fd1498Szrj 	  pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1625*38fd1498Szrj           STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1626*38fd1498Szrj 
1627*38fd1498Szrj           *type_in = get_vectype_for_scalar_type (new_type);
1628*38fd1498Szrj           *type_out = get_vectype_for_scalar_type (use_type);
1629*38fd1498Szrj 
1630*38fd1498Szrj           /* We created a pattern statement for the last statement in the
1631*38fd1498Szrj              sequence, so we don't need to associate it with the pattern
1632*38fd1498Szrj              statement created for PREV_STMT.  Therefore, we add PREV_STMT
1633*38fd1498Szrj              to the list in order to mark it later in vect_pattern_recog_1.  */
1634*38fd1498Szrj           if (prev_stmt)
1635*38fd1498Szrj             stmts->safe_push (prev_stmt);
1636*38fd1498Szrj         }
1637*38fd1498Szrj       else
1638*38fd1498Szrj         {
1639*38fd1498Szrj           if (prev_stmt)
1640*38fd1498Szrj 	    STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1641*38fd1498Szrj 	       = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1642*38fd1498Szrj 
1643*38fd1498Szrj           *type_in = vectype;
1644*38fd1498Szrj           *type_out = NULL_TREE;
1645*38fd1498Szrj         }
1646*38fd1498Szrj 
1647*38fd1498Szrj       stmts->safe_push (use_stmt);
1648*38fd1498Szrj     }
1649*38fd1498Szrj   else
1650*38fd1498Szrj     /* TODO: support general case, create a conversion to the correct type.  */
1651*38fd1498Szrj     return NULL;
1652*38fd1498Szrj 
1653*38fd1498Szrj   /* Pattern detected.  */
1654*38fd1498Szrj   if (dump_enabled_p ())
1655*38fd1498Szrj     {
1656*38fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
1657*38fd1498Szrj                        "vect_recog_over_widening_pattern: detected: ");
1658*38fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1659*38fd1498Szrj     }
1660*38fd1498Szrj 
1661*38fd1498Szrj   return pattern_stmt;
1662*38fd1498Szrj }
1663*38fd1498Szrj 
1664*38fd1498Szrj /* Detect widening shift pattern:
1665*38fd1498Szrj 
1666*38fd1498Szrj    type a_t;
1667*38fd1498Szrj    TYPE a_T, res_T;
1668*38fd1498Szrj 
1669*38fd1498Szrj    S1 a_t = ;
1670*38fd1498Szrj    S2 a_T = (TYPE) a_t;
1671*38fd1498Szrj    S3 res_T = a_T << CONST;
1672*38fd1498Szrj 
1673*38fd1498Szrj   where type 'TYPE' is at least double the size of type 'type'.
1674*38fd1498Szrj 
1675*38fd1498Szrj   Also detect cases where the shift result is immediately converted
1676*38fd1498Szrj   to another type 'result_type' that is no larger in size than 'TYPE'.
1677*38fd1498Szrj   In those cases we perform a widen-shift that directly results in
1678*38fd1498Szrj   'result_type', to avoid a possible over-widening situation:
1679*38fd1498Szrj 
1680*38fd1498Szrj   type a_t;
1681*38fd1498Szrj   TYPE a_T, res_T;
1682*38fd1498Szrj   result_type res_result;
1683*38fd1498Szrj 
1684*38fd1498Szrj   S1 a_t = ;
1685*38fd1498Szrj   S2 a_T = (TYPE) a_t;
1686*38fd1498Szrj   S3 res_T = a_T << CONST;
1687*38fd1498Szrj   S4 res_result = (result_type) res_T;
1688*38fd1498Szrj       '--> res_result' = a_t w<< CONST;
1689*38fd1498Szrj 
1690*38fd1498Szrj   And a case when 'TYPE' is 4 times bigger than 'type'.  In that case we
1691*38fd1498Szrj   create an additional pattern stmt for S2 to create a variable of an
1692*38fd1498Szrj   intermediate type, and perform widen-shift on the intermediate type:
1693*38fd1498Szrj 
1694*38fd1498Szrj   type a_t;
1695*38fd1498Szrj   interm_type a_it;
1696*38fd1498Szrj   TYPE a_T, res_T, res_T';
1697*38fd1498Szrj 
1698*38fd1498Szrj   S1 a_t = ;
1699*38fd1498Szrj   S2 a_T = (TYPE) a_t;
1700*38fd1498Szrj       '--> a_it = (interm_type) a_t;
1701*38fd1498Szrj   S3 res_T = a_T << CONST;
1702*38fd1498Szrj       '--> res_T' = a_it <<* CONST;
1703*38fd1498Szrj 
1704*38fd1498Szrj   Input/Output:
1705*38fd1498Szrj 
1706*38fd1498Szrj   * STMTS: Contains a stmt from which the pattern search begins.
1707*38fd1498Szrj     In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1708*38fd1498Szrj     in STMTS.  When an intermediate type is used and a pattern statement is
1709*38fd1498Szrj     created for S2, we also put S2 here (before S3).
1710*38fd1498Szrj 
1711*38fd1498Szrj   Output:
1712*38fd1498Szrj 
1713*38fd1498Szrj   * TYPE_IN: The type of the input arguments to the pattern.
1714*38fd1498Szrj 
1715*38fd1498Szrj   * TYPE_OUT: The type of the output of this pattern.
1716*38fd1498Szrj 
1717*38fd1498Szrj   * Return value: A new stmt that will be used to replace the sequence of
1718*38fd1498Szrj     stmts that constitute the pattern.  In this case it will be:
1719*38fd1498Szrj     WIDEN_LSHIFT_EXPR <a_t, CONST>.  */
1720*38fd1498Szrj 
1721*38fd1498Szrj static gimple *
vect_recog_widen_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1722*38fd1498Szrj vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
1723*38fd1498Szrj 				tree *type_in, tree *type_out)
1724*38fd1498Szrj {
1725*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
1726*38fd1498Szrj   gimple *def_stmt0;
1727*38fd1498Szrj   tree oprnd0, oprnd1;
1728*38fd1498Szrj   tree type, half_type0;
1729*38fd1498Szrj   gimple *pattern_stmt;
1730*38fd1498Szrj   tree vectype, vectype_out = NULL_TREE;
1731*38fd1498Szrj   tree var;
1732*38fd1498Szrj   enum tree_code dummy_code;
1733*38fd1498Szrj   int dummy_int;
1734*38fd1498Szrj   vec<tree>  dummy_vec;
1735*38fd1498Szrj   gimple *use_stmt;
1736*38fd1498Szrj   bool promotion;
1737*38fd1498Szrj 
1738*38fd1498Szrj   if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1739*38fd1498Szrj     return NULL;
1740*38fd1498Szrj 
1741*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1742*38fd1498Szrj     return NULL;
1743*38fd1498Szrj 
1744*38fd1498Szrj   if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1745*38fd1498Szrj     return NULL;
1746*38fd1498Szrj 
1747*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
1748*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
1749*38fd1498Szrj   if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1750*38fd1498Szrj     return NULL;
1751*38fd1498Szrj 
1752*38fd1498Szrj   /* Check operand 0: it has to be defined by a type promotion.  */
1753*38fd1498Szrj   if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1754*38fd1498Szrj 			  &promotion)
1755*38fd1498Szrj       || !promotion)
1756*38fd1498Szrj      return NULL;
1757*38fd1498Szrj 
1758*38fd1498Szrj   /* Check operand 1: has to be positive.  We check that it fits the type
1759*38fd1498Szrj      in vect_handle_widen_op_by_const ().  */
1760*38fd1498Szrj   if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1761*38fd1498Szrj     return NULL;
1762*38fd1498Szrj 
1763*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (def_stmt0);
1764*38fd1498Szrj   type = gimple_expr_type (last_stmt);
1765*38fd1498Szrj 
1766*38fd1498Szrj   /* Check for subsequent conversion to another type.  */
1767*38fd1498Szrj   use_stmt = vect_single_imm_use (last_stmt);
1768*38fd1498Szrj   if (use_stmt && is_gimple_assign (use_stmt)
1769*38fd1498Szrj       && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1770*38fd1498Szrj       && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1771*38fd1498Szrj     {
1772*38fd1498Szrj       tree use_lhs = gimple_assign_lhs (use_stmt);
1773*38fd1498Szrj       tree use_type = TREE_TYPE (use_lhs);
1774*38fd1498Szrj 
1775*38fd1498Szrj       if (INTEGRAL_TYPE_P (use_type)
1776*38fd1498Szrj 	  && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1777*38fd1498Szrj 	{
1778*38fd1498Szrj 	  last_stmt = use_stmt;
1779*38fd1498Szrj 	  type = use_type;
1780*38fd1498Szrj 	}
1781*38fd1498Szrj     }
1782*38fd1498Szrj 
1783*38fd1498Szrj   /* Check if this a widening operation.  */
1784*38fd1498Szrj   gimple *wstmt = NULL;
1785*38fd1498Szrj   if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1786*38fd1498Szrj        				      &oprnd0, &wstmt,
1787*38fd1498Szrj 	                              type, &half_type0, def_stmt0))
1788*38fd1498Szrj     return NULL;
1789*38fd1498Szrj 
1790*38fd1498Szrj   /* Pattern detected.  */
1791*38fd1498Szrj   if (dump_enabled_p ())
1792*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
1793*38fd1498Szrj                      "vect_recog_widen_shift_pattern: detected:\n");
1794*38fd1498Szrj 
1795*38fd1498Szrj   /* Check target support.  */
1796*38fd1498Szrj   vectype = get_vectype_for_scalar_type (half_type0);
1797*38fd1498Szrj   vectype_out = get_vectype_for_scalar_type (type);
1798*38fd1498Szrj 
1799*38fd1498Szrj   if (!vectype
1800*38fd1498Szrj       || !vectype_out
1801*38fd1498Szrj       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1802*38fd1498Szrj 					  vectype_out, vectype,
1803*38fd1498Szrj 					  &dummy_code, &dummy_code,
1804*38fd1498Szrj 					  &dummy_int, &dummy_vec))
1805*38fd1498Szrj     return NULL;
1806*38fd1498Szrj 
1807*38fd1498Szrj   *type_in = vectype;
1808*38fd1498Szrj   *type_out = vectype_out;
1809*38fd1498Szrj 
1810*38fd1498Szrj   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
1811*38fd1498Szrj   var = vect_recog_temp_ssa_var (type, NULL);
1812*38fd1498Szrj   pattern_stmt
1813*38fd1498Szrj     = gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
1814*38fd1498Szrj   if (wstmt)
1815*38fd1498Szrj     {
1816*38fd1498Szrj       stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1817*38fd1498Szrj       new_pattern_def_seq (stmt_vinfo, wstmt);
1818*38fd1498Szrj       stmt_vec_info new_stmt_info
1819*38fd1498Szrj 	= new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
1820*38fd1498Szrj       set_vinfo_for_stmt (wstmt, new_stmt_info);
1821*38fd1498Szrj       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1822*38fd1498Szrj     }
1823*38fd1498Szrj 
1824*38fd1498Szrj   if (dump_enabled_p ())
1825*38fd1498Szrj     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
1826*38fd1498Szrj 
1827*38fd1498Szrj   stmts->safe_push (last_stmt);
1828*38fd1498Szrj   return pattern_stmt;
1829*38fd1498Szrj }
1830*38fd1498Szrj 
1831*38fd1498Szrj /* Detect a rotate pattern wouldn't be otherwise vectorized:
1832*38fd1498Szrj 
1833*38fd1498Szrj    type a_t, b_t, c_t;
1834*38fd1498Szrj 
1835*38fd1498Szrj    S0 a_t = b_t r<< c_t;
1836*38fd1498Szrj 
1837*38fd1498Szrj   Input/Output:
1838*38fd1498Szrj 
1839*38fd1498Szrj   * STMTS: Contains a stmt from which the pattern search begins,
1840*38fd1498Szrj     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
1841*38fd1498Szrj     with a sequence:
1842*38fd1498Szrj 
1843*38fd1498Szrj    S1 d_t = -c_t;
1844*38fd1498Szrj    S2 e_t = d_t & (B - 1);
1845*38fd1498Szrj    S3 f_t = b_t << c_t;
1846*38fd1498Szrj    S4 g_t = b_t >> e_t;
1847*38fd1498Szrj    S0 a_t = f_t | g_t;
1848*38fd1498Szrj 
1849*38fd1498Szrj     where B is element bitsize of type.
1850*38fd1498Szrj 
1851*38fd1498Szrj   Output:
1852*38fd1498Szrj 
1853*38fd1498Szrj   * TYPE_IN: The type of the input arguments to the pattern.
1854*38fd1498Szrj 
1855*38fd1498Szrj   * TYPE_OUT: The type of the output of this pattern.
1856*38fd1498Szrj 
1857*38fd1498Szrj   * Return value: A new stmt that will be used to replace the rotate
1858*38fd1498Szrj     S0 stmt.  */
1859*38fd1498Szrj 
1860*38fd1498Szrj static gimple *
vect_recog_rotate_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)1861*38fd1498Szrj vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
1862*38fd1498Szrj {
1863*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
1864*38fd1498Szrj   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1865*38fd1498Szrj   gimple *pattern_stmt, *def_stmt;
1866*38fd1498Szrj   enum tree_code rhs_code;
1867*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1868*38fd1498Szrj   vec_info *vinfo = stmt_vinfo->vinfo;
1869*38fd1498Szrj   enum vect_def_type dt;
1870*38fd1498Szrj   optab optab1, optab2;
1871*38fd1498Szrj   edge ext_def = NULL;
1872*38fd1498Szrj 
1873*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
1874*38fd1498Szrj     return NULL;
1875*38fd1498Szrj 
1876*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (last_stmt);
1877*38fd1498Szrj   switch (rhs_code)
1878*38fd1498Szrj     {
1879*38fd1498Szrj     case LROTATE_EXPR:
1880*38fd1498Szrj     case RROTATE_EXPR:
1881*38fd1498Szrj       break;
1882*38fd1498Szrj     default:
1883*38fd1498Szrj       return NULL;
1884*38fd1498Szrj     }
1885*38fd1498Szrj 
1886*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1887*38fd1498Szrj     return NULL;
1888*38fd1498Szrj 
1889*38fd1498Szrj   lhs = gimple_assign_lhs (last_stmt);
1890*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
1891*38fd1498Szrj   type = TREE_TYPE (oprnd0);
1892*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
1893*38fd1498Szrj   if (TREE_CODE (oprnd0) != SSA_NAME
1894*38fd1498Szrj       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1895*38fd1498Szrj       || !INTEGRAL_TYPE_P (type)
1896*38fd1498Szrj       || !TYPE_UNSIGNED (type))
1897*38fd1498Szrj     return NULL;
1898*38fd1498Szrj 
1899*38fd1498Szrj   if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
1900*38fd1498Szrj     return NULL;
1901*38fd1498Szrj 
1902*38fd1498Szrj   if (dt != vect_internal_def
1903*38fd1498Szrj       && dt != vect_constant_def
1904*38fd1498Szrj       && dt != vect_external_def)
1905*38fd1498Szrj     return NULL;
1906*38fd1498Szrj 
1907*38fd1498Szrj   vectype = get_vectype_for_scalar_type (type);
1908*38fd1498Szrj   if (vectype == NULL_TREE)
1909*38fd1498Szrj     return NULL;
1910*38fd1498Szrj 
1911*38fd1498Szrj   /* If vector/vector or vector/scalar rotate is supported by the target,
1912*38fd1498Szrj      don't do anything here.  */
1913*38fd1498Szrj   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1914*38fd1498Szrj   if (optab1
1915*38fd1498Szrj       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1916*38fd1498Szrj     return NULL;
1917*38fd1498Szrj 
1918*38fd1498Szrj   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
1919*38fd1498Szrj     {
1920*38fd1498Szrj       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1921*38fd1498Szrj       if (optab2
1922*38fd1498Szrj 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1923*38fd1498Szrj 	return NULL;
1924*38fd1498Szrj     }
1925*38fd1498Szrj 
1926*38fd1498Szrj   /* If vector/vector or vector/scalar shifts aren't supported by the target,
1927*38fd1498Szrj      don't do anything here either.  */
1928*38fd1498Szrj   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1929*38fd1498Szrj   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1930*38fd1498Szrj   if (!optab1
1931*38fd1498Szrj       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1932*38fd1498Szrj       || !optab2
1933*38fd1498Szrj       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1934*38fd1498Szrj     {
1935*38fd1498Szrj       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
1936*38fd1498Szrj 	return NULL;
1937*38fd1498Szrj       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1938*38fd1498Szrj       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1939*38fd1498Szrj       if (!optab1
1940*38fd1498Szrj 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1941*38fd1498Szrj 	  || !optab2
1942*38fd1498Szrj 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1943*38fd1498Szrj 	return NULL;
1944*38fd1498Szrj     }
1945*38fd1498Szrj 
1946*38fd1498Szrj   *type_in = vectype;
1947*38fd1498Szrj   *type_out = vectype;
1948*38fd1498Szrj   if (*type_in == NULL_TREE)
1949*38fd1498Szrj     return NULL;
1950*38fd1498Szrj 
1951*38fd1498Szrj   if (dt == vect_external_def
1952*38fd1498Szrj       && TREE_CODE (oprnd1) == SSA_NAME
1953*38fd1498Szrj       && is_a <loop_vec_info> (vinfo))
1954*38fd1498Szrj     {
1955*38fd1498Szrj       struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
1956*38fd1498Szrj       ext_def = loop_preheader_edge (loop);
1957*38fd1498Szrj       if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1958*38fd1498Szrj 	{
1959*38fd1498Szrj 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1960*38fd1498Szrj 	  if (bb == NULL
1961*38fd1498Szrj 	      || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1962*38fd1498Szrj 	    ext_def = NULL;
1963*38fd1498Szrj 	}
1964*38fd1498Szrj     }
1965*38fd1498Szrj 
1966*38fd1498Szrj   def = NULL_TREE;
1967*38fd1498Szrj   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
1968*38fd1498Szrj   if (TREE_CODE (oprnd1) == INTEGER_CST
1969*38fd1498Szrj       || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
1970*38fd1498Szrj     def = oprnd1;
1971*38fd1498Szrj   else if (def_stmt && gimple_assign_cast_p (def_stmt))
1972*38fd1498Szrj     {
1973*38fd1498Szrj       tree rhs1 = gimple_assign_rhs1 (def_stmt);
1974*38fd1498Szrj       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
1975*38fd1498Szrj 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
1976*38fd1498Szrj 	     == TYPE_PRECISION (type))
1977*38fd1498Szrj 	def = rhs1;
1978*38fd1498Szrj     }
1979*38fd1498Szrj 
1980*38fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1981*38fd1498Szrj   if (def == NULL_TREE)
1982*38fd1498Szrj     {
1983*38fd1498Szrj       def = vect_recog_temp_ssa_var (type, NULL);
1984*38fd1498Szrj       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
1985*38fd1498Szrj       if (ext_def)
1986*38fd1498Szrj 	{
1987*38fd1498Szrj 	  basic_block new_bb
1988*38fd1498Szrj 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1989*38fd1498Szrj 	  gcc_assert (!new_bb);
1990*38fd1498Szrj 	}
1991*38fd1498Szrj       else
1992*38fd1498Szrj 	append_pattern_def_seq (stmt_vinfo, def_stmt);
1993*38fd1498Szrj     }
1994*38fd1498Szrj   stype = TREE_TYPE (def);
1995*38fd1498Szrj   scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
1996*38fd1498Szrj 
1997*38fd1498Szrj   if (TREE_CODE (def) == INTEGER_CST)
1998*38fd1498Szrj     {
1999*38fd1498Szrj       if (!tree_fits_uhwi_p (def)
2000*38fd1498Szrj 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2001*38fd1498Szrj 	  || integer_zerop (def))
2002*38fd1498Szrj 	return NULL;
2003*38fd1498Szrj       def2 = build_int_cst (stype,
2004*38fd1498Szrj 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2005*38fd1498Szrj     }
2006*38fd1498Szrj   else
2007*38fd1498Szrj     {
2008*38fd1498Szrj       tree vecstype = get_vectype_for_scalar_type (stype);
2009*38fd1498Szrj       stmt_vec_info def_stmt_vinfo;
2010*38fd1498Szrj 
2011*38fd1498Szrj       if (vecstype == NULL_TREE)
2012*38fd1498Szrj 	return NULL;
2013*38fd1498Szrj       def2 = vect_recog_temp_ssa_var (stype, NULL);
2014*38fd1498Szrj       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2015*38fd1498Szrj       if (ext_def)
2016*38fd1498Szrj 	{
2017*38fd1498Szrj 	  basic_block new_bb
2018*38fd1498Szrj 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2019*38fd1498Szrj 	  gcc_assert (!new_bb);
2020*38fd1498Szrj 	}
2021*38fd1498Szrj       else
2022*38fd1498Szrj 	{
2023*38fd1498Szrj 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2024*38fd1498Szrj 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2025*38fd1498Szrj 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2026*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2027*38fd1498Szrj 	}
2028*38fd1498Szrj 
2029*38fd1498Szrj       def2 = vect_recog_temp_ssa_var (stype, NULL);
2030*38fd1498Szrj       tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2031*38fd1498Szrj       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2032*38fd1498Szrj 				      gimple_assign_lhs (def_stmt), mask);
2033*38fd1498Szrj       if (ext_def)
2034*38fd1498Szrj 	{
2035*38fd1498Szrj 	  basic_block new_bb
2036*38fd1498Szrj 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2037*38fd1498Szrj 	  gcc_assert (!new_bb);
2038*38fd1498Szrj 	}
2039*38fd1498Szrj       else
2040*38fd1498Szrj 	{
2041*38fd1498Szrj 	  def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2042*38fd1498Szrj 	  set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2043*38fd1498Szrj 	  STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2044*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2045*38fd1498Szrj 	}
2046*38fd1498Szrj     }
2047*38fd1498Szrj 
2048*38fd1498Szrj   var1 = vect_recog_temp_ssa_var (type, NULL);
2049*38fd1498Szrj   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2050*38fd1498Szrj 					? LSHIFT_EXPR : RSHIFT_EXPR,
2051*38fd1498Szrj 				  oprnd0, def);
2052*38fd1498Szrj   append_pattern_def_seq (stmt_vinfo, def_stmt);
2053*38fd1498Szrj 
2054*38fd1498Szrj   var2 = vect_recog_temp_ssa_var (type, NULL);
2055*38fd1498Szrj   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2056*38fd1498Szrj 					? RSHIFT_EXPR : LSHIFT_EXPR,
2057*38fd1498Szrj 				  oprnd0, def2);
2058*38fd1498Szrj   append_pattern_def_seq (stmt_vinfo, def_stmt);
2059*38fd1498Szrj 
2060*38fd1498Szrj   /* Pattern detected.  */
2061*38fd1498Szrj   if (dump_enabled_p ())
2062*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
2063*38fd1498Szrj 		     "vect_recog_rotate_pattern: detected:\n");
2064*38fd1498Szrj 
2065*38fd1498Szrj   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2066*38fd1498Szrj   var = vect_recog_temp_ssa_var (type, NULL);
2067*38fd1498Szrj   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2068*38fd1498Szrj 
2069*38fd1498Szrj   if (dump_enabled_p ())
2070*38fd1498Szrj     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2071*38fd1498Szrj 
2072*38fd1498Szrj   stmts->safe_push (last_stmt);
2073*38fd1498Szrj   return pattern_stmt;
2074*38fd1498Szrj }
2075*38fd1498Szrj 
2076*38fd1498Szrj /* Detect a vector by vector shift pattern that wouldn't be otherwise
2077*38fd1498Szrj    vectorized:
2078*38fd1498Szrj 
2079*38fd1498Szrj    type a_t;
2080*38fd1498Szrj    TYPE b_T, res_T;
2081*38fd1498Szrj 
2082*38fd1498Szrj    S1 a_t = ;
2083*38fd1498Szrj    S2 b_T = ;
2084*38fd1498Szrj    S3 res_T = b_T op a_t;
2085*38fd1498Szrj 
2086*38fd1498Szrj   where type 'TYPE' is a type with different size than 'type',
2087*38fd1498Szrj   and op is <<, >> or rotate.
2088*38fd1498Szrj 
2089*38fd1498Szrj   Also detect cases:
2090*38fd1498Szrj 
2091*38fd1498Szrj    type a_t;
2092*38fd1498Szrj    TYPE b_T, c_T, res_T;
2093*38fd1498Szrj 
2094*38fd1498Szrj    S0 c_T = ;
2095*38fd1498Szrj    S1 a_t = (type) c_T;
2096*38fd1498Szrj    S2 b_T = ;
2097*38fd1498Szrj    S3 res_T = b_T op a_t;
2098*38fd1498Szrj 
2099*38fd1498Szrj   Input/Output:
2100*38fd1498Szrj 
2101*38fd1498Szrj   * STMTS: Contains a stmt from which the pattern search begins,
2102*38fd1498Szrj     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2103*38fd1498Szrj     with a shift/rotate which has same type on both operands, in the
2104*38fd1498Szrj     second case just b_T op c_T, in the first case with added cast
2105*38fd1498Szrj     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2106*38fd1498Szrj 
2107*38fd1498Szrj   Output:
2108*38fd1498Szrj 
2109*38fd1498Szrj   * TYPE_IN: The type of the input arguments to the pattern.
2110*38fd1498Szrj 
2111*38fd1498Szrj   * TYPE_OUT: The type of the output of this pattern.
2112*38fd1498Szrj 
2113*38fd1498Szrj   * Return value: A new stmt that will be used to replace the shift/rotate
2114*38fd1498Szrj     S3 stmt.  */
2115*38fd1498Szrj 
2116*38fd1498Szrj static gimple *
vect_recog_vector_vector_shift_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2117*38fd1498Szrj vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
2118*38fd1498Szrj 					tree *type_in, tree *type_out)
2119*38fd1498Szrj {
2120*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
2121*38fd1498Szrj   tree oprnd0, oprnd1, lhs, var;
2122*38fd1498Szrj   gimple *pattern_stmt, *def_stmt;
2123*38fd1498Szrj   enum tree_code rhs_code;
2124*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2125*38fd1498Szrj   vec_info *vinfo = stmt_vinfo->vinfo;
2126*38fd1498Szrj   enum vect_def_type dt;
2127*38fd1498Szrj 
2128*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
2129*38fd1498Szrj     return NULL;
2130*38fd1498Szrj 
2131*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (last_stmt);
2132*38fd1498Szrj   switch (rhs_code)
2133*38fd1498Szrj     {
2134*38fd1498Szrj     case LSHIFT_EXPR:
2135*38fd1498Szrj     case RSHIFT_EXPR:
2136*38fd1498Szrj     case LROTATE_EXPR:
2137*38fd1498Szrj     case RROTATE_EXPR:
2138*38fd1498Szrj       break;
2139*38fd1498Szrj     default:
2140*38fd1498Szrj       return NULL;
2141*38fd1498Szrj     }
2142*38fd1498Szrj 
2143*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2144*38fd1498Szrj     return NULL;
2145*38fd1498Szrj 
2146*38fd1498Szrj   lhs = gimple_assign_lhs (last_stmt);
2147*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
2148*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
2149*38fd1498Szrj   if (TREE_CODE (oprnd0) != SSA_NAME
2150*38fd1498Szrj       || TREE_CODE (oprnd1) != SSA_NAME
2151*38fd1498Szrj       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2152*38fd1498Szrj       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2153*38fd1498Szrj       || TYPE_PRECISION (TREE_TYPE (lhs))
2154*38fd1498Szrj 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2155*38fd1498Szrj     return NULL;
2156*38fd1498Szrj 
2157*38fd1498Szrj   if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
2158*38fd1498Szrj     return NULL;
2159*38fd1498Szrj 
2160*38fd1498Szrj   if (dt != vect_internal_def)
2161*38fd1498Szrj     return NULL;
2162*38fd1498Szrj 
2163*38fd1498Szrj   *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2164*38fd1498Szrj   *type_out = *type_in;
2165*38fd1498Szrj   if (*type_in == NULL_TREE)
2166*38fd1498Szrj     return NULL;
2167*38fd1498Szrj 
2168*38fd1498Szrj   tree def = NULL_TREE;
2169*38fd1498Szrj   stmt_vec_info def_vinfo = vinfo_for_stmt (def_stmt);
2170*38fd1498Szrj   if (!STMT_VINFO_IN_PATTERN_P (def_vinfo) && gimple_assign_cast_p (def_stmt))
2171*38fd1498Szrj     {
2172*38fd1498Szrj       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2173*38fd1498Szrj       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2174*38fd1498Szrj 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2175*38fd1498Szrj 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2176*38fd1498Szrj 	{
2177*38fd1498Szrj 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2178*38fd1498Szrj 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2179*38fd1498Szrj 	    def = rhs1;
2180*38fd1498Szrj 	  else
2181*38fd1498Szrj 	    {
2182*38fd1498Szrj 	      tree mask
2183*38fd1498Szrj 		= build_low_bits_mask (TREE_TYPE (rhs1),
2184*38fd1498Szrj 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2185*38fd1498Szrj 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2186*38fd1498Szrj 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2187*38fd1498Szrj 	      new_pattern_def_seq (stmt_vinfo, def_stmt);
2188*38fd1498Szrj 	    }
2189*38fd1498Szrj 	}
2190*38fd1498Szrj     }
2191*38fd1498Szrj 
2192*38fd1498Szrj   if (def == NULL_TREE)
2193*38fd1498Szrj     {
2194*38fd1498Szrj       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2195*38fd1498Szrj       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2196*38fd1498Szrj       new_pattern_def_seq (stmt_vinfo, def_stmt);
2197*38fd1498Szrj     }
2198*38fd1498Szrj 
2199*38fd1498Szrj   /* Pattern detected.  */
2200*38fd1498Szrj   if (dump_enabled_p ())
2201*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
2202*38fd1498Szrj                      "vect_recog_vector_vector_shift_pattern: detected:\n");
2203*38fd1498Szrj 
2204*38fd1498Szrj   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2205*38fd1498Szrj   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2206*38fd1498Szrj   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2207*38fd1498Szrj 
2208*38fd1498Szrj   if (dump_enabled_p ())
2209*38fd1498Szrj     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
2210*38fd1498Szrj 
2211*38fd1498Szrj   stmts->safe_push (last_stmt);
2212*38fd1498Szrj   return pattern_stmt;
2213*38fd1498Szrj }
2214*38fd1498Szrj 
2215*38fd1498Szrj /* Return true iff the target has a vector optab implementing the operation
2216*38fd1498Szrj    CODE on type VECTYPE.  */
2217*38fd1498Szrj 
2218*38fd1498Szrj static bool
target_has_vecop_for_code(tree_code code,tree vectype)2219*38fd1498Szrj target_has_vecop_for_code (tree_code code, tree vectype)
2220*38fd1498Szrj {
2221*38fd1498Szrj   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2222*38fd1498Szrj   return voptab
2223*38fd1498Szrj 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2224*38fd1498Szrj }
2225*38fd1498Szrj 
2226*38fd1498Szrj /* Verify that the target has optabs of VECTYPE to perform all the steps
2227*38fd1498Szrj    needed by the multiplication-by-immediate synthesis algorithm described by
2228*38fd1498Szrj    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2229*38fd1498Szrj    present.  Return true iff the target supports all the steps.  */
2230*38fd1498Szrj 
2231*38fd1498Szrj static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)2232*38fd1498Szrj target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2233*38fd1498Szrj 				 tree vectype, bool synth_shift_p)
2234*38fd1498Szrj {
2235*38fd1498Szrj   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2236*38fd1498Szrj     return false;
2237*38fd1498Szrj 
2238*38fd1498Szrj   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2239*38fd1498Szrj   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2240*38fd1498Szrj 
2241*38fd1498Szrj   if (var == negate_variant
2242*38fd1498Szrj       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2243*38fd1498Szrj     return false;
2244*38fd1498Szrj 
2245*38fd1498Szrj   /* If we must synthesize shifts with additions make sure that vector
2246*38fd1498Szrj      addition is available.  */
2247*38fd1498Szrj   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2248*38fd1498Szrj     return false;
2249*38fd1498Szrj 
2250*38fd1498Szrj   for (int i = 1; i < alg->ops; i++)
2251*38fd1498Szrj     {
2252*38fd1498Szrj       switch (alg->op[i])
2253*38fd1498Szrj 	{
2254*38fd1498Szrj 	case alg_shift:
2255*38fd1498Szrj 	  break;
2256*38fd1498Szrj 	case alg_add_t_m2:
2257*38fd1498Szrj 	case alg_add_t2_m:
2258*38fd1498Szrj 	case alg_add_factor:
2259*38fd1498Szrj 	  if (!supports_vplus)
2260*38fd1498Szrj 	    return false;
2261*38fd1498Szrj 	  break;
2262*38fd1498Szrj 	case alg_sub_t_m2:
2263*38fd1498Szrj 	case alg_sub_t2_m:
2264*38fd1498Szrj 	case alg_sub_factor:
2265*38fd1498Szrj 	  if (!supports_vminus)
2266*38fd1498Szrj 	    return false;
2267*38fd1498Szrj 	  break;
2268*38fd1498Szrj 	case alg_unknown:
2269*38fd1498Szrj 	case alg_m:
2270*38fd1498Szrj 	case alg_zero:
2271*38fd1498Szrj 	case alg_impossible:
2272*38fd1498Szrj 	  return false;
2273*38fd1498Szrj 	default:
2274*38fd1498Szrj 	  gcc_unreachable ();
2275*38fd1498Szrj 	}
2276*38fd1498Szrj     }
2277*38fd1498Szrj 
2278*38fd1498Szrj   return true;
2279*38fd1498Szrj }
2280*38fd1498Szrj 
2281*38fd1498Szrj /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2282*38fd1498Szrj    putting the final result in DEST.  Append all statements but the last into
2283*38fd1498Szrj    VINFO.  Return the last statement.  */
2284*38fd1498Szrj 
2285*38fd1498Szrj static gimple *
synth_lshift_by_additions(tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info vinfo)2286*38fd1498Szrj synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2287*38fd1498Szrj 			   stmt_vec_info vinfo)
2288*38fd1498Szrj {
2289*38fd1498Szrj   HOST_WIDE_INT i;
2290*38fd1498Szrj   tree itype = TREE_TYPE (op);
2291*38fd1498Szrj   tree prev_res = op;
2292*38fd1498Szrj   gcc_assert (amnt >= 0);
2293*38fd1498Szrj   for (i = 0; i < amnt; i++)
2294*38fd1498Szrj     {
2295*38fd1498Szrj       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2296*38fd1498Szrj 		      : dest;
2297*38fd1498Szrj       gimple *stmt
2298*38fd1498Szrj         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2299*38fd1498Szrj       prev_res = tmp_var;
2300*38fd1498Szrj       if (i < amnt - 1)
2301*38fd1498Szrj 	append_pattern_def_seq (vinfo, stmt);
2302*38fd1498Szrj       else
2303*38fd1498Szrj 	return stmt;
2304*38fd1498Szrj     }
2305*38fd1498Szrj   gcc_unreachable ();
2306*38fd1498Szrj   return NULL;
2307*38fd1498Szrj }
2308*38fd1498Szrj 
2309*38fd1498Szrj /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2310*38fd1498Szrj    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2311*38fd1498Szrj    the process if necessary.  Append the resulting assignment statements
2312*38fd1498Szrj    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2313*38fd1498Szrj    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2314*38fd1498Szrj    left shifts using additions.  */
2315*38fd1498Szrj 
2316*38fd1498Szrj static tree
apply_binop_and_append_stmt(tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)2317*38fd1498Szrj apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2318*38fd1498Szrj 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2319*38fd1498Szrj {
2320*38fd1498Szrj   if (integer_zerop (op2)
2321*38fd1498Szrj       && (code == LSHIFT_EXPR
2322*38fd1498Szrj 	  || code == PLUS_EXPR))
2323*38fd1498Szrj     {
2324*38fd1498Szrj       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2325*38fd1498Szrj       return op1;
2326*38fd1498Szrj     }
2327*38fd1498Szrj 
2328*38fd1498Szrj   gimple *stmt;
2329*38fd1498Szrj   tree itype = TREE_TYPE (op1);
2330*38fd1498Szrj   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2331*38fd1498Szrj 
2332*38fd1498Szrj   if (code == LSHIFT_EXPR
2333*38fd1498Szrj       && synth_shift_p)
2334*38fd1498Szrj     {
2335*38fd1498Szrj       stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2336*38fd1498Szrj 					 stmt_vinfo);
2337*38fd1498Szrj       append_pattern_def_seq (stmt_vinfo, stmt);
2338*38fd1498Szrj       return tmp_var;
2339*38fd1498Szrj     }
2340*38fd1498Szrj 
2341*38fd1498Szrj   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2342*38fd1498Szrj   append_pattern_def_seq (stmt_vinfo, stmt);
2343*38fd1498Szrj   return tmp_var;
2344*38fd1498Szrj }
2345*38fd1498Szrj 
2346*38fd1498Szrj /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2347*38fd1498Szrj    and simple arithmetic operations to be vectorized.  Record the statements
2348*38fd1498Szrj    produced in STMT_VINFO and return the last statement in the sequence or
2349*38fd1498Szrj    NULL if it's not possible to synthesize such a multiplication.
2350*38fd1498Szrj    This function mirrors the behavior of expand_mult_const in expmed.c but
2351*38fd1498Szrj    works on tree-ssa form.  */
2352*38fd1498Szrj 
2353*38fd1498Szrj static gimple *
vect_synth_mult_by_constant(tree op,tree val,stmt_vec_info stmt_vinfo)2354*38fd1498Szrj vect_synth_mult_by_constant (tree op, tree val,
2355*38fd1498Szrj 			     stmt_vec_info stmt_vinfo)
2356*38fd1498Szrj {
2357*38fd1498Szrj   tree itype = TREE_TYPE (op);
2358*38fd1498Szrj   machine_mode mode = TYPE_MODE (itype);
2359*38fd1498Szrj   struct algorithm alg;
2360*38fd1498Szrj   mult_variant variant;
2361*38fd1498Szrj   if (!tree_fits_shwi_p (val))
2362*38fd1498Szrj     return NULL;
2363*38fd1498Szrj 
2364*38fd1498Szrj   /* Multiplication synthesis by shifts, adds and subs can introduce
2365*38fd1498Szrj      signed overflow where the original operation didn't.  Perform the
2366*38fd1498Szrj      operations on an unsigned type and cast back to avoid this.
2367*38fd1498Szrj      In the future we may want to relax this for synthesis algorithms
2368*38fd1498Szrj      that we can prove do not cause unexpected overflow.  */
2369*38fd1498Szrj   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2370*38fd1498Szrj 
2371*38fd1498Szrj   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2372*38fd1498Szrj 
2373*38fd1498Szrj   /* Targets that don't support vector shifts but support vector additions
2374*38fd1498Szrj      can synthesize shifts that way.  */
2375*38fd1498Szrj   bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2376*38fd1498Szrj 
2377*38fd1498Szrj   HOST_WIDE_INT hwval = tree_to_shwi (val);
2378*38fd1498Szrj   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2379*38fd1498Szrj      The vectorizer's benefit analysis will decide whether it's beneficial
2380*38fd1498Szrj      to do this.  */
2381*38fd1498Szrj   bool possible = choose_mult_variant (mode, hwval, &alg,
2382*38fd1498Szrj 					&variant, MAX_COST);
2383*38fd1498Szrj   if (!possible)
2384*38fd1498Szrj     return NULL;
2385*38fd1498Szrj 
2386*38fd1498Szrj   tree vectype = get_vectype_for_scalar_type (multtype);
2387*38fd1498Szrj 
2388*38fd1498Szrj   if (!vectype
2389*38fd1498Szrj       || !target_supports_mult_synth_alg (&alg, variant,
2390*38fd1498Szrj 					   vectype, synth_shift_p))
2391*38fd1498Szrj     return NULL;
2392*38fd1498Szrj 
2393*38fd1498Szrj   tree accumulator;
2394*38fd1498Szrj 
2395*38fd1498Szrj   /* Clear out the sequence of statements so we can populate it below.  */
2396*38fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2397*38fd1498Szrj   gimple *stmt = NULL;
2398*38fd1498Szrj 
2399*38fd1498Szrj   if (cast_to_unsigned_p)
2400*38fd1498Szrj     {
2401*38fd1498Szrj       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2402*38fd1498Szrj       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2403*38fd1498Szrj       append_pattern_def_seq (stmt_vinfo, stmt);
2404*38fd1498Szrj       op = tmp_op;
2405*38fd1498Szrj     }
2406*38fd1498Szrj 
2407*38fd1498Szrj   if (alg.op[0] == alg_zero)
2408*38fd1498Szrj     accumulator = build_int_cst (multtype, 0);
2409*38fd1498Szrj   else
2410*38fd1498Szrj     accumulator = op;
2411*38fd1498Szrj 
2412*38fd1498Szrj   bool needs_fixup = (variant == negate_variant)
2413*38fd1498Szrj 		      || (variant == add_variant);
2414*38fd1498Szrj 
2415*38fd1498Szrj   for (int i = 1; i < alg.ops; i++)
2416*38fd1498Szrj     {
2417*38fd1498Szrj       tree shft_log = build_int_cst (multtype, alg.log[i]);
2418*38fd1498Szrj       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2419*38fd1498Szrj       tree tmp_var = NULL_TREE;
2420*38fd1498Szrj 
2421*38fd1498Szrj       switch (alg.op[i])
2422*38fd1498Szrj 	{
2423*38fd1498Szrj 	case alg_shift:
2424*38fd1498Szrj 	  if (synth_shift_p)
2425*38fd1498Szrj 	    stmt
2426*38fd1498Szrj 	      = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2427*38fd1498Szrj 					    stmt_vinfo);
2428*38fd1498Szrj 	  else
2429*38fd1498Szrj 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2430*38fd1498Szrj 					 shft_log);
2431*38fd1498Szrj 	  break;
2432*38fd1498Szrj 	case alg_add_t_m2:
2433*38fd1498Szrj 	  tmp_var
2434*38fd1498Szrj 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2435*38fd1498Szrj 					    stmt_vinfo, synth_shift_p);
2436*38fd1498Szrj 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2437*38fd1498Szrj 				       tmp_var);
2438*38fd1498Szrj 	  break;
2439*38fd1498Szrj 	case alg_sub_t_m2:
2440*38fd1498Szrj 	  tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2441*38fd1498Szrj 						  shft_log, stmt_vinfo,
2442*38fd1498Szrj 						  synth_shift_p);
2443*38fd1498Szrj 	  /* In some algorithms the first step involves zeroing the
2444*38fd1498Szrj 	     accumulator.  If subtracting from such an accumulator
2445*38fd1498Szrj 	     just emit the negation directly.  */
2446*38fd1498Szrj 	  if (integer_zerop (accumulator))
2447*38fd1498Szrj 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2448*38fd1498Szrj 	  else
2449*38fd1498Szrj 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2450*38fd1498Szrj 					tmp_var);
2451*38fd1498Szrj 	  break;
2452*38fd1498Szrj 	case alg_add_t2_m:
2453*38fd1498Szrj 	  tmp_var
2454*38fd1498Szrj 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2455*38fd1498Szrj 					   stmt_vinfo, synth_shift_p);
2456*38fd1498Szrj 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2457*38fd1498Szrj 	  break;
2458*38fd1498Szrj 	case alg_sub_t2_m:
2459*38fd1498Szrj 	  tmp_var
2460*38fd1498Szrj 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2461*38fd1498Szrj 					   stmt_vinfo, synth_shift_p);
2462*38fd1498Szrj 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2463*38fd1498Szrj 	  break;
2464*38fd1498Szrj 	case alg_add_factor:
2465*38fd1498Szrj 	  tmp_var
2466*38fd1498Szrj 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2467*38fd1498Szrj 					    stmt_vinfo, synth_shift_p);
2468*38fd1498Szrj 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2469*38fd1498Szrj 				       tmp_var);
2470*38fd1498Szrj 	  break;
2471*38fd1498Szrj 	case alg_sub_factor:
2472*38fd1498Szrj 	  tmp_var
2473*38fd1498Szrj 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2474*38fd1498Szrj 					   stmt_vinfo, synth_shift_p);
2475*38fd1498Szrj 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2476*38fd1498Szrj 				      accumulator);
2477*38fd1498Szrj 	  break;
2478*38fd1498Szrj 	default:
2479*38fd1498Szrj 	  gcc_unreachable ();
2480*38fd1498Szrj 	}
2481*38fd1498Szrj       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2482*38fd1498Szrj 	 but rather return it directly.  */
2483*38fd1498Szrj 
2484*38fd1498Szrj       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2485*38fd1498Szrj 	append_pattern_def_seq (stmt_vinfo, stmt);
2486*38fd1498Szrj       accumulator = accum_tmp;
2487*38fd1498Szrj     }
2488*38fd1498Szrj   if (variant == negate_variant)
2489*38fd1498Szrj     {
2490*38fd1498Szrj       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2491*38fd1498Szrj       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2492*38fd1498Szrj       accumulator = accum_tmp;
2493*38fd1498Szrj       if (cast_to_unsigned_p)
2494*38fd1498Szrj 	append_pattern_def_seq (stmt_vinfo, stmt);
2495*38fd1498Szrj     }
2496*38fd1498Szrj   else if (variant == add_variant)
2497*38fd1498Szrj     {
2498*38fd1498Szrj       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2499*38fd1498Szrj       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2500*38fd1498Szrj       accumulator = accum_tmp;
2501*38fd1498Szrj       if (cast_to_unsigned_p)
2502*38fd1498Szrj 	append_pattern_def_seq (stmt_vinfo, stmt);
2503*38fd1498Szrj     }
2504*38fd1498Szrj   /* Move back to a signed if needed.  */
2505*38fd1498Szrj   if (cast_to_unsigned_p)
2506*38fd1498Szrj     {
2507*38fd1498Szrj       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2508*38fd1498Szrj       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2509*38fd1498Szrj     }
2510*38fd1498Szrj 
2511*38fd1498Szrj   return stmt;
2512*38fd1498Szrj }
2513*38fd1498Szrj 
2514*38fd1498Szrj /* Detect multiplication by constant and convert it into a sequence of
2515*38fd1498Szrj    shifts and additions, subtractions, negations.  We reuse the
2516*38fd1498Szrj    choose_mult_variant algorithms from expmed.c
2517*38fd1498Szrj 
2518*38fd1498Szrj    Input/Output:
2519*38fd1498Szrj 
2520*38fd1498Szrj    STMTS: Contains a stmt from which the pattern search begins,
2521*38fd1498Szrj    i.e. the mult stmt.
2522*38fd1498Szrj 
2523*38fd1498Szrj  Output:
2524*38fd1498Szrj 
2525*38fd1498Szrj   * TYPE_IN: The type of the input arguments to the pattern.
2526*38fd1498Szrj 
2527*38fd1498Szrj   * TYPE_OUT: The type of the output of this pattern.
2528*38fd1498Szrj 
2529*38fd1498Szrj   * Return value: A new stmt that will be used to replace
2530*38fd1498Szrj     the multiplication.  */
2531*38fd1498Szrj 
2532*38fd1498Szrj static gimple *
vect_recog_mult_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2533*38fd1498Szrj vect_recog_mult_pattern (vec<gimple *> *stmts,
2534*38fd1498Szrj 			 tree *type_in, tree *type_out)
2535*38fd1498Szrj {
2536*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
2537*38fd1498Szrj   tree oprnd0, oprnd1, vectype, itype;
2538*38fd1498Szrj   gimple *pattern_stmt;
2539*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2540*38fd1498Szrj 
2541*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
2542*38fd1498Szrj     return NULL;
2543*38fd1498Szrj 
2544*38fd1498Szrj   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2545*38fd1498Szrj     return NULL;
2546*38fd1498Szrj 
2547*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
2548*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
2549*38fd1498Szrj   itype = TREE_TYPE (oprnd0);
2550*38fd1498Szrj 
2551*38fd1498Szrj   if (TREE_CODE (oprnd0) != SSA_NAME
2552*38fd1498Szrj       || TREE_CODE (oprnd1) != INTEGER_CST
2553*38fd1498Szrj       || !INTEGRAL_TYPE_P (itype)
2554*38fd1498Szrj       || !type_has_mode_precision_p (itype))
2555*38fd1498Szrj     return NULL;
2556*38fd1498Szrj 
2557*38fd1498Szrj   vectype = get_vectype_for_scalar_type (itype);
2558*38fd1498Szrj   if (vectype == NULL_TREE)
2559*38fd1498Szrj     return NULL;
2560*38fd1498Szrj 
2561*38fd1498Szrj   /* If the target can handle vectorized multiplication natively,
2562*38fd1498Szrj      don't attempt to optimize this.  */
2563*38fd1498Szrj   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2564*38fd1498Szrj   if (mul_optab != unknown_optab)
2565*38fd1498Szrj     {
2566*38fd1498Szrj       machine_mode vec_mode = TYPE_MODE (vectype);
2567*38fd1498Szrj       int icode = (int) optab_handler (mul_optab, vec_mode);
2568*38fd1498Szrj       if (icode != CODE_FOR_nothing)
2569*38fd1498Szrj        return NULL;
2570*38fd1498Szrj     }
2571*38fd1498Szrj 
2572*38fd1498Szrj   pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2573*38fd1498Szrj   if (!pattern_stmt)
2574*38fd1498Szrj     return NULL;
2575*38fd1498Szrj 
2576*38fd1498Szrj   /* Pattern detected.  */
2577*38fd1498Szrj   if (dump_enabled_p ())
2578*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
2579*38fd1498Szrj 		     "vect_recog_mult_pattern: detected:\n");
2580*38fd1498Szrj 
2581*38fd1498Szrj   if (dump_enabled_p ())
2582*38fd1498Szrj     dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2583*38fd1498Szrj 			  pattern_stmt,0);
2584*38fd1498Szrj 
2585*38fd1498Szrj   stmts->safe_push (last_stmt);
2586*38fd1498Szrj   *type_in = vectype;
2587*38fd1498Szrj   *type_out = vectype;
2588*38fd1498Szrj 
2589*38fd1498Szrj   return pattern_stmt;
2590*38fd1498Szrj }
2591*38fd1498Szrj 
2592*38fd1498Szrj /* Detect a signed division by a constant that wouldn't be
2593*38fd1498Szrj    otherwise vectorized:
2594*38fd1498Szrj 
2595*38fd1498Szrj    type a_t, b_t;
2596*38fd1498Szrj 
2597*38fd1498Szrj    S1 a_t = b_t / N;
2598*38fd1498Szrj 
2599*38fd1498Szrj   where type 'type' is an integral type and N is a constant.
2600*38fd1498Szrj 
2601*38fd1498Szrj   Similarly handle modulo by a constant:
2602*38fd1498Szrj 
2603*38fd1498Szrj    S4 a_t = b_t % N;
2604*38fd1498Szrj 
2605*38fd1498Szrj   Input/Output:
2606*38fd1498Szrj 
2607*38fd1498Szrj   * STMTS: Contains a stmt from which the pattern search begins,
2608*38fd1498Szrj     i.e. the division stmt.  S1 is replaced by if N is a power
2609*38fd1498Szrj     of two constant and type is signed:
2610*38fd1498Szrj   S3  y_t = b_t < 0 ? N - 1 : 0;
2611*38fd1498Szrj   S2  x_t = b_t + y_t;
2612*38fd1498Szrj   S1' a_t = x_t >> log2 (N);
2613*38fd1498Szrj 
2614*38fd1498Szrj     S4 is replaced if N is a power of two constant and
2615*38fd1498Szrj     type is signed by (where *_T temporaries have unsigned type):
2616*38fd1498Szrj   S9  y_T = b_t < 0 ? -1U : 0U;
2617*38fd1498Szrj   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2618*38fd1498Szrj   S7  z_t = (type) z_T;
2619*38fd1498Szrj   S6  w_t = b_t + z_t;
2620*38fd1498Szrj   S5  x_t = w_t & (N - 1);
2621*38fd1498Szrj   S4' a_t = x_t - z_t;
2622*38fd1498Szrj 
2623*38fd1498Szrj   Output:
2624*38fd1498Szrj 
2625*38fd1498Szrj   * TYPE_IN: The type of the input arguments to the pattern.
2626*38fd1498Szrj 
2627*38fd1498Szrj   * TYPE_OUT: The type of the output of this pattern.
2628*38fd1498Szrj 
2629*38fd1498Szrj   * Return value: A new stmt that will be used to replace the division
2630*38fd1498Szrj     S1 or modulo S4 stmt.  */
2631*38fd1498Szrj 
2632*38fd1498Szrj static gimple *
vect_recog_divmod_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)2633*38fd1498Szrj vect_recog_divmod_pattern (vec<gimple *> *stmts,
2634*38fd1498Szrj 			   tree *type_in, tree *type_out)
2635*38fd1498Szrj {
2636*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
2637*38fd1498Szrj   tree oprnd0, oprnd1, vectype, itype, cond;
2638*38fd1498Szrj   gimple *pattern_stmt, *def_stmt;
2639*38fd1498Szrj   enum tree_code rhs_code;
2640*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2641*38fd1498Szrj   vec_info *vinfo = stmt_vinfo->vinfo;
2642*38fd1498Szrj   optab optab;
2643*38fd1498Szrj   tree q;
2644*38fd1498Szrj   int dummy_int, prec;
2645*38fd1498Szrj   stmt_vec_info def_stmt_vinfo;
2646*38fd1498Szrj 
2647*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
2648*38fd1498Szrj     return NULL;
2649*38fd1498Szrj 
2650*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (last_stmt);
2651*38fd1498Szrj   switch (rhs_code)
2652*38fd1498Szrj     {
2653*38fd1498Szrj     case TRUNC_DIV_EXPR:
2654*38fd1498Szrj     case TRUNC_MOD_EXPR:
2655*38fd1498Szrj       break;
2656*38fd1498Szrj     default:
2657*38fd1498Szrj       return NULL;
2658*38fd1498Szrj     }
2659*38fd1498Szrj 
2660*38fd1498Szrj   if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2661*38fd1498Szrj     return NULL;
2662*38fd1498Szrj 
2663*38fd1498Szrj   oprnd0 = gimple_assign_rhs1 (last_stmt);
2664*38fd1498Szrj   oprnd1 = gimple_assign_rhs2 (last_stmt);
2665*38fd1498Szrj   itype = TREE_TYPE (oprnd0);
2666*38fd1498Szrj   if (TREE_CODE (oprnd0) != SSA_NAME
2667*38fd1498Szrj       || TREE_CODE (oprnd1) != INTEGER_CST
2668*38fd1498Szrj       || TREE_CODE (itype) != INTEGER_TYPE
2669*38fd1498Szrj       || !type_has_mode_precision_p (itype))
2670*38fd1498Szrj     return NULL;
2671*38fd1498Szrj 
2672*38fd1498Szrj   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2673*38fd1498Szrj   vectype = get_vectype_for_scalar_type (itype);
2674*38fd1498Szrj   if (vectype == NULL_TREE)
2675*38fd1498Szrj     return NULL;
2676*38fd1498Szrj 
2677*38fd1498Szrj   /* If the target can handle vectorized division or modulo natively,
2678*38fd1498Szrj      don't attempt to optimize this.  */
2679*38fd1498Szrj   optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2680*38fd1498Szrj   if (optab != unknown_optab)
2681*38fd1498Szrj     {
2682*38fd1498Szrj       machine_mode vec_mode = TYPE_MODE (vectype);
2683*38fd1498Szrj       int icode = (int) optab_handler (optab, vec_mode);
2684*38fd1498Szrj       if (icode != CODE_FOR_nothing)
2685*38fd1498Szrj 	return NULL;
2686*38fd1498Szrj     }
2687*38fd1498Szrj 
2688*38fd1498Szrj   prec = TYPE_PRECISION (itype);
2689*38fd1498Szrj   if (integer_pow2p (oprnd1))
2690*38fd1498Szrj     {
2691*38fd1498Szrj       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2692*38fd1498Szrj 	return NULL;
2693*38fd1498Szrj 
2694*38fd1498Szrj       /* Pattern detected.  */
2695*38fd1498Szrj       if (dump_enabled_p ())
2696*38fd1498Szrj         dump_printf_loc (MSG_NOTE, vect_location,
2697*38fd1498Szrj                          "vect_recog_divmod_pattern: detected:\n");
2698*38fd1498Szrj 
2699*38fd1498Szrj       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2700*38fd1498Szrj 		     build_int_cst (itype, 0));
2701*38fd1498Szrj       if (rhs_code == TRUNC_DIV_EXPR)
2702*38fd1498Szrj 	{
2703*38fd1498Szrj 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
2704*38fd1498Szrj 	  tree shift;
2705*38fd1498Szrj 	  def_stmt
2706*38fd1498Szrj 	    = gimple_build_assign (var, COND_EXPR, cond,
2707*38fd1498Szrj 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2708*38fd1498Szrj 						build_int_cst (itype, 1)),
2709*38fd1498Szrj 				   build_int_cst (itype, 0));
2710*38fd1498Szrj 	  new_pattern_def_seq (stmt_vinfo, def_stmt);
2711*38fd1498Szrj 	  var = vect_recog_temp_ssa_var (itype, NULL);
2712*38fd1498Szrj 	  def_stmt
2713*38fd1498Szrj 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2714*38fd1498Szrj 				   gimple_assign_lhs (def_stmt));
2715*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2716*38fd1498Szrj 
2717*38fd1498Szrj 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
2718*38fd1498Szrj 	  pattern_stmt
2719*38fd1498Szrj 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2720*38fd1498Szrj 				   RSHIFT_EXPR, var, shift);
2721*38fd1498Szrj 	}
2722*38fd1498Szrj       else
2723*38fd1498Szrj 	{
2724*38fd1498Szrj 	  tree signmask;
2725*38fd1498Szrj 	  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2726*38fd1498Szrj 	  if (compare_tree_int (oprnd1, 2) == 0)
2727*38fd1498Szrj 	    {
2728*38fd1498Szrj 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2729*38fd1498Szrj 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2730*38fd1498Szrj 					      build_int_cst (itype, 1),
2731*38fd1498Szrj 					      build_int_cst (itype, 0));
2732*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2733*38fd1498Szrj 	    }
2734*38fd1498Szrj 	  else
2735*38fd1498Szrj 	    {
2736*38fd1498Szrj 	      tree utype
2737*38fd1498Szrj 		= build_nonstandard_integer_type (prec, 1);
2738*38fd1498Szrj 	      tree vecutype = get_vectype_for_scalar_type (utype);
2739*38fd1498Szrj 	      tree shift
2740*38fd1498Szrj 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2741*38fd1498Szrj 					- tree_log2 (oprnd1));
2742*38fd1498Szrj 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
2743*38fd1498Szrj 
2744*38fd1498Szrj 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2745*38fd1498Szrj 					      build_int_cst (utype, -1),
2746*38fd1498Szrj 					      build_int_cst (utype, 0));
2747*38fd1498Szrj 	      def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2748*38fd1498Szrj 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2749*38fd1498Szrj 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2750*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2751*38fd1498Szrj 	      var = vect_recog_temp_ssa_var (utype, NULL);
2752*38fd1498Szrj 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2753*38fd1498Szrj 					      gimple_assign_lhs (def_stmt),
2754*38fd1498Szrj 					      shift);
2755*38fd1498Szrj 	      def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
2756*38fd1498Szrj 	      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2757*38fd1498Szrj 	      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2758*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2759*38fd1498Szrj 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2760*38fd1498Szrj 	      def_stmt
2761*38fd1498Szrj 		= gimple_build_assign (signmask, NOP_EXPR, var);
2762*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2763*38fd1498Szrj 	    }
2764*38fd1498Szrj 	  def_stmt
2765*38fd1498Szrj 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2766*38fd1498Szrj 				   PLUS_EXPR, oprnd0, signmask);
2767*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2768*38fd1498Szrj 	  def_stmt
2769*38fd1498Szrj 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2770*38fd1498Szrj 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2771*38fd1498Szrj 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2772*38fd1498Szrj 						build_int_cst (itype, 1)));
2773*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2774*38fd1498Szrj 
2775*38fd1498Szrj 	  pattern_stmt
2776*38fd1498Szrj 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2777*38fd1498Szrj 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
2778*38fd1498Szrj 				   signmask);
2779*38fd1498Szrj 	}
2780*38fd1498Szrj 
2781*38fd1498Szrj       if (dump_enabled_p ())
2782*38fd1498Szrj 	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2783*38fd1498Szrj                               0);
2784*38fd1498Szrj 
2785*38fd1498Szrj       stmts->safe_push (last_stmt);
2786*38fd1498Szrj 
2787*38fd1498Szrj       *type_in = vectype;
2788*38fd1498Szrj       *type_out = vectype;
2789*38fd1498Szrj       return pattern_stmt;
2790*38fd1498Szrj     }
2791*38fd1498Szrj 
2792*38fd1498Szrj   if (prec > HOST_BITS_PER_WIDE_INT
2793*38fd1498Szrj       || integer_zerop (oprnd1))
2794*38fd1498Szrj     return NULL;
2795*38fd1498Szrj 
2796*38fd1498Szrj   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2797*38fd1498Szrj     return NULL;
2798*38fd1498Szrj 
2799*38fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2800*38fd1498Szrj 
2801*38fd1498Szrj   if (TYPE_UNSIGNED (itype))
2802*38fd1498Szrj     {
2803*38fd1498Szrj       unsigned HOST_WIDE_INT mh, ml;
2804*38fd1498Szrj       int pre_shift, post_shift;
2805*38fd1498Szrj       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2806*38fd1498Szrj 				  & GET_MODE_MASK (itype_mode));
2807*38fd1498Szrj       tree t1, t2, t3, t4;
2808*38fd1498Szrj 
2809*38fd1498Szrj       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2810*38fd1498Szrj 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
2811*38fd1498Szrj 	return NULL;
2812*38fd1498Szrj 
2813*38fd1498Szrj       /* Find a suitable multiplier and right shift count
2814*38fd1498Szrj 	 instead of multiplying with D.  */
2815*38fd1498Szrj       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2816*38fd1498Szrj 
2817*38fd1498Szrj       /* If the suggested multiplier is more than SIZE bits, we can do better
2818*38fd1498Szrj 	 for even divisors, using an initial right shift.  */
2819*38fd1498Szrj       if (mh != 0 && (d & 1) == 0)
2820*38fd1498Szrj 	{
2821*38fd1498Szrj 	  pre_shift = ctz_or_zero (d);
2822*38fd1498Szrj 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2823*38fd1498Szrj 				  &ml, &post_shift, &dummy_int);
2824*38fd1498Szrj 	  gcc_assert (!mh);
2825*38fd1498Szrj 	}
2826*38fd1498Szrj       else
2827*38fd1498Szrj 	pre_shift = 0;
2828*38fd1498Szrj 
2829*38fd1498Szrj       if (mh != 0)
2830*38fd1498Szrj 	{
2831*38fd1498Szrj 	  if (post_shift - 1 >= prec)
2832*38fd1498Szrj 	    return NULL;
2833*38fd1498Szrj 
2834*38fd1498Szrj 	  /* t1 = oprnd0 h* ml;
2835*38fd1498Szrj 	     t2 = oprnd0 - t1;
2836*38fd1498Szrj 	     t3 = t2 >> 1;
2837*38fd1498Szrj 	     t4 = t1 + t3;
2838*38fd1498Szrj 	     q = t4 >> (post_shift - 1);  */
2839*38fd1498Szrj 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
2840*38fd1498Szrj 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2841*38fd1498Szrj 					  build_int_cst (itype, ml));
2842*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2843*38fd1498Szrj 
2844*38fd1498Szrj 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2845*38fd1498Szrj 	  def_stmt
2846*38fd1498Szrj 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2847*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2848*38fd1498Szrj 
2849*38fd1498Szrj 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2850*38fd1498Szrj 	  def_stmt
2851*38fd1498Szrj 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2852*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2853*38fd1498Szrj 
2854*38fd1498Szrj 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2855*38fd1498Szrj 	  def_stmt
2856*38fd1498Szrj 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2857*38fd1498Szrj 
2858*38fd1498Szrj 	  if (post_shift != 1)
2859*38fd1498Szrj 	    {
2860*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2861*38fd1498Szrj 
2862*38fd1498Szrj 	      q = vect_recog_temp_ssa_var (itype, NULL);
2863*38fd1498Szrj 	      pattern_stmt
2864*38fd1498Szrj 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
2865*38fd1498Szrj 				       build_int_cst (itype, post_shift - 1));
2866*38fd1498Szrj 	    }
2867*38fd1498Szrj 	  else
2868*38fd1498Szrj 	    {
2869*38fd1498Szrj 	      q = t4;
2870*38fd1498Szrj 	      pattern_stmt = def_stmt;
2871*38fd1498Szrj 	    }
2872*38fd1498Szrj 	}
2873*38fd1498Szrj       else
2874*38fd1498Szrj 	{
2875*38fd1498Szrj 	  if (pre_shift >= prec || post_shift >= prec)
2876*38fd1498Szrj 	    return NULL;
2877*38fd1498Szrj 
2878*38fd1498Szrj 	  /* t1 = oprnd0 >> pre_shift;
2879*38fd1498Szrj 	     t2 = t1 h* ml;
2880*38fd1498Szrj 	     q = t2 >> post_shift;  */
2881*38fd1498Szrj 	  if (pre_shift)
2882*38fd1498Szrj 	    {
2883*38fd1498Szrj 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
2884*38fd1498Szrj 	      def_stmt
2885*38fd1498Szrj 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2886*38fd1498Szrj 				       build_int_cst (NULL, pre_shift));
2887*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2888*38fd1498Szrj 	    }
2889*38fd1498Szrj 	  else
2890*38fd1498Szrj 	    t1 = oprnd0;
2891*38fd1498Szrj 
2892*38fd1498Szrj 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2893*38fd1498Szrj 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2894*38fd1498Szrj 					  build_int_cst (itype, ml));
2895*38fd1498Szrj 
2896*38fd1498Szrj 	  if (post_shift)
2897*38fd1498Szrj 	    {
2898*38fd1498Szrj 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2899*38fd1498Szrj 
2900*38fd1498Szrj 	      q = vect_recog_temp_ssa_var (itype, NULL);
2901*38fd1498Szrj 	      def_stmt
2902*38fd1498Szrj 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
2903*38fd1498Szrj 				       build_int_cst (itype, post_shift));
2904*38fd1498Szrj 	    }
2905*38fd1498Szrj 	  else
2906*38fd1498Szrj 	    q = t2;
2907*38fd1498Szrj 
2908*38fd1498Szrj 	  pattern_stmt = def_stmt;
2909*38fd1498Szrj 	}
2910*38fd1498Szrj     }
2911*38fd1498Szrj   else
2912*38fd1498Szrj     {
2913*38fd1498Szrj       unsigned HOST_WIDE_INT ml;
2914*38fd1498Szrj       int post_shift;
2915*38fd1498Szrj       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2916*38fd1498Szrj       unsigned HOST_WIDE_INT abs_d;
2917*38fd1498Szrj       bool add = false;
2918*38fd1498Szrj       tree t1, t2, t3, t4;
2919*38fd1498Szrj 
2920*38fd1498Szrj       /* Give up for -1.  */
2921*38fd1498Szrj       if (d == -1)
2922*38fd1498Szrj 	return NULL;
2923*38fd1498Szrj 
2924*38fd1498Szrj       /* Since d might be INT_MIN, we have to cast to
2925*38fd1498Szrj 	 unsigned HOST_WIDE_INT before negating to avoid
2926*38fd1498Szrj 	 undefined signed overflow.  */
2927*38fd1498Szrj       abs_d = (d >= 0
2928*38fd1498Szrj 	       ? (unsigned HOST_WIDE_INT) d
2929*38fd1498Szrj 	       : - (unsigned HOST_WIDE_INT) d);
2930*38fd1498Szrj 
2931*38fd1498Szrj       /* n rem d = n rem -d */
2932*38fd1498Szrj       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2933*38fd1498Szrj 	{
2934*38fd1498Szrj 	  d = abs_d;
2935*38fd1498Szrj 	  oprnd1 = build_int_cst (itype, abs_d);
2936*38fd1498Szrj 	}
2937*38fd1498Szrj       else if (HOST_BITS_PER_WIDE_INT >= prec
2938*38fd1498Szrj 	       && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2939*38fd1498Szrj 	/* This case is not handled correctly below.  */
2940*38fd1498Szrj 	return NULL;
2941*38fd1498Szrj 
2942*38fd1498Szrj       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2943*38fd1498Szrj       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2944*38fd1498Szrj 	{
2945*38fd1498Szrj 	  add = true;
2946*38fd1498Szrj 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
2947*38fd1498Szrj 	}
2948*38fd1498Szrj       if (post_shift >= prec)
2949*38fd1498Szrj 	return NULL;
2950*38fd1498Szrj 
2951*38fd1498Szrj       /* t1 = oprnd0 h* ml;  */
2952*38fd1498Szrj       t1 = vect_recog_temp_ssa_var (itype, NULL);
2953*38fd1498Szrj       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2954*38fd1498Szrj 				      build_int_cst (itype, ml));
2955*38fd1498Szrj 
2956*38fd1498Szrj       if (add)
2957*38fd1498Szrj 	{
2958*38fd1498Szrj 	  /* t2 = t1 + oprnd0;  */
2959*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2960*38fd1498Szrj 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2961*38fd1498Szrj 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2962*38fd1498Szrj 	}
2963*38fd1498Szrj       else
2964*38fd1498Szrj 	t2 = t1;
2965*38fd1498Szrj 
2966*38fd1498Szrj       if (post_shift)
2967*38fd1498Szrj 	{
2968*38fd1498Szrj 	  /* t3 = t2 >> post_shift;  */
2969*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2970*38fd1498Szrj 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2971*38fd1498Szrj 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2972*38fd1498Szrj 					  build_int_cst (itype, post_shift));
2973*38fd1498Szrj 	}
2974*38fd1498Szrj       else
2975*38fd1498Szrj 	t3 = t2;
2976*38fd1498Szrj 
2977*38fd1498Szrj       wide_int oprnd0_min, oprnd0_max;
2978*38fd1498Szrj       int msb = 1;
2979*38fd1498Szrj       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2980*38fd1498Szrj 	{
2981*38fd1498Szrj 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
2982*38fd1498Szrj 	    msb = 0;
2983*38fd1498Szrj 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
2984*38fd1498Szrj 	    msb = -1;
2985*38fd1498Szrj 	}
2986*38fd1498Szrj 
2987*38fd1498Szrj       if (msb == 0 && d >= 0)
2988*38fd1498Szrj 	{
2989*38fd1498Szrj 	  /* q = t3;  */
2990*38fd1498Szrj 	  q = t3;
2991*38fd1498Szrj 	  pattern_stmt = def_stmt;
2992*38fd1498Szrj 	}
2993*38fd1498Szrj       else
2994*38fd1498Szrj 	{
2995*38fd1498Szrj 	  /* t4 = oprnd0 >> (prec - 1);
2996*38fd1498Szrj 	     or if we know from VRP that oprnd0 >= 0
2997*38fd1498Szrj 	     t4 = 0;
2998*38fd1498Szrj 	     or if we know from VRP that oprnd0 < 0
2999*38fd1498Szrj 	     t4 = -1;  */
3000*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3001*38fd1498Szrj 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3002*38fd1498Szrj 	  if (msb != 1)
3003*38fd1498Szrj 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3004*38fd1498Szrj 					    build_int_cst (itype, msb));
3005*38fd1498Szrj 	  else
3006*38fd1498Szrj 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3007*38fd1498Szrj 					    build_int_cst (itype, prec - 1));
3008*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3009*38fd1498Szrj 
3010*38fd1498Szrj 	  /* q = t3 - t4;  or q = t4 - t3;  */
3011*38fd1498Szrj 	  q = vect_recog_temp_ssa_var (itype, NULL);
3012*38fd1498Szrj 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3013*38fd1498Szrj 					      d < 0 ? t3 : t4);
3014*38fd1498Szrj 	}
3015*38fd1498Szrj     }
3016*38fd1498Szrj 
3017*38fd1498Szrj   if (rhs_code == TRUNC_MOD_EXPR)
3018*38fd1498Szrj     {
3019*38fd1498Szrj       tree r, t1;
3020*38fd1498Szrj 
3021*38fd1498Szrj       /* We divided.  Now finish by:
3022*38fd1498Szrj 	 t1 = q * oprnd1;
3023*38fd1498Szrj 	 r = oprnd0 - t1;  */
3024*38fd1498Szrj       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3025*38fd1498Szrj 
3026*38fd1498Szrj       t1 = vect_recog_temp_ssa_var (itype, NULL);
3027*38fd1498Szrj       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3028*38fd1498Szrj       append_pattern_def_seq (stmt_vinfo, def_stmt);
3029*38fd1498Szrj 
3030*38fd1498Szrj       r = vect_recog_temp_ssa_var (itype, NULL);
3031*38fd1498Szrj       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3032*38fd1498Szrj     }
3033*38fd1498Szrj 
3034*38fd1498Szrj   /* Pattern detected.  */
3035*38fd1498Szrj   if (dump_enabled_p ())
3036*38fd1498Szrj     {
3037*38fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
3038*38fd1498Szrj                        "vect_recog_divmod_pattern: detected: ");
3039*38fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
3040*38fd1498Szrj     }
3041*38fd1498Szrj 
3042*38fd1498Szrj   stmts->safe_push (last_stmt);
3043*38fd1498Szrj 
3044*38fd1498Szrj   *type_in = vectype;
3045*38fd1498Szrj   *type_out = vectype;
3046*38fd1498Szrj   return pattern_stmt;
3047*38fd1498Szrj }
3048*38fd1498Szrj 
3049*38fd1498Szrj /* Function vect_recog_mixed_size_cond_pattern
3050*38fd1498Szrj 
3051*38fd1498Szrj    Try to find the following pattern:
3052*38fd1498Szrj 
3053*38fd1498Szrj      type x_t, y_t;
3054*38fd1498Szrj      TYPE a_T, b_T, c_T;
3055*38fd1498Szrj    loop:
3056*38fd1498Szrj      S1  a_T = x_t CMP y_t ? b_T : c_T;
3057*38fd1498Szrj 
3058*38fd1498Szrj    where type 'TYPE' is an integral type which has different size
3059*38fd1498Szrj    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3060*38fd1498Szrj    than 'type', the constants need to fit into an integer type
3061*38fd1498Szrj    with the same width as 'type') or results of conversion from 'type'.
3062*38fd1498Szrj 
3063*38fd1498Szrj    Input:
3064*38fd1498Szrj 
3065*38fd1498Szrj    * LAST_STMT: A stmt from which the pattern search begins.
3066*38fd1498Szrj 
3067*38fd1498Szrj    Output:
3068*38fd1498Szrj 
3069*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
3070*38fd1498Szrj 
3071*38fd1498Szrj    * TYPE_OUT: The type of the output of this pattern.
3072*38fd1498Szrj 
3073*38fd1498Szrj    * Return value: A new stmt that will be used to replace the pattern.
3074*38fd1498Szrj 	Additionally a def_stmt is added.
3075*38fd1498Szrj 
3076*38fd1498Szrj 	a_it = x_t CMP y_t ? b_it : c_it;
3077*38fd1498Szrj 	a_T = (TYPE) a_it;  */
3078*38fd1498Szrj 
3079*38fd1498Szrj static gimple *
vect_recog_mixed_size_cond_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3080*38fd1498Szrj vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
3081*38fd1498Szrj 				    tree *type_out)
3082*38fd1498Szrj {
3083*38fd1498Szrj   gimple *last_stmt = (*stmts)[0];
3084*38fd1498Szrj   tree cond_expr, then_clause, else_clause;
3085*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
3086*38fd1498Szrj   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3087*38fd1498Szrj   gimple *pattern_stmt, *def_stmt;
3088*38fd1498Szrj   vec_info *vinfo = stmt_vinfo->vinfo;
3089*38fd1498Szrj   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3090*38fd1498Szrj   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3091*38fd1498Szrj   bool promotion;
3092*38fd1498Szrj   tree comp_scalar_type;
3093*38fd1498Szrj 
3094*38fd1498Szrj   if (!is_gimple_assign (last_stmt)
3095*38fd1498Szrj       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3096*38fd1498Szrj       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3097*38fd1498Szrj     return NULL;
3098*38fd1498Szrj 
3099*38fd1498Szrj   cond_expr = gimple_assign_rhs1 (last_stmt);
3100*38fd1498Szrj   then_clause = gimple_assign_rhs2 (last_stmt);
3101*38fd1498Szrj   else_clause = gimple_assign_rhs3 (last_stmt);
3102*38fd1498Szrj 
3103*38fd1498Szrj   if (!COMPARISON_CLASS_P (cond_expr))
3104*38fd1498Szrj     return NULL;
3105*38fd1498Szrj 
3106*38fd1498Szrj   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3107*38fd1498Szrj   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3108*38fd1498Szrj   if (comp_vectype == NULL_TREE)
3109*38fd1498Szrj     return NULL;
3110*38fd1498Szrj 
3111*38fd1498Szrj   type = gimple_expr_type (last_stmt);
3112*38fd1498Szrj   if (types_compatible_p (type, comp_scalar_type)
3113*38fd1498Szrj       || ((TREE_CODE (then_clause) != INTEGER_CST
3114*38fd1498Szrj 	   || TREE_CODE (else_clause) != INTEGER_CST)
3115*38fd1498Szrj 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3116*38fd1498Szrj       || !INTEGRAL_TYPE_P (type))
3117*38fd1498Szrj     return NULL;
3118*38fd1498Szrj 
3119*38fd1498Szrj   if ((TREE_CODE (then_clause) != INTEGER_CST
3120*38fd1498Szrj        && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3121*38fd1498Szrj                               &def_stmt0, &promotion))
3122*38fd1498Szrj       || (TREE_CODE (else_clause) != INTEGER_CST
3123*38fd1498Szrj           && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3124*38fd1498Szrj                                  &def_stmt1, &promotion)))
3125*38fd1498Szrj     return NULL;
3126*38fd1498Szrj 
3127*38fd1498Szrj   if (orig_type0 && orig_type1
3128*38fd1498Szrj       && !types_compatible_p (orig_type0, orig_type1))
3129*38fd1498Szrj     return NULL;
3130*38fd1498Szrj 
3131*38fd1498Szrj   if (orig_type0)
3132*38fd1498Szrj     {
3133*38fd1498Szrj       if (!types_compatible_p (orig_type0, comp_scalar_type))
3134*38fd1498Szrj 	return NULL;
3135*38fd1498Szrj       then_clause = gimple_assign_rhs1 (def_stmt0);
3136*38fd1498Szrj       itype = orig_type0;
3137*38fd1498Szrj     }
3138*38fd1498Szrj 
3139*38fd1498Szrj   if (orig_type1)
3140*38fd1498Szrj     {
3141*38fd1498Szrj       if (!types_compatible_p (orig_type1, comp_scalar_type))
3142*38fd1498Szrj 	return NULL;
3143*38fd1498Szrj       else_clause = gimple_assign_rhs1 (def_stmt1);
3144*38fd1498Szrj       itype = orig_type1;
3145*38fd1498Szrj     }
3146*38fd1498Szrj 
3147*38fd1498Szrj 
3148*38fd1498Szrj   HOST_WIDE_INT cmp_mode_size
3149*38fd1498Szrj     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3150*38fd1498Szrj 
3151*38fd1498Szrj   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3152*38fd1498Szrj   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3153*38fd1498Szrj     return NULL;
3154*38fd1498Szrj 
3155*38fd1498Szrj   vectype = get_vectype_for_scalar_type (type);
3156*38fd1498Szrj   if (vectype == NULL_TREE)
3157*38fd1498Szrj     return NULL;
3158*38fd1498Szrj 
3159*38fd1498Szrj   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3160*38fd1498Szrj     return NULL;
3161*38fd1498Szrj 
3162*38fd1498Szrj   if (itype == NULL_TREE)
3163*38fd1498Szrj     itype = build_nonstandard_integer_type (cmp_mode_size,
3164*38fd1498Szrj   					    TYPE_UNSIGNED (type));
3165*38fd1498Szrj 
3166*38fd1498Szrj   if (itype == NULL_TREE
3167*38fd1498Szrj       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3168*38fd1498Szrj     return NULL;
3169*38fd1498Szrj 
3170*38fd1498Szrj   vecitype = get_vectype_for_scalar_type (itype);
3171*38fd1498Szrj   if (vecitype == NULL_TREE)
3172*38fd1498Szrj     return NULL;
3173*38fd1498Szrj 
3174*38fd1498Szrj   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3175*38fd1498Szrj     return NULL;
3176*38fd1498Szrj 
3177*38fd1498Szrj   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3178*38fd1498Szrj     {
3179*38fd1498Szrj       if ((TREE_CODE (then_clause) == INTEGER_CST
3180*38fd1498Szrj 	   && !int_fits_type_p (then_clause, itype))
3181*38fd1498Szrj 	  || (TREE_CODE (else_clause) == INTEGER_CST
3182*38fd1498Szrj 	      && !int_fits_type_p (else_clause, itype)))
3183*38fd1498Szrj 	return NULL;
3184*38fd1498Szrj     }
3185*38fd1498Szrj 
3186*38fd1498Szrj   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3187*38fd1498Szrj 				  COND_EXPR, unshare_expr (cond_expr),
3188*38fd1498Szrj 				  fold_convert (itype, then_clause),
3189*38fd1498Szrj 				  fold_convert (itype, else_clause));
3190*38fd1498Szrj   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3191*38fd1498Szrj 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3192*38fd1498Szrj 
3193*38fd1498Szrj   new_pattern_def_seq (stmt_vinfo, def_stmt);
3194*38fd1498Szrj   def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
3195*38fd1498Szrj   set_vinfo_for_stmt (def_stmt, def_stmt_info);
3196*38fd1498Szrj   STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
3197*38fd1498Szrj   *type_in = vecitype;
3198*38fd1498Szrj   *type_out = vectype;
3199*38fd1498Szrj 
3200*38fd1498Szrj   if (dump_enabled_p ())
3201*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
3202*38fd1498Szrj                      "vect_recog_mixed_size_cond_pattern: detected:\n");
3203*38fd1498Szrj 
3204*38fd1498Szrj   return pattern_stmt;
3205*38fd1498Szrj }
3206*38fd1498Szrj 
3207*38fd1498Szrj 
3208*38fd1498Szrj /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3209*38fd1498Szrj    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3210*38fd1498Szrj    in case it's a result of a comparison which can be directly vectorized into
3211*38fd1498Szrj    a vector comparison.  Fills in STMTS with all stmts visited during the
3212*38fd1498Szrj    walk.  */
3213*38fd1498Szrj 
3214*38fd1498Szrj static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3215*38fd1498Szrj check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3216*38fd1498Szrj {
3217*38fd1498Szrj   gimple *def_stmt;
3218*38fd1498Szrj   enum vect_def_type dt;
3219*38fd1498Szrj   tree rhs1;
3220*38fd1498Szrj   enum tree_code rhs_code;
3221*38fd1498Szrj 
3222*38fd1498Szrj   if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3223*38fd1498Szrj     return false;
3224*38fd1498Szrj 
3225*38fd1498Szrj   if (dt != vect_internal_def)
3226*38fd1498Szrj     return false;
3227*38fd1498Szrj 
3228*38fd1498Szrj   if (!is_gimple_assign (def_stmt))
3229*38fd1498Szrj     return false;
3230*38fd1498Szrj 
3231*38fd1498Szrj   if (stmts.contains (def_stmt))
3232*38fd1498Szrj     return true;
3233*38fd1498Szrj 
3234*38fd1498Szrj   rhs1 = gimple_assign_rhs1 (def_stmt);
3235*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (def_stmt);
3236*38fd1498Szrj   switch (rhs_code)
3237*38fd1498Szrj     {
3238*38fd1498Szrj     case SSA_NAME:
3239*38fd1498Szrj       if (! check_bool_pattern (rhs1, vinfo, stmts))
3240*38fd1498Szrj 	return false;
3241*38fd1498Szrj       break;
3242*38fd1498Szrj 
3243*38fd1498Szrj     CASE_CONVERT:
3244*38fd1498Szrj       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3245*38fd1498Szrj 	return false;
3246*38fd1498Szrj       if (! check_bool_pattern (rhs1, vinfo, stmts))
3247*38fd1498Szrj 	return false;
3248*38fd1498Szrj       break;
3249*38fd1498Szrj 
3250*38fd1498Szrj     case BIT_NOT_EXPR:
3251*38fd1498Szrj       if (! check_bool_pattern (rhs1, vinfo, stmts))
3252*38fd1498Szrj 	return false;
3253*38fd1498Szrj       break;
3254*38fd1498Szrj 
3255*38fd1498Szrj     case BIT_AND_EXPR:
3256*38fd1498Szrj     case BIT_IOR_EXPR:
3257*38fd1498Szrj     case BIT_XOR_EXPR:
3258*38fd1498Szrj       if (! check_bool_pattern (rhs1, vinfo, stmts)
3259*38fd1498Szrj 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3260*38fd1498Szrj 	return false;
3261*38fd1498Szrj       break;
3262*38fd1498Szrj 
3263*38fd1498Szrj     default:
3264*38fd1498Szrj       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3265*38fd1498Szrj 	{
3266*38fd1498Szrj 	  tree vecitype, comp_vectype;
3267*38fd1498Szrj 
3268*38fd1498Szrj 	  /* If the comparison can throw, then is_gimple_condexpr will be
3269*38fd1498Szrj 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3270*38fd1498Szrj 	  if (stmt_could_throw_p (def_stmt))
3271*38fd1498Szrj 	    return false;
3272*38fd1498Szrj 
3273*38fd1498Szrj 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3274*38fd1498Szrj 	  if (comp_vectype == NULL_TREE)
3275*38fd1498Szrj 	    return false;
3276*38fd1498Szrj 
3277*38fd1498Szrj 	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3278*38fd1498Szrj 	  if (mask_type
3279*38fd1498Szrj 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3280*38fd1498Szrj 	    return false;
3281*38fd1498Szrj 
3282*38fd1498Szrj 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3283*38fd1498Szrj 	    {
3284*38fd1498Szrj 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3285*38fd1498Szrj 	      tree itype
3286*38fd1498Szrj 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3287*38fd1498Szrj 	      vecitype = get_vectype_for_scalar_type (itype);
3288*38fd1498Szrj 	      if (vecitype == NULL_TREE)
3289*38fd1498Szrj 		return false;
3290*38fd1498Szrj 	    }
3291*38fd1498Szrj 	  else
3292*38fd1498Szrj 	    vecitype = comp_vectype;
3293*38fd1498Szrj 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3294*38fd1498Szrj 	    return false;
3295*38fd1498Szrj 	}
3296*38fd1498Szrj       else
3297*38fd1498Szrj 	return false;
3298*38fd1498Szrj       break;
3299*38fd1498Szrj     }
3300*38fd1498Szrj 
3301*38fd1498Szrj   bool res = stmts.add (def_stmt);
3302*38fd1498Szrj   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3303*38fd1498Szrj   gcc_assert (!res);
3304*38fd1498Szrj 
3305*38fd1498Szrj   return true;
3306*38fd1498Szrj }
3307*38fd1498Szrj 
3308*38fd1498Szrj 
3309*38fd1498Szrj /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3310*38fd1498Szrj    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3311*38fd1498Szrj    pattern sequence.  */
3312*38fd1498Szrj 
3313*38fd1498Szrj static tree
adjust_bool_pattern_cast(tree type,tree var,stmt_vec_info stmt_info)3314*38fd1498Szrj adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3315*38fd1498Szrj {
3316*38fd1498Szrj   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3317*38fd1498Szrj 					   NOP_EXPR, var);
3318*38fd1498Szrj   stmt_vec_info patt_vinfo = new_stmt_vec_info (cast_stmt, stmt_info->vinfo);
3319*38fd1498Szrj   set_vinfo_for_stmt (cast_stmt, patt_vinfo);
3320*38fd1498Szrj   STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (type);
3321*38fd1498Szrj   append_pattern_def_seq (stmt_info, cast_stmt);
3322*38fd1498Szrj   return gimple_assign_lhs (cast_stmt);
3323*38fd1498Szrj }
3324*38fd1498Szrj 
3325*38fd1498Szrj /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3326*38fd1498Szrj    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3327*38fd1498Szrj    type, OUT_TYPE is the desired final integer type of the whole pattern.
3328*38fd1498Szrj    STMT_INFO is the info of the pattern root and is where pattern stmts should
3329*38fd1498Szrj    be associated with.  DEFS is a map of pattern defs.  */
3330*38fd1498Szrj 
3331*38fd1498Szrj static void
adjust_bool_pattern(tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)3332*38fd1498Szrj adjust_bool_pattern (tree var, tree out_type,
3333*38fd1498Szrj 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3334*38fd1498Szrj {
3335*38fd1498Szrj   gimple *stmt = SSA_NAME_DEF_STMT (var);
3336*38fd1498Szrj   enum tree_code rhs_code, def_rhs_code;
3337*38fd1498Szrj   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3338*38fd1498Szrj   location_t loc;
3339*38fd1498Szrj   gimple *pattern_stmt, *def_stmt;
3340*38fd1498Szrj   tree trueval = NULL_TREE;
3341*38fd1498Szrj 
3342*38fd1498Szrj   rhs1 = gimple_assign_rhs1 (stmt);
3343*38fd1498Szrj   rhs2 = gimple_assign_rhs2 (stmt);
3344*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (stmt);
3345*38fd1498Szrj   loc = gimple_location (stmt);
3346*38fd1498Szrj   switch (rhs_code)
3347*38fd1498Szrj     {
3348*38fd1498Szrj     case SSA_NAME:
3349*38fd1498Szrj     CASE_CONVERT:
3350*38fd1498Szrj       irhs1 = *defs.get (rhs1);
3351*38fd1498Szrj       itype = TREE_TYPE (irhs1);
3352*38fd1498Szrj       pattern_stmt
3353*38fd1498Szrj 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3354*38fd1498Szrj 			       SSA_NAME, irhs1);
3355*38fd1498Szrj       break;
3356*38fd1498Szrj 
3357*38fd1498Szrj     case BIT_NOT_EXPR:
3358*38fd1498Szrj       irhs1 = *defs.get (rhs1);
3359*38fd1498Szrj       itype = TREE_TYPE (irhs1);
3360*38fd1498Szrj       pattern_stmt
3361*38fd1498Szrj 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3362*38fd1498Szrj 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3363*38fd1498Szrj       break;
3364*38fd1498Szrj 
3365*38fd1498Szrj     case BIT_AND_EXPR:
3366*38fd1498Szrj       /* Try to optimize x = y & (a < b ? 1 : 0); into
3367*38fd1498Szrj 	 x = (a < b ? y : 0);
3368*38fd1498Szrj 
3369*38fd1498Szrj 	 E.g. for:
3370*38fd1498Szrj 	   bool a_b, b_b, c_b;
3371*38fd1498Szrj 	   TYPE d_T;
3372*38fd1498Szrj 
3373*38fd1498Szrj 	   S1  a_b = x1 CMP1 y1;
3374*38fd1498Szrj 	   S2  b_b = x2 CMP2 y2;
3375*38fd1498Szrj 	   S3  c_b = a_b & b_b;
3376*38fd1498Szrj 	   S4  d_T = (TYPE) c_b;
3377*38fd1498Szrj 
3378*38fd1498Szrj 	 we would normally emit:
3379*38fd1498Szrj 
3380*38fd1498Szrj 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3381*38fd1498Szrj 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3382*38fd1498Szrj 	   S3'  c_T = a_T & b_T;
3383*38fd1498Szrj 	   S4'  d_T = c_T;
3384*38fd1498Szrj 
3385*38fd1498Szrj 	 but we can save one stmt by using the
3386*38fd1498Szrj 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3387*38fd1498Szrj 	 BIT_AND_EXPR stmt out:
3388*38fd1498Szrj 
3389*38fd1498Szrj 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3390*38fd1498Szrj 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3391*38fd1498Szrj 	   S4'  f_T = c_T;
3392*38fd1498Szrj 
3393*38fd1498Szrj 	 At least when VEC_COND_EXPR is implemented using masks
3394*38fd1498Szrj 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3395*38fd1498Szrj 	 computes the comparison masks and ands it, in one case with
3396*38fd1498Szrj 	 all ones vector, in the other case with a vector register.
3397*38fd1498Szrj 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3398*38fd1498Szrj 	 often more expensive.  */
3399*38fd1498Szrj       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3400*38fd1498Szrj       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3401*38fd1498Szrj       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3402*38fd1498Szrj 	{
3403*38fd1498Szrj 	  irhs1 = *defs.get (rhs1);
3404*38fd1498Szrj 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3405*38fd1498Szrj 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3406*38fd1498Szrj 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3407*38fd1498Szrj 	    {
3408*38fd1498Szrj 	      rhs_code = def_rhs_code;
3409*38fd1498Szrj 	      rhs1 = def_rhs1;
3410*38fd1498Szrj 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3411*38fd1498Szrj 	      trueval = irhs1;
3412*38fd1498Szrj 	      goto do_compare;
3413*38fd1498Szrj 	    }
3414*38fd1498Szrj 	  else
3415*38fd1498Szrj 	    irhs2 = *defs.get (rhs2);
3416*38fd1498Szrj 	  goto and_ior_xor;
3417*38fd1498Szrj 	}
3418*38fd1498Szrj       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3419*38fd1498Szrj       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3420*38fd1498Szrj       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3421*38fd1498Szrj 	{
3422*38fd1498Szrj 	  irhs2 = *defs.get (rhs2);
3423*38fd1498Szrj 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3424*38fd1498Szrj 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3425*38fd1498Szrj 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3426*38fd1498Szrj 	    {
3427*38fd1498Szrj 	      rhs_code = def_rhs_code;
3428*38fd1498Szrj 	      rhs1 = def_rhs1;
3429*38fd1498Szrj 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3430*38fd1498Szrj 	      trueval = irhs2;
3431*38fd1498Szrj 	      goto do_compare;
3432*38fd1498Szrj 	    }
3433*38fd1498Szrj 	  else
3434*38fd1498Szrj 	    irhs1 = *defs.get (rhs1);
3435*38fd1498Szrj 	  goto and_ior_xor;
3436*38fd1498Szrj 	}
3437*38fd1498Szrj       /* FALLTHRU */
3438*38fd1498Szrj     case BIT_IOR_EXPR:
3439*38fd1498Szrj     case BIT_XOR_EXPR:
3440*38fd1498Szrj       irhs1 = *defs.get (rhs1);
3441*38fd1498Szrj       irhs2 = *defs.get (rhs2);
3442*38fd1498Szrj     and_ior_xor:
3443*38fd1498Szrj       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3444*38fd1498Szrj 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3445*38fd1498Szrj 	{
3446*38fd1498Szrj 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3447*38fd1498Szrj 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3448*38fd1498Szrj 	  int out_prec = TYPE_PRECISION (out_type);
3449*38fd1498Szrj 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3450*38fd1498Szrj 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3451*38fd1498Szrj 					      stmt_info);
3452*38fd1498Szrj 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3453*38fd1498Szrj 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3454*38fd1498Szrj 					      stmt_info);
3455*38fd1498Szrj 	  else
3456*38fd1498Szrj 	    {
3457*38fd1498Szrj 	      irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3458*38fd1498Szrj 	      irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3459*38fd1498Szrj 	    }
3460*38fd1498Szrj 	}
3461*38fd1498Szrj       itype = TREE_TYPE (irhs1);
3462*38fd1498Szrj       pattern_stmt
3463*38fd1498Szrj 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3464*38fd1498Szrj 			       rhs_code, irhs1, irhs2);
3465*38fd1498Szrj       break;
3466*38fd1498Szrj 
3467*38fd1498Szrj     default:
3468*38fd1498Szrj     do_compare:
3469*38fd1498Szrj       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3470*38fd1498Szrj       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3471*38fd1498Szrj 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3472*38fd1498Szrj 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3473*38fd1498Szrj 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3474*38fd1498Szrj 	{
3475*38fd1498Szrj 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3476*38fd1498Szrj 	  itype
3477*38fd1498Szrj 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3478*38fd1498Szrj 	}
3479*38fd1498Szrj       else
3480*38fd1498Szrj 	itype = TREE_TYPE (rhs1);
3481*38fd1498Szrj       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3482*38fd1498Szrj       if (trueval == NULL_TREE)
3483*38fd1498Szrj 	trueval = build_int_cst (itype, 1);
3484*38fd1498Szrj       else
3485*38fd1498Szrj 	gcc_checking_assert (useless_type_conversion_p (itype,
3486*38fd1498Szrj 							TREE_TYPE (trueval)));
3487*38fd1498Szrj       pattern_stmt
3488*38fd1498Szrj 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3489*38fd1498Szrj 			       COND_EXPR, cond_expr, trueval,
3490*38fd1498Szrj 			       build_int_cst (itype, 0));
3491*38fd1498Szrj       break;
3492*38fd1498Szrj     }
3493*38fd1498Szrj 
3494*38fd1498Szrj   gimple_set_location (pattern_stmt, loc);
3495*38fd1498Szrj   /* ???  Why does vect_mark_pattern_stmts set the vector type on all
3496*38fd1498Szrj      pattern def seq stmts instead of just letting auto-detection do
3497*38fd1498Szrj      its work?  */
3498*38fd1498Szrj   stmt_vec_info patt_vinfo = new_stmt_vec_info (pattern_stmt, stmt_info->vinfo);
3499*38fd1498Szrj   set_vinfo_for_stmt (pattern_stmt, patt_vinfo);
3500*38fd1498Szrj   STMT_VINFO_VECTYPE (patt_vinfo) = get_vectype_for_scalar_type (itype);
3501*38fd1498Szrj   append_pattern_def_seq (stmt_info, pattern_stmt);
3502*38fd1498Szrj   defs.put (var, gimple_assign_lhs (pattern_stmt));
3503*38fd1498Szrj }
3504*38fd1498Szrj 
3505*38fd1498Szrj /* Comparison function to qsort a vector of gimple stmts after UID.  */
3506*38fd1498Szrj 
3507*38fd1498Szrj static int
sort_after_uid(const void * p1,const void * p2)3508*38fd1498Szrj sort_after_uid (const void *p1, const void *p2)
3509*38fd1498Szrj {
3510*38fd1498Szrj   const gimple *stmt1 = *(const gimple * const *)p1;
3511*38fd1498Szrj   const gimple *stmt2 = *(const gimple * const *)p2;
3512*38fd1498Szrj   return gimple_uid (stmt1) - gimple_uid (stmt2);
3513*38fd1498Szrj }
3514*38fd1498Szrj 
3515*38fd1498Szrj /* Create pattern stmts for all stmts participating in the bool pattern
3516*38fd1498Szrj    specified by BOOL_STMT_SET and its root STMT with the desired type
3517*38fd1498Szrj    OUT_TYPE.  Return the def of the pattern root.  */
3518*38fd1498Szrj 
3519*38fd1498Szrj static tree
adjust_bool_stmts(hash_set<gimple * > & bool_stmt_set,tree out_type,gimple * stmt)3520*38fd1498Szrj adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3521*38fd1498Szrj 		   tree out_type, gimple *stmt)
3522*38fd1498Szrj {
3523*38fd1498Szrj   /* Gather original stmts in the bool pattern in their order of appearance
3524*38fd1498Szrj      in the IL.  */
3525*38fd1498Szrj   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3526*38fd1498Szrj   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3527*38fd1498Szrj        i != bool_stmt_set.end (); ++i)
3528*38fd1498Szrj     bool_stmts.quick_push (*i);
3529*38fd1498Szrj   bool_stmts.qsort (sort_after_uid);
3530*38fd1498Szrj 
3531*38fd1498Szrj   /* Now process them in that order, producing pattern stmts.  */
3532*38fd1498Szrj   hash_map <tree, tree> defs;
3533*38fd1498Szrj   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3534*38fd1498Szrj     adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3535*38fd1498Szrj 			 out_type, vinfo_for_stmt (stmt), defs);
3536*38fd1498Szrj 
3537*38fd1498Szrj   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
3538*38fd1498Szrj   gimple *pattern_stmt
3539*38fd1498Szrj     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3540*38fd1498Szrj   return gimple_assign_lhs (pattern_stmt);
3541*38fd1498Szrj }
3542*38fd1498Szrj 
3543*38fd1498Szrj /* Helper for search_type_for_mask.  */
3544*38fd1498Szrj 
3545*38fd1498Szrj static tree
search_type_for_mask_1(tree var,vec_info * vinfo,hash_map<gimple *,tree> & cache)3546*38fd1498Szrj search_type_for_mask_1 (tree var, vec_info *vinfo,
3547*38fd1498Szrj 			hash_map<gimple *, tree> &cache)
3548*38fd1498Szrj {
3549*38fd1498Szrj   gimple *def_stmt;
3550*38fd1498Szrj   enum vect_def_type dt;
3551*38fd1498Szrj   tree rhs1;
3552*38fd1498Szrj   enum tree_code rhs_code;
3553*38fd1498Szrj   tree res = NULL_TREE, res2;
3554*38fd1498Szrj 
3555*38fd1498Szrj   if (TREE_CODE (var) != SSA_NAME)
3556*38fd1498Szrj     return NULL_TREE;
3557*38fd1498Szrj 
3558*38fd1498Szrj   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3559*38fd1498Szrj     return NULL_TREE;
3560*38fd1498Szrj 
3561*38fd1498Szrj   if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3562*38fd1498Szrj     return NULL_TREE;
3563*38fd1498Szrj 
3564*38fd1498Szrj   if (dt != vect_internal_def)
3565*38fd1498Szrj     return NULL_TREE;
3566*38fd1498Szrj 
3567*38fd1498Szrj   if (!is_gimple_assign (def_stmt))
3568*38fd1498Szrj     return NULL_TREE;
3569*38fd1498Szrj 
3570*38fd1498Szrj   tree *c = cache.get (def_stmt);
3571*38fd1498Szrj   if (c)
3572*38fd1498Szrj     return *c;
3573*38fd1498Szrj 
3574*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (def_stmt);
3575*38fd1498Szrj   rhs1 = gimple_assign_rhs1 (def_stmt);
3576*38fd1498Szrj 
3577*38fd1498Szrj   switch (rhs_code)
3578*38fd1498Szrj     {
3579*38fd1498Szrj     case SSA_NAME:
3580*38fd1498Szrj     case BIT_NOT_EXPR:
3581*38fd1498Szrj     CASE_CONVERT:
3582*38fd1498Szrj       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3583*38fd1498Szrj       break;
3584*38fd1498Szrj 
3585*38fd1498Szrj     case BIT_AND_EXPR:
3586*38fd1498Szrj     case BIT_IOR_EXPR:
3587*38fd1498Szrj     case BIT_XOR_EXPR:
3588*38fd1498Szrj       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3589*38fd1498Szrj       res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3590*38fd1498Szrj 				     cache);
3591*38fd1498Szrj       if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3592*38fd1498Szrj 	res = res2;
3593*38fd1498Szrj       break;
3594*38fd1498Szrj 
3595*38fd1498Szrj     default:
3596*38fd1498Szrj       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3597*38fd1498Szrj 	{
3598*38fd1498Szrj 	  tree comp_vectype, mask_type;
3599*38fd1498Szrj 
3600*38fd1498Szrj 	  if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3601*38fd1498Szrj 	    {
3602*38fd1498Szrj 	      res = search_type_for_mask_1 (rhs1, vinfo, cache);
3603*38fd1498Szrj 	      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3604*38fd1498Szrj 					     vinfo, cache);
3605*38fd1498Szrj 	      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3606*38fd1498Szrj 		res = res2;
3607*38fd1498Szrj 	      break;
3608*38fd1498Szrj 	    }
3609*38fd1498Szrj 
3610*38fd1498Szrj 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3611*38fd1498Szrj 	  if (comp_vectype == NULL_TREE)
3612*38fd1498Szrj 	    {
3613*38fd1498Szrj 	      res = NULL_TREE;
3614*38fd1498Szrj 	      break;
3615*38fd1498Szrj 	    }
3616*38fd1498Szrj 
3617*38fd1498Szrj 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3618*38fd1498Szrj 	  if (!mask_type
3619*38fd1498Szrj 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3620*38fd1498Szrj 	    {
3621*38fd1498Szrj 	      res = NULL_TREE;
3622*38fd1498Szrj 	      break;
3623*38fd1498Szrj 	    }
3624*38fd1498Szrj 
3625*38fd1498Szrj 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3626*38fd1498Szrj 	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3627*38fd1498Szrj 	    {
3628*38fd1498Szrj 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629*38fd1498Szrj 	      res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3630*38fd1498Szrj 	    }
3631*38fd1498Szrj 	  else
3632*38fd1498Szrj 	    res = TREE_TYPE (rhs1);
3633*38fd1498Szrj 	}
3634*38fd1498Szrj     }
3635*38fd1498Szrj 
3636*38fd1498Szrj   cache.put (def_stmt, res);
3637*38fd1498Szrj   return res;
3638*38fd1498Szrj }
3639*38fd1498Szrj 
3640*38fd1498Szrj /* Return the proper type for converting bool VAR into
3641*38fd1498Szrj    an integer value or NULL_TREE if no such type exists.
3642*38fd1498Szrj    The type is chosen so that converted value has the
3643*38fd1498Szrj    same number of elements as VAR's vector type.  */
3644*38fd1498Szrj 
3645*38fd1498Szrj static tree
search_type_for_mask(tree var,vec_info * vinfo)3646*38fd1498Szrj search_type_for_mask (tree var, vec_info *vinfo)
3647*38fd1498Szrj {
3648*38fd1498Szrj   hash_map<gimple *, tree> cache;
3649*38fd1498Szrj   return search_type_for_mask_1 (var, vinfo, cache);
3650*38fd1498Szrj }
3651*38fd1498Szrj 
3652*38fd1498Szrj /* Function vect_recog_bool_pattern
3653*38fd1498Szrj 
3654*38fd1498Szrj    Try to find pattern like following:
3655*38fd1498Szrj 
3656*38fd1498Szrj      bool a_b, b_b, c_b, d_b, e_b;
3657*38fd1498Szrj      TYPE f_T;
3658*38fd1498Szrj    loop:
3659*38fd1498Szrj      S1  a_b = x1 CMP1 y1;
3660*38fd1498Szrj      S2  b_b = x2 CMP2 y2;
3661*38fd1498Szrj      S3  c_b = a_b & b_b;
3662*38fd1498Szrj      S4  d_b = x3 CMP3 y3;
3663*38fd1498Szrj      S5  e_b = c_b | d_b;
3664*38fd1498Szrj      S6  f_T = (TYPE) e_b;
3665*38fd1498Szrj 
3666*38fd1498Szrj    where type 'TYPE' is an integral type.  Or a similar pattern
3667*38fd1498Szrj    ending in
3668*38fd1498Szrj 
3669*38fd1498Szrj      S6  f_Y = e_b ? r_Y : s_Y;
3670*38fd1498Szrj 
3671*38fd1498Szrj    as results from if-conversion of a complex condition.
3672*38fd1498Szrj 
3673*38fd1498Szrj    Input:
3674*38fd1498Szrj 
3675*38fd1498Szrj    * LAST_STMT: A stmt at the end from which the pattern
3676*38fd1498Szrj 		search begins, i.e. cast of a bool to
3677*38fd1498Szrj 		an integer type.
3678*38fd1498Szrj 
3679*38fd1498Szrj    Output:
3680*38fd1498Szrj 
3681*38fd1498Szrj    * TYPE_IN: The type of the input arguments to the pattern.
3682*38fd1498Szrj 
3683*38fd1498Szrj    * TYPE_OUT: The type of the output of this pattern.
3684*38fd1498Szrj 
3685*38fd1498Szrj    * Return value: A new stmt that will be used to replace the pattern.
3686*38fd1498Szrj 
3687*38fd1498Szrj 	Assuming size of TYPE is the same as size of all comparisons
3688*38fd1498Szrj 	(otherwise some casts would be added where needed), the above
3689*38fd1498Szrj 	sequence we create related pattern stmts:
3690*38fd1498Szrj 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3691*38fd1498Szrj 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3692*38fd1498Szrj 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
3693*38fd1498Szrj 	S5'  e_T = c_T | d_T;
3694*38fd1498Szrj 	S6'  f_T = e_T;
3695*38fd1498Szrj 
3696*38fd1498Szrj 	Instead of the above S3' we could emit:
3697*38fd1498Szrj 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3698*38fd1498Szrj 	S3'  c_T = a_T | b_T;
3699*38fd1498Szrj 	but the above is more efficient.  */
3700*38fd1498Szrj 
3701*38fd1498Szrj static gimple *
vect_recog_bool_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3702*38fd1498Szrj vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
3703*38fd1498Szrj 			 tree *type_out)
3704*38fd1498Szrj {
3705*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
3706*38fd1498Szrj   enum tree_code rhs_code;
3707*38fd1498Szrj   tree var, lhs, rhs, vectype;
3708*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3709*38fd1498Szrj   stmt_vec_info new_stmt_info;
3710*38fd1498Szrj   vec_info *vinfo = stmt_vinfo->vinfo;
3711*38fd1498Szrj   gimple *pattern_stmt;
3712*38fd1498Szrj 
3713*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
3714*38fd1498Szrj     return NULL;
3715*38fd1498Szrj 
3716*38fd1498Szrj   var = gimple_assign_rhs1 (last_stmt);
3717*38fd1498Szrj   lhs = gimple_assign_lhs (last_stmt);
3718*38fd1498Szrj 
3719*38fd1498Szrj   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3720*38fd1498Szrj     return NULL;
3721*38fd1498Szrj 
3722*38fd1498Szrj   hash_set<gimple *> bool_stmts;
3723*38fd1498Szrj 
3724*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (last_stmt);
3725*38fd1498Szrj   if (CONVERT_EXPR_CODE_P (rhs_code))
3726*38fd1498Szrj     {
3727*38fd1498Szrj       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3728*38fd1498Szrj 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3729*38fd1498Szrj 	return NULL;
3730*38fd1498Szrj       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3731*38fd1498Szrj       if (vectype == NULL_TREE)
3732*38fd1498Szrj 	return NULL;
3733*38fd1498Szrj 
3734*38fd1498Szrj       if (check_bool_pattern (var, vinfo, bool_stmts))
3735*38fd1498Szrj 	{
3736*38fd1498Szrj 	  rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
3737*38fd1498Szrj 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3738*38fd1498Szrj 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3739*38fd1498Szrj 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3740*38fd1498Szrj 	  else
3741*38fd1498Szrj 	    pattern_stmt
3742*38fd1498Szrj 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
3743*38fd1498Szrj 	}
3744*38fd1498Szrj       else
3745*38fd1498Szrj 	{
3746*38fd1498Szrj 	  tree type = search_type_for_mask (var, vinfo);
3747*38fd1498Szrj 	  tree cst0, cst1, tmp;
3748*38fd1498Szrj 
3749*38fd1498Szrj 	  if (!type)
3750*38fd1498Szrj 	    return NULL;
3751*38fd1498Szrj 
3752*38fd1498Szrj 	  /* We may directly use cond with narrowed type to avoid
3753*38fd1498Szrj 	     multiple cond exprs with following result packing and
3754*38fd1498Szrj 	     perform single cond with packed mask instead.  In case
3755*38fd1498Szrj 	     of widening we better make cond first and then extract
3756*38fd1498Szrj 	     results.  */
3757*38fd1498Szrj 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3758*38fd1498Szrj 	    type = TREE_TYPE (lhs);
3759*38fd1498Szrj 
3760*38fd1498Szrj 	  cst0 = build_int_cst (type, 0);
3761*38fd1498Szrj 	  cst1 = build_int_cst (type, 1);
3762*38fd1498Szrj 	  tmp = vect_recog_temp_ssa_var (type, NULL);
3763*38fd1498Szrj 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3764*38fd1498Szrj 
3765*38fd1498Szrj 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3766*38fd1498Szrj 	    {
3767*38fd1498Szrj 	      tree new_vectype = get_vectype_for_scalar_type (type);
3768*38fd1498Szrj 	      new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3769*38fd1498Szrj 	      set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3770*38fd1498Szrj 	      STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3771*38fd1498Szrj 	      new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3772*38fd1498Szrj 
3773*38fd1498Szrj 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3774*38fd1498Szrj 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3775*38fd1498Szrj 	    }
3776*38fd1498Szrj 	}
3777*38fd1498Szrj 
3778*38fd1498Szrj       *type_out = vectype;
3779*38fd1498Szrj       *type_in = vectype;
3780*38fd1498Szrj       stmts->safe_push (last_stmt);
3781*38fd1498Szrj       if (dump_enabled_p ())
3782*38fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
3783*38fd1498Szrj                          "vect_recog_bool_pattern: detected:\n");
3784*38fd1498Szrj 
3785*38fd1498Szrj       return pattern_stmt;
3786*38fd1498Szrj     }
3787*38fd1498Szrj   else if (rhs_code == COND_EXPR
3788*38fd1498Szrj 	   && TREE_CODE (var) == SSA_NAME)
3789*38fd1498Szrj     {
3790*38fd1498Szrj       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3791*38fd1498Szrj       if (vectype == NULL_TREE)
3792*38fd1498Szrj 	return NULL;
3793*38fd1498Szrj 
3794*38fd1498Szrj       /* Build a scalar type for the boolean result that when
3795*38fd1498Szrj          vectorized matches the vector type of the result in
3796*38fd1498Szrj 	 size and number of elements.  */
3797*38fd1498Szrj       unsigned prec
3798*38fd1498Szrj 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3799*38fd1498Szrj 			       TYPE_VECTOR_SUBPARTS (vectype));
3800*38fd1498Szrj 
3801*38fd1498Szrj       tree type
3802*38fd1498Szrj 	= build_nonstandard_integer_type (prec,
3803*38fd1498Szrj 					  TYPE_UNSIGNED (TREE_TYPE (var)));
3804*38fd1498Szrj       if (get_vectype_for_scalar_type (type) == NULL_TREE)
3805*38fd1498Szrj 	return NULL;
3806*38fd1498Szrj 
3807*38fd1498Szrj       if (!check_bool_pattern (var, vinfo, bool_stmts))
3808*38fd1498Szrj 	return NULL;
3809*38fd1498Szrj 
3810*38fd1498Szrj       rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
3811*38fd1498Szrj 
3812*38fd1498Szrj       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3813*38fd1498Szrj       pattern_stmt
3814*38fd1498Szrj 	  = gimple_build_assign (lhs, COND_EXPR,
3815*38fd1498Szrj 				 build2 (NE_EXPR, boolean_type_node,
3816*38fd1498Szrj 					 rhs, build_int_cst (type, 0)),
3817*38fd1498Szrj 				 gimple_assign_rhs2 (last_stmt),
3818*38fd1498Szrj 				 gimple_assign_rhs3 (last_stmt));
3819*38fd1498Szrj       *type_out = vectype;
3820*38fd1498Szrj       *type_in = vectype;
3821*38fd1498Szrj       stmts->safe_push (last_stmt);
3822*38fd1498Szrj       if (dump_enabled_p ())
3823*38fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
3824*38fd1498Szrj                          "vect_recog_bool_pattern: detected:\n");
3825*38fd1498Szrj 
3826*38fd1498Szrj       return pattern_stmt;
3827*38fd1498Szrj     }
3828*38fd1498Szrj   else if (rhs_code == SSA_NAME
3829*38fd1498Szrj 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
3830*38fd1498Szrj     {
3831*38fd1498Szrj       stmt_vec_info pattern_stmt_info;
3832*38fd1498Szrj       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3833*38fd1498Szrj       gcc_assert (vectype != NULL_TREE);
3834*38fd1498Szrj       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3835*38fd1498Szrj 	return NULL;
3836*38fd1498Szrj 
3837*38fd1498Szrj       if (check_bool_pattern (var, vinfo, bool_stmts))
3838*38fd1498Szrj 	rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
3839*38fd1498Szrj       else
3840*38fd1498Szrj 	{
3841*38fd1498Szrj 	  tree type = search_type_for_mask (var, vinfo);
3842*38fd1498Szrj 	  tree cst0, cst1, new_vectype;
3843*38fd1498Szrj 
3844*38fd1498Szrj 	  if (!type)
3845*38fd1498Szrj 	    return NULL;
3846*38fd1498Szrj 
3847*38fd1498Szrj 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3848*38fd1498Szrj 	    type = TREE_TYPE (vectype);
3849*38fd1498Szrj 
3850*38fd1498Szrj 	  cst0 = build_int_cst (type, 0);
3851*38fd1498Szrj 	  cst1 = build_int_cst (type, 1);
3852*38fd1498Szrj 	  new_vectype = get_vectype_for_scalar_type (type);
3853*38fd1498Szrj 
3854*38fd1498Szrj 	  rhs = vect_recog_temp_ssa_var (type, NULL);
3855*38fd1498Szrj 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3856*38fd1498Szrj 
3857*38fd1498Szrj 	  pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3858*38fd1498Szrj 	  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3859*38fd1498Szrj 	  STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3860*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3861*38fd1498Szrj 	}
3862*38fd1498Szrj 
3863*38fd1498Szrj       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3864*38fd1498Szrj       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3865*38fd1498Szrj 	{
3866*38fd1498Szrj 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3867*38fd1498Szrj 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3868*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, cast_stmt);
3869*38fd1498Szrj 	  rhs = rhs2;
3870*38fd1498Szrj 	}
3871*38fd1498Szrj       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3872*38fd1498Szrj       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3873*38fd1498Szrj       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3874*38fd1498Szrj       STMT_VINFO_DATA_REF (pattern_stmt_info)
3875*38fd1498Szrj 	= STMT_VINFO_DATA_REF (stmt_vinfo);
3876*38fd1498Szrj       STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3877*38fd1498Szrj 	= STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3878*38fd1498Szrj       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3879*38fd1498Szrj       *type_out = vectype;
3880*38fd1498Szrj       *type_in = vectype;
3881*38fd1498Szrj       stmts->safe_push (last_stmt);
3882*38fd1498Szrj       if (dump_enabled_p ())
3883*38fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
3884*38fd1498Szrj                          "vect_recog_bool_pattern: detected:\n");
3885*38fd1498Szrj       return pattern_stmt;
3886*38fd1498Szrj     }
3887*38fd1498Szrj   else
3888*38fd1498Szrj     return NULL;
3889*38fd1498Szrj }
3890*38fd1498Szrj 
3891*38fd1498Szrj 
3892*38fd1498Szrj /* A helper for vect_recog_mask_conversion_pattern.  Build
3893*38fd1498Szrj    conversion of MASK to a type suitable for masking VECTYPE.
3894*38fd1498Szrj    Built statement gets required vectype and is appended to
3895*38fd1498Szrj    a pattern sequence of STMT_VINFO.
3896*38fd1498Szrj 
3897*38fd1498Szrj    Return converted mask.  */
3898*38fd1498Szrj 
3899*38fd1498Szrj static tree
build_mask_conversion(tree mask,tree vectype,stmt_vec_info stmt_vinfo,vec_info * vinfo)3900*38fd1498Szrj build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3901*38fd1498Szrj 		       vec_info *vinfo)
3902*38fd1498Szrj {
3903*38fd1498Szrj   gimple *stmt;
3904*38fd1498Szrj   tree masktype, tmp;
3905*38fd1498Szrj   stmt_vec_info new_stmt_info;
3906*38fd1498Szrj 
3907*38fd1498Szrj   masktype = build_same_sized_truth_vector_type (vectype);
3908*38fd1498Szrj   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3909*38fd1498Szrj   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3910*38fd1498Szrj   new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3911*38fd1498Szrj   set_vinfo_for_stmt (stmt, new_stmt_info);
3912*38fd1498Szrj   STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3913*38fd1498Szrj   append_pattern_def_seq (stmt_vinfo, stmt);
3914*38fd1498Szrj 
3915*38fd1498Szrj   return tmp;
3916*38fd1498Szrj }
3917*38fd1498Szrj 
3918*38fd1498Szrj 
3919*38fd1498Szrj /* Function vect_recog_mask_conversion_pattern
3920*38fd1498Szrj 
3921*38fd1498Szrj    Try to find statements which require boolean type
3922*38fd1498Szrj    converison.  Additional conversion statements are
3923*38fd1498Szrj    added to handle such cases.  For example:
3924*38fd1498Szrj 
3925*38fd1498Szrj    bool m_1, m_2, m_3;
3926*38fd1498Szrj    int i_4, i_5;
3927*38fd1498Szrj    double d_6, d_7;
3928*38fd1498Szrj    char c_1, c_2, c_3;
3929*38fd1498Szrj 
3930*38fd1498Szrj    S1   m_1 = i_4 > i_5;
3931*38fd1498Szrj    S2   m_2 = d_6 < d_7;
3932*38fd1498Szrj    S3   m_3 = m_1 & m_2;
3933*38fd1498Szrj    S4   c_1 = m_3 ? c_2 : c_3;
3934*38fd1498Szrj 
3935*38fd1498Szrj    Will be transformed into:
3936*38fd1498Szrj 
3937*38fd1498Szrj    S1   m_1 = i_4 > i_5;
3938*38fd1498Szrj    S2   m_2 = d_6 < d_7;
3939*38fd1498Szrj    S3'' m_2' = (_Bool[bitsize=32])m_2
3940*38fd1498Szrj    S3'  m_3' = m_1 & m_2';
3941*38fd1498Szrj    S4'' m_3'' = (_Bool[bitsize=8])m_3'
3942*38fd1498Szrj    S4'  c_1' = m_3'' ? c_2 : c_3;  */
3943*38fd1498Szrj 
3944*38fd1498Szrj static gimple *
vect_recog_mask_conversion_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)3945*38fd1498Szrj vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3946*38fd1498Szrj 				    tree *type_out)
3947*38fd1498Szrj {
3948*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
3949*38fd1498Szrj   enum tree_code rhs_code;
3950*38fd1498Szrj   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3951*38fd1498Szrj   tree vectype1, vectype2;
3952*38fd1498Szrj   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3953*38fd1498Szrj   stmt_vec_info pattern_stmt_info;
3954*38fd1498Szrj   vec_info *vinfo = stmt_vinfo->vinfo;
3955*38fd1498Szrj 
3956*38fd1498Szrj   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
3957*38fd1498Szrj   if (is_gimple_call (last_stmt)
3958*38fd1498Szrj       && gimple_call_internal_p (last_stmt)
3959*38fd1498Szrj       && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3960*38fd1498Szrj 	  || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3961*38fd1498Szrj     {
3962*38fd1498Szrj       gcall *pattern_stmt;
3963*38fd1498Szrj       bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3964*38fd1498Szrj 
3965*38fd1498Szrj       if (load)
3966*38fd1498Szrj 	{
3967*38fd1498Szrj 	  lhs = gimple_call_lhs (last_stmt);
3968*38fd1498Szrj 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3969*38fd1498Szrj 	}
3970*38fd1498Szrj       else
3971*38fd1498Szrj 	{
3972*38fd1498Szrj 	  rhs2 = gimple_call_arg (last_stmt, 3);
3973*38fd1498Szrj 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3974*38fd1498Szrj 	}
3975*38fd1498Szrj 
3976*38fd1498Szrj       rhs1 = gimple_call_arg (last_stmt, 2);
3977*38fd1498Szrj       rhs1_type = search_type_for_mask (rhs1, vinfo);
3978*38fd1498Szrj       if (!rhs1_type)
3979*38fd1498Szrj 	return NULL;
3980*38fd1498Szrj       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3981*38fd1498Szrj 
3982*38fd1498Szrj       if (!vectype1 || !vectype2
3983*38fd1498Szrj 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3984*38fd1498Szrj 		       TYPE_VECTOR_SUBPARTS (vectype2)))
3985*38fd1498Szrj 	return NULL;
3986*38fd1498Szrj 
3987*38fd1498Szrj       tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3988*38fd1498Szrj 
3989*38fd1498Szrj       if (load)
3990*38fd1498Szrj 	{
3991*38fd1498Szrj 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3992*38fd1498Szrj 	  pattern_stmt
3993*38fd1498Szrj 	    = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3994*38fd1498Szrj 					  gimple_call_arg (last_stmt, 0),
3995*38fd1498Szrj 					  gimple_call_arg (last_stmt, 1),
3996*38fd1498Szrj 					  tmp);
3997*38fd1498Szrj 	  gimple_call_set_lhs (pattern_stmt, lhs);
3998*38fd1498Szrj 	}
3999*38fd1498Szrj       else
4000*38fd1498Szrj 	  pattern_stmt
4001*38fd1498Szrj 	    = gimple_build_call_internal (IFN_MASK_STORE, 4,
4002*38fd1498Szrj 					  gimple_call_arg (last_stmt, 0),
4003*38fd1498Szrj 					  gimple_call_arg (last_stmt, 1),
4004*38fd1498Szrj 					  tmp,
4005*38fd1498Szrj 					  gimple_call_arg (last_stmt, 3));
4006*38fd1498Szrj 
4007*38fd1498Szrj       gimple_call_set_nothrow (pattern_stmt, true);
4008*38fd1498Szrj 
4009*38fd1498Szrj       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4010*38fd1498Szrj       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4011*38fd1498Szrj       STMT_VINFO_DATA_REF (pattern_stmt_info)
4012*38fd1498Szrj 	= STMT_VINFO_DATA_REF (stmt_vinfo);
4013*38fd1498Szrj       STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4014*38fd1498Szrj 	= STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
4015*38fd1498Szrj       DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
4016*38fd1498Szrj 
4017*38fd1498Szrj       *type_out = vectype1;
4018*38fd1498Szrj       *type_in = vectype1;
4019*38fd1498Szrj       stmts->safe_push (last_stmt);
4020*38fd1498Szrj       if (dump_enabled_p ())
4021*38fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
4022*38fd1498Szrj                          "vect_recog_mask_conversion_pattern: detected:\n");
4023*38fd1498Szrj 
4024*38fd1498Szrj       return pattern_stmt;
4025*38fd1498Szrj     }
4026*38fd1498Szrj 
4027*38fd1498Szrj   if (!is_gimple_assign (last_stmt))
4028*38fd1498Szrj     return NULL;
4029*38fd1498Szrj 
4030*38fd1498Szrj   gimple *pattern_stmt;
4031*38fd1498Szrj   lhs = gimple_assign_lhs (last_stmt);
4032*38fd1498Szrj   rhs1 = gimple_assign_rhs1 (last_stmt);
4033*38fd1498Szrj   rhs_code = gimple_assign_rhs_code (last_stmt);
4034*38fd1498Szrj 
4035*38fd1498Szrj   /* Check for cond expression requiring mask conversion.  */
4036*38fd1498Szrj   if (rhs_code == COND_EXPR)
4037*38fd1498Szrj     {
4038*38fd1498Szrj       /* vect_recog_mixed_size_cond_pattern could apply.
4039*38fd1498Szrj 	 Do nothing then.  */
4040*38fd1498Szrj       if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
4041*38fd1498Szrj 	return NULL;
4042*38fd1498Szrj 
4043*38fd1498Szrj       vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4044*38fd1498Szrj 
4045*38fd1498Szrj       if (TREE_CODE (rhs1) == SSA_NAME)
4046*38fd1498Szrj 	{
4047*38fd1498Szrj 	  rhs1_type = search_type_for_mask (rhs1, vinfo);
4048*38fd1498Szrj 	  if (!rhs1_type)
4049*38fd1498Szrj 	    return NULL;
4050*38fd1498Szrj 	}
4051*38fd1498Szrj       else if (COMPARISON_CLASS_P (rhs1))
4052*38fd1498Szrj 	{
4053*38fd1498Szrj 	  /* Check whether we're comparing scalar booleans and (if so)
4054*38fd1498Szrj 	     whether a better mask type exists than the mask associated
4055*38fd1498Szrj 	     with boolean-sized elements.  This avoids unnecessary packs
4056*38fd1498Szrj 	     and unpacks if the booleans are set from comparisons of
4057*38fd1498Szrj 	     wider types.  E.g. in:
4058*38fd1498Szrj 
4059*38fd1498Szrj 	       int x1, x2, x3, x4, y1, y1;
4060*38fd1498Szrj 	       ...
4061*38fd1498Szrj 	       bool b1 = (x1 == x2);
4062*38fd1498Szrj 	       bool b2 = (x3 == x4);
4063*38fd1498Szrj 	       ... = b1 == b2 ? y1 : y2;
4064*38fd1498Szrj 
4065*38fd1498Szrj 	     it is better for b1 and b2 to use the mask type associated
4066*38fd1498Szrj 	     with int elements rather bool (byte) elements.  */
4067*38fd1498Szrj 	  rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4068*38fd1498Szrj 	  if (!rhs1_type)
4069*38fd1498Szrj 	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4070*38fd1498Szrj 	}
4071*38fd1498Szrj       else
4072*38fd1498Szrj 	return NULL;
4073*38fd1498Szrj 
4074*38fd1498Szrj       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4075*38fd1498Szrj 
4076*38fd1498Szrj       if (!vectype1 || !vectype2)
4077*38fd1498Szrj 	return NULL;
4078*38fd1498Szrj 
4079*38fd1498Szrj       /* Continue if a conversion is needed.  Also continue if we have
4080*38fd1498Szrj 	 a comparison whose vector type would normally be different from
4081*38fd1498Szrj 	 VECTYPE2 when considered in isolation.  In that case we'll
4082*38fd1498Szrj 	 replace the comparison with an SSA name (so that we can record
4083*38fd1498Szrj 	 its vector type) and behave as though the comparison was an SSA
4084*38fd1498Szrj 	 name from the outset.  */
4085*38fd1498Szrj       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4086*38fd1498Szrj 		    TYPE_VECTOR_SUBPARTS (vectype2))
4087*38fd1498Szrj 	  && (TREE_CODE (rhs1) == SSA_NAME
4088*38fd1498Szrj 	      || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4089*38fd1498Szrj 	return NULL;
4090*38fd1498Szrj 
4091*38fd1498Szrj       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4092*38fd1498Szrj          in place, we can handle it in vectorizable_condition.  This avoids
4093*38fd1498Szrj 	 unnecessary promotion stmts and increased vectorization factor.  */
4094*38fd1498Szrj       if (COMPARISON_CLASS_P (rhs1)
4095*38fd1498Szrj 	  && INTEGRAL_TYPE_P (rhs1_type)
4096*38fd1498Szrj 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4097*38fd1498Szrj 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4098*38fd1498Szrj 	{
4099*38fd1498Szrj 	  gimple *dummy;
4100*38fd1498Szrj 	  enum vect_def_type dt;
4101*38fd1498Szrj 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), stmt_vinfo->vinfo,
4102*38fd1498Szrj 				  &dummy, &dt)
4103*38fd1498Szrj 	      && dt == vect_external_def
4104*38fd1498Szrj 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), stmt_vinfo->vinfo,
4105*38fd1498Szrj 				     &dummy, &dt)
4106*38fd1498Szrj 	      && (dt == vect_external_def
4107*38fd1498Szrj 		  || dt == vect_constant_def))
4108*38fd1498Szrj 	    {
4109*38fd1498Szrj 	      tree wide_scalar_type = build_nonstandard_integer_type
4110*38fd1498Szrj 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4111*38fd1498Szrj 		 TYPE_UNSIGNED (rhs1_type));
4112*38fd1498Szrj 	      tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4113*38fd1498Szrj 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4114*38fd1498Szrj 		return NULL;
4115*38fd1498Szrj 	    }
4116*38fd1498Szrj 	}
4117*38fd1498Szrj 
4118*38fd1498Szrj       /* If rhs1 is a comparison we need to move it into a
4119*38fd1498Szrj 	 separate statement.  */
4120*38fd1498Szrj       if (TREE_CODE (rhs1) != SSA_NAME)
4121*38fd1498Szrj 	{
4122*38fd1498Szrj 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4123*38fd1498Szrj 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
4124*38fd1498Szrj 	  rhs1 = tmp;
4125*38fd1498Szrj 
4126*38fd1498Szrj 	  pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4127*38fd1498Szrj 	  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4128*38fd1498Szrj 	  STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
4129*38fd1498Szrj 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt);
4130*38fd1498Szrj 	}
4131*38fd1498Szrj 
4132*38fd1498Szrj       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4133*38fd1498Szrj 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4134*38fd1498Szrj 	tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4135*38fd1498Szrj       else
4136*38fd1498Szrj 	tmp = rhs1;
4137*38fd1498Szrj 
4138*38fd1498Szrj       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4139*38fd1498Szrj       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4140*38fd1498Szrj 					  gimple_assign_rhs2 (last_stmt),
4141*38fd1498Szrj 					  gimple_assign_rhs3 (last_stmt));
4142*38fd1498Szrj 
4143*38fd1498Szrj       *type_out = vectype1;
4144*38fd1498Szrj       *type_in = vectype1;
4145*38fd1498Szrj       stmts->safe_push (last_stmt);
4146*38fd1498Szrj       if (dump_enabled_p ())
4147*38fd1498Szrj 	dump_printf_loc (MSG_NOTE, vect_location,
4148*38fd1498Szrj                          "vect_recog_mask_conversion_pattern: detected:\n");
4149*38fd1498Szrj 
4150*38fd1498Szrj       return pattern_stmt;
4151*38fd1498Szrj     }
4152*38fd1498Szrj 
4153*38fd1498Szrj   /* Now check for binary boolean operations requiring conversion for
4154*38fd1498Szrj      one of operands.  */
4155*38fd1498Szrj   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4156*38fd1498Szrj     return NULL;
4157*38fd1498Szrj 
4158*38fd1498Szrj   if (rhs_code != BIT_IOR_EXPR
4159*38fd1498Szrj       && rhs_code != BIT_XOR_EXPR
4160*38fd1498Szrj       && rhs_code != BIT_AND_EXPR
4161*38fd1498Szrj       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4162*38fd1498Szrj     return NULL;
4163*38fd1498Szrj 
4164*38fd1498Szrj   rhs2 = gimple_assign_rhs2 (last_stmt);
4165*38fd1498Szrj 
4166*38fd1498Szrj   rhs1_type = search_type_for_mask (rhs1, vinfo);
4167*38fd1498Szrj   rhs2_type = search_type_for_mask (rhs2, vinfo);
4168*38fd1498Szrj 
4169*38fd1498Szrj   if (!rhs1_type || !rhs2_type
4170*38fd1498Szrj       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4171*38fd1498Szrj     return NULL;
4172*38fd1498Szrj 
4173*38fd1498Szrj   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4174*38fd1498Szrj     {
4175*38fd1498Szrj       vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4176*38fd1498Szrj       if (!vectype1)
4177*38fd1498Szrj 	return NULL;
4178*38fd1498Szrj       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
4179*38fd1498Szrj     }
4180*38fd1498Szrj   else
4181*38fd1498Szrj     {
4182*38fd1498Szrj       vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4183*38fd1498Szrj       if (!vectype1)
4184*38fd1498Szrj 	return NULL;
4185*38fd1498Szrj       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
4186*38fd1498Szrj     }
4187*38fd1498Szrj 
4188*38fd1498Szrj   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4189*38fd1498Szrj   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4190*38fd1498Szrj 
4191*38fd1498Szrj   *type_out = vectype1;
4192*38fd1498Szrj   *type_in = vectype1;
4193*38fd1498Szrj   stmts->safe_push (last_stmt);
4194*38fd1498Szrj   if (dump_enabled_p ())
4195*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
4196*38fd1498Szrj 		     "vect_recog_mask_conversion_pattern: detected:\n");
4197*38fd1498Szrj 
4198*38fd1498Szrj   return pattern_stmt;
4199*38fd1498Szrj }
4200*38fd1498Szrj 
4201*38fd1498Szrj /* STMT is a load or store.  If the load or store is conditional, return
4202*38fd1498Szrj    the boolean condition under which it occurs, otherwise return null.  */
4203*38fd1498Szrj 
4204*38fd1498Szrj static tree
vect_get_load_store_mask(gimple * stmt)4205*38fd1498Szrj vect_get_load_store_mask (gimple *stmt)
4206*38fd1498Szrj {
4207*38fd1498Szrj   if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4208*38fd1498Szrj     {
4209*38fd1498Szrj       gcc_assert (gimple_assign_single_p (def_assign));
4210*38fd1498Szrj       return NULL_TREE;
4211*38fd1498Szrj     }
4212*38fd1498Szrj 
4213*38fd1498Szrj   if (gcall *def_call = dyn_cast <gcall *> (stmt))
4214*38fd1498Szrj     {
4215*38fd1498Szrj       internal_fn ifn = gimple_call_internal_fn (def_call);
4216*38fd1498Szrj       int mask_index = internal_fn_mask_index (ifn);
4217*38fd1498Szrj       return gimple_call_arg (def_call, mask_index);
4218*38fd1498Szrj     }
4219*38fd1498Szrj 
4220*38fd1498Szrj   gcc_unreachable ();
4221*38fd1498Szrj }
4222*38fd1498Szrj 
4223*38fd1498Szrj /* Return the scalar offset type that an internal gather/scatter function
4224*38fd1498Szrj    should use.  GS_INFO describes the gather/scatter operation.  */
4225*38fd1498Szrj 
4226*38fd1498Szrj static tree
vect_get_gather_scatter_offset_type(gather_scatter_info * gs_info)4227*38fd1498Szrj vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4228*38fd1498Szrj {
4229*38fd1498Szrj   tree offset_type = TREE_TYPE (gs_info->offset);
4230*38fd1498Szrj   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4231*38fd1498Szrj 
4232*38fd1498Szrj   /* Enforced by vect_check_gather_scatter.  */
4233*38fd1498Szrj   unsigned int offset_bits = TYPE_PRECISION (offset_type);
4234*38fd1498Szrj   gcc_assert (element_bits >= offset_bits);
4235*38fd1498Szrj 
4236*38fd1498Szrj   /* If the offset is narrower than the elements, extend it according
4237*38fd1498Szrj      to its sign.  */
4238*38fd1498Szrj   if (element_bits > offset_bits)
4239*38fd1498Szrj     return build_nonstandard_integer_type (element_bits,
4240*38fd1498Szrj 					   TYPE_UNSIGNED (offset_type));
4241*38fd1498Szrj 
4242*38fd1498Szrj   return offset_type;
4243*38fd1498Szrj }
4244*38fd1498Szrj 
4245*38fd1498Szrj /* Return MASK if MASK is suitable for masking an operation on vectors
4246*38fd1498Szrj    of type VECTYPE, otherwise convert it into such a form and return
4247*38fd1498Szrj    the result.  Associate any conversion statements with STMT_INFO's
4248*38fd1498Szrj    pattern.  */
4249*38fd1498Szrj 
4250*38fd1498Szrj static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4251*38fd1498Szrj vect_convert_mask_for_vectype (tree mask, tree vectype,
4252*38fd1498Szrj 			       stmt_vec_info stmt_info, vec_info *vinfo)
4253*38fd1498Szrj {
4254*38fd1498Szrj   tree mask_type = search_type_for_mask (mask, vinfo);
4255*38fd1498Szrj   if (mask_type)
4256*38fd1498Szrj     {
4257*38fd1498Szrj       tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4258*38fd1498Szrj       if (mask_vectype
4259*38fd1498Szrj 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4260*38fd1498Szrj 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4261*38fd1498Szrj 	mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
4262*38fd1498Szrj     }
4263*38fd1498Szrj   return mask;
4264*38fd1498Szrj }
4265*38fd1498Szrj 
4266*38fd1498Szrj /* Return the equivalent of:
4267*38fd1498Szrj 
4268*38fd1498Szrj      fold_convert (TYPE, VALUE)
4269*38fd1498Szrj 
4270*38fd1498Szrj    with the expectation that the operation will be vectorized.
4271*38fd1498Szrj    If new statements are needed, add them as pattern statements
4272*38fd1498Szrj    to STMT_INFO.  */
4273*38fd1498Szrj 
4274*38fd1498Szrj static tree
vect_add_conversion_to_patterm(tree type,tree value,stmt_vec_info stmt_info,vec_info * vinfo)4275*38fd1498Szrj vect_add_conversion_to_patterm (tree type, tree value,
4276*38fd1498Szrj 				stmt_vec_info stmt_info,
4277*38fd1498Szrj 				vec_info *vinfo)
4278*38fd1498Szrj {
4279*38fd1498Szrj   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4280*38fd1498Szrj     return value;
4281*38fd1498Szrj 
4282*38fd1498Szrj   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4283*38fd1498Szrj   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4284*38fd1498Szrj   stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
4285*38fd1498Szrj   set_vinfo_for_stmt (conversion, new_stmt_info);
4286*38fd1498Szrj   STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
4287*38fd1498Szrj   append_pattern_def_seq (stmt_info, conversion);
4288*38fd1498Szrj   return new_value;
4289*38fd1498Szrj }
4290*38fd1498Szrj 
4291*38fd1498Szrj /* Try to convert STMT into a call to a gather load or scatter store
4292*38fd1498Szrj    internal function.  Return the final statement on success and set
4293*38fd1498Szrj    *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
4294*38fd1498Szrj 
4295*38fd1498Szrj    This function only handles gathers and scatters that were recognized
4296*38fd1498Szrj    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4297*38fd1498Szrj 
4298*38fd1498Szrj static gimple *
vect_try_gather_scatter_pattern(gimple * stmt,stmt_vec_info last_stmt_info,tree * type_in,tree * type_out)4299*38fd1498Szrj vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
4300*38fd1498Szrj 				 tree *type_in, tree *type_out)
4301*38fd1498Szrj {
4302*38fd1498Szrj   /* Currently we only support this for loop vectorization.  */
4303*38fd1498Szrj   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4304*38fd1498Szrj   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4305*38fd1498Szrj   if (!loop_vinfo)
4306*38fd1498Szrj     return NULL;
4307*38fd1498Szrj 
4308*38fd1498Szrj   /* Make sure that we're looking at a gather load or scatter store.  */
4309*38fd1498Szrj   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4310*38fd1498Szrj   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4311*38fd1498Szrj     return NULL;
4312*38fd1498Szrj 
4313*38fd1498Szrj   /* Get the boolean that controls whether the load or store happens.
4314*38fd1498Szrj      This is null if the operation is unconditional.  */
4315*38fd1498Szrj   tree mask = vect_get_load_store_mask (stmt);
4316*38fd1498Szrj 
4317*38fd1498Szrj   /* Make sure that the target supports an appropriate internal
4318*38fd1498Szrj      function for the gather/scatter operation.  */
4319*38fd1498Szrj   gather_scatter_info gs_info;
4320*38fd1498Szrj   if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4321*38fd1498Szrj       || gs_info.decl)
4322*38fd1498Szrj     return NULL;
4323*38fd1498Szrj 
4324*38fd1498Szrj   /* Convert the mask to the right form.  */
4325*38fd1498Szrj   tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4326*38fd1498Szrj   if (mask)
4327*38fd1498Szrj     mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
4328*38fd1498Szrj 					  loop_vinfo);
4329*38fd1498Szrj 
4330*38fd1498Szrj   /* Get the invariant base and non-invariant offset, converting the
4331*38fd1498Szrj      latter to the same width as the vector elements.  */
4332*38fd1498Szrj   tree base = gs_info.base;
4333*38fd1498Szrj   tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4334*38fd1498Szrj   tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
4335*38fd1498Szrj 						last_stmt_info, loop_vinfo);
4336*38fd1498Szrj 
4337*38fd1498Szrj   /* Build the new pattern statement.  */
4338*38fd1498Szrj   tree scale = size_int (gs_info.scale);
4339*38fd1498Szrj   gcall *pattern_stmt;
4340*38fd1498Szrj   if (DR_IS_READ (dr))
4341*38fd1498Szrj     {
4342*38fd1498Szrj       if (mask != NULL)
4343*38fd1498Szrj 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4344*38fd1498Szrj 						   offset, scale, mask);
4345*38fd1498Szrj       else
4346*38fd1498Szrj 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4347*38fd1498Szrj 						   offset, scale);
4348*38fd1498Szrj       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4349*38fd1498Szrj       gimple_call_set_lhs (pattern_stmt, load_lhs);
4350*38fd1498Szrj     }
4351*38fd1498Szrj   else
4352*38fd1498Szrj     {
4353*38fd1498Szrj       tree rhs = vect_get_store_rhs (stmt);
4354*38fd1498Szrj       if (mask != NULL)
4355*38fd1498Szrj 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4356*38fd1498Szrj 						   base, offset, scale, rhs,
4357*38fd1498Szrj 						   mask);
4358*38fd1498Szrj       else
4359*38fd1498Szrj 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4360*38fd1498Szrj 						   base, offset, scale, rhs);
4361*38fd1498Szrj     }
4362*38fd1498Szrj   gimple_call_set_nothrow (pattern_stmt, true);
4363*38fd1498Szrj 
4364*38fd1498Szrj   /* Copy across relevant vectorization info and associate DR with the
4365*38fd1498Szrj      new pattern statement instead of the original statement.  */
4366*38fd1498Szrj   stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
4367*38fd1498Szrj 						       loop_vinfo);
4368*38fd1498Szrj   set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4369*38fd1498Szrj   STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4370*38fd1498Szrj   STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4371*38fd1498Szrj     = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4372*38fd1498Szrj   STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4373*38fd1498Szrj     = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
4374*38fd1498Szrj   DR_STMT (dr) = pattern_stmt;
4375*38fd1498Szrj 
4376*38fd1498Szrj   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4377*38fd1498Szrj   *type_out = vectype;
4378*38fd1498Szrj   *type_in = vectype;
4379*38fd1498Szrj 
4380*38fd1498Szrj   if (dump_enabled_p ())
4381*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
4382*38fd1498Szrj 		     "gather/scatter pattern detected:\n");
4383*38fd1498Szrj 
4384*38fd1498Szrj   return pattern_stmt;
4385*38fd1498Szrj }
4386*38fd1498Szrj 
4387*38fd1498Szrj /* Pattern wrapper around vect_try_gather_scatter_pattern.  */
4388*38fd1498Szrj 
4389*38fd1498Szrj static gimple *
vect_recog_gather_scatter_pattern(vec<gimple * > * stmts,tree * type_in,tree * type_out)4390*38fd1498Szrj vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
4391*38fd1498Szrj 				   tree *type_out)
4392*38fd1498Szrj {
4393*38fd1498Szrj   gimple *last_stmt = stmts->pop ();
4394*38fd1498Szrj   stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
4395*38fd1498Szrj   gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
4396*38fd1498Szrj 							  last_stmt_info,
4397*38fd1498Szrj 							  type_in, type_out);
4398*38fd1498Szrj   if (pattern_stmt)
4399*38fd1498Szrj     stmts->safe_push (last_stmt);
4400*38fd1498Szrj   return pattern_stmt;
4401*38fd1498Szrj }
4402*38fd1498Szrj 
4403*38fd1498Szrj /* Mark statements that are involved in a pattern.  */
4404*38fd1498Szrj 
4405*38fd1498Szrj static inline void
vect_mark_pattern_stmts(gimple * orig_stmt,gimple * pattern_stmt,tree pattern_vectype)4406*38fd1498Szrj vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
4407*38fd1498Szrj                          tree pattern_vectype)
4408*38fd1498Szrj {
4409*38fd1498Szrj   stmt_vec_info pattern_stmt_info, def_stmt_info;
4410*38fd1498Szrj   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
4411*38fd1498Szrj   vec_info *vinfo = orig_stmt_info->vinfo;
4412*38fd1498Szrj   gimple *def_stmt;
4413*38fd1498Szrj 
4414*38fd1498Szrj   pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
4415*38fd1498Szrj   if (pattern_stmt_info == NULL)
4416*38fd1498Szrj     {
4417*38fd1498Szrj       pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
4418*38fd1498Szrj       set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
4419*38fd1498Szrj     }
4420*38fd1498Szrj   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
4421*38fd1498Szrj 
4422*38fd1498Szrj   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
4423*38fd1498Szrj   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
4424*38fd1498Szrj     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
4425*38fd1498Szrj   STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
4426*38fd1498Szrj   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
4427*38fd1498Szrj   STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
4428*38fd1498Szrj   STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
4429*38fd1498Szrj     = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4430*38fd1498Szrj   if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
4431*38fd1498Szrj     {
4432*38fd1498Szrj       gimple_stmt_iterator si;
4433*38fd1498Szrj       for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
4434*38fd1498Szrj 	   !gsi_end_p (si); gsi_next (&si))
4435*38fd1498Szrj 	{
4436*38fd1498Szrj 	  def_stmt = gsi_stmt (si);
4437*38fd1498Szrj 	  def_stmt_info = vinfo_for_stmt (def_stmt);
4438*38fd1498Szrj 	  if (def_stmt_info == NULL)
4439*38fd1498Szrj 	    {
4440*38fd1498Szrj 	      def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
4441*38fd1498Szrj 	      set_vinfo_for_stmt (def_stmt, def_stmt_info);
4442*38fd1498Szrj 	    }
4443*38fd1498Szrj 	  gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
4444*38fd1498Szrj 	  STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
4445*38fd1498Szrj 	  STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
4446*38fd1498Szrj 	  if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
4447*38fd1498Szrj 	    STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
4448*38fd1498Szrj 	}
4449*38fd1498Szrj     }
4450*38fd1498Szrj }
4451*38fd1498Szrj 
4452*38fd1498Szrj /* Function vect_pattern_recog_1
4453*38fd1498Szrj 
4454*38fd1498Szrj    Input:
4455*38fd1498Szrj    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4456*38fd1498Szrj         computation pattern.
4457*38fd1498Szrj    STMT: A stmt from which the pattern search should start.
4458*38fd1498Szrj 
4459*38fd1498Szrj    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
4460*38fd1498Szrj    expression that computes the same functionality and can be used to
4461*38fd1498Szrj    replace the sequence of stmts that are involved in the pattern.
4462*38fd1498Szrj 
4463*38fd1498Szrj    Output:
4464*38fd1498Szrj    This function checks if the expression returned by PATTERN_RECOG_FUNC is
4465*38fd1498Szrj    supported in vector form by the target.  We use 'TYPE_IN' to obtain the
4466*38fd1498Szrj    relevant vector type. If 'TYPE_IN' is already a vector type, then this
4467*38fd1498Szrj    indicates that target support had already been checked by PATTERN_RECOG_FUNC.
4468*38fd1498Szrj    If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
4469*38fd1498Szrj    to the available target pattern.
4470*38fd1498Szrj 
4471*38fd1498Szrj    This function also does some bookkeeping, as explained in the documentation
4472*38fd1498Szrj    for vect_recog_pattern.  */
4473*38fd1498Szrj 
4474*38fd1498Szrj static bool
vect_pattern_recog_1(vect_recog_func * recog_func,gimple_stmt_iterator si,vec<gimple * > * stmts_to_replace)4475*38fd1498Szrj vect_pattern_recog_1 (vect_recog_func *recog_func,
4476*38fd1498Szrj 		      gimple_stmt_iterator si,
4477*38fd1498Szrj 		      vec<gimple *> *stmts_to_replace)
4478*38fd1498Szrj {
4479*38fd1498Szrj   gimple *stmt = gsi_stmt (si), *pattern_stmt;
4480*38fd1498Szrj   stmt_vec_info stmt_info;
4481*38fd1498Szrj   loop_vec_info loop_vinfo;
4482*38fd1498Szrj   tree pattern_vectype;
4483*38fd1498Szrj   tree type_in, type_out;
4484*38fd1498Szrj   enum tree_code code;
4485*38fd1498Szrj   int i;
4486*38fd1498Szrj   gimple *next;
4487*38fd1498Szrj 
4488*38fd1498Szrj   stmts_to_replace->truncate (0);
4489*38fd1498Szrj   stmts_to_replace->quick_push (stmt);
4490*38fd1498Szrj   pattern_stmt = recog_func->fn (stmts_to_replace, &type_in, &type_out);
4491*38fd1498Szrj   if (!pattern_stmt)
4492*38fd1498Szrj     return false;
4493*38fd1498Szrj 
4494*38fd1498Szrj   stmt = stmts_to_replace->last ();
4495*38fd1498Szrj   stmt_info = vinfo_for_stmt (stmt);
4496*38fd1498Szrj   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4497*38fd1498Szrj 
4498*38fd1498Szrj   if (VECTOR_BOOLEAN_TYPE_P (type_in)
4499*38fd1498Szrj       || VECTOR_TYPE_P (type_in))
4500*38fd1498Szrj     {
4501*38fd1498Szrj       /* No need to check target support (already checked by the pattern
4502*38fd1498Szrj          recognition function).  */
4503*38fd1498Szrj       pattern_vectype = type_out ? type_out : type_in;
4504*38fd1498Szrj     }
4505*38fd1498Szrj   else
4506*38fd1498Szrj     {
4507*38fd1498Szrj       /* Check target support  */
4508*38fd1498Szrj       type_in = get_vectype_for_scalar_type (type_in);
4509*38fd1498Szrj       if (!type_in)
4510*38fd1498Szrj 	return false;
4511*38fd1498Szrj       if (type_out)
4512*38fd1498Szrj 	type_out = get_vectype_for_scalar_type (type_out);
4513*38fd1498Szrj       else
4514*38fd1498Szrj 	type_out = type_in;
4515*38fd1498Szrj       if (!type_out)
4516*38fd1498Szrj 	return false;
4517*38fd1498Szrj       pattern_vectype = type_out;
4518*38fd1498Szrj 
4519*38fd1498Szrj       if (is_gimple_assign (pattern_stmt))
4520*38fd1498Szrj 	{
4521*38fd1498Szrj 	  enum insn_code icode;
4522*38fd1498Szrj 	  code = gimple_assign_rhs_code (pattern_stmt);
4523*38fd1498Szrj 	  optab optab = optab_for_tree_code (code, type_in, optab_default);
4524*38fd1498Szrj 	  machine_mode vec_mode = TYPE_MODE (type_in);
4525*38fd1498Szrj 	  if (!optab
4526*38fd1498Szrj 	      || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
4527*38fd1498Szrj 	      || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4528*38fd1498Szrj 	    return false;
4529*38fd1498Szrj 	}
4530*38fd1498Szrj       else
4531*38fd1498Szrj 	gcc_assert (is_gimple_call (pattern_stmt));
4532*38fd1498Szrj     }
4533*38fd1498Szrj 
4534*38fd1498Szrj   /* Found a vectorizable pattern.  */
4535*38fd1498Szrj   if (dump_enabled_p ())
4536*38fd1498Szrj     {
4537*38fd1498Szrj       dump_printf_loc (MSG_NOTE, vect_location,
4538*38fd1498Szrj                        "%s pattern recognized: ", recog_func->name);
4539*38fd1498Szrj       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4540*38fd1498Szrj     }
4541*38fd1498Szrj 
4542*38fd1498Szrj   /* Mark the stmts that are involved in the pattern. */
4543*38fd1498Szrj   vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4544*38fd1498Szrj 
4545*38fd1498Szrj   /* Patterns cannot be vectorized using SLP, because they change the order of
4546*38fd1498Szrj      computation.  */
4547*38fd1498Szrj   if (loop_vinfo)
4548*38fd1498Szrj     FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4549*38fd1498Szrj       if (next == stmt)
4550*38fd1498Szrj         LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
4551*38fd1498Szrj 
4552*38fd1498Szrj   /* It is possible that additional pattern stmts are created and inserted in
4553*38fd1498Szrj      STMTS_TO_REPLACE.  We create a stmt_info for each of them, and mark the
4554*38fd1498Szrj      relevant statements.  */
4555*38fd1498Szrj   for (i = 0; stmts_to_replace->iterate (i, &stmt)
4556*38fd1498Szrj 	      && (unsigned) i < (stmts_to_replace->length () - 1);
4557*38fd1498Szrj        i++)
4558*38fd1498Szrj     {
4559*38fd1498Szrj       stmt_info = vinfo_for_stmt (stmt);
4560*38fd1498Szrj       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4561*38fd1498Szrj       if (dump_enabled_p ())
4562*38fd1498Szrj         {
4563*38fd1498Szrj           dump_printf_loc (MSG_NOTE, vect_location,
4564*38fd1498Szrj                            "additional pattern stmt: ");
4565*38fd1498Szrj           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4566*38fd1498Szrj         }
4567*38fd1498Szrj 
4568*38fd1498Szrj       vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
4569*38fd1498Szrj     }
4570*38fd1498Szrj 
4571*38fd1498Szrj   return true;
4572*38fd1498Szrj }
4573*38fd1498Szrj 
4574*38fd1498Szrj 
4575*38fd1498Szrj /* Function vect_pattern_recog
4576*38fd1498Szrj 
4577*38fd1498Szrj    Input:
4578*38fd1498Szrj    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4579*38fd1498Szrj         computation idioms.
4580*38fd1498Szrj 
4581*38fd1498Szrj    Output - for each computation idiom that is detected we create a new stmt
4582*38fd1498Szrj         that provides the same functionality and that can be vectorized.  We
4583*38fd1498Szrj         also record some information in the struct_stmt_info of the relevant
4584*38fd1498Szrj         stmts, as explained below:
4585*38fd1498Szrj 
4586*38fd1498Szrj    At the entry to this function we have the following stmts, with the
4587*38fd1498Szrj    following initial value in the STMT_VINFO fields:
4588*38fd1498Szrj 
4589*38fd1498Szrj          stmt                     in_pattern_p  related_stmt    vec_stmt
4590*38fd1498Szrj          S1: a_i = ....                 -       -               -
4591*38fd1498Szrj          S2: a_2 = ..use(a_i)..         -       -               -
4592*38fd1498Szrj          S3: a_1 = ..use(a_2)..         -       -               -
4593*38fd1498Szrj          S4: a_0 = ..use(a_1)..         -       -               -
4594*38fd1498Szrj          S5: ... = ..use(a_0)..         -       -               -
4595*38fd1498Szrj 
4596*38fd1498Szrj    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4597*38fd1498Szrj    represented by a single stmt.  We then:
4598*38fd1498Szrj    - create a new stmt S6 equivalent to the pattern (the stmt is not
4599*38fd1498Szrj      inserted into the code)
4600*38fd1498Szrj    - fill in the STMT_VINFO fields as follows:
4601*38fd1498Szrj 
4602*38fd1498Szrj                                   in_pattern_p  related_stmt    vec_stmt
4603*38fd1498Szrj          S1: a_i = ....                 -       -               -
4604*38fd1498Szrj          S2: a_2 = ..use(a_i)..         -       -               -
4605*38fd1498Szrj          S3: a_1 = ..use(a_2)..         -       -               -
4606*38fd1498Szrj          S4: a_0 = ..use(a_1)..         true    S6              -
4607*38fd1498Szrj           '---> S6: a_new = ....        -       S4              -
4608*38fd1498Szrj          S5: ... = ..use(a_0)..         -       -               -
4609*38fd1498Szrj 
4610*38fd1498Szrj    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4611*38fd1498Szrj    to each other through the RELATED_STMT field).
4612*38fd1498Szrj 
4613*38fd1498Szrj    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4614*38fd1498Szrj    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
4615*38fd1498Szrj    remain irrelevant unless used by stmts other than S4.
4616*38fd1498Szrj 
4617*38fd1498Szrj    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4618*38fd1498Szrj    (because they are marked as irrelevant).  It will vectorize S6, and record
4619*38fd1498Szrj    a pointer to the new vector stmt VS6 from S6 (as usual).
4620*38fd1498Szrj    S4 will be skipped, and S5 will be vectorized as usual:
4621*38fd1498Szrj 
4622*38fd1498Szrj                                   in_pattern_p  related_stmt    vec_stmt
4623*38fd1498Szrj          S1: a_i = ....                 -       -               -
4624*38fd1498Szrj          S2: a_2 = ..use(a_i)..         -       -               -
4625*38fd1498Szrj          S3: a_1 = ..use(a_2)..         -       -               -
4626*38fd1498Szrj        > VS6: va_new = ....             -       -               -
4627*38fd1498Szrj          S4: a_0 = ..use(a_1)..         true    S6              VS6
4628*38fd1498Szrj           '---> S6: a_new = ....        -       S4              VS6
4629*38fd1498Szrj        > VS5: ... = ..vuse(va_new)..    -       -               -
4630*38fd1498Szrj          S5: ... = ..use(a_0)..         -       -               -
4631*38fd1498Szrj 
4632*38fd1498Szrj    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4633*38fd1498Szrj    elsewhere), and we'll end up with:
4634*38fd1498Szrj 
4635*38fd1498Szrj         VS6: va_new = ....
4636*38fd1498Szrj         VS5: ... = ..vuse(va_new)..
4637*38fd1498Szrj 
4638*38fd1498Szrj    In case of more than one pattern statements, e.g., widen-mult with
4639*38fd1498Szrj    intermediate type:
4640*38fd1498Szrj 
4641*38fd1498Szrj      S1  a_t = ;
4642*38fd1498Szrj      S2  a_T = (TYPE) a_t;
4643*38fd1498Szrj            '--> S3: a_it = (interm_type) a_t;
4644*38fd1498Szrj      S4  prod_T = a_T * CONST;
4645*38fd1498Szrj            '--> S5: prod_T' = a_it w* CONST;
4646*38fd1498Szrj 
4647*38fd1498Szrj    there may be other users of a_T outside the pattern.  In that case S2 will
4648*38fd1498Szrj    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4649*38fd1498Szrj    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
4650*38fd1498Szrj    be recorded in S3.  */
4651*38fd1498Szrj 
4652*38fd1498Szrj void
vect_pattern_recog(vec_info * vinfo)4653*38fd1498Szrj vect_pattern_recog (vec_info *vinfo)
4654*38fd1498Szrj {
4655*38fd1498Szrj   struct loop *loop;
4656*38fd1498Szrj   basic_block *bbs;
4657*38fd1498Szrj   unsigned int nbbs;
4658*38fd1498Szrj   gimple_stmt_iterator si;
4659*38fd1498Szrj   unsigned int i, j;
4660*38fd1498Szrj   auto_vec<gimple *, 1> stmts_to_replace;
4661*38fd1498Szrj   gimple *stmt;
4662*38fd1498Szrj 
4663*38fd1498Szrj   if (dump_enabled_p ())
4664*38fd1498Szrj     dump_printf_loc (MSG_NOTE, vect_location,
4665*38fd1498Szrj                      "=== vect_pattern_recog ===\n");
4666*38fd1498Szrj 
4667*38fd1498Szrj   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4668*38fd1498Szrj     {
4669*38fd1498Szrj       loop = LOOP_VINFO_LOOP (loop_vinfo);
4670*38fd1498Szrj       bbs = LOOP_VINFO_BBS (loop_vinfo);
4671*38fd1498Szrj       nbbs = loop->num_nodes;
4672*38fd1498Szrj 
4673*38fd1498Szrj       /* Scan through the loop stmts, applying the pattern recognition
4674*38fd1498Szrj 	 functions starting at each stmt visited:  */
4675*38fd1498Szrj       for (i = 0; i < nbbs; i++)
4676*38fd1498Szrj 	{
4677*38fd1498Szrj 	  basic_block bb = bbs[i];
4678*38fd1498Szrj 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4679*38fd1498Szrj 	    {
4680*38fd1498Szrj 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
4681*38fd1498Szrj 	      for (j = 0; j < NUM_PATTERNS; j++)
4682*38fd1498Szrj 		if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4683*38fd1498Szrj 					  &stmts_to_replace))
4684*38fd1498Szrj 		  break;
4685*38fd1498Szrj 	    }
4686*38fd1498Szrj 	}
4687*38fd1498Szrj     }
4688*38fd1498Szrj   else
4689*38fd1498Szrj     {
4690*38fd1498Szrj       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4691*38fd1498Szrj       for (si = bb_vinfo->region_begin;
4692*38fd1498Szrj 	   gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4693*38fd1498Szrj 	{
4694*38fd1498Szrj 	  if ((stmt = gsi_stmt (si))
4695*38fd1498Szrj 	      && vinfo_for_stmt (stmt)
4696*38fd1498Szrj 	      && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
4697*38fd1498Szrj 	    continue;
4698*38fd1498Szrj 
4699*38fd1498Szrj 	  /* Scan over all generic vect_recog_xxx_pattern functions.  */
4700*38fd1498Szrj 	  for (j = 0; j < NUM_PATTERNS; j++)
4701*38fd1498Szrj 	    if (vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si,
4702*38fd1498Szrj 				      &stmts_to_replace))
4703*38fd1498Szrj 	      break;
4704*38fd1498Szrj 	}
4705*38fd1498Szrj     }
4706*38fd1498Szrj }
4707