xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-vect-patterns.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2022 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
52 
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
55 
56 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 {
59   value_range vr;
60   get_range_query (cfun)->range_of_expr (vr, var);
61   if (vr.undefined_p ())
62     vr.set_varying (TREE_TYPE (var));
63   *min_value = wi::to_wide (vr.min ());
64   *max_value = wi::to_wide (vr.max ());
65   value_range_kind vr_type = vr.kind ();
66   wide_int nonzero = get_nonzero_bits (var);
67   signop sgn = TYPE_SIGN (TREE_TYPE (var));
68   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 					 nonzero, sgn) == VR_RANGE)
70     {
71       if (dump_enabled_p ())
72 	{
73 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 	  dump_printf (MSG_NOTE, " has range [");
75 	  dump_hex (MSG_NOTE, *min_value);
76 	  dump_printf (MSG_NOTE, ", ");
77 	  dump_hex (MSG_NOTE, *max_value);
78 	  dump_printf (MSG_NOTE, "]\n");
79 	}
80       return true;
81     }
82   else
83     {
84       if (dump_enabled_p ())
85 	{
86 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 	  dump_printf (MSG_NOTE, " has no range info\n");
88 	}
89       return false;
90     }
91 }
92 
93 /* Report that we've found an instance of pattern PATTERN in
94    statement STMT.  */
95 
96 static void
vect_pattern_detected(const char * name,gimple * stmt)97 vect_pattern_detected (const char *name, gimple *stmt)
98 {
99   if (dump_enabled_p ())
100     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
101 }
102 
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104    return the pattern statement's stmt_vec_info.  Set its vector type to
105    VECTYPE if it doesn't have one already.  */
106 
107 static stmt_vec_info
vect_init_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 			stmt_vec_info orig_stmt_info, tree vectype)
110 {
111   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112   if (pattern_stmt_info == NULL)
113     pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
115 
116   pattern_stmt_info->pattern_stmt_p = true;
117   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
121     {
122       gcc_assert (!vectype
123 		  || (VECTOR_BOOLEAN_TYPE_P (vectype)
124 		      == vect_use_mask_type_p (orig_stmt_info)));
125       STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
126       pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
127     }
128   return pattern_stmt_info;
129 }
130 
131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
132    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
133    have one already.  */
134 
135 static void
vect_set_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)136 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
137 		       stmt_vec_info orig_stmt_info, tree vectype)
138 {
139   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
140   STMT_VINFO_RELATED_STMT (orig_stmt_info)
141     = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
142 }
143 
144 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
145    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
146    be different from the vector type of the final pattern statement.
147    If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
148    from which it was derived.  */
149 
150 static inline void
append_pattern_def_seq(vec_info * vinfo,stmt_vec_info stmt_info,gimple * new_stmt,tree vectype=NULL_TREE,tree scalar_type_for_mask=NULL_TREE)151 append_pattern_def_seq (vec_info *vinfo,
152 			stmt_vec_info stmt_info, gimple *new_stmt,
153 			tree vectype = NULL_TREE,
154 			tree scalar_type_for_mask = NULL_TREE)
155 {
156   gcc_assert (!scalar_type_for_mask
157 	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
158   if (vectype)
159     {
160       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
161       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
162       if (scalar_type_for_mask)
163 	new_stmt_info->mask_precision
164 	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
165     }
166   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
167 				      new_stmt);
168 }
169 
170 /* The caller wants to perform new operations on vect_external variable
171    VAR, so that the result of the operations would also be vect_external.
172    Return the edge on which the operations can be performed, if one exists.
173    Return null if the operations should instead be treated as part of
174    the pattern that needs them.  */
175 
176 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)177 vect_get_external_def_edge (vec_info *vinfo, tree var)
178 {
179   edge e = NULL;
180   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
181     {
182       e = loop_preheader_edge (loop_vinfo->loop);
183       if (!SSA_NAME_IS_DEFAULT_DEF (var))
184 	{
185 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
186 	  if (bb == NULL
187 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
188 	    e = NULL;
189 	}
190     }
191   return e;
192 }
193 
194 /* Return true if the target supports a vector version of CODE,
195    where CODE is known to map to a direct optab with the given SUBTYPE.
196    ITYPE specifies the type of (some of) the scalar inputs and OTYPE
197    specifies the type of the scalar result.
198 
199    If CODE allows the inputs and outputs to have different type
200    (such as for WIDEN_SUM_EXPR), it is the input mode rather
201    than the output mode that determines the appropriate target pattern.
202    Operand 0 of the target pattern then specifies the mode that the output
203    must have.
204 
205    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
206    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
207    is nonnull.  */
208 
209 static bool
vect_supportable_direct_optab_p(vec_info * vinfo,tree otype,tree_code code,tree itype,tree * vecotype_out,tree * vecitype_out=NULL,enum optab_subtype subtype=optab_default)210 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
211 				 tree itype, tree *vecotype_out,
212 				 tree *vecitype_out = NULL,
213 				 enum optab_subtype subtype = optab_default)
214 {
215   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
216   if (!vecitype)
217     return false;
218 
219   tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
220   if (!vecotype)
221     return false;
222 
223   optab optab = optab_for_tree_code (code, vecitype, subtype);
224   if (!optab)
225     return false;
226 
227   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
228   if (icode == CODE_FOR_nothing
229       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
230     return false;
231 
232   *vecotype_out = vecotype;
233   if (vecitype_out)
234     *vecitype_out = vecitype;
235   return true;
236 }
237 
238 /* Round bit precision PRECISION up to a full element.  */
239 
240 static unsigned int
vect_element_precision(unsigned int precision)241 vect_element_precision (unsigned int precision)
242 {
243   precision = 1 << ceil_log2 (precision);
244   return MAX (precision, BITS_PER_UNIT);
245 }
246 
247 /* If OP is defined by a statement that's being considered for vectorization,
248    return information about that statement, otherwise return NULL.  */
249 
250 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)251 vect_get_internal_def (vec_info *vinfo, tree op)
252 {
253   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
254   if (def_stmt_info
255       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
256     return def_stmt_info;
257   return NULL;
258 }
259 
260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
261    is a result of a type promotion, such that:
262      DEF_STMT: NAME = NOP (name0)
263    If CHECK_SIGN is TRUE, check that either both types are signed or both are
264    unsigned.  */
265 
266 static bool
type_conversion_p(vec_info * vinfo,tree name,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)267 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
268 		   tree *orig_type, gimple **def_stmt, bool *promotion)
269 {
270   tree type = TREE_TYPE (name);
271   tree oprnd0;
272   enum vect_def_type dt;
273 
274   stmt_vec_info def_stmt_info;
275   if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
276     return false;
277 
278   if (dt != vect_internal_def
279       && dt != vect_external_def && dt != vect_constant_def)
280     return false;
281 
282   if (!*def_stmt)
283     return false;
284 
285   if (!is_gimple_assign (*def_stmt))
286     return false;
287 
288   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
289     return false;
290 
291   oprnd0 = gimple_assign_rhs1 (*def_stmt);
292 
293   *orig_type = TREE_TYPE (oprnd0);
294   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
295       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
296     return false;
297 
298   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
299     *promotion = true;
300   else
301     *promotion = false;
302 
303   if (!vect_is_simple_use (oprnd0, vinfo, &dt))
304     return false;
305 
306   return true;
307 }
308 
309 /* Holds information about an input operand after some sign changes
310    and type promotions have been peeled away.  */
311 class vect_unpromoted_value {
312 public:
313   vect_unpromoted_value ();
314 
315   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
316 
317   /* The value obtained after peeling away zero or more casts.  */
318   tree op;
319 
320   /* The type of OP.  */
321   tree type;
322 
323   /* The definition type of OP.  */
324   vect_def_type dt;
325 
326   /* If OP is the result of peeling at least one cast, and if the cast
327      of OP itself is a vectorizable statement, CASTER identifies that
328      statement, otherwise it is null.  */
329   stmt_vec_info caster;
330 };
331 
vect_unpromoted_value()332 inline vect_unpromoted_value::vect_unpromoted_value ()
333   : op (NULL_TREE),
334     type (NULL_TREE),
335     dt (vect_uninitialized_def),
336     caster (NULL)
337 {
338 }
339 
340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
341    statement that casts it to CASTER_IN.  */
342 
343 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)344 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
345 			       stmt_vec_info caster_in)
346 {
347   op = op_in;
348   type = TREE_TYPE (op);
349   dt = dt_in;
350   caster = caster_in;
351 }
352 
353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
354    to reach some vectorizable inner operand OP', continuing as long as it
355    is possible to convert OP' back to OP using a possible sign change
356    followed by a possible promotion P.  Return this OP', or null if OP is
357    not a vectorizable SSA name.  If there is a promotion P, describe its
358    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
359    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
360    have more than one user.
361 
362    A successful return means that it is possible to go from OP' to OP
363    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
364    whereas the cast from UNPROM to OP might be a promotion, a sign
365    change, or a nop.
366 
367    E.g. say we have:
368 
369        signed short *ptr = ...;
370        signed short C = *ptr;
371        unsigned short B = (unsigned short) C;    // sign change
372        signed int A = (signed int) B;            // unsigned promotion
373        ...possible other uses of A...
374        unsigned int OP = (unsigned int) A;       // sign change
375 
376    In this case it's possible to go directly from C to OP using:
377 
378        OP = (unsigned int) (unsigned short) C;
379 	    +------------+ +--------------+
380 	       promotion      sign change
381 
382    so OP' would be C.  The input to the promotion is B, so UNPROM
383    would describe B.  */
384 
385 static tree
vect_look_through_possible_promotion(vec_info * vinfo,tree op,vect_unpromoted_value * unprom,bool * single_use_p=NULL)386 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
387 				      vect_unpromoted_value *unprom,
388 				      bool *single_use_p = NULL)
389 {
390   tree res = NULL_TREE;
391   tree op_type = TREE_TYPE (op);
392   unsigned int orig_precision = TYPE_PRECISION (op_type);
393   unsigned int min_precision = orig_precision;
394   stmt_vec_info caster = NULL;
395   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
396     {
397       /* See whether OP is simple enough to vectorize.  */
398       stmt_vec_info def_stmt_info;
399       gimple *def_stmt;
400       vect_def_type dt;
401       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
402 	break;
403 
404       /* If OP is the input of a demotion, skip over it to see whether
405 	 OP is itself the result of a promotion.  If so, the combined
406 	 effect of the promotion and the demotion might fit the required
407 	 pattern, otherwise neither operation fits.
408 
409 	 This copes with cases such as the result of an arithmetic
410 	 operation being truncated before being stored, and where that
411 	 arithmetic operation has been recognized as an over-widened one.  */
412       if (TYPE_PRECISION (op_type) <= min_precision)
413 	{
414 	  /* Use OP as the UNPROM described above if we haven't yet
415 	     found a promotion, or if using the new input preserves the
416 	     sign of the previous promotion.  */
417 	  if (!res
418 	      || TYPE_PRECISION (unprom->type) == orig_precision
419 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
420 	    {
421 	      unprom->set_op (op, dt, caster);
422 	      min_precision = TYPE_PRECISION (op_type);
423 	    }
424 	  /* Stop if we've already seen a promotion and if this
425 	     conversion does more than change the sign.  */
426 	  else if (TYPE_PRECISION (op_type)
427 		   != TYPE_PRECISION (unprom->type))
428 	    break;
429 
430 	  /* The sequence now extends to OP.  */
431 	  res = op;
432 	}
433 
434       /* See whether OP is defined by a cast.  Record it as CASTER if
435 	 the cast is potentially vectorizable.  */
436       if (!def_stmt)
437 	break;
438       caster = def_stmt_info;
439 
440       /* Ignore pattern statements, since we don't link uses for them.  */
441       if (caster
442 	  && single_use_p
443 	  && !STMT_VINFO_RELATED_STMT (caster)
444 	  && !has_single_use (res))
445 	*single_use_p = false;
446 
447       gassign *assign = dyn_cast <gassign *> (def_stmt);
448       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
449 	break;
450 
451       /* Continue with the input to the cast.  */
452       op = gimple_assign_rhs1 (def_stmt);
453       op_type = TREE_TYPE (op);
454     }
455   return res;
456 }
457 
458 /* OP is an integer operand to an operation that returns TYPE, and we
459    want to treat the operation as a widening one.  So far we can treat
460    it as widening from *COMMON_TYPE.
461 
462    Return true if OP is suitable for such a widening operation,
463    either widening from *COMMON_TYPE or from some supertype of it.
464    Update *COMMON_TYPE to the supertype in the latter case.
465 
466    SHIFT_P is true if OP is a shift amount.  */
467 
468 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)469 vect_joust_widened_integer (tree type, bool shift_p, tree op,
470 			    tree *common_type)
471 {
472   /* Calculate the minimum precision required by OP, without changing
473      the sign of either operand.  */
474   unsigned int precision;
475   if (shift_p)
476     {
477       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
478 	return false;
479       precision = TREE_INT_CST_LOW (op);
480     }
481   else
482     {
483       precision = wi::min_precision (wi::to_widest (op),
484 				     TYPE_SIGN (*common_type));
485       if (precision * 2 > TYPE_PRECISION (type))
486 	return false;
487     }
488 
489   /* If OP requires a wider type, switch to that type.  The checks
490      above ensure that this is still narrower than the result.  */
491   precision = vect_element_precision (precision);
492   if (TYPE_PRECISION (*common_type) < precision)
493     *common_type = build_nonstandard_integer_type
494       (precision, TYPE_UNSIGNED (*common_type));
495   return true;
496 }
497 
498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
499    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
500 
501 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)502 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
503 {
504   if (types_compatible_p (*common_type, new_type))
505     return true;
506 
507   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
508   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
509       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
510     return true;
511 
512   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
513   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
514       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
515     {
516       *common_type = new_type;
517       return true;
518     }
519 
520   /* We have mismatched signs, with the signed type being
521      no wider than the unsigned type.  In this case we need
522      a wider signed type.  */
523   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
524 				TYPE_PRECISION (new_type));
525   precision *= 2;
526 
527   if (precision * 2 > TYPE_PRECISION (type))
528     return false;
529 
530   *common_type = build_nonstandard_integer_type (precision, false);
531   return true;
532 }
533 
534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
535    in which each node either performs CODE or WIDENED_CODE, and where
536    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
537    specifies the maximum number of leaf operands.  SHIFT_P says whether
538    CODE and WIDENED_CODE are some sort of shift.
539 
540    If STMT_INFO is such a tree, return the number of leaf operands
541    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
542    to a type that (a) is narrower than the result of STMT_INFO and
543    (b) can hold all leaf operand values.
544 
545    If SUBTYPE then allow that the signs of the operands
546    may differ in signs but not in precision.  SUBTYPE is updated to reflect
547    this.
548 
549    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
550    exists.  */
551 
552 static unsigned int
vect_widened_op_tree(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type,enum optab_subtype * subtype=NULL)553 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
554 		      tree_code widened_code, bool shift_p,
555 		      unsigned int max_nops,
556 		      vect_unpromoted_value *unprom, tree *common_type,
557 		      enum optab_subtype *subtype = NULL)
558 {
559   /* Check for an integer operation with the right code.  */
560   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
561   if (!assign)
562     return 0;
563 
564   tree_code rhs_code = gimple_assign_rhs_code (assign);
565   if (rhs_code != code && rhs_code != widened_code)
566     return 0;
567 
568   tree type = TREE_TYPE (gimple_assign_lhs (assign));
569   if (!INTEGRAL_TYPE_P (type))
570     return 0;
571 
572   /* Assume that both operands will be leaf operands.  */
573   max_nops -= 2;
574 
575   /* Check the operands.  */
576   unsigned int next_op = 0;
577   for (unsigned int i = 0; i < 2; ++i)
578     {
579       vect_unpromoted_value *this_unprom = &unprom[next_op];
580       unsigned int nops = 1;
581       tree op = gimple_op (assign, i + 1);
582       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
583 	{
584 	  /* We already have a common type from earlier operands.
585 	     Update it to account for OP.  */
586 	  this_unprom->set_op (op, vect_constant_def);
587 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
588 	    return 0;
589 	}
590       else
591 	{
592 	  /* Only allow shifts by constants.  */
593 	  if (shift_p && i == 1)
594 	    return 0;
595 
596 	  if (rhs_code != code)
597 	    {
598 	      /* If rhs_code is widened_code, don't look through further
599 		 possible promotions, there is a promotion already embedded
600 		 in the WIDEN_*_EXPR.  */
601 	      if (TREE_CODE (op) != SSA_NAME
602 		  || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
603 		return 0;
604 
605 	      stmt_vec_info def_stmt_info;
606 	      gimple *def_stmt;
607 	      vect_def_type dt;
608 	      if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
609 				       &def_stmt))
610 		return 0;
611 	      this_unprom->set_op (op, dt, NULL);
612 	    }
613 	  else if (!vect_look_through_possible_promotion (vinfo, op,
614 							  this_unprom))
615 	    return 0;
616 
617 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
618 	    {
619 	      /* The operand isn't widened.  If STMT_INFO has the code
620 		 for an unwidened operation, recursively check whether
621 		 this operand is a node of the tree.  */
622 	      if (rhs_code != code
623 		  || max_nops == 0
624 		  || this_unprom->dt != vect_internal_def)
625 		return 0;
626 
627 	      /* Give back the leaf slot allocated above now that we're
628 		 not treating this as a leaf operand.  */
629 	      max_nops += 1;
630 
631 	      /* Recursively process the definition of the operand.  */
632 	      stmt_vec_info def_stmt_info
633 		= vinfo->lookup_def (this_unprom->op);
634 	      nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
635 					   widened_code, shift_p, max_nops,
636 					   this_unprom, common_type,
637 					   subtype);
638 	      if (nops == 0)
639 		return 0;
640 
641 	      max_nops -= nops;
642 	    }
643 	  else
644 	    {
645 	      /* Make sure that the operand is narrower than the result.  */
646 	      if (TYPE_PRECISION (this_unprom->type) * 2
647 		  > TYPE_PRECISION (type))
648 		return 0;
649 
650 	      /* Update COMMON_TYPE for the new operand.  */
651 	      if (i == 0)
652 		*common_type = this_unprom->type;
653 	      else if (!vect_joust_widened_type (type, this_unprom->type,
654 						 common_type))
655 		{
656 		  if (subtype)
657 		    {
658 		      /* See if we can sign extend the smaller type.  */
659 		      if (TYPE_PRECISION (this_unprom->type)
660 			  > TYPE_PRECISION (*common_type))
661 			*common_type = this_unprom->type;
662 		      *subtype = optab_vector_mixed_sign;
663 		    }
664 		  else
665 		    return 0;
666 		}
667 	    }
668 	}
669       next_op += nops;
670     }
671   return next_op;
672 }
673 
674 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
675    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
676 
677 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)678 vect_recog_temp_ssa_var (tree type, gimple *stmt)
679 {
680   return make_temp_ssa_name (type, stmt, "patt");
681 }
682 
683 /* STMT2_INFO describes a type conversion that could be split into STMT1
684    followed by a version of STMT2_INFO that takes NEW_RHS as its first
685    input.  Try to do this using pattern statements, returning true on
686    success.  */
687 
688 static bool
vect_split_statement(vec_info * vinfo,stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)689 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
690 		      gimple *stmt1, tree vectype)
691 {
692   if (is_pattern_stmt_p (stmt2_info))
693     {
694       /* STMT2_INFO is part of a pattern.  Get the statement to which
695 	 the pattern is attached.  */
696       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
697       vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
698 
699       if (dump_enabled_p ())
700 	dump_printf_loc (MSG_NOTE, vect_location,
701 			 "Splitting pattern statement: %G", stmt2_info->stmt);
702 
703       /* Since STMT2_INFO is a pattern statement, we can change it
704 	 in-situ without worrying about changing the code for the
705 	 containing block.  */
706       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
707 
708       if (dump_enabled_p ())
709 	{
710 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
711 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
712 			   stmt2_info->stmt);
713 	}
714 
715       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
716       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
717 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
718 	   to the end of the definition sequence.  */
719 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
720       else
721 	{
722 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
723 	     before it.  */
724 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
725 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
726 	}
727       return true;
728     }
729   else
730     {
731       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
732 	 two-statement pattern now.  */
733       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
734       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
735       tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
736       if (!lhs_vectype)
737 	return false;
738 
739       if (dump_enabled_p ())
740 	dump_printf_loc (MSG_NOTE, vect_location,
741 			 "Splitting statement: %G", stmt2_info->stmt);
742 
743       /* Add STMT1 as a singleton pattern definition sequence.  */
744       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
745       vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
746       gimple_seq_add_stmt_without_update (def_seq, stmt1);
747 
748       /* Build the second of the two pattern statements.  */
749       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
750       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
751       vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
752 
753       if (dump_enabled_p ())
754 	{
755 	  dump_printf_loc (MSG_NOTE, vect_location,
756 			   "into pattern statements: %G", stmt1);
757 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
758 	}
759 
760       return true;
761     }
762 }
763 
764 /* Convert UNPROM to TYPE and return the result, adding new statements
765    to STMT_INFO's pattern definition statements if no better way is
766    available.  VECTYPE is the vector form of TYPE.
767 
768    If SUBTYPE then convert the type based on the subtype.  */
769 
770 static tree
vect_convert_input(vec_info * vinfo,stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype,enum optab_subtype subtype=optab_default)771 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
772 		    vect_unpromoted_value *unprom, tree vectype,
773 		    enum optab_subtype subtype = optab_default)
774 {
775 
776   /* Update the type if the signs differ.  */
777   if (subtype == optab_vector_mixed_sign
778       && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
779     type = build_nonstandard_integer_type (TYPE_PRECISION (type),
780 					   TYPE_SIGN (unprom->type));
781 
782   /* Check for a no-op conversion.  */
783   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
784     return unprom->op;
785 
786   /* Allow the caller to create constant vect_unpromoted_values.  */
787   if (TREE_CODE (unprom->op) == INTEGER_CST)
788     return wide_int_to_tree (type, wi::to_widest (unprom->op));
789 
790   tree input = unprom->op;
791   if (unprom->caster)
792     {
793       tree lhs = gimple_get_lhs (unprom->caster->stmt);
794       tree lhs_type = TREE_TYPE (lhs);
795 
796       /* If the result of the existing cast is the right width, use it
797 	 instead of the source of the cast.  */
798       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
799 	input = lhs;
800       /* If the precision we want is between the source and result
801 	 precisions of the existing cast, try splitting the cast into
802 	 two and tapping into a mid-way point.  */
803       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
804 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
805 	{
806 	  /* In order to preserve the semantics of the original cast,
807 	     give the mid-way point the same signedness as the input value.
808 
809 	     It would be possible to use a signed type here instead if
810 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
811 	     make the sign of the midtype sensitive to the order in
812 	     which we process the statements, since the signedness of
813 	     TYPE is the signedness required by just one of possibly
814 	     many users.  Also, unsigned promotions are usually as cheap
815 	     as or cheaper than signed ones, so it's better to keep an
816 	     unsigned promotion.  */
817 	  tree midtype = build_nonstandard_integer_type
818 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
819 	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
820 	  if (vec_midtype)
821 	    {
822 	      input = vect_recog_temp_ssa_var (midtype, NULL);
823 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
824 						       unprom->op);
825 	      if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
826 					 vec_midtype))
827 		append_pattern_def_seq (vinfo, stmt_info,
828 					new_stmt, vec_midtype);
829 	    }
830 	}
831 
832       /* See if we can reuse an existing result.  */
833       if (types_compatible_p (type, TREE_TYPE (input)))
834 	return input;
835     }
836 
837   /* We need a new conversion statement.  */
838   tree new_op = vect_recog_temp_ssa_var (type, NULL);
839   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
840 
841   /* If OP is an external value, see if we can insert the new statement
842      on an incoming edge.  */
843   if (input == unprom->op && unprom->dt == vect_external_def)
844     if (edge e = vect_get_external_def_edge (vinfo, input))
845       {
846 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
847 	gcc_assert (!new_bb);
848 	return new_op;
849       }
850 
851   /* As a (common) last resort, add the statement to the pattern itself.  */
852   append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
853   return new_op;
854 }
855 
856 /* Invoke vect_convert_input for N elements of UNPROM and store the
857    result in the corresponding elements of RESULT.
858 
859    If SUBTYPE then convert the type based on the subtype.  */
860 
861 static void
vect_convert_inputs(vec_info * vinfo,stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype,enum optab_subtype subtype=optab_default)862 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
863 		     tree *result, tree type, vect_unpromoted_value *unprom,
864 		     tree vectype, enum optab_subtype subtype = optab_default)
865 {
866   for (unsigned int i = 0; i < n; ++i)
867     {
868       unsigned int j;
869       for (j = 0; j < i; ++j)
870 	if (unprom[j].op == unprom[i].op)
871 	  break;
872 
873       if (j < i)
874 	result[i] = result[j];
875       else
876 	result[i] = vect_convert_input (vinfo, stmt_info,
877 					type, &unprom[i], vectype, subtype);
878     }
879 }
880 
881 /* The caller has created a (possibly empty) sequence of pattern definition
882    statements followed by a single statement PATTERN_STMT.  Cast the result
883    of this final statement to TYPE.  If a new statement is needed, add
884    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
885    and return the new statement, otherwise return PATTERN_STMT as-is.
886    VECITYPE is the vector form of PATTERN_STMT's result type.  */
887 
888 static gimple *
vect_convert_output(vec_info * vinfo,stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)889 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
890 		     gimple *pattern_stmt, tree vecitype)
891 {
892   tree lhs = gimple_get_lhs (pattern_stmt);
893   if (!types_compatible_p (type, TREE_TYPE (lhs)))
894     {
895       append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
896       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
897       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
898     }
899   return pattern_stmt;
900 }
901 
902 /* Return true if STMT_VINFO describes a reduction for which reassociation
903    is allowed.  If STMT_INFO is part of a group, assume that it's part of
904    a reduction chain and optimistically assume that all statements
905    except the last allow reassociation.
906    Also require it to have code CODE and to be a reduction
907    in the outermost loop.  When returning true, store the operands in
908    *OP0_OUT and *OP1_OUT.  */
909 
910 static bool
vect_reassociating_reduction_p(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)911 vect_reassociating_reduction_p (vec_info *vinfo,
912 				stmt_vec_info stmt_info, tree_code code,
913 				tree *op0_out, tree *op1_out)
914 {
915   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
916   if (!loop_info)
917     return false;
918 
919   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
920   if (!assign || gimple_assign_rhs_code (assign) != code)
921     return false;
922 
923   /* We don't allow changing the order of the computation in the inner-loop
924      when doing outer-loop vectorization.  */
925   class loop *loop = LOOP_VINFO_LOOP (loop_info);
926   if (loop && nested_in_vect_loop_p (loop, stmt_info))
927     return false;
928 
929   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
930     {
931       if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
932 				       code))
933 	return false;
934     }
935   else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
936     return false;
937 
938   *op0_out = gimple_assign_rhs1 (assign);
939   *op1_out = gimple_assign_rhs2 (assign);
940   if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
941     std::swap (*op0_out, *op1_out);
942   return true;
943 }
944 
945 /* match.pd function to match
946    (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
947    with conditions:
948    1) @1, @2, c, d, a, b are all integral type.
949    2) There's single_use for both @1 and @2.
950    3) a, c have same precision.
951    4) c and @1 have different precision.
952    5) c, d are the same type or they can differ in sign when convert is
953    truncation.
954 
955    record a and c and d and @3.  */
956 
957 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
958 
959 /* Function vect_recog_cond_expr_convert
960 
961    Try to find the following pattern:
962 
963    TYPE_AB A,B;
964    TYPE_CD C,D;
965    TYPE_E E;
966    TYPE_E op_true = (TYPE_E) A;
967    TYPE_E op_false = (TYPE_E) B;
968 
969    E = C cmp D ? op_true : op_false;
970 
971    where
972    TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
973    TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
974    single_use of op_true and op_false.
975    TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
976 
977    Input:
978 
979    * STMT_VINFO: The stmt from which the pattern search begins.
980    here it starts with E = c cmp D ? op_true : op_false;
981 
982    Output:
983 
984    TYPE1 E' = C cmp D ? A : B;
985    TYPE3 E = (TYPE3) E';
986 
987    There may extra nop_convert for A or B to handle different signness.
988 
989    * TYPE_OUT: The vector type of the output of this pattern.
990 
991    * Return value: A new stmt that will be used to replace the sequence of
992    stmts that constitute the pattern. In this case it will be:
993    E = (TYPE3)E';
994    E' = C cmp D ? A : B; is recorded in pattern definition statements;  */
995 
996 static gimple *
vect_recog_cond_expr_convert_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)997 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
998 				      stmt_vec_info stmt_vinfo, tree *type_out)
999 {
1000   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1001   tree lhs, match[4], temp, type, new_lhs, op2;
1002   gimple *cond_stmt;
1003   gimple *pattern_stmt;
1004 
1005   if (!last_stmt)
1006     return NULL;
1007 
1008   lhs = gimple_assign_lhs (last_stmt);
1009 
1010   /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1011      TYPE_PRECISION (A) == TYPE_PRECISION (C).  */
1012   if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1013     return NULL;
1014 
1015   vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1016 
1017   op2 = match[2];
1018   type = TREE_TYPE (match[1]);
1019   if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1020     {
1021       op2 = vect_recog_temp_ssa_var (type, NULL);
1022       gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1023       append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1024 			      get_vectype_for_scalar_type (vinfo, type));
1025     }
1026 
1027   temp = vect_recog_temp_ssa_var (type, NULL);
1028   cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1029 						 match[1], op2));
1030   append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1031 			  get_vectype_for_scalar_type (vinfo, type));
1032   new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1033   pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1034   *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1035 
1036   if (dump_enabled_p ())
1037     dump_printf_loc (MSG_NOTE, vect_location,
1038 		     "created pattern stmt: %G", pattern_stmt);
1039   return pattern_stmt;
1040 }
1041 
1042 /* Function vect_recog_dot_prod_pattern
1043 
1044    Try to find the following pattern:
1045 
1046      type1a x_t
1047      type1b y_t;
1048      TYPE1 prod;
1049      TYPE2 sum = init;
1050    loop:
1051      sum_0 = phi <init, sum_1>
1052      S1  x_t = ...
1053      S2  y_t = ...
1054      S3  x_T = (TYPE1) x_t;
1055      S4  y_T = (TYPE1) y_t;
1056      S5  prod = x_T * y_T;
1057      [S6  prod = (TYPE2) prod;  #optional]
1058      S7  sum_1 = prod + sum_0;
1059 
1060    where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1061    the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1062    'type1a' and 'type1b' can differ.
1063 
1064    Input:
1065 
1066    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1067    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1068    will be detected.
1069 
1070    Output:
1071 
1072    * TYPE_OUT: The type of the output  of this pattern.
1073 
1074    * Return value: A new stmt that will be used to replace the sequence of
1075    stmts that constitute the pattern. In this case it will be:
1076         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1077 
1078    Note: The dot-prod idiom is a widening reduction pattern that is
1079          vectorized without preserving all the intermediate results. It
1080          produces only N/2 (widened) results (by summing up pairs of
1081          intermediate results) rather than all N results.  Therefore, we
1082          cannot allow this pattern when we want to get all the results and in
1083          the correct order (as is the case when this computation is in an
1084          inner-loop nested in an outer-loop that us being vectorized).  */
1085 
1086 static gimple *
vect_recog_dot_prod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1087 vect_recog_dot_prod_pattern (vec_info *vinfo,
1088 			     stmt_vec_info stmt_vinfo, tree *type_out)
1089 {
1090   tree oprnd0, oprnd1;
1091   gimple *last_stmt = stmt_vinfo->stmt;
1092   tree type, half_type;
1093   gimple *pattern_stmt;
1094   tree var;
1095 
1096   /* Look for the following pattern
1097           DX = (TYPE1) X;
1098           DY = (TYPE1) Y;
1099           DPROD = DX * DY;
1100           DDPROD = (TYPE2) DPROD;
1101           sum_1 = DDPROD + sum_0;
1102      In which
1103      - DX is double the size of X
1104      - DY is double the size of Y
1105      - DX, DY, DPROD all have the same type but the sign
1106        between X, Y and DPROD can differ.
1107      - sum is the same size of DPROD or bigger
1108      - sum has been recognized as a reduction variable.
1109 
1110      This is equivalent to:
1111        DPROD = X w* Y;          #widen mult
1112        sum_1 = DPROD w+ sum_0;  #widen summation
1113      or
1114        DPROD = X w* Y;          #widen mult
1115        sum_1 = DPROD + sum_0;   #summation
1116    */
1117 
1118   /* Starting from LAST_STMT, follow the defs of its uses in search
1119      of the above pattern.  */
1120 
1121   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1122 				       &oprnd0, &oprnd1))
1123     return NULL;
1124 
1125   type = TREE_TYPE (gimple_get_lhs (last_stmt));
1126 
1127   vect_unpromoted_value unprom_mult;
1128   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1129 
1130   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1131      we know that oprnd1 is the reduction variable (defined by a loop-header
1132      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1133      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
1134   if (!oprnd0)
1135     return NULL;
1136 
1137   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1138   if (!mult_vinfo)
1139     return NULL;
1140 
1141   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1142      inside the loop (in case we are analyzing an outer-loop).  */
1143   vect_unpromoted_value unprom0[2];
1144   enum optab_subtype subtype = optab_vector;
1145   if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1146 			     false, 2, unprom0, &half_type, &subtype))
1147     return NULL;
1148 
1149   /* If there are two widening operations, make sure they agree on the sign
1150      of the extension.  The result of an optab_vector_mixed_sign operation
1151      is signed; otherwise, the result has the same sign as the operands.  */
1152   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1153       && (subtype == optab_vector_mixed_sign
1154 	? TYPE_UNSIGNED (unprom_mult.type)
1155 	: TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1156     return NULL;
1157 
1158   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1159 
1160   tree half_vectype;
1161   if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1162 					type_out, &half_vectype, subtype))
1163     return NULL;
1164 
1165   /* Get the inputs in the appropriate types.  */
1166   tree mult_oprnd[2];
1167   vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1168 		       unprom0, half_vectype, subtype);
1169 
1170   var = vect_recog_temp_ssa_var (type, NULL);
1171   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1172 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
1173 
1174   return pattern_stmt;
1175 }
1176 
1177 
1178 /* Function vect_recog_sad_pattern
1179 
1180    Try to find the following Sum of Absolute Difference (SAD) pattern:
1181 
1182      type x_t, y_t;
1183      signed TYPE1 diff, abs_diff;
1184      TYPE2 sum = init;
1185    loop:
1186      sum_0 = phi <init, sum_1>
1187      S1  x_t = ...
1188      S2  y_t = ...
1189      S3  x_T = (TYPE1) x_t;
1190      S4  y_T = (TYPE1) y_t;
1191      S5  diff = x_T - y_T;
1192      S6  abs_diff = ABS_EXPR <diff>;
1193      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1194      S8  sum_1 = abs_diff + sum_0;
1195 
1196    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1197    same size of 'TYPE1' or bigger. This is a special case of a reduction
1198    computation.
1199 
1200    Input:
1201 
1202    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1203    example, when this function is called with S8, the pattern
1204    {S3,S4,S5,S6,S7,S8} will be detected.
1205 
1206    Output:
1207 
1208    * TYPE_OUT: The type of the output of this pattern.
1209 
1210    * Return value: A new stmt that will be used to replace the sequence of
1211    stmts that constitute the pattern. In this case it will be:
1212         SAD_EXPR <x_t, y_t, sum_0>
1213   */
1214 
1215 static gimple *
vect_recog_sad_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1216 vect_recog_sad_pattern (vec_info *vinfo,
1217 			stmt_vec_info stmt_vinfo, tree *type_out)
1218 {
1219   gimple *last_stmt = stmt_vinfo->stmt;
1220   tree half_type;
1221 
1222   /* Look for the following pattern
1223           DX = (TYPE1) X;
1224           DY = (TYPE1) Y;
1225           DDIFF = DX - DY;
1226           DAD = ABS_EXPR <DDIFF>;
1227           DDPROD = (TYPE2) DPROD;
1228           sum_1 = DAD + sum_0;
1229      In which
1230      - DX is at least double the size of X
1231      - DY is at least double the size of Y
1232      - DX, DY, DDIFF, DAD all have the same type
1233      - sum is the same size of DAD or bigger
1234      - sum has been recognized as a reduction variable.
1235 
1236      This is equivalent to:
1237        DDIFF = X w- Y;          #widen sub
1238        DAD = ABS_EXPR <DDIFF>;
1239        sum_1 = DAD w+ sum_0;    #widen summation
1240      or
1241        DDIFF = X w- Y;          #widen sub
1242        DAD = ABS_EXPR <DDIFF>;
1243        sum_1 = DAD + sum_0;     #summation
1244    */
1245 
1246   /* Starting from LAST_STMT, follow the defs of its uses in search
1247      of the above pattern.  */
1248 
1249   tree plus_oprnd0, plus_oprnd1;
1250   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1251 				       &plus_oprnd0, &plus_oprnd1))
1252     return NULL;
1253 
1254   tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1255 
1256   /* Any non-truncating sequence of conversions is OK here, since
1257      with a successful match, the result of the ABS(U) is known to fit
1258      within the nonnegative range of the result type.  (It cannot be the
1259      negative of the minimum signed value due to the range of the widening
1260      MINUS_EXPR.)  */
1261   vect_unpromoted_value unprom_abs;
1262   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1263 						      &unprom_abs);
1264 
1265   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1266      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1267      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1268      Then check that plus_oprnd0 is defined by an abs_expr.  */
1269 
1270   if (!plus_oprnd0)
1271     return NULL;
1272 
1273   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1274   if (!abs_stmt_vinfo)
1275     return NULL;
1276 
1277   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1278      inside the loop (in case we are analyzing an outer-loop).  */
1279   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1280   if (!abs_stmt
1281       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1282 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1283     return NULL;
1284 
1285   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1286   tree abs_type = TREE_TYPE (abs_oprnd);
1287   if (TYPE_UNSIGNED (abs_type))
1288     return NULL;
1289 
1290   /* Peel off conversions from the ABS input.  This can involve sign
1291      changes (e.g. from an unsigned subtraction to a signed ABS input)
1292      or signed promotion, but it can't include unsigned promotion.
1293      (Note that ABS of an unsigned promotion should have been folded
1294      away before now anyway.)  */
1295   vect_unpromoted_value unprom_diff;
1296   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1297 						    &unprom_diff);
1298   if (!abs_oprnd)
1299     return NULL;
1300   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1301       && TYPE_UNSIGNED (unprom_diff.type))
1302     return NULL;
1303 
1304   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1305   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1306   if (!diff_stmt_vinfo)
1307     return NULL;
1308 
1309   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1310      inside the loop (in case we are analyzing an outer-loop).  */
1311   vect_unpromoted_value unprom[2];
1312   if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1313 			     false, 2, unprom, &half_type))
1314     return NULL;
1315 
1316   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1317 
1318   tree half_vectype;
1319   if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1320 					type_out, &half_vectype))
1321     return NULL;
1322 
1323   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1324   tree sad_oprnd[2];
1325   vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1326 		       unprom, half_vectype);
1327 
1328   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1329   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1330 					      sad_oprnd[1], plus_oprnd1);
1331 
1332   return pattern_stmt;
1333 }
1334 
1335 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1336    so that it can be treated as though it had the form:
1337 
1338       A_TYPE a;
1339       B_TYPE b;
1340       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1341       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1342     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1343     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1344     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1345 
1346    Try to replace the pattern with:
1347 
1348       A_TYPE a;
1349       B_TYPE b;
1350       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1351       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1352     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1353     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1354 
1355    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1356 
1357    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1358    name of the pattern being matched, for dump purposes.  */
1359 
1360 static gimple *
vect_recog_widen_op_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1361 vect_recog_widen_op_pattern (vec_info *vinfo,
1362 			     stmt_vec_info last_stmt_info, tree *type_out,
1363 			     tree_code orig_code, tree_code wide_code,
1364 			     bool shift_p, const char *name)
1365 {
1366   gimple *last_stmt = last_stmt_info->stmt;
1367 
1368   vect_unpromoted_value unprom[2];
1369   tree half_type;
1370   if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1371 			     shift_p, 2, unprom, &half_type))
1372     return NULL;
1373 
1374   /* Pattern detected.  */
1375   vect_pattern_detected (name, last_stmt);
1376 
1377   tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1378   tree itype = type;
1379   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1380       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1381     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1382 					    TYPE_UNSIGNED (half_type));
1383 
1384   /* Check target support  */
1385   tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1386   tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1387   tree ctype = itype;
1388   tree vecctype = vecitype;
1389   if (orig_code == MINUS_EXPR
1390       && TYPE_UNSIGNED (itype)
1391       && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1392     {
1393       /* Subtraction is special, even if half_type is unsigned and no matter
1394 	 whether type is signed or unsigned, if type is wider than itype,
1395 	 we need to sign-extend from the widening operation result to the
1396 	 result type.
1397 	 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1398 	 itype unsigned short and type either int or unsigned int.
1399 	 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1400 	 (unsigned short) 0xffff, but for type int we want the result -1
1401 	 and for type unsigned int 0xffffffff rather than 0xffff.  */
1402       ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1403       vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1404     }
1405 
1406   enum tree_code dummy_code;
1407   int dummy_int;
1408   auto_vec<tree> dummy_vec;
1409   if (!vectype
1410       || !vecitype
1411       || !vecctype
1412       || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1413 					  vecitype, vectype,
1414 					  &dummy_code, &dummy_code,
1415 					  &dummy_int, &dummy_vec))
1416     return NULL;
1417 
1418   *type_out = get_vectype_for_scalar_type (vinfo, type);
1419   if (!*type_out)
1420     return NULL;
1421 
1422   tree oprnd[2];
1423   vect_convert_inputs (vinfo, last_stmt_info,
1424 		       2, oprnd, half_type, unprom, vectype);
1425 
1426   tree var = vect_recog_temp_ssa_var (itype, NULL);
1427   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1428 					      oprnd[0], oprnd[1]);
1429 
1430   if (vecctype != vecitype)
1431     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1432 					pattern_stmt, vecitype);
1433 
1434   return vect_convert_output (vinfo, last_stmt_info,
1435 			      type, pattern_stmt, vecctype);
1436 }
1437 
1438 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1439    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1440 
1441 static gimple *
vect_recog_widen_mult_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1442 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1443 			       tree *type_out)
1444 {
1445   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1446 				      MULT_EXPR, WIDEN_MULT_EXPR, false,
1447 				      "vect_recog_widen_mult_pattern");
1448 }
1449 
1450 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1451    to WIDEN_PLUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1452 
1453 static gimple *
vect_recog_widen_plus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1454 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1455 			       tree *type_out)
1456 {
1457   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1458 				      PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1459 				      "vect_recog_widen_plus_pattern");
1460 }
1461 
1462 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1463    to WIDEN_MINUS_EXPR.  See vect_recog_widen_op_pattern for details.  */
1464 static gimple *
vect_recog_widen_minus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1465 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1466 			       tree *type_out)
1467 {
1468   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1469 				      MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1470 				      "vect_recog_widen_minus_pattern");
1471 }
1472 
1473 /* Function vect_recog_popcount_pattern
1474 
1475    Try to find the following pattern:
1476 
1477    UTYPE1 A;
1478    TYPE1 B;
1479    UTYPE2 temp_in;
1480    TYPE3 temp_out;
1481    temp_in = (UTYPE2)A;
1482 
1483    temp_out = __builtin_popcount{,l,ll} (temp_in);
1484    B = (TYPE1) temp_out;
1485 
1486    TYPE2 may or may not be equal to TYPE3.
1487    i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1488    i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1489 
1490    Input:
1491 
1492    * STMT_VINFO: The stmt from which the pattern search begins.
1493    here it starts with B = (TYPE1) temp_out;
1494 
1495    Output:
1496 
1497    * TYPE_OUT: The vector type of the output of this pattern.
1498 
1499    * Return value: A new stmt that will be used to replace the sequence of
1500    stmts that constitute the pattern. In this case it will be:
1501    B = .POPCOUNT (A);
1502 */
1503 
1504 static gimple *
vect_recog_popcount_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1505 vect_recog_popcount_pattern (vec_info *vinfo,
1506 			     stmt_vec_info stmt_vinfo, tree *type_out)
1507 {
1508   gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1509   gimple *popcount_stmt, *pattern_stmt;
1510   tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1511   auto_vec<tree> vargs;
1512 
1513   /* Find B = (TYPE1) temp_out. */
1514   if (!last_stmt)
1515     return NULL;
1516   tree_code code = gimple_assign_rhs_code (last_stmt);
1517   if (!CONVERT_EXPR_CODE_P (code))
1518     return NULL;
1519 
1520   lhs_oprnd = gimple_assign_lhs (last_stmt);
1521   lhs_type = TREE_TYPE (lhs_oprnd);
1522   if (!INTEGRAL_TYPE_P (lhs_type))
1523     return NULL;
1524 
1525   rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1526   if (TREE_CODE (rhs_oprnd) != SSA_NAME
1527       || !has_single_use (rhs_oprnd))
1528     return NULL;
1529   popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1530 
1531   /* Find temp_out = __builtin_popcount{,l,ll} (temp_in);  */
1532   if (!is_gimple_call (popcount_stmt))
1533     return NULL;
1534   switch (gimple_call_combined_fn (popcount_stmt))
1535     {
1536     CASE_CFN_POPCOUNT:
1537       break;
1538     default:
1539       return NULL;
1540     }
1541 
1542   if (gimple_call_num_args (popcount_stmt) != 1)
1543     return NULL;
1544 
1545   rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1546   vect_unpromoted_value unprom_diff;
1547   rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1548 						    &unprom_diff);
1549 
1550   if (!rhs_origin)
1551     return NULL;
1552 
1553   /* Input and output of .POPCOUNT should be same-precision integer.
1554      Also A should be unsigned or same precision as temp_in,
1555      otherwise there would be sign_extend from A to temp_in.  */
1556   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1557       || (!TYPE_UNSIGNED (unprom_diff.type)
1558 	  && (TYPE_PRECISION (unprom_diff.type)
1559 	      != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1560     return NULL;
1561   vargs.safe_push (unprom_diff.op);
1562 
1563   vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1564   vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1565   /* Do it only if the backend has popcount<vector_mode>2 pattern.  */
1566   if (!vec_type
1567       || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1568 					  OPTIMIZE_FOR_SPEED))
1569     return NULL;
1570 
1571   /* Create B = .POPCOUNT (A).  */
1572   new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1573   pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1574   gimple_call_set_lhs (pattern_stmt, new_var);
1575   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1576   *type_out = vec_type;
1577 
1578   if (dump_enabled_p ())
1579     dump_printf_loc (MSG_NOTE, vect_location,
1580 		     "created pattern stmt: %G", pattern_stmt);
1581   return pattern_stmt;
1582 }
1583 
1584 /* Function vect_recog_pow_pattern
1585 
1586    Try to find the following pattern:
1587 
1588      x = POW (y, N);
1589 
1590    with POW being one of pow, powf, powi, powif and N being
1591    either 2 or 0.5.
1592 
1593    Input:
1594 
1595    * STMT_VINFO: The stmt from which the pattern search begins.
1596 
1597    Output:
1598 
1599    * TYPE_OUT: The type of the output of this pattern.
1600 
1601    * Return value: A new stmt that will be used to replace the sequence of
1602    stmts that constitute the pattern. In this case it will be:
1603         x = x * x
1604    or
1605 	x = sqrt (x)
1606 */
1607 
1608 static gimple *
vect_recog_pow_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1609 vect_recog_pow_pattern (vec_info *vinfo,
1610 			stmt_vec_info stmt_vinfo, tree *type_out)
1611 {
1612   gimple *last_stmt = stmt_vinfo->stmt;
1613   tree base, exp;
1614   gimple *stmt;
1615   tree var;
1616 
1617   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1618     return NULL;
1619 
1620   switch (gimple_call_combined_fn (last_stmt))
1621     {
1622     CASE_CFN_POW:
1623     CASE_CFN_POWI:
1624       break;
1625 
1626     default:
1627       return NULL;
1628     }
1629 
1630   base = gimple_call_arg (last_stmt, 0);
1631   exp = gimple_call_arg (last_stmt, 1);
1632   if (TREE_CODE (exp) != REAL_CST
1633       && TREE_CODE (exp) != INTEGER_CST)
1634     {
1635       if (flag_unsafe_math_optimizations
1636 	  && TREE_CODE (base) == REAL_CST
1637 	  && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1638 	{
1639 	  combined_fn log_cfn;
1640 	  built_in_function exp_bfn;
1641 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1642 	    {
1643 	    case BUILT_IN_POW:
1644 	      log_cfn = CFN_BUILT_IN_LOG;
1645 	      exp_bfn = BUILT_IN_EXP;
1646 	      break;
1647 	    case BUILT_IN_POWF:
1648 	      log_cfn = CFN_BUILT_IN_LOGF;
1649 	      exp_bfn = BUILT_IN_EXPF;
1650 	      break;
1651 	    case BUILT_IN_POWL:
1652 	      log_cfn = CFN_BUILT_IN_LOGL;
1653 	      exp_bfn = BUILT_IN_EXPL;
1654 	      break;
1655 	    default:
1656 	      return NULL;
1657 	    }
1658 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1659 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1660 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1661 	     does that, but if C is a power of 2, we want to use
1662 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1663 	     vectorization we don't have vectorized exp2.  */
1664 	  if (logc
1665 	      && TREE_CODE (logc) == REAL_CST
1666 	      && exp_decl
1667 	      && lookup_attribute ("omp declare simd",
1668 				   DECL_ATTRIBUTES (exp_decl)))
1669 	    {
1670 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1671 	      if (node->simd_clones == NULL)
1672 		{
1673 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1674 		      || node->definition)
1675 		    return NULL;
1676 		  expand_simd_clones (node);
1677 		  if (node->simd_clones == NULL)
1678 		    return NULL;
1679 		}
1680 	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1681 	      if (!*type_out)
1682 		return NULL;
1683 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1684 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1685 	      append_pattern_def_seq (vinfo, stmt_vinfo, g);
1686 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1687 	      g = gimple_build_call (exp_decl, 1, def);
1688 	      gimple_call_set_lhs (g, res);
1689 	      return g;
1690 	    }
1691 	}
1692 
1693       return NULL;
1694     }
1695 
1696   /* We now have a pow or powi builtin function call with a constant
1697      exponent.  */
1698 
1699   /* Catch squaring.  */
1700   if ((tree_fits_shwi_p (exp)
1701        && tree_to_shwi (exp) == 2)
1702       || (TREE_CODE (exp) == REAL_CST
1703           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1704     {
1705       if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1706 					    TREE_TYPE (base), type_out))
1707 	return NULL;
1708 
1709       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1710       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1711       return stmt;
1712     }
1713 
1714   /* Catch square root.  */
1715   if (TREE_CODE (exp) == REAL_CST
1716       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1717     {
1718       *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1719       if (*type_out
1720 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1721 					     OPTIMIZE_FOR_SPEED))
1722 	{
1723 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1724 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1725 	  gimple_call_set_lhs (stmt, var);
1726 	  gimple_call_set_nothrow (stmt, true);
1727 	  return stmt;
1728 	}
1729     }
1730 
1731   return NULL;
1732 }
1733 
1734 
1735 /* Function vect_recog_widen_sum_pattern
1736 
1737    Try to find the following pattern:
1738 
1739      type x_t;
1740      TYPE x_T, sum = init;
1741    loop:
1742      sum_0 = phi <init, sum_1>
1743      S1  x_t = *p;
1744      S2  x_T = (TYPE) x_t;
1745      S3  sum_1 = x_T + sum_0;
1746 
1747    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1748    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1749    a special case of a reduction computation.
1750 
1751    Input:
1752 
1753    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1754    when this function is called with S3, the pattern {S2,S3} will be detected.
1755 
1756    Output:
1757 
1758    * TYPE_OUT: The type of the output of this pattern.
1759 
1760    * Return value: A new stmt that will be used to replace the sequence of
1761    stmts that constitute the pattern. In this case it will be:
1762         WIDEN_SUM <x_t, sum_0>
1763 
1764    Note: The widening-sum idiom is a widening reduction pattern that is
1765 	 vectorized without preserving all the intermediate results. It
1766          produces only N/2 (widened) results (by summing up pairs of
1767 	 intermediate results) rather than all N results.  Therefore, we
1768 	 cannot allow this pattern when we want to get all the results and in
1769 	 the correct order (as is the case when this computation is in an
1770 	 inner-loop nested in an outer-loop that us being vectorized).  */
1771 
1772 static gimple *
vect_recog_widen_sum_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1773 vect_recog_widen_sum_pattern (vec_info *vinfo,
1774 			      stmt_vec_info stmt_vinfo, tree *type_out)
1775 {
1776   gimple *last_stmt = stmt_vinfo->stmt;
1777   tree oprnd0, oprnd1;
1778   tree type;
1779   gimple *pattern_stmt;
1780   tree var;
1781 
1782   /* Look for the following pattern
1783           DX = (TYPE) X;
1784           sum_1 = DX + sum_0;
1785      In which DX is at least double the size of X, and sum_1 has been
1786      recognized as a reduction variable.
1787    */
1788 
1789   /* Starting from LAST_STMT, follow the defs of its uses in search
1790      of the above pattern.  */
1791 
1792   if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1793 				       &oprnd0, &oprnd1)
1794       || TREE_CODE (oprnd0) != SSA_NAME
1795       || !vinfo->lookup_def (oprnd0))
1796     return NULL;
1797 
1798   type = TREE_TYPE (gimple_get_lhs (last_stmt));
1799 
1800   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1801      we know that oprnd1 is the reduction variable (defined by a loop-header
1802      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1803      Left to check that oprnd0 is defined by a cast from type 'type' to type
1804      'TYPE'.  */
1805 
1806   vect_unpromoted_value unprom0;
1807   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1808       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1809     return NULL;
1810 
1811   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1812 
1813   if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1814 					unprom0.type, type_out))
1815     return NULL;
1816 
1817   var = vect_recog_temp_ssa_var (type, NULL);
1818   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1819 
1820   return pattern_stmt;
1821 }
1822 
1823 /* Recognize cases in which an operation is performed in one type WTYPE
1824    but could be done more efficiently in a narrower type NTYPE.  For example,
1825    if we have:
1826 
1827      ATYPE a;  // narrower than NTYPE
1828      BTYPE b;  // narrower than NTYPE
1829      WTYPE aw = (WTYPE) a;
1830      WTYPE bw = (WTYPE) b;
1831      WTYPE res = aw + bw;  // only uses of aw and bw
1832 
1833    then it would be more efficient to do:
1834 
1835      NTYPE an = (NTYPE) a;
1836      NTYPE bn = (NTYPE) b;
1837      NTYPE resn = an + bn;
1838      WTYPE res = (WTYPE) resn;
1839 
1840    Other situations include things like:
1841 
1842      ATYPE a;  // NTYPE or narrower
1843      WTYPE aw = (WTYPE) a;
1844      WTYPE res = aw + b;
1845 
1846    when only "(NTYPE) res" is significant.  In that case it's more efficient
1847    to truncate "b" and do the operation on NTYPE instead:
1848 
1849      NTYPE an = (NTYPE) a;
1850      NTYPE bn = (NTYPE) b;  // truncation
1851      NTYPE resn = an + bn;
1852      WTYPE res = (WTYPE) resn;
1853 
1854    All users of "res" should then use "resn" instead, making the final
1855    statement dead (not marked as relevant).  The final statement is still
1856    needed to maintain the type correctness of the IR.
1857 
1858    vect_determine_precisions has already determined the minimum
1859    precison of the operation and the minimum precision required
1860    by users of the result.  */
1861 
1862 static gimple *
vect_recog_over_widening_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1863 vect_recog_over_widening_pattern (vec_info *vinfo,
1864 				  stmt_vec_info last_stmt_info, tree *type_out)
1865 {
1866   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1867   if (!last_stmt)
1868     return NULL;
1869 
1870   /* See whether we have found that this operation can be done on a
1871      narrower type without changing its semantics.  */
1872   unsigned int new_precision = last_stmt_info->operation_precision;
1873   if (!new_precision)
1874     return NULL;
1875 
1876   tree lhs = gimple_assign_lhs (last_stmt);
1877   tree type = TREE_TYPE (lhs);
1878   tree_code code = gimple_assign_rhs_code (last_stmt);
1879 
1880   /* Punt for reductions where we don't handle the type conversions.  */
1881   if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1882     return NULL;
1883 
1884   /* Keep the first operand of a COND_EXPR as-is: only the other two
1885      operands are interesting.  */
1886   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1887 
1888   /* Check the operands.  */
1889   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1890   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1891   unprom.quick_grow (nops);
1892   unsigned int min_precision = 0;
1893   bool single_use_p = false;
1894   for (unsigned int i = 0; i < nops; ++i)
1895     {
1896       tree op = gimple_op (last_stmt, first_op + i);
1897       if (TREE_CODE (op) == INTEGER_CST)
1898 	unprom[i].set_op (op, vect_constant_def);
1899       else if (TREE_CODE (op) == SSA_NAME)
1900 	{
1901 	  bool op_single_use_p = true;
1902 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1903 						     &op_single_use_p))
1904 	    return NULL;
1905 	  /* If:
1906 
1907 	     (1) N bits of the result are needed;
1908 	     (2) all inputs are widened from M<N bits; and
1909 	     (3) one operand OP is a single-use SSA name
1910 
1911 	     we can shift the M->N widening from OP to the output
1912 	     without changing the number or type of extensions involved.
1913 	     This then reduces the number of copies of STMT_INFO.
1914 
1915 	     If instead of (3) more than one operand is a single-use SSA name,
1916 	     shifting the extension to the output is even more of a win.
1917 
1918 	     If instead:
1919 
1920 	     (1) N bits of the result are needed;
1921 	     (2) one operand OP2 is widened from M2<N bits;
1922 	     (3) another operand OP1 is widened from M1<M2 bits; and
1923 	     (4) both OP1 and OP2 are single-use
1924 
1925 	     the choice is between:
1926 
1927 	     (a) truncating OP2 to M1, doing the operation on M1,
1928 		 and then widening the result to N
1929 
1930 	     (b) widening OP1 to M2, doing the operation on M2, and then
1931 		 widening the result to N
1932 
1933 	     Both shift the M2->N widening of the inputs to the output.
1934 	     (a) additionally shifts the M1->M2 widening to the output;
1935 	     it requires fewer copies of STMT_INFO but requires an extra
1936 	     M2->M1 truncation.
1937 
1938 	     Which is better will depend on the complexity and cost of
1939 	     STMT_INFO, which is hard to predict at this stage.  However,
1940 	     a clear tie-breaker in favor of (b) is the fact that the
1941 	     truncation in (a) increases the length of the operation chain.
1942 
1943 	     If instead of (4) only one of OP1 or OP2 is single-use,
1944 	     (b) is still a win over doing the operation in N bits:
1945 	     it still shifts the M2->N widening on the single-use operand
1946 	     to the output and reduces the number of STMT_INFO copies.
1947 
1948 	     If neither operand is single-use then operating on fewer than
1949 	     N bits might lead to more extensions overall.  Whether it does
1950 	     or not depends on global information about the vectorization
1951 	     region, and whether that's a good trade-off would again
1952 	     depend on the complexity and cost of the statements involved,
1953 	     as well as things like register pressure that are not normally
1954 	     modelled at this stage.  We therefore ignore these cases
1955 	     and just optimize the clear single-use wins above.
1956 
1957 	     Thus we take the maximum precision of the unpromoted operands
1958 	     and record whether any operand is single-use.  */
1959 	  if (unprom[i].dt == vect_internal_def)
1960 	    {
1961 	      min_precision = MAX (min_precision,
1962 				   TYPE_PRECISION (unprom[i].type));
1963 	      single_use_p |= op_single_use_p;
1964 	    }
1965 	}
1966       else
1967 	return NULL;
1968     }
1969 
1970   /* Although the operation could be done in operation_precision, we have
1971      to balance that against introducing extra truncations or extensions.
1972      Calculate the minimum precision that can be handled efficiently.
1973 
1974      The loop above determined that the operation could be handled
1975      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1976      extension from the inputs to the output without introducing more
1977      instructions, and would reduce the number of instructions required
1978      for STMT_INFO itself.
1979 
1980      vect_determine_precisions has also determined that the result only
1981      needs min_output_precision bits.  Truncating by a factor of N times
1982      requires a tree of N - 1 instructions, so if TYPE is N times wider
1983      than min_output_precision, doing the operation in TYPE and truncating
1984      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1985      In contrast:
1986 
1987      - truncating the input to a unary operation and doing the operation
1988        in the new type requires at most N - 1 + 1 = N instructions per
1989        output vector
1990 
1991      - doing the same for a binary operation requires at most
1992        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1993 
1994      Both unary and binary operations require fewer instructions than
1995      this if the operands were extended from a suitable truncated form.
1996      Thus there is usually nothing to lose by doing operations in
1997      min_output_precision bits, but there can be something to gain.  */
1998   if (!single_use_p)
1999     min_precision = last_stmt_info->min_output_precision;
2000   else
2001     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2002 
2003   /* Apply the minimum efficient precision we just calculated.  */
2004   if (new_precision < min_precision)
2005     new_precision = min_precision;
2006   new_precision = vect_element_precision (new_precision);
2007   if (new_precision >= TYPE_PRECISION (type))
2008     return NULL;
2009 
2010   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2011 
2012   *type_out = get_vectype_for_scalar_type (vinfo, type);
2013   if (!*type_out)
2014     return NULL;
2015 
2016   /* We've found a viable pattern.  Get the new type of the operation.  */
2017   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2018   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2019 
2020   /* If we're truncating an operation, we need to make sure that we
2021      don't introduce new undefined overflow.  The codes tested here are
2022      a subset of those accepted by vect_truncatable_operation_p.  */
2023   tree op_type = new_type;
2024   if (TYPE_OVERFLOW_UNDEFINED (new_type)
2025       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2026     op_type = build_nonstandard_integer_type (new_precision, true);
2027 
2028   /* We specifically don't check here whether the target supports the
2029      new operation, since it might be something that a later pattern
2030      wants to rewrite anyway.  If targets have a minimum element size
2031      for some optabs, we should pattern-match smaller ops to larger ops
2032      where beneficial.  */
2033   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2034   tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2035   if (!new_vectype || !op_vectype)
2036     return NULL;
2037 
2038   if (dump_enabled_p ())
2039     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2040 		     type, new_type);
2041 
2042   /* Calculate the rhs operands for an operation on OP_TYPE.  */
2043   tree ops[3] = {};
2044   for (unsigned int i = 1; i < first_op; ++i)
2045     ops[i - 1] = gimple_op (last_stmt, i);
2046   vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2047 		       op_type, &unprom[0], op_vectype);
2048 
2049   /* Use the operation to produce a result of type OP_TYPE.  */
2050   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2051   gimple *pattern_stmt = gimple_build_assign (new_var, code,
2052 					      ops[0], ops[1], ops[2]);
2053   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2054 
2055   if (dump_enabled_p ())
2056     dump_printf_loc (MSG_NOTE, vect_location,
2057 		     "created pattern stmt: %G", pattern_stmt);
2058 
2059   /* Convert back to the original signedness, if OP_TYPE is different
2060      from NEW_TYPE.  */
2061   if (op_type != new_type)
2062     pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2063 					pattern_stmt, op_vectype);
2064 
2065   /* Promote the result to the original type.  */
2066   pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2067 				      pattern_stmt, new_vectype);
2068 
2069   return pattern_stmt;
2070 }
2071 
2072 /* Recognize the following patterns:
2073 
2074      ATYPE a;  // narrower than TYPE
2075      BTYPE b;  // narrower than TYPE
2076 
2077    1) Multiply high with scaling
2078      TYPE res = ((TYPE) a * (TYPE) b) >> c;
2079      Here, c is bitsize (TYPE) / 2 - 1.
2080 
2081    2) ... or also with rounding
2082      TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2083      Here, d is bitsize (TYPE) / 2 - 2.
2084 
2085    3) Normal multiply high
2086      TYPE res = ((TYPE) a * (TYPE) b) >> e;
2087      Here, e is bitsize (TYPE) / 2.
2088 
2089    where only the bottom half of res is used.  */
2090 
2091 static gimple *
vect_recog_mulhs_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2092 vect_recog_mulhs_pattern (vec_info *vinfo,
2093 			  stmt_vec_info last_stmt_info, tree *type_out)
2094 {
2095   /* Check for a right shift.  */
2096   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2097   if (!last_stmt
2098       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2099     return NULL;
2100 
2101   /* Check that the shift result is wider than the users of the
2102      result need (i.e. that narrowing would be a natural choice).  */
2103   tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2104   unsigned int target_precision
2105     = vect_element_precision (last_stmt_info->min_output_precision);
2106   if (!INTEGRAL_TYPE_P (lhs_type)
2107       || target_precision >= TYPE_PRECISION (lhs_type))
2108     return NULL;
2109 
2110   /* Look through any change in sign on the outer shift input.  */
2111   vect_unpromoted_value unprom_rshift_input;
2112   tree rshift_input = vect_look_through_possible_promotion
2113     (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2114   if (!rshift_input
2115       || TYPE_PRECISION (TREE_TYPE (rshift_input))
2116 	   != TYPE_PRECISION (lhs_type))
2117     return NULL;
2118 
2119   /* Get the definition of the shift input.  */
2120   stmt_vec_info rshift_input_stmt_info
2121     = vect_get_internal_def (vinfo, rshift_input);
2122   if (!rshift_input_stmt_info)
2123     return NULL;
2124   gassign *rshift_input_stmt
2125     = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2126   if (!rshift_input_stmt)
2127     return NULL;
2128 
2129   stmt_vec_info mulh_stmt_info;
2130   tree scale_term;
2131   bool rounding_p = false;
2132 
2133   /* Check for the presence of the rounding term.  */
2134   if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2135     {
2136       /* Check that the outer shift was by 1.  */
2137       if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2138 	return NULL;
2139 
2140       /* Check that the second operand of the PLUS_EXPR is 1.  */
2141       if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2142 	return NULL;
2143 
2144       /* Look through any change in sign on the addition input.  */
2145       vect_unpromoted_value unprom_plus_input;
2146       tree plus_input = vect_look_through_possible_promotion
2147 	(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2148       if (!plus_input
2149 	   || TYPE_PRECISION (TREE_TYPE (plus_input))
2150 		!= TYPE_PRECISION (TREE_TYPE (rshift_input)))
2151 	return NULL;
2152 
2153       /* Get the definition of the multiply-high-scale part.  */
2154       stmt_vec_info plus_input_stmt_info
2155 	= vect_get_internal_def (vinfo, plus_input);
2156       if (!plus_input_stmt_info)
2157 	return NULL;
2158       gassign *plus_input_stmt
2159 	= dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2160       if (!plus_input_stmt
2161 	  || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2162 	return NULL;
2163 
2164       /* Look through any change in sign on the scaling input.  */
2165       vect_unpromoted_value unprom_scale_input;
2166       tree scale_input = vect_look_through_possible_promotion
2167 	(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2168       if (!scale_input
2169 	  || TYPE_PRECISION (TREE_TYPE (scale_input))
2170 	       != TYPE_PRECISION (TREE_TYPE (plus_input)))
2171 	return NULL;
2172 
2173       /* Get the definition of the multiply-high part.  */
2174       mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2175       if (!mulh_stmt_info)
2176 	return NULL;
2177 
2178       /* Get the scaling term.  */
2179       scale_term = gimple_assign_rhs2 (plus_input_stmt);
2180       rounding_p = true;
2181     }
2182   else
2183     {
2184       mulh_stmt_info = rshift_input_stmt_info;
2185       scale_term = gimple_assign_rhs2 (last_stmt);
2186     }
2187 
2188   /* Check that the scaling factor is constant.  */
2189   if (TREE_CODE (scale_term) != INTEGER_CST)
2190     return NULL;
2191 
2192   /* Check whether the scaling input term can be seen as two widened
2193      inputs multiplied together.  */
2194   vect_unpromoted_value unprom_mult[2];
2195   tree new_type;
2196   unsigned int nops
2197     = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2198 			    false, 2, unprom_mult, &new_type);
2199   if (nops != 2)
2200     return NULL;
2201 
2202   /* Adjust output precision.  */
2203   if (TYPE_PRECISION (new_type) < target_precision)
2204     new_type = build_nonstandard_integer_type
2205       (target_precision, TYPE_UNSIGNED (new_type));
2206 
2207   unsigned mult_precision = TYPE_PRECISION (new_type);
2208   internal_fn ifn;
2209   /* Check that the scaling factor is expected.  Instead of
2210      target_precision, we should use the one that we actually
2211      use for internal function.  */
2212   if (rounding_p)
2213     {
2214       /* Check pattern 2).  */
2215       if (wi::to_widest (scale_term) + mult_precision + 2
2216 	  != TYPE_PRECISION (lhs_type))
2217 	return NULL;
2218 
2219       ifn = IFN_MULHRS;
2220     }
2221   else
2222     {
2223       /* Check for pattern 1).  */
2224       if (wi::to_widest (scale_term) + mult_precision + 1
2225 	  == TYPE_PRECISION (lhs_type))
2226 	ifn = IFN_MULHS;
2227       /* Check for pattern 3).  */
2228       else if (wi::to_widest (scale_term) + mult_precision
2229 	       == TYPE_PRECISION (lhs_type))
2230 	ifn = IFN_MULH;
2231       else
2232 	return NULL;
2233     }
2234 
2235   vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2236 
2237   /* Check for target support.  */
2238   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2239   if (!new_vectype
2240       || !direct_internal_fn_supported_p
2241 	    (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2242     return NULL;
2243 
2244   /* The IR requires a valid vector type for the cast result, even though
2245      it's likely to be discarded.  */
2246   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2247   if (!*type_out)
2248     return NULL;
2249 
2250   /* Generate the IFN_MULHRS call.  */
2251   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2252   tree new_ops[2];
2253   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2254 		       unprom_mult, new_vectype);
2255   gcall *mulhrs_stmt
2256     = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2257   gimple_call_set_lhs (mulhrs_stmt, new_var);
2258   gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2259 
2260   if (dump_enabled_p ())
2261     dump_printf_loc (MSG_NOTE, vect_location,
2262 		     "created pattern stmt: %G", mulhrs_stmt);
2263 
2264   return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2265 			      mulhrs_stmt, new_vectype);
2266 }
2267 
2268 /* Recognize the patterns:
2269 
2270 	    ATYPE a;  // narrower than TYPE
2271 	    BTYPE b;  // narrower than TYPE
2272 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2273      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2274 
2275    where only the bottom half of avg is used.  Try to transform them into:
2276 
2277 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2278      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2279 
2280   followed by:
2281 
2282 	    TYPE avg = (TYPE) avg';
2283 
2284   where NTYPE is no wider than half of TYPE.  Since only the bottom half
2285   of avg is used, all or part of the cast of avg' should become redundant.
2286 
2287   If there is no target support available, generate code to distribute rshift
2288   over plus and add a carry.  */
2289 
2290 static gimple *
vect_recog_average_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2291 vect_recog_average_pattern (vec_info *vinfo,
2292 			    stmt_vec_info last_stmt_info, tree *type_out)
2293 {
2294   /* Check for a shift right by one bit.  */
2295   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2296   if (!last_stmt
2297       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2298       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2299     return NULL;
2300 
2301   /* Check that the shift result is wider than the users of the
2302      result need (i.e. that narrowing would be a natural choice).  */
2303   tree lhs = gimple_assign_lhs (last_stmt);
2304   tree type = TREE_TYPE (lhs);
2305   unsigned int target_precision
2306     = vect_element_precision (last_stmt_info->min_output_precision);
2307   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2308     return NULL;
2309 
2310   /* Look through any change in sign on the shift input.  */
2311   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2312   vect_unpromoted_value unprom_plus;
2313   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2314 						     &unprom_plus);
2315   if (!rshift_rhs
2316       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2317     return NULL;
2318 
2319   /* Get the definition of the shift input.  */
2320   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2321   if (!plus_stmt_info)
2322     return NULL;
2323 
2324   /* Check whether the shift input can be seen as a tree of additions on
2325      2 or 3 widened inputs.
2326 
2327      Note that the pattern should be a win even if the result of one or
2328      more additions is reused elsewhere: if the pattern matches, we'd be
2329      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
2330   internal_fn ifn = IFN_AVG_FLOOR;
2331   vect_unpromoted_value unprom[3];
2332   tree new_type;
2333   unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2334 					    WIDEN_PLUS_EXPR, false, 3,
2335 					    unprom, &new_type);
2336   if (nops == 0)
2337     return NULL;
2338   if (nops == 3)
2339     {
2340       /* Check that one operand is 1.  */
2341       unsigned int i;
2342       for (i = 0; i < 3; ++i)
2343 	if (integer_onep (unprom[i].op))
2344 	  break;
2345       if (i == 3)
2346 	return NULL;
2347       /* Throw away the 1 operand and keep the other two.  */
2348       if (i < 2)
2349 	unprom[i] = unprom[2];
2350       ifn = IFN_AVG_CEIL;
2351     }
2352 
2353   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2354 
2355   /* We know that:
2356 
2357      (a) the operation can be viewed as:
2358 
2359 	   TYPE widened0 = (TYPE) UNPROM[0];
2360 	   TYPE widened1 = (TYPE) UNPROM[1];
2361 	   TYPE tmp1 = widened0 + widened1 {+ 1};
2362 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
2363 
2364      (b) the first two statements are equivalent to:
2365 
2366 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2367 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2368 
2369      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2370 	 where sensible;
2371 
2372      (d) all the operations can be performed correctly at twice the width of
2373 	 NEW_TYPE, due to the nature of the average operation; and
2374 
2375      (e) users of the result of the right shift need only TARGET_PRECISION
2376 	 bits, where TARGET_PRECISION is no more than half of TYPE's
2377 	 precision.
2378 
2379      Under these circumstances, the only situation in which NEW_TYPE
2380      could be narrower than TARGET_PRECISION is if widened0, widened1
2381      and an addition result are all used more than once.  Thus we can
2382      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2383      as "free", whereas widening the result of the average instruction
2384      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
2385      therefore better not to go narrower than TARGET_PRECISION.  */
2386   if (TYPE_PRECISION (new_type) < target_precision)
2387     new_type = build_nonstandard_integer_type (target_precision,
2388 					       TYPE_UNSIGNED (new_type));
2389 
2390   /* Check for target support.  */
2391   tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2392   if (!new_vectype)
2393     return NULL;
2394 
2395   bool fallback_p = false;
2396 
2397   if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2398     ;
2399   else if (TYPE_UNSIGNED (new_type)
2400 	   && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2401 	   && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2402 	   && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2403 	   && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2404     fallback_p = true;
2405   else
2406     return NULL;
2407 
2408   /* The IR requires a valid vector type for the cast result, even though
2409      it's likely to be discarded.  */
2410   *type_out = get_vectype_for_scalar_type (vinfo, type);
2411   if (!*type_out)
2412     return NULL;
2413 
2414   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2415   tree new_ops[2];
2416   vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2417 		       unprom, new_vectype);
2418 
2419   if (fallback_p)
2420     {
2421       /* As a fallback, generate code for following sequence:
2422 
2423 	 shifted_op0 = new_ops[0] >> 1;
2424 	 shifted_op1 = new_ops[1] >> 1;
2425 	 sum_of_shifted = shifted_op0 + shifted_op1;
2426 	 unmasked_carry = new_ops[0] and/or new_ops[1];
2427 	 carry = unmasked_carry & 1;
2428 	 new_var = sum_of_shifted + carry;
2429       */
2430 
2431       tree one_cst = build_one_cst (new_type);
2432       gassign *g;
2433 
2434       tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2435       g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2436       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2437 
2438       tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2439       g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2440       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2441 
2442       tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2443       g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2444 			       shifted_op0, shifted_op1);
2445       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2446 
2447       tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2448       tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2449       g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2450       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2451 
2452       tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2453       g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2454       append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2455 
2456       g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2457       return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2458     }
2459 
2460   /* Generate the IFN_AVG* call.  */
2461   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2462 						    new_ops[1]);
2463   gimple_call_set_lhs (average_stmt, new_var);
2464   gimple_set_location (average_stmt, gimple_location (last_stmt));
2465 
2466   if (dump_enabled_p ())
2467     dump_printf_loc (MSG_NOTE, vect_location,
2468 		     "created pattern stmt: %G", average_stmt);
2469 
2470   return vect_convert_output (vinfo, last_stmt_info,
2471 			      type, average_stmt, new_vectype);
2472 }
2473 
2474 /* Recognize cases in which the input to a cast is wider than its
2475    output, and the input is fed by a widening operation.  Fold this
2476    by removing the unnecessary intermediate widening.  E.g.:
2477 
2478      unsigned char a;
2479      unsigned int b = (unsigned int) a;
2480      unsigned short c = (unsigned short) b;
2481 
2482    -->
2483 
2484      unsigned short c = (unsigned short) a;
2485 
2486    Although this is rare in input IR, it is an expected side-effect
2487    of the over-widening pattern above.
2488 
2489    This is beneficial also for integer-to-float conversions, if the
2490    widened integer has more bits than the float, and if the unwidened
2491    input doesn't.  */
2492 
2493 static gimple *
vect_recog_cast_forwprop_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2494 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2495 				  stmt_vec_info last_stmt_info, tree *type_out)
2496 {
2497   /* Check for a cast, including an integer-to-float conversion.  */
2498   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2499   if (!last_stmt)
2500     return NULL;
2501   tree_code code = gimple_assign_rhs_code (last_stmt);
2502   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2503     return NULL;
2504 
2505   /* Make sure that the rhs is a scalar with a natural bitsize.  */
2506   tree lhs = gimple_assign_lhs (last_stmt);
2507   if (!lhs)
2508     return NULL;
2509   tree lhs_type = TREE_TYPE (lhs);
2510   scalar_mode lhs_mode;
2511   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2512       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2513     return NULL;
2514 
2515   /* Check for a narrowing operation (from a vector point of view).  */
2516   tree rhs = gimple_assign_rhs1 (last_stmt);
2517   tree rhs_type = TREE_TYPE (rhs);
2518   if (!INTEGRAL_TYPE_P (rhs_type)
2519       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2520       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2521     return NULL;
2522 
2523   /* Try to find an unpromoted input.  */
2524   vect_unpromoted_value unprom;
2525   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2526       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2527     return NULL;
2528 
2529   /* If the bits above RHS_TYPE matter, make sure that they're the
2530      same when extending from UNPROM as they are when extending from RHS.  */
2531   if (!INTEGRAL_TYPE_P (lhs_type)
2532       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2533     return NULL;
2534 
2535   /* We can get the same result by casting UNPROM directly, to avoid
2536      the unnecessary widening and narrowing.  */
2537   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2538 
2539   *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2540   if (!*type_out)
2541     return NULL;
2542 
2543   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2544   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2545   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2546 
2547   return pattern_stmt;
2548 }
2549 
2550 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2551    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
2552 
2553 static gimple *
vect_recog_widen_shift_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2554 vect_recog_widen_shift_pattern (vec_info *vinfo,
2555 				stmt_vec_info last_stmt_info, tree *type_out)
2556 {
2557   return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2558 				      LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2559 				      "vect_recog_widen_shift_pattern");
2560 }
2561 
2562 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2563 
2564    type a_t, b_t, c_t;
2565 
2566    S0 a_t = b_t r<< c_t;
2567 
2568   Input/Output:
2569 
2570   * STMT_VINFO: The stmt from which the pattern search begins,
2571     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
2572     with a sequence:
2573 
2574    S1 d_t = -c_t;
2575    S2 e_t = d_t & (B - 1);
2576    S3 f_t = b_t << c_t;
2577    S4 g_t = b_t >> e_t;
2578    S0 a_t = f_t | g_t;
2579 
2580     where B is element bitsize of type.
2581 
2582   Output:
2583 
2584   * TYPE_OUT: The type of the output of this pattern.
2585 
2586   * Return value: A new stmt that will be used to replace the rotate
2587     S0 stmt.  */
2588 
2589 static gimple *
vect_recog_rotate_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2590 vect_recog_rotate_pattern (vec_info *vinfo,
2591 			   stmt_vec_info stmt_vinfo, tree *type_out)
2592 {
2593   gimple *last_stmt = stmt_vinfo->stmt;
2594   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2595   gimple *pattern_stmt, *def_stmt;
2596   enum tree_code rhs_code;
2597   enum vect_def_type dt;
2598   optab optab1, optab2;
2599   edge ext_def = NULL;
2600   bool bswap16_p = false;
2601 
2602   if (is_gimple_assign (last_stmt))
2603     {
2604       rhs_code = gimple_assign_rhs_code (last_stmt);
2605       switch (rhs_code)
2606 	{
2607 	case LROTATE_EXPR:
2608 	case RROTATE_EXPR:
2609 	  break;
2610 	default:
2611 	  return NULL;
2612 	}
2613 
2614       lhs = gimple_assign_lhs (last_stmt);
2615       oprnd0 = gimple_assign_rhs1 (last_stmt);
2616       type = TREE_TYPE (oprnd0);
2617       oprnd1 = gimple_assign_rhs2 (last_stmt);
2618     }
2619   else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2620     {
2621       /* __builtin_bswap16 (x) is another form of x r>> 8.
2622 	 The vectorizer has bswap support, but only if the argument isn't
2623 	 promoted.  */
2624       lhs = gimple_call_lhs (last_stmt);
2625       oprnd0 = gimple_call_arg (last_stmt, 0);
2626       type = TREE_TYPE (oprnd0);
2627       if (!lhs
2628 	  || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2629 	  || TYPE_PRECISION (type) <= 16
2630 	  || TREE_CODE (oprnd0) != SSA_NAME
2631 	  || BITS_PER_UNIT != 8
2632 	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2633 	return NULL;
2634 
2635       stmt_vec_info def_stmt_info;
2636       if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2637 	return NULL;
2638 
2639       if (dt != vect_internal_def)
2640 	return NULL;
2641 
2642       if (gimple_assign_cast_p (def_stmt))
2643 	{
2644 	  def = gimple_assign_rhs1 (def_stmt);
2645 	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2646 	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2647 	    oprnd0 = def;
2648 	}
2649 
2650       type = TREE_TYPE (lhs);
2651       vectype = get_vectype_for_scalar_type (vinfo, type);
2652       if (vectype == NULL_TREE)
2653 	return NULL;
2654 
2655       if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2656 	{
2657 	  /* The encoding uses one stepped pattern for each byte in the
2658 	     16-bit word.  */
2659 	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2660 	  for (unsigned i = 0; i < 3; ++i)
2661 	    for (unsigned j = 0; j < 2; ++j)
2662 	      elts.quick_push ((i + 1) * 2 - j - 1);
2663 
2664 	  vec_perm_indices indices (elts, 1,
2665 				    TYPE_VECTOR_SUBPARTS (char_vectype));
2666 	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2667 	    {
2668 	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
2669 		 undo the argument promotion.  */
2670 	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2671 		{
2672 		  def = vect_recog_temp_ssa_var (type, NULL);
2673 		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2674 		  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2675 		  oprnd0 = def;
2676 		}
2677 
2678 	      /* Pattern detected.  */
2679 	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2680 
2681 	      *type_out = vectype;
2682 
2683 	      /* Pattern supported.  Create a stmt to be used to replace the
2684 		 pattern, with the unpromoted argument.  */
2685 	      var = vect_recog_temp_ssa_var (type, NULL);
2686 	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2687 						1, oprnd0);
2688 	      gimple_call_set_lhs (pattern_stmt, var);
2689 	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2690 				      gimple_call_fntype (last_stmt));
2691 	      return pattern_stmt;
2692 	    }
2693 	}
2694 
2695       oprnd1 = build_int_cst (integer_type_node, 8);
2696       rhs_code = LROTATE_EXPR;
2697       bswap16_p = true;
2698     }
2699   else
2700     return NULL;
2701 
2702   if (TREE_CODE (oprnd0) != SSA_NAME
2703       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2704       || !INTEGRAL_TYPE_P (type)
2705       || !TYPE_UNSIGNED (type))
2706     return NULL;
2707 
2708   stmt_vec_info def_stmt_info;
2709   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2710     return NULL;
2711 
2712   if (dt != vect_internal_def
2713       && dt != vect_constant_def
2714       && dt != vect_external_def)
2715     return NULL;
2716 
2717   vectype = get_vectype_for_scalar_type (vinfo, type);
2718   if (vectype == NULL_TREE)
2719     return NULL;
2720 
2721   /* If vector/vector or vector/scalar rotate is supported by the target,
2722      don't do anything here.  */
2723   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2724   if (optab1
2725       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2726     {
2727      use_rotate:
2728       if (bswap16_p)
2729 	{
2730 	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2731 	    {
2732 	      def = vect_recog_temp_ssa_var (type, NULL);
2733 	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2734 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2735 	      oprnd0 = def;
2736 	    }
2737 
2738 	  /* Pattern detected.  */
2739 	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2740 
2741 	  *type_out = vectype;
2742 
2743 	  /* Pattern supported.  Create a stmt to be used to replace the
2744 	     pattern.  */
2745 	  var = vect_recog_temp_ssa_var (type, NULL);
2746 	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2747 					      oprnd1);
2748 	  return pattern_stmt;
2749 	}
2750       return NULL;
2751     }
2752 
2753   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2754     {
2755       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2756       if (optab2
2757 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2758 	goto use_rotate;
2759     }
2760 
2761   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2762      don't do anything here either.  */
2763   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2764   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2765   if (!optab1
2766       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2767       || !optab2
2768       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2769     {
2770       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2771 	return NULL;
2772       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2773       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2774       if (!optab1
2775 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2776 	  || !optab2
2777 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2778 	return NULL;
2779     }
2780 
2781   *type_out = vectype;
2782 
2783   if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2784     {
2785       def = vect_recog_temp_ssa_var (type, NULL);
2786       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2787       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2788       oprnd0 = def;
2789     }
2790 
2791   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2792     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2793 
2794   def = NULL_TREE;
2795   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2796   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2797     def = oprnd1;
2798   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2799     {
2800       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2801       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2802 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2803 	     == TYPE_PRECISION (type))
2804 	def = rhs1;
2805     }
2806 
2807   if (def == NULL_TREE)
2808     {
2809       def = vect_recog_temp_ssa_var (type, NULL);
2810       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2811       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2812     }
2813   stype = TREE_TYPE (def);
2814 
2815   if (TREE_CODE (def) == INTEGER_CST)
2816     {
2817       if (!tree_fits_uhwi_p (def)
2818 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2819 	  || integer_zerop (def))
2820 	return NULL;
2821       def2 = build_int_cst (stype,
2822 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2823     }
2824   else
2825     {
2826       tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2827 
2828       if (vecstype == NULL_TREE)
2829 	return NULL;
2830       def2 = vect_recog_temp_ssa_var (stype, NULL);
2831       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2832       if (ext_def)
2833 	{
2834 	  basic_block new_bb
2835 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2836 	  gcc_assert (!new_bb);
2837 	}
2838       else
2839 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2840 
2841       def2 = vect_recog_temp_ssa_var (stype, NULL);
2842       tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2843       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2844 				      gimple_assign_lhs (def_stmt), mask);
2845       if (ext_def)
2846 	{
2847 	  basic_block new_bb
2848 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2849 	  gcc_assert (!new_bb);
2850 	}
2851       else
2852 	append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2853     }
2854 
2855   var1 = vect_recog_temp_ssa_var (type, NULL);
2856   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2857 					? LSHIFT_EXPR : RSHIFT_EXPR,
2858 				  oprnd0, def);
2859   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2860 
2861   var2 = vect_recog_temp_ssa_var (type, NULL);
2862   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2863 					? RSHIFT_EXPR : LSHIFT_EXPR,
2864 				  oprnd0, def2);
2865   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2866 
2867   /* Pattern detected.  */
2868   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2869 
2870   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2871   var = vect_recog_temp_ssa_var (type, NULL);
2872   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2873 
2874   return pattern_stmt;
2875 }
2876 
2877 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2878    vectorized:
2879 
2880    type a_t;
2881    TYPE b_T, res_T;
2882 
2883    S1 a_t = ;
2884    S2 b_T = ;
2885    S3 res_T = b_T op a_t;
2886 
2887   where type 'TYPE' is a type with different size than 'type',
2888   and op is <<, >> or rotate.
2889 
2890   Also detect cases:
2891 
2892    type a_t;
2893    TYPE b_T, c_T, res_T;
2894 
2895    S0 c_T = ;
2896    S1 a_t = (type) c_T;
2897    S2 b_T = ;
2898    S3 res_T = b_T op a_t;
2899 
2900   Input/Output:
2901 
2902   * STMT_VINFO: The stmt from which the pattern search begins,
2903     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2904     with a shift/rotate which has same type on both operands, in the
2905     second case just b_T op c_T, in the first case with added cast
2906     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2907 
2908   Output:
2909 
2910   * TYPE_OUT: The type of the output of this pattern.
2911 
2912   * Return value: A new stmt that will be used to replace the shift/rotate
2913     S3 stmt.  */
2914 
2915 static gimple *
vect_recog_vector_vector_shift_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2916 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2917 					stmt_vec_info stmt_vinfo,
2918 					tree *type_out)
2919 {
2920   gimple *last_stmt = stmt_vinfo->stmt;
2921   tree oprnd0, oprnd1, lhs, var;
2922   gimple *pattern_stmt;
2923   enum tree_code rhs_code;
2924 
2925   if (!is_gimple_assign (last_stmt))
2926     return NULL;
2927 
2928   rhs_code = gimple_assign_rhs_code (last_stmt);
2929   switch (rhs_code)
2930     {
2931     case LSHIFT_EXPR:
2932     case RSHIFT_EXPR:
2933     case LROTATE_EXPR:
2934     case RROTATE_EXPR:
2935       break;
2936     default:
2937       return NULL;
2938     }
2939 
2940   lhs = gimple_assign_lhs (last_stmt);
2941   oprnd0 = gimple_assign_rhs1 (last_stmt);
2942   oprnd1 = gimple_assign_rhs2 (last_stmt);
2943   if (TREE_CODE (oprnd0) != SSA_NAME
2944       || TREE_CODE (oprnd1) != SSA_NAME
2945       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2946       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2947       || TYPE_PRECISION (TREE_TYPE (lhs))
2948 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2949     return NULL;
2950 
2951   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2952   if (!def_vinfo)
2953     return NULL;
2954 
2955   *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2956   if (*type_out == NULL_TREE)
2957     return NULL;
2958 
2959   tree def = NULL_TREE;
2960   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2961   if (def_stmt && gimple_assign_cast_p (def_stmt))
2962     {
2963       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2964       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2965 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2966 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2967 	{
2968 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2969 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2970 	    def = rhs1;
2971 	  else
2972 	    {
2973 	      tree mask
2974 		= build_low_bits_mask (TREE_TYPE (rhs1),
2975 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2976 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2977 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2978 	      tree vecstype = get_vectype_for_scalar_type (vinfo,
2979 							   TREE_TYPE (rhs1));
2980 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2981 	    }
2982 	}
2983     }
2984 
2985   if (def == NULL_TREE)
2986     {
2987       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2988       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2989       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2990     }
2991 
2992   /* Pattern detected.  */
2993   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2994 
2995   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2996   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2997   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2998 
2999   return pattern_stmt;
3000 }
3001 
3002 /* Return true iff the target has a vector optab implementing the operation
3003    CODE on type VECTYPE.  */
3004 
3005 static bool
target_has_vecop_for_code(tree_code code,tree vectype)3006 target_has_vecop_for_code (tree_code code, tree vectype)
3007 {
3008   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3009   return voptab
3010 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3011 }
3012 
3013 /* Verify that the target has optabs of VECTYPE to perform all the steps
3014    needed by the multiplication-by-immediate synthesis algorithm described by
3015    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
3016    present.  Return true iff the target supports all the steps.  */
3017 
3018 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)3019 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3020 				 tree vectype, bool synth_shift_p)
3021 {
3022   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3023     return false;
3024 
3025   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3026   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3027 
3028   if (var == negate_variant
3029       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3030     return false;
3031 
3032   /* If we must synthesize shifts with additions make sure that vector
3033      addition is available.  */
3034   if ((var == add_variant || synth_shift_p) && !supports_vplus)
3035     return false;
3036 
3037   for (int i = 1; i < alg->ops; i++)
3038     {
3039       switch (alg->op[i])
3040 	{
3041 	case alg_shift:
3042 	  break;
3043 	case alg_add_t_m2:
3044 	case alg_add_t2_m:
3045 	case alg_add_factor:
3046 	  if (!supports_vplus)
3047 	    return false;
3048 	  break;
3049 	case alg_sub_t_m2:
3050 	case alg_sub_t2_m:
3051 	case alg_sub_factor:
3052 	  if (!supports_vminus)
3053 	    return false;
3054 	  break;
3055 	case alg_unknown:
3056 	case alg_m:
3057 	case alg_zero:
3058 	case alg_impossible:
3059 	  return false;
3060 	default:
3061 	  gcc_unreachable ();
3062 	}
3063     }
3064 
3065   return true;
3066 }
3067 
3068 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3069    putting the final result in DEST.  Append all statements but the last into
3070    VINFO.  Return the last statement.  */
3071 
3072 static gimple *
synth_lshift_by_additions(vec_info * vinfo,tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info stmt_info)3073 synth_lshift_by_additions (vec_info *vinfo,
3074 			   tree dest, tree op, HOST_WIDE_INT amnt,
3075 			   stmt_vec_info stmt_info)
3076 {
3077   HOST_WIDE_INT i;
3078   tree itype = TREE_TYPE (op);
3079   tree prev_res = op;
3080   gcc_assert (amnt >= 0);
3081   for (i = 0; i < amnt; i++)
3082     {
3083       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3084 		      : dest;
3085       gimple *stmt
3086         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3087       prev_res = tmp_var;
3088       if (i < amnt - 1)
3089 	append_pattern_def_seq (vinfo, stmt_info, stmt);
3090       else
3091 	return stmt;
3092     }
3093   gcc_unreachable ();
3094   return NULL;
3095 }
3096 
3097 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
3098    CODE to operands OP1 and OP2, creating a new temporary SSA var in
3099    the process if necessary.  Append the resulting assignment statements
3100    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
3101    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
3102    left shifts using additions.  */
3103 
3104 static tree
apply_binop_and_append_stmt(vec_info * vinfo,tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)3105 apply_binop_and_append_stmt (vec_info *vinfo,
3106 			     tree_code code, tree op1, tree op2,
3107 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
3108 {
3109   if (integer_zerop (op2)
3110       && (code == LSHIFT_EXPR
3111 	  || code == PLUS_EXPR))
3112     {
3113       gcc_assert (TREE_CODE (op1) == SSA_NAME);
3114       return op1;
3115     }
3116 
3117   gimple *stmt;
3118   tree itype = TREE_TYPE (op1);
3119   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3120 
3121   if (code == LSHIFT_EXPR
3122       && synth_shift_p)
3123     {
3124       stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3125 					TREE_INT_CST_LOW (op2), stmt_vinfo);
3126       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3127       return tmp_var;
3128     }
3129 
3130   stmt = gimple_build_assign (tmp_var, code, op1, op2);
3131   append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3132   return tmp_var;
3133 }
3134 
3135 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3136    and simple arithmetic operations to be vectorized.  Record the statements
3137    produced in STMT_VINFO and return the last statement in the sequence or
3138    NULL if it's not possible to synthesize such a multiplication.
3139    This function mirrors the behavior of expand_mult_const in expmed.cc but
3140    works on tree-ssa form.  */
3141 
3142 static gimple *
vect_synth_mult_by_constant(vec_info * vinfo,tree op,tree val,stmt_vec_info stmt_vinfo)3143 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3144 			     stmt_vec_info stmt_vinfo)
3145 {
3146   tree itype = TREE_TYPE (op);
3147   machine_mode mode = TYPE_MODE (itype);
3148   struct algorithm alg;
3149   mult_variant variant;
3150   if (!tree_fits_shwi_p (val))
3151     return NULL;
3152 
3153   /* Multiplication synthesis by shifts, adds and subs can introduce
3154      signed overflow where the original operation didn't.  Perform the
3155      operations on an unsigned type and cast back to avoid this.
3156      In the future we may want to relax this for synthesis algorithms
3157      that we can prove do not cause unexpected overflow.  */
3158   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3159 
3160   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3161   tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3162   if (!vectype)
3163     return NULL;
3164 
3165   /* Targets that don't support vector shifts but support vector additions
3166      can synthesize shifts that way.  */
3167   bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3168 
3169   HOST_WIDE_INT hwval = tree_to_shwi (val);
3170   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3171      The vectorizer's benefit analysis will decide whether it's beneficial
3172      to do this.  */
3173   bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3174 				       ? TYPE_MODE (vectype) : mode,
3175 				       hwval, &alg, &variant, MAX_COST);
3176   if (!possible)
3177     return NULL;
3178 
3179   if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3180     return NULL;
3181 
3182   tree accumulator;
3183 
3184   /* Clear out the sequence of statements so we can populate it below.  */
3185   gimple *stmt = NULL;
3186 
3187   if (cast_to_unsigned_p)
3188     {
3189       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3190       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3191       append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3192       op = tmp_op;
3193     }
3194 
3195   if (alg.op[0] == alg_zero)
3196     accumulator = build_int_cst (multtype, 0);
3197   else
3198     accumulator = op;
3199 
3200   bool needs_fixup = (variant == negate_variant)
3201 		      || (variant == add_variant);
3202 
3203   for (int i = 1; i < alg.ops; i++)
3204     {
3205       tree shft_log = build_int_cst (multtype, alg.log[i]);
3206       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3207       tree tmp_var = NULL_TREE;
3208 
3209       switch (alg.op[i])
3210 	{
3211 	case alg_shift:
3212 	  if (synth_shift_p)
3213 	    stmt
3214 	      = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3215 					   alg.log[i], stmt_vinfo);
3216 	  else
3217 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3218 					 shft_log);
3219 	  break;
3220 	case alg_add_t_m2:
3221 	  tmp_var
3222 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3223 					   stmt_vinfo, synth_shift_p);
3224 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3225 				       tmp_var);
3226 	  break;
3227 	case alg_sub_t_m2:
3228 	  tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3229 						 shft_log, stmt_vinfo,
3230 						 synth_shift_p);
3231 	  /* In some algorithms the first step involves zeroing the
3232 	     accumulator.  If subtracting from such an accumulator
3233 	     just emit the negation directly.  */
3234 	  if (integer_zerop (accumulator))
3235 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3236 	  else
3237 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3238 					tmp_var);
3239 	  break;
3240 	case alg_add_t2_m:
3241 	  tmp_var
3242 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3243 					   shft_log, stmt_vinfo, synth_shift_p);
3244 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3245 	  break;
3246 	case alg_sub_t2_m:
3247 	  tmp_var
3248 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3249 					   shft_log, stmt_vinfo, synth_shift_p);
3250 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3251 	  break;
3252 	case alg_add_factor:
3253 	  tmp_var
3254 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3255 					   shft_log, stmt_vinfo, synth_shift_p);
3256 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3257 				       tmp_var);
3258 	  break;
3259 	case alg_sub_factor:
3260 	  tmp_var
3261 	    = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3262 					   shft_log, stmt_vinfo, synth_shift_p);
3263 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3264 				      accumulator);
3265 	  break;
3266 	default:
3267 	  gcc_unreachable ();
3268 	}
3269       /* We don't want to append the last stmt in the sequence to stmt_vinfo
3270 	 but rather return it directly.  */
3271 
3272       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3273 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3274       accumulator = accum_tmp;
3275     }
3276   if (variant == negate_variant)
3277     {
3278       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3279       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3280       accumulator = accum_tmp;
3281       if (cast_to_unsigned_p)
3282 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3283     }
3284   else if (variant == add_variant)
3285     {
3286       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3287       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3288       accumulator = accum_tmp;
3289       if (cast_to_unsigned_p)
3290 	append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3291     }
3292   /* Move back to a signed if needed.  */
3293   if (cast_to_unsigned_p)
3294     {
3295       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3296       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3297     }
3298 
3299   return stmt;
3300 }
3301 
3302 /* Detect multiplication by constant and convert it into a sequence of
3303    shifts and additions, subtractions, negations.  We reuse the
3304    choose_mult_variant algorithms from expmed.cc
3305 
3306    Input/Output:
3307 
3308    STMT_VINFO: The stmt from which the pattern search begins,
3309    i.e. the mult stmt.
3310 
3311  Output:
3312 
3313   * TYPE_OUT: The type of the output of this pattern.
3314 
3315   * Return value: A new stmt that will be used to replace
3316     the multiplication.  */
3317 
3318 static gimple *
vect_recog_mult_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3319 vect_recog_mult_pattern (vec_info *vinfo,
3320 			 stmt_vec_info stmt_vinfo, tree *type_out)
3321 {
3322   gimple *last_stmt = stmt_vinfo->stmt;
3323   tree oprnd0, oprnd1, vectype, itype;
3324   gimple *pattern_stmt;
3325 
3326   if (!is_gimple_assign (last_stmt))
3327     return NULL;
3328 
3329   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3330     return NULL;
3331 
3332   oprnd0 = gimple_assign_rhs1 (last_stmt);
3333   oprnd1 = gimple_assign_rhs2 (last_stmt);
3334   itype = TREE_TYPE (oprnd0);
3335 
3336   if (TREE_CODE (oprnd0) != SSA_NAME
3337       || TREE_CODE (oprnd1) != INTEGER_CST
3338       || !INTEGRAL_TYPE_P (itype)
3339       || !type_has_mode_precision_p (itype))
3340     return NULL;
3341 
3342   vectype = get_vectype_for_scalar_type (vinfo, itype);
3343   if (vectype == NULL_TREE)
3344     return NULL;
3345 
3346   /* If the target can handle vectorized multiplication natively,
3347      don't attempt to optimize this.  */
3348   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3349   if (mul_optab != unknown_optab)
3350     {
3351       machine_mode vec_mode = TYPE_MODE (vectype);
3352       int icode = (int) optab_handler (mul_optab, vec_mode);
3353       if (icode != CODE_FOR_nothing)
3354        return NULL;
3355     }
3356 
3357   pattern_stmt = vect_synth_mult_by_constant (vinfo,
3358 					      oprnd0, oprnd1, stmt_vinfo);
3359   if (!pattern_stmt)
3360     return NULL;
3361 
3362   /* Pattern detected.  */
3363   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3364 
3365   *type_out = vectype;
3366 
3367   return pattern_stmt;
3368 }
3369 
3370 /* Detect a signed division by a constant that wouldn't be
3371    otherwise vectorized:
3372 
3373    type a_t, b_t;
3374 
3375    S1 a_t = b_t / N;
3376 
3377   where type 'type' is an integral type and N is a constant.
3378 
3379   Similarly handle modulo by a constant:
3380 
3381    S4 a_t = b_t % N;
3382 
3383   Input/Output:
3384 
3385   * STMT_VINFO: The stmt from which the pattern search begins,
3386     i.e. the division stmt.  S1 is replaced by if N is a power
3387     of two constant and type is signed:
3388   S3  y_t = b_t < 0 ? N - 1 : 0;
3389   S2  x_t = b_t + y_t;
3390   S1' a_t = x_t >> log2 (N);
3391 
3392     S4 is replaced if N is a power of two constant and
3393     type is signed by (where *_T temporaries have unsigned type):
3394   S9  y_T = b_t < 0 ? -1U : 0U;
3395   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3396   S7  z_t = (type) z_T;
3397   S6  w_t = b_t + z_t;
3398   S5  x_t = w_t & (N - 1);
3399   S4' a_t = x_t - z_t;
3400 
3401   Output:
3402 
3403   * TYPE_OUT: The type of the output of this pattern.
3404 
3405   * Return value: A new stmt that will be used to replace the division
3406     S1 or modulo S4 stmt.  */
3407 
3408 static gimple *
vect_recog_divmod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3409 vect_recog_divmod_pattern (vec_info *vinfo,
3410 			   stmt_vec_info stmt_vinfo, tree *type_out)
3411 {
3412   gimple *last_stmt = stmt_vinfo->stmt;
3413   tree oprnd0, oprnd1, vectype, itype, cond;
3414   gimple *pattern_stmt, *def_stmt;
3415   enum tree_code rhs_code;
3416   optab optab;
3417   tree q;
3418   int dummy_int, prec;
3419 
3420   if (!is_gimple_assign (last_stmt))
3421     return NULL;
3422 
3423   rhs_code = gimple_assign_rhs_code (last_stmt);
3424   switch (rhs_code)
3425     {
3426     case TRUNC_DIV_EXPR:
3427     case EXACT_DIV_EXPR:
3428     case TRUNC_MOD_EXPR:
3429       break;
3430     default:
3431       return NULL;
3432     }
3433 
3434   oprnd0 = gimple_assign_rhs1 (last_stmt);
3435   oprnd1 = gimple_assign_rhs2 (last_stmt);
3436   itype = TREE_TYPE (oprnd0);
3437   if (TREE_CODE (oprnd0) != SSA_NAME
3438       || TREE_CODE (oprnd1) != INTEGER_CST
3439       || TREE_CODE (itype) != INTEGER_TYPE
3440       || !type_has_mode_precision_p (itype))
3441     return NULL;
3442 
3443   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3444   vectype = get_vectype_for_scalar_type (vinfo, itype);
3445   if (vectype == NULL_TREE)
3446     return NULL;
3447 
3448   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3449     {
3450       /* If the target can handle vectorized division or modulo natively,
3451 	 don't attempt to optimize this, since native division is likely
3452 	 to give smaller code.  */
3453       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3454       if (optab != unknown_optab)
3455 	{
3456 	  machine_mode vec_mode = TYPE_MODE (vectype);
3457 	  int icode = (int) optab_handler (optab, vec_mode);
3458 	  if (icode != CODE_FOR_nothing)
3459 	    return NULL;
3460 	}
3461     }
3462 
3463   prec = TYPE_PRECISION (itype);
3464   if (integer_pow2p (oprnd1))
3465     {
3466       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3467 	return NULL;
3468 
3469       /* Pattern detected.  */
3470       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3471 
3472       *type_out = vectype;
3473 
3474       /* Check if the target supports this internal function.  */
3475       internal_fn ifn = IFN_DIV_POW2;
3476       if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3477 	{
3478 	  tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3479 
3480 	  tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3481 	  gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3482 	  gimple_call_set_lhs (div_stmt, var_div);
3483 
3484 	  if (rhs_code == TRUNC_MOD_EXPR)
3485 	    {
3486 	      append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3487 	      def_stmt
3488 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3489 				       LSHIFT_EXPR, var_div, shift);
3490 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3491 	      pattern_stmt
3492 		= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3493 				       MINUS_EXPR, oprnd0,
3494 				       gimple_assign_lhs (def_stmt));
3495 	    }
3496 	  else
3497 	    pattern_stmt = div_stmt;
3498 	  gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3499 
3500 	  return pattern_stmt;
3501 	}
3502 
3503       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3504 		     build_int_cst (itype, 0));
3505       if (rhs_code == TRUNC_DIV_EXPR
3506 	  || rhs_code == EXACT_DIV_EXPR)
3507 	{
3508 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
3509 	  tree shift;
3510 	  def_stmt
3511 	    = gimple_build_assign (var, COND_EXPR, cond,
3512 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3513 						build_int_cst (itype, 1)),
3514 				   build_int_cst (itype, 0));
3515 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3516 	  var = vect_recog_temp_ssa_var (itype, NULL);
3517 	  def_stmt
3518 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3519 				   gimple_assign_lhs (def_stmt));
3520 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3521 
3522 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
3523 	  pattern_stmt
3524 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3525 				   RSHIFT_EXPR, var, shift);
3526 	}
3527       else
3528 	{
3529 	  tree signmask;
3530 	  if (compare_tree_int (oprnd1, 2) == 0)
3531 	    {
3532 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3533 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3534 					      build_int_cst (itype, 1),
3535 					      build_int_cst (itype, 0));
3536 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3537 	    }
3538 	  else
3539 	    {
3540 	      tree utype
3541 		= build_nonstandard_integer_type (prec, 1);
3542 	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3543 	      tree shift
3544 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3545 					- tree_log2 (oprnd1));
3546 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
3547 
3548 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3549 					      build_int_cst (utype, -1),
3550 					      build_int_cst (utype, 0));
3551 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3552 	      var = vect_recog_temp_ssa_var (utype, NULL);
3553 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3554 					      gimple_assign_lhs (def_stmt),
3555 					      shift);
3556 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3557 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
3558 	      def_stmt
3559 		= gimple_build_assign (signmask, NOP_EXPR, var);
3560 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3561 	    }
3562 	  def_stmt
3563 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3564 				   PLUS_EXPR, oprnd0, signmask);
3565 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3566 	  def_stmt
3567 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3568 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3569 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
3570 						build_int_cst (itype, 1)));
3571 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3572 
3573 	  pattern_stmt
3574 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3575 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
3576 				   signmask);
3577 	}
3578 
3579       return pattern_stmt;
3580     }
3581 
3582   if (prec > HOST_BITS_PER_WIDE_INT
3583       || integer_zerop (oprnd1))
3584     return NULL;
3585 
3586   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3587     return NULL;
3588 
3589   if (TYPE_UNSIGNED (itype))
3590     {
3591       unsigned HOST_WIDE_INT mh, ml;
3592       int pre_shift, post_shift;
3593       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3594 				  & GET_MODE_MASK (itype_mode));
3595       tree t1, t2, t3, t4;
3596 
3597       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3598 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
3599 	return NULL;
3600 
3601       /* Find a suitable multiplier and right shift count
3602 	 instead of multiplying with D.  */
3603       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3604 
3605       /* If the suggested multiplier is more than SIZE bits, we can do better
3606 	 for even divisors, using an initial right shift.  */
3607       if (mh != 0 && (d & 1) == 0)
3608 	{
3609 	  pre_shift = ctz_or_zero (d);
3610 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3611 				  &ml, &post_shift, &dummy_int);
3612 	  gcc_assert (!mh);
3613 	}
3614       else
3615 	pre_shift = 0;
3616 
3617       if (mh != 0)
3618 	{
3619 	  if (post_shift - 1 >= prec)
3620 	    return NULL;
3621 
3622 	  /* t1 = oprnd0 h* ml;
3623 	     t2 = oprnd0 - t1;
3624 	     t3 = t2 >> 1;
3625 	     t4 = t1 + t3;
3626 	     q = t4 >> (post_shift - 1);  */
3627 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
3628 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3629 					  build_int_cst (itype, ml));
3630 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3631 
3632 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3633 	  def_stmt
3634 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3635 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3636 
3637 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3638 	  def_stmt
3639 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3640 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3641 
3642 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3643 	  def_stmt
3644 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3645 
3646 	  if (post_shift != 1)
3647 	    {
3648 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3649 
3650 	      q = vect_recog_temp_ssa_var (itype, NULL);
3651 	      pattern_stmt
3652 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
3653 				       build_int_cst (itype, post_shift - 1));
3654 	    }
3655 	  else
3656 	    {
3657 	      q = t4;
3658 	      pattern_stmt = def_stmt;
3659 	    }
3660 	}
3661       else
3662 	{
3663 	  if (pre_shift >= prec || post_shift >= prec)
3664 	    return NULL;
3665 
3666 	  /* t1 = oprnd0 >> pre_shift;
3667 	     t2 = t1 h* ml;
3668 	     q = t2 >> post_shift;  */
3669 	  if (pre_shift)
3670 	    {
3671 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
3672 	      def_stmt
3673 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3674 				       build_int_cst (NULL, pre_shift));
3675 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3676 	    }
3677 	  else
3678 	    t1 = oprnd0;
3679 
3680 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3681 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3682 					  build_int_cst (itype, ml));
3683 
3684 	  if (post_shift)
3685 	    {
3686 	      append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3687 
3688 	      q = vect_recog_temp_ssa_var (itype, NULL);
3689 	      def_stmt
3690 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
3691 				       build_int_cst (itype, post_shift));
3692 	    }
3693 	  else
3694 	    q = t2;
3695 
3696 	  pattern_stmt = def_stmt;
3697 	}
3698     }
3699   else
3700     {
3701       unsigned HOST_WIDE_INT ml;
3702       int post_shift;
3703       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3704       unsigned HOST_WIDE_INT abs_d;
3705       bool add = false;
3706       tree t1, t2, t3, t4;
3707 
3708       /* Give up for -1.  */
3709       if (d == -1)
3710 	return NULL;
3711 
3712       /* Since d might be INT_MIN, we have to cast to
3713 	 unsigned HOST_WIDE_INT before negating to avoid
3714 	 undefined signed overflow.  */
3715       abs_d = (d >= 0
3716 	       ? (unsigned HOST_WIDE_INT) d
3717 	       : - (unsigned HOST_WIDE_INT) d);
3718 
3719       /* n rem d = n rem -d */
3720       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3721 	{
3722 	  d = abs_d;
3723 	  oprnd1 = build_int_cst (itype, abs_d);
3724 	}
3725       if (HOST_BITS_PER_WIDE_INT >= prec
3726 	  && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3727 	/* This case is not handled correctly below.  */
3728 	return NULL;
3729 
3730       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3731       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3732 	{
3733 	  add = true;
3734 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
3735 	}
3736       if (post_shift >= prec)
3737 	return NULL;
3738 
3739       /* t1 = oprnd0 h* ml;  */
3740       t1 = vect_recog_temp_ssa_var (itype, NULL);
3741       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3742 				      build_int_cst (itype, ml));
3743 
3744       if (add)
3745 	{
3746 	  /* t2 = t1 + oprnd0;  */
3747 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3748 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
3749 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3750 	}
3751       else
3752 	t2 = t1;
3753 
3754       if (post_shift)
3755 	{
3756 	  /* t3 = t2 >> post_shift;  */
3757 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3758 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3759 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3760 					  build_int_cst (itype, post_shift));
3761 	}
3762       else
3763 	t3 = t2;
3764 
3765       int msb = 1;
3766       value_range r;
3767       get_range_query (cfun)->range_of_expr (r, oprnd0);
3768       if (r.kind () == VR_RANGE)
3769 	{
3770 	  if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3771 	    msb = 0;
3772 	  else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3773 	    msb = -1;
3774 	}
3775 
3776       if (msb == 0 && d >= 0)
3777 	{
3778 	  /* q = t3;  */
3779 	  q = t3;
3780 	  pattern_stmt = def_stmt;
3781 	}
3782       else
3783 	{
3784 	  /* t4 = oprnd0 >> (prec - 1);
3785 	     or if we know from VRP that oprnd0 >= 0
3786 	     t4 = 0;
3787 	     or if we know from VRP that oprnd0 < 0
3788 	     t4 = -1;  */
3789 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3790 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3791 	  if (msb != 1)
3792 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3793 					    build_int_cst (itype, msb));
3794 	  else
3795 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3796 					    build_int_cst (itype, prec - 1));
3797 	  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3798 
3799 	  /* q = t3 - t4;  or q = t4 - t3;  */
3800 	  q = vect_recog_temp_ssa_var (itype, NULL);
3801 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3802 					      d < 0 ? t3 : t4);
3803 	}
3804     }
3805 
3806   if (rhs_code == TRUNC_MOD_EXPR)
3807     {
3808       tree r, t1;
3809 
3810       /* We divided.  Now finish by:
3811 	 t1 = q * oprnd1;
3812 	 r = oprnd0 - t1;  */
3813       append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3814 
3815       t1 = vect_recog_temp_ssa_var (itype, NULL);
3816       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3817       append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3818 
3819       r = vect_recog_temp_ssa_var (itype, NULL);
3820       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3821     }
3822 
3823   /* Pattern detected.  */
3824   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3825 
3826   *type_out = vectype;
3827   return pattern_stmt;
3828 }
3829 
3830 /* Function vect_recog_mixed_size_cond_pattern
3831 
3832    Try to find the following pattern:
3833 
3834      type x_t, y_t;
3835      TYPE a_T, b_T, c_T;
3836    loop:
3837      S1  a_T = x_t CMP y_t ? b_T : c_T;
3838 
3839    where type 'TYPE' is an integral type which has different size
3840    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3841    than 'type', the constants need to fit into an integer type
3842    with the same width as 'type') or results of conversion from 'type'.
3843 
3844    Input:
3845 
3846    * STMT_VINFO: The stmt from which the pattern search begins.
3847 
3848    Output:
3849 
3850    * TYPE_OUT: The type of the output of this pattern.
3851 
3852    * Return value: A new stmt that will be used to replace the pattern.
3853 	Additionally a def_stmt is added.
3854 
3855 	a_it = x_t CMP y_t ? b_it : c_it;
3856 	a_T = (TYPE) a_it;  */
3857 
3858 static gimple *
vect_recog_mixed_size_cond_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3859 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3860 				    stmt_vec_info stmt_vinfo, tree *type_out)
3861 {
3862   gimple *last_stmt = stmt_vinfo->stmt;
3863   tree cond_expr, then_clause, else_clause;
3864   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3865   gimple *pattern_stmt, *def_stmt;
3866   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3867   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3868   bool promotion;
3869   tree comp_scalar_type;
3870 
3871   if (!is_gimple_assign (last_stmt)
3872       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3873       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3874     return NULL;
3875 
3876   cond_expr = gimple_assign_rhs1 (last_stmt);
3877   then_clause = gimple_assign_rhs2 (last_stmt);
3878   else_clause = gimple_assign_rhs3 (last_stmt);
3879 
3880   if (!COMPARISON_CLASS_P (cond_expr))
3881     return NULL;
3882 
3883   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3884   comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3885   if (comp_vectype == NULL_TREE)
3886     return NULL;
3887 
3888   type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3889   if (types_compatible_p (type, comp_scalar_type)
3890       || ((TREE_CODE (then_clause) != INTEGER_CST
3891 	   || TREE_CODE (else_clause) != INTEGER_CST)
3892 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3893       || !INTEGRAL_TYPE_P (type))
3894     return NULL;
3895 
3896   if ((TREE_CODE (then_clause) != INTEGER_CST
3897        && !type_conversion_p (vinfo, then_clause, false,
3898 			      &orig_type0, &def_stmt0, &promotion))
3899       || (TREE_CODE (else_clause) != INTEGER_CST
3900 	  && !type_conversion_p (vinfo, else_clause, false,
3901 				 &orig_type1, &def_stmt1, &promotion)))
3902     return NULL;
3903 
3904   if (orig_type0 && orig_type1
3905       && !types_compatible_p (orig_type0, orig_type1))
3906     return NULL;
3907 
3908   if (orig_type0)
3909     {
3910       if (!types_compatible_p (orig_type0, comp_scalar_type))
3911 	return NULL;
3912       then_clause = gimple_assign_rhs1 (def_stmt0);
3913       itype = orig_type0;
3914     }
3915 
3916   if (orig_type1)
3917     {
3918       if (!types_compatible_p (orig_type1, comp_scalar_type))
3919 	return NULL;
3920       else_clause = gimple_assign_rhs1 (def_stmt1);
3921       itype = orig_type1;
3922     }
3923 
3924 
3925   HOST_WIDE_INT cmp_mode_size
3926     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3927 
3928   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3929   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3930     return NULL;
3931 
3932   vectype = get_vectype_for_scalar_type (vinfo, type);
3933   if (vectype == NULL_TREE)
3934     return NULL;
3935 
3936   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3937     return NULL;
3938 
3939   if (itype == NULL_TREE)
3940     itype = build_nonstandard_integer_type (cmp_mode_size,
3941   					    TYPE_UNSIGNED (type));
3942 
3943   if (itype == NULL_TREE
3944       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3945     return NULL;
3946 
3947   vecitype = get_vectype_for_scalar_type (vinfo, itype);
3948   if (vecitype == NULL_TREE)
3949     return NULL;
3950 
3951   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3952     return NULL;
3953 
3954   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3955     {
3956       if ((TREE_CODE (then_clause) == INTEGER_CST
3957 	   && !int_fits_type_p (then_clause, itype))
3958 	  || (TREE_CODE (else_clause) == INTEGER_CST
3959 	      && !int_fits_type_p (else_clause, itype)))
3960 	return NULL;
3961     }
3962 
3963   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3964 				  COND_EXPR, unshare_expr (cond_expr),
3965 				  fold_convert (itype, then_clause),
3966 				  fold_convert (itype, else_clause));
3967   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3968 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3969 
3970   append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3971   *type_out = vectype;
3972 
3973   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3974 
3975   return pattern_stmt;
3976 }
3977 
3978 
3979 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3980    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3981    in case it's a result of a comparison which can be directly vectorized into
3982    a vector comparison.  Fills in STMTS with all stmts visited during the
3983    walk.  */
3984 
3985 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3986 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3987 {
3988   tree rhs1;
3989   enum tree_code rhs_code;
3990 
3991   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3992   if (!def_stmt_info)
3993     return false;
3994 
3995   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3996   if (!def_stmt)
3997     return false;
3998 
3999   if (stmts.contains (def_stmt))
4000     return true;
4001 
4002   rhs1 = gimple_assign_rhs1 (def_stmt);
4003   rhs_code = gimple_assign_rhs_code (def_stmt);
4004   switch (rhs_code)
4005     {
4006     case SSA_NAME:
4007       if (! check_bool_pattern (rhs1, vinfo, stmts))
4008 	return false;
4009       break;
4010 
4011     CASE_CONVERT:
4012       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4013 	return false;
4014       if (! check_bool_pattern (rhs1, vinfo, stmts))
4015 	return false;
4016       break;
4017 
4018     case BIT_NOT_EXPR:
4019       if (! check_bool_pattern (rhs1, vinfo, stmts))
4020 	return false;
4021       break;
4022 
4023     case BIT_AND_EXPR:
4024     case BIT_IOR_EXPR:
4025     case BIT_XOR_EXPR:
4026       if (! check_bool_pattern (rhs1, vinfo, stmts)
4027 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4028 	return false;
4029       break;
4030 
4031     default:
4032       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4033 	{
4034 	  tree vecitype, comp_vectype;
4035 
4036 	  /* If the comparison can throw, then is_gimple_condexpr will be
4037 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
4038 	  if (stmt_could_throw_p (cfun, def_stmt))
4039 	    return false;
4040 
4041 	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4042 	  if (comp_vectype == NULL_TREE)
4043 	    return false;
4044 
4045 	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
4046 							  TREE_TYPE (rhs1));
4047 	  if (mask_type
4048 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4049 	    return false;
4050 
4051 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4052 	    {
4053 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4054 	      tree itype
4055 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4056 	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
4057 	      if (vecitype == NULL_TREE)
4058 		return false;
4059 	    }
4060 	  else
4061 	    vecitype = comp_vectype;
4062 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4063 	    return false;
4064 	}
4065       else
4066 	return false;
4067       break;
4068     }
4069 
4070   bool res = stmts.add (def_stmt);
4071   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
4072   gcc_assert (!res);
4073 
4074   return true;
4075 }
4076 
4077 
4078 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
4079    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4080    pattern sequence.  */
4081 
4082 static tree
adjust_bool_pattern_cast(vec_info * vinfo,tree type,tree var,stmt_vec_info stmt_info)4083 adjust_bool_pattern_cast (vec_info *vinfo,
4084 			  tree type, tree var, stmt_vec_info stmt_info)
4085 {
4086   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4087 					   NOP_EXPR, var);
4088   append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4089 			  get_vectype_for_scalar_type (vinfo, type));
4090   return gimple_assign_lhs (cast_stmt);
4091 }
4092 
4093 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
4094    VAR is an SSA_NAME that should be transformed from bool to a wider integer
4095    type, OUT_TYPE is the desired final integer type of the whole pattern.
4096    STMT_INFO is the info of the pattern root and is where pattern stmts should
4097    be associated with.  DEFS is a map of pattern defs.  */
4098 
4099 static void
adjust_bool_pattern(vec_info * vinfo,tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)4100 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4101 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4102 {
4103   gimple *stmt = SSA_NAME_DEF_STMT (var);
4104   enum tree_code rhs_code, def_rhs_code;
4105   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4106   location_t loc;
4107   gimple *pattern_stmt, *def_stmt;
4108   tree trueval = NULL_TREE;
4109 
4110   rhs1 = gimple_assign_rhs1 (stmt);
4111   rhs2 = gimple_assign_rhs2 (stmt);
4112   rhs_code = gimple_assign_rhs_code (stmt);
4113   loc = gimple_location (stmt);
4114   switch (rhs_code)
4115     {
4116     case SSA_NAME:
4117     CASE_CONVERT:
4118       irhs1 = *defs.get (rhs1);
4119       itype = TREE_TYPE (irhs1);
4120       pattern_stmt
4121 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4122 			       SSA_NAME, irhs1);
4123       break;
4124 
4125     case BIT_NOT_EXPR:
4126       irhs1 = *defs.get (rhs1);
4127       itype = TREE_TYPE (irhs1);
4128       pattern_stmt
4129 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4130 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4131       break;
4132 
4133     case BIT_AND_EXPR:
4134       /* Try to optimize x = y & (a < b ? 1 : 0); into
4135 	 x = (a < b ? y : 0);
4136 
4137 	 E.g. for:
4138 	   bool a_b, b_b, c_b;
4139 	   TYPE d_T;
4140 
4141 	   S1  a_b = x1 CMP1 y1;
4142 	   S2  b_b = x2 CMP2 y2;
4143 	   S3  c_b = a_b & b_b;
4144 	   S4  d_T = (TYPE) c_b;
4145 
4146 	 we would normally emit:
4147 
4148 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4149 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4150 	   S3'  c_T = a_T & b_T;
4151 	   S4'  d_T = c_T;
4152 
4153 	 but we can save one stmt by using the
4154 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
4155 	 BIT_AND_EXPR stmt out:
4156 
4157 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4158 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4159 	   S4'  f_T = c_T;
4160 
4161 	 At least when VEC_COND_EXPR is implemented using masks
4162 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4163 	 computes the comparison masks and ands it, in one case with
4164 	 all ones vector, in the other case with a vector register.
4165 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4166 	 often more expensive.  */
4167       def_stmt = SSA_NAME_DEF_STMT (rhs2);
4168       def_rhs_code = gimple_assign_rhs_code (def_stmt);
4169       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4170 	{
4171 	  irhs1 = *defs.get (rhs1);
4172 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4173 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
4174 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4175 	    {
4176 	      rhs_code = def_rhs_code;
4177 	      rhs1 = def_rhs1;
4178 	      rhs2 = gimple_assign_rhs2 (def_stmt);
4179 	      trueval = irhs1;
4180 	      goto do_compare;
4181 	    }
4182 	  else
4183 	    irhs2 = *defs.get (rhs2);
4184 	  goto and_ior_xor;
4185 	}
4186       def_stmt = SSA_NAME_DEF_STMT (rhs1);
4187       def_rhs_code = gimple_assign_rhs_code (def_stmt);
4188       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4189 	{
4190 	  irhs2 = *defs.get (rhs2);
4191 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4192 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
4193 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4194 	    {
4195 	      rhs_code = def_rhs_code;
4196 	      rhs1 = def_rhs1;
4197 	      rhs2 = gimple_assign_rhs2 (def_stmt);
4198 	      trueval = irhs2;
4199 	      goto do_compare;
4200 	    }
4201 	  else
4202 	    irhs1 = *defs.get (rhs1);
4203 	  goto and_ior_xor;
4204 	}
4205       /* FALLTHRU */
4206     case BIT_IOR_EXPR:
4207     case BIT_XOR_EXPR:
4208       irhs1 = *defs.get (rhs1);
4209       irhs2 = *defs.get (rhs2);
4210     and_ior_xor:
4211       if (TYPE_PRECISION (TREE_TYPE (irhs1))
4212 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
4213 	{
4214 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4215 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4216 	  int out_prec = TYPE_PRECISION (out_type);
4217 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4218 	    irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4219 					      stmt_info);
4220 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4221 	    irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4222 					      stmt_info);
4223 	  else
4224 	    {
4225 	      irhs1 = adjust_bool_pattern_cast (vinfo,
4226 						out_type, irhs1, stmt_info);
4227 	      irhs2 = adjust_bool_pattern_cast (vinfo,
4228 						out_type, irhs2, stmt_info);
4229 	    }
4230 	}
4231       itype = TREE_TYPE (irhs1);
4232       pattern_stmt
4233 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4234 			       rhs_code, irhs1, irhs2);
4235       break;
4236 
4237     default:
4238     do_compare:
4239       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4240       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4241 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4242 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4243 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4244 	{
4245 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4246 	  itype
4247 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4248 	}
4249       else
4250 	itype = TREE_TYPE (rhs1);
4251       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4252       if (trueval == NULL_TREE)
4253 	trueval = build_int_cst (itype, 1);
4254       else
4255 	gcc_checking_assert (useless_type_conversion_p (itype,
4256 							TREE_TYPE (trueval)));
4257       pattern_stmt
4258 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4259 			       COND_EXPR, cond_expr, trueval,
4260 			       build_int_cst (itype, 0));
4261       break;
4262     }
4263 
4264   gimple_set_location (pattern_stmt, loc);
4265   append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4266 			  get_vectype_for_scalar_type (vinfo, itype));
4267   defs.put (var, gimple_assign_lhs (pattern_stmt));
4268 }
4269 
4270 /* Comparison function to qsort a vector of gimple stmts after UID.  */
4271 
4272 static int
sort_after_uid(const void * p1,const void * p2)4273 sort_after_uid (const void *p1, const void *p2)
4274 {
4275   const gimple *stmt1 = *(const gimple * const *)p1;
4276   const gimple *stmt2 = *(const gimple * const *)p2;
4277   return gimple_uid (stmt1) - gimple_uid (stmt2);
4278 }
4279 
4280 /* Create pattern stmts for all stmts participating in the bool pattern
4281    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4282    OUT_TYPE.  Return the def of the pattern root.  */
4283 
4284 static tree
adjust_bool_stmts(vec_info * vinfo,hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)4285 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4286 		   tree out_type, stmt_vec_info stmt_info)
4287 {
4288   /* Gather original stmts in the bool pattern in their order of appearance
4289      in the IL.  */
4290   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4291   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4292        i != bool_stmt_set.end (); ++i)
4293     bool_stmts.quick_push (*i);
4294   bool_stmts.qsort (sort_after_uid);
4295 
4296   /* Now process them in that order, producing pattern stmts.  */
4297   hash_map <tree, tree> defs;
4298   for (unsigned i = 0; i < bool_stmts.length (); ++i)
4299     adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4300 			 out_type, stmt_info, defs);
4301 
4302   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
4303   gimple *pattern_stmt
4304     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4305   return gimple_assign_lhs (pattern_stmt);
4306 }
4307 
4308 /* Return the proper type for converting bool VAR into
4309    an integer value or NULL_TREE if no such type exists.
4310    The type is chosen so that the converted value has the
4311    same number of elements as VAR's vector type.  */
4312 
4313 static tree
integer_type_for_mask(tree var,vec_info * vinfo)4314 integer_type_for_mask (tree var, vec_info *vinfo)
4315 {
4316   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4317     return NULL_TREE;
4318 
4319   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4320   if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4321     return NULL_TREE;
4322 
4323   return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4324 }
4325 
4326 /* Function vect_recog_bool_pattern
4327 
4328    Try to find pattern like following:
4329 
4330      bool a_b, b_b, c_b, d_b, e_b;
4331      TYPE f_T;
4332    loop:
4333      S1  a_b = x1 CMP1 y1;
4334      S2  b_b = x2 CMP2 y2;
4335      S3  c_b = a_b & b_b;
4336      S4  d_b = x3 CMP3 y3;
4337      S5  e_b = c_b | d_b;
4338      S6  f_T = (TYPE) e_b;
4339 
4340    where type 'TYPE' is an integral type.  Or a similar pattern
4341    ending in
4342 
4343      S6  f_Y = e_b ? r_Y : s_Y;
4344 
4345    as results from if-conversion of a complex condition.
4346 
4347    Input:
4348 
4349    * STMT_VINFO: The stmt at the end from which the pattern
4350 		 search begins, i.e. cast of a bool to
4351 		 an integer type.
4352 
4353    Output:
4354 
4355    * TYPE_OUT: The type of the output of this pattern.
4356 
4357    * Return value: A new stmt that will be used to replace the pattern.
4358 
4359 	Assuming size of TYPE is the same as size of all comparisons
4360 	(otherwise some casts would be added where needed), the above
4361 	sequence we create related pattern stmts:
4362 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
4363 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
4364 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
4365 	S5'  e_T = c_T | d_T;
4366 	S6'  f_T = e_T;
4367 
4368 	Instead of the above S3' we could emit:
4369 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
4370 	S3'  c_T = a_T | b_T;
4371 	but the above is more efficient.  */
4372 
4373 static gimple *
vect_recog_bool_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4374 vect_recog_bool_pattern (vec_info *vinfo,
4375 			 stmt_vec_info stmt_vinfo, tree *type_out)
4376 {
4377   gimple *last_stmt = stmt_vinfo->stmt;
4378   enum tree_code rhs_code;
4379   tree var, lhs, rhs, vectype;
4380   gimple *pattern_stmt;
4381 
4382   if (!is_gimple_assign (last_stmt))
4383     return NULL;
4384 
4385   var = gimple_assign_rhs1 (last_stmt);
4386   lhs = gimple_assign_lhs (last_stmt);
4387   rhs_code = gimple_assign_rhs_code (last_stmt);
4388 
4389   if (rhs_code == VIEW_CONVERT_EXPR)
4390     var = TREE_OPERAND (var, 0);
4391 
4392   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4393     return NULL;
4394 
4395   hash_set<gimple *> bool_stmts;
4396 
4397   if (CONVERT_EXPR_CODE_P (rhs_code)
4398       || rhs_code == VIEW_CONVERT_EXPR)
4399     {
4400       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4401 	  || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4402 	return NULL;
4403       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4404 
4405       if (check_bool_pattern (var, vinfo, bool_stmts))
4406 	{
4407 	  rhs = adjust_bool_stmts (vinfo, bool_stmts,
4408 				   TREE_TYPE (lhs), stmt_vinfo);
4409 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4410 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4411 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4412 	  else
4413 	    pattern_stmt
4414 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
4415 	}
4416       else
4417 	{
4418 	  tree type = integer_type_for_mask (var, vinfo);
4419 	  tree cst0, cst1, tmp;
4420 
4421 	  if (!type)
4422 	    return NULL;
4423 
4424 	  /* We may directly use cond with narrowed type to avoid
4425 	     multiple cond exprs with following result packing and
4426 	     perform single cond with packed mask instead.  In case
4427 	     of widening we better make cond first and then extract
4428 	     results.  */
4429 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4430 	    type = TREE_TYPE (lhs);
4431 
4432 	  cst0 = build_int_cst (type, 0);
4433 	  cst1 = build_int_cst (type, 1);
4434 	  tmp = vect_recog_temp_ssa_var (type, NULL);
4435 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4436 
4437 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4438 	    {
4439 	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4440 	      append_pattern_def_seq (vinfo, stmt_vinfo,
4441 				      pattern_stmt, new_vectype);
4442 
4443 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4444 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4445 	    }
4446 	}
4447 
4448       *type_out = vectype;
4449       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4450 
4451       return pattern_stmt;
4452     }
4453   else if (rhs_code == COND_EXPR
4454 	   && TREE_CODE (var) == SSA_NAME)
4455     {
4456       vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4457       if (vectype == NULL_TREE)
4458 	return NULL;
4459 
4460       /* Build a scalar type for the boolean result that when
4461          vectorized matches the vector type of the result in
4462 	 size and number of elements.  */
4463       unsigned prec
4464 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4465 			       TYPE_VECTOR_SUBPARTS (vectype));
4466 
4467       tree type
4468 	= build_nonstandard_integer_type (prec,
4469 					  TYPE_UNSIGNED (TREE_TYPE (var)));
4470       if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4471 	return NULL;
4472 
4473       if (!check_bool_pattern (var, vinfo, bool_stmts))
4474 	return NULL;
4475 
4476       rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4477 
4478       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4479       pattern_stmt
4480 	  = gimple_build_assign (lhs, COND_EXPR,
4481 				 build2 (NE_EXPR, boolean_type_node,
4482 					 rhs, build_int_cst (type, 0)),
4483 				 gimple_assign_rhs2 (last_stmt),
4484 				 gimple_assign_rhs3 (last_stmt));
4485       *type_out = vectype;
4486       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4487 
4488       return pattern_stmt;
4489     }
4490   else if (rhs_code == SSA_NAME
4491 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
4492     {
4493       stmt_vec_info pattern_stmt_info;
4494       tree nunits_vectype;
4495       if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4496 					   &nunits_vectype)
4497 	  || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4498 	return NULL;
4499 
4500       if (check_bool_pattern (var, vinfo, bool_stmts))
4501 	rhs = adjust_bool_stmts (vinfo, bool_stmts,
4502 				 TREE_TYPE (vectype), stmt_vinfo);
4503       else
4504 	{
4505 	  tree type = integer_type_for_mask (var, vinfo);
4506 	  tree cst0, cst1, new_vectype;
4507 
4508 	  if (!type)
4509 	    return NULL;
4510 
4511 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4512 	    type = TREE_TYPE (vectype);
4513 
4514 	  cst0 = build_int_cst (type, 0);
4515 	  cst1 = build_int_cst (type, 1);
4516 	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
4517 
4518 	  rhs = vect_recog_temp_ssa_var (type, NULL);
4519 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4520 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4521 	}
4522 
4523       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4524       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4525 	{
4526 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4527 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4528 	  append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4529 	  rhs = rhs2;
4530 	}
4531       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4532       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4533       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4534       *type_out = vectype;
4535       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4536 
4537       return pattern_stmt;
4538     }
4539   else
4540     return NULL;
4541 }
4542 
4543 
4544 /* A helper for vect_recog_mask_conversion_pattern.  Build
4545    conversion of MASK to a type suitable for masking VECTYPE.
4546    Built statement gets required vectype and is appended to
4547    a pattern sequence of STMT_VINFO.
4548 
4549    Return converted mask.  */
4550 
4551 static tree
build_mask_conversion(vec_info * vinfo,tree mask,tree vectype,stmt_vec_info stmt_vinfo)4552 build_mask_conversion (vec_info *vinfo,
4553 		       tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4554 {
4555   gimple *stmt;
4556   tree masktype, tmp;
4557 
4558   masktype = truth_type_for (vectype);
4559   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4560   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4561   append_pattern_def_seq (vinfo, stmt_vinfo,
4562 			  stmt, masktype, TREE_TYPE (vectype));
4563 
4564   return tmp;
4565 }
4566 
4567 
4568 /* Function vect_recog_mask_conversion_pattern
4569 
4570    Try to find statements which require boolean type
4571    converison.  Additional conversion statements are
4572    added to handle such cases.  For example:
4573 
4574    bool m_1, m_2, m_3;
4575    int i_4, i_5;
4576    double d_6, d_7;
4577    char c_1, c_2, c_3;
4578 
4579    S1   m_1 = i_4 > i_5;
4580    S2   m_2 = d_6 < d_7;
4581    S3   m_3 = m_1 & m_2;
4582    S4   c_1 = m_3 ? c_2 : c_3;
4583 
4584    Will be transformed into:
4585 
4586    S1   m_1 = i_4 > i_5;
4587    S2   m_2 = d_6 < d_7;
4588    S3'' m_2' = (_Bool[bitsize=32])m_2
4589    S3'  m_3' = m_1 & m_2';
4590    S4'' m_3'' = (_Bool[bitsize=8])m_3'
4591    S4'  c_1' = m_3'' ? c_2 : c_3;  */
4592 
4593 static gimple *
vect_recog_mask_conversion_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4594 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4595 				    stmt_vec_info stmt_vinfo, tree *type_out)
4596 {
4597   gimple *last_stmt = stmt_vinfo->stmt;
4598   enum tree_code rhs_code;
4599   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4600   tree vectype1, vectype2;
4601   stmt_vec_info pattern_stmt_info;
4602   tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4603   tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4604 
4605   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
4606   if (is_gimple_call (last_stmt)
4607       && gimple_call_internal_p (last_stmt))
4608     {
4609       gcall *pattern_stmt;
4610 
4611       internal_fn ifn = gimple_call_internal_fn (last_stmt);
4612       int mask_argno = internal_fn_mask_index (ifn);
4613       if (mask_argno < 0)
4614 	return NULL;
4615 
4616       bool store_p = internal_store_fn_p (ifn);
4617       if (store_p)
4618 	{
4619 	  int rhs_index = internal_fn_stored_value_index (ifn);
4620 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
4621 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4622 	}
4623       else
4624 	{
4625 	  lhs = gimple_call_lhs (last_stmt);
4626 	  if (!lhs)
4627 	    return NULL;
4628 	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4629 	}
4630 
4631       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4632       tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4633       if (!mask_arg_type)
4634 	return NULL;
4635       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4636 
4637       if (!vectype1 || !vectype2
4638 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4639 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4640 	return NULL;
4641 
4642       tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4643 
4644       auto_vec<tree, 8> args;
4645       unsigned int nargs = gimple_call_num_args (last_stmt);
4646       args.safe_grow (nargs, true);
4647       for (unsigned int i = 0; i < nargs; ++i)
4648 	args[i] = ((int) i == mask_argno
4649 		   ? tmp
4650 		   : gimple_call_arg (last_stmt, i));
4651       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4652 
4653       if (!store_p)
4654 	{
4655 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4656 	  gimple_call_set_lhs (pattern_stmt, lhs);
4657 	}
4658       gimple_call_set_nothrow (pattern_stmt, true);
4659 
4660       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4661       if (STMT_VINFO_DATA_REF (stmt_vinfo))
4662 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4663 
4664       *type_out = vectype1;
4665       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4666 
4667       return pattern_stmt;
4668     }
4669 
4670   if (!is_gimple_assign (last_stmt))
4671     return NULL;
4672 
4673   gimple *pattern_stmt;
4674   lhs = gimple_assign_lhs (last_stmt);
4675   rhs1 = gimple_assign_rhs1 (last_stmt);
4676   rhs_code = gimple_assign_rhs_code (last_stmt);
4677 
4678   /* Check for cond expression requiring mask conversion.  */
4679   if (rhs_code == COND_EXPR)
4680     {
4681       vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4682 
4683       if (TREE_CODE (rhs1) == SSA_NAME)
4684 	{
4685 	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
4686 	  if (!rhs1_type)
4687 	    return NULL;
4688 	}
4689       else if (COMPARISON_CLASS_P (rhs1))
4690 	{
4691 	  /* Check whether we're comparing scalar booleans and (if so)
4692 	     whether a better mask type exists than the mask associated
4693 	     with boolean-sized elements.  This avoids unnecessary packs
4694 	     and unpacks if the booleans are set from comparisons of
4695 	     wider types.  E.g. in:
4696 
4697 	       int x1, x2, x3, x4, y1, y1;
4698 	       ...
4699 	       bool b1 = (x1 == x2);
4700 	       bool b2 = (x3 == x4);
4701 	       ... = b1 == b2 ? y1 : y2;
4702 
4703 	     it is better for b1 and b2 to use the mask type associated
4704 	     with int elements rather bool (byte) elements.  */
4705 	  rhs1_op0 = TREE_OPERAND (rhs1, 0);
4706 	  rhs1_op1 = TREE_OPERAND (rhs1, 1);
4707 	  if (!rhs1_op0 || !rhs1_op1)
4708 	    return NULL;
4709 	  rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4710 	  rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4711 
4712 	  if (!rhs1_op0_type)
4713 	    rhs1_type = TREE_TYPE (rhs1_op0);
4714 	  else if (!rhs1_op1_type)
4715 	    rhs1_type = TREE_TYPE (rhs1_op1);
4716 	  else if (TYPE_PRECISION (rhs1_op0_type)
4717 		   != TYPE_PRECISION (rhs1_op1_type))
4718 	    {
4719 	      int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4720 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4721 	      int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4722 			 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4723 	      if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4724 		{
4725 		  if (abs (tmp0) > abs (tmp1))
4726 		    rhs1_type = rhs1_op1_type;
4727 		  else
4728 		    rhs1_type = rhs1_op0_type;
4729 		}
4730 	      else
4731 		rhs1_type = build_nonstandard_integer_type
4732 		  (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4733 	    }
4734 	  else
4735 	    rhs1_type = rhs1_op0_type;
4736 	}
4737       else
4738 	return NULL;
4739 
4740       vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4741 
4742       if (!vectype1 || !vectype2)
4743 	return NULL;
4744 
4745       /* Continue if a conversion is needed.  Also continue if we have
4746 	 a comparison whose vector type would normally be different from
4747 	 VECTYPE2 when considered in isolation.  In that case we'll
4748 	 replace the comparison with an SSA name (so that we can record
4749 	 its vector type) and behave as though the comparison was an SSA
4750 	 name from the outset.  */
4751       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4752 		    TYPE_VECTOR_SUBPARTS (vectype2))
4753 	  && !rhs1_op0_type
4754 	  && !rhs1_op1_type)
4755 	return NULL;
4756 
4757       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4758          in place, we can handle it in vectorizable_condition.  This avoids
4759 	 unnecessary promotion stmts and increased vectorization factor.  */
4760       if (COMPARISON_CLASS_P (rhs1)
4761 	  && INTEGRAL_TYPE_P (rhs1_type)
4762 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4763 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4764 	{
4765 	  enum vect_def_type dt;
4766 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4767 	      && dt == vect_external_def
4768 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4769 	      && (dt == vect_external_def
4770 		  || dt == vect_constant_def))
4771 	    {
4772 	      tree wide_scalar_type = build_nonstandard_integer_type
4773 		(vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4774 	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
4775 							   wide_scalar_type);
4776 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4777 		return NULL;
4778 	    }
4779 	}
4780 
4781       /* If rhs1 is a comparison we need to move it into a
4782 	 separate statement.  */
4783       if (TREE_CODE (rhs1) != SSA_NAME)
4784 	{
4785 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4786 	  if (rhs1_op0_type
4787 	      && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4788 	    rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4789 					      vectype2, stmt_vinfo);
4790 	  if (rhs1_op1_type
4791 	      && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4792 	    rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4793 				      vectype2, stmt_vinfo);
4794 	  pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4795 					      rhs1_op0, rhs1_op1);
4796 	  rhs1 = tmp;
4797 	  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4798 				  rhs1_type);
4799 	}
4800 
4801       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4802 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4803 	tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4804       else
4805 	tmp = rhs1;
4806 
4807       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4808       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4809 					  gimple_assign_rhs2 (last_stmt),
4810 					  gimple_assign_rhs3 (last_stmt));
4811 
4812       *type_out = vectype1;
4813       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4814 
4815       return pattern_stmt;
4816     }
4817 
4818   /* Now check for binary boolean operations requiring conversion for
4819      one of operands.  */
4820   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4821     return NULL;
4822 
4823   if (rhs_code != BIT_IOR_EXPR
4824       && rhs_code != BIT_XOR_EXPR
4825       && rhs_code != BIT_AND_EXPR
4826       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4827     return NULL;
4828 
4829   rhs2 = gimple_assign_rhs2 (last_stmt);
4830 
4831   rhs1_type = integer_type_for_mask (rhs1, vinfo);
4832   rhs2_type = integer_type_for_mask (rhs2, vinfo);
4833 
4834   if (!rhs1_type || !rhs2_type
4835       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4836     return NULL;
4837 
4838   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4839     {
4840       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4841       if (!vectype1)
4842 	return NULL;
4843       rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4844     }
4845   else
4846     {
4847       vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4848       if (!vectype1)
4849 	return NULL;
4850       rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4851     }
4852 
4853   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4854   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4855 
4856   *type_out = vectype1;
4857   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4858 
4859   return pattern_stmt;
4860 }
4861 
4862 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4863    the boolean condition under which it occurs, otherwise return null.  */
4864 
4865 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4866 vect_get_load_store_mask (stmt_vec_info stmt_info)
4867 {
4868   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4869     {
4870       gcc_assert (gimple_assign_single_p (def_assign));
4871       return NULL_TREE;
4872     }
4873 
4874   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4875     {
4876       internal_fn ifn = gimple_call_internal_fn (def_call);
4877       int mask_index = internal_fn_mask_index (ifn);
4878       return gimple_call_arg (def_call, mask_index);
4879     }
4880 
4881   gcc_unreachable ();
4882 }
4883 
4884 /* Return MASK if MASK is suitable for masking an operation on vectors
4885    of type VECTYPE, otherwise convert it into such a form and return
4886    the result.  Associate any conversion statements with STMT_INFO's
4887    pattern.  */
4888 
4889 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4890 vect_convert_mask_for_vectype (tree mask, tree vectype,
4891 			       stmt_vec_info stmt_info, vec_info *vinfo)
4892 {
4893   tree mask_type = integer_type_for_mask (mask, vinfo);
4894   if (mask_type)
4895     {
4896       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4897       if (mask_vectype
4898 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4899 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4900 	mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4901     }
4902   return mask;
4903 }
4904 
4905 /* Return the equivalent of:
4906 
4907      fold_convert (TYPE, VALUE)
4908 
4909    with the expectation that the operation will be vectorized.
4910    If new statements are needed, add them as pattern statements
4911    to STMT_INFO.  */
4912 
4913 static tree
vect_add_conversion_to_pattern(vec_info * vinfo,tree type,tree value,stmt_vec_info stmt_info)4914 vect_add_conversion_to_pattern (vec_info *vinfo,
4915 				tree type, tree value, stmt_vec_info stmt_info)
4916 {
4917   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4918     return value;
4919 
4920   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4921   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4922   append_pattern_def_seq (vinfo, stmt_info, conversion,
4923 			  get_vectype_for_scalar_type (vinfo, type));
4924   return new_value;
4925 }
4926 
4927 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4928    internal function.  Return the final statement on success and set
4929    *TYPE_OUT to the vector type being loaded or stored.
4930 
4931    This function only handles gathers and scatters that were recognized
4932    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4933 
4934 static gimple *
vect_recog_gather_scatter_pattern(vec_info * vinfo,stmt_vec_info stmt_info,tree * type_out)4935 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4936 				   stmt_vec_info stmt_info, tree *type_out)
4937 {
4938   /* Currently we only support this for loop vectorization.  */
4939   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4940   if (!loop_vinfo)
4941     return NULL;
4942 
4943   /* Make sure that we're looking at a gather load or scatter store.  */
4944   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4945   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4946     return NULL;
4947 
4948   /* Get the boolean that controls whether the load or store happens.
4949      This is null if the operation is unconditional.  */
4950   tree mask = vect_get_load_store_mask (stmt_info);
4951 
4952   /* Make sure that the target supports an appropriate internal
4953      function for the gather/scatter operation.  */
4954   gather_scatter_info gs_info;
4955   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4956       || gs_info.ifn == IFN_LAST)
4957     return NULL;
4958 
4959   /* Convert the mask to the right form.  */
4960   tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4961 						 gs_info.element_type);
4962   if (mask)
4963     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4964 					  loop_vinfo);
4965   else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4966 	   || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4967     mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4968 
4969   /* Get the invariant base and non-invariant offset, converting the
4970      latter to the same width as the vector elements.  */
4971   tree base = gs_info.base;
4972   tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4973   tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4974 						gs_info.offset, stmt_info);
4975 
4976   /* Build the new pattern statement.  */
4977   tree scale = size_int (gs_info.scale);
4978   gcall *pattern_stmt;
4979   if (DR_IS_READ (dr))
4980     {
4981       tree zero = build_zero_cst (gs_info.element_type);
4982       if (mask != NULL)
4983 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4984 						   offset, scale, zero, mask);
4985       else
4986 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4987 						   offset, scale, zero);
4988       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4989       gimple_call_set_lhs (pattern_stmt, load_lhs);
4990     }
4991   else
4992     {
4993       tree rhs = vect_get_store_rhs (stmt_info);
4994       if (mask != NULL)
4995 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4996 						   base, offset, scale, rhs,
4997 						   mask);
4998       else
4999 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5000 						   base, offset, scale, rhs);
5001     }
5002   gimple_call_set_nothrow (pattern_stmt, true);
5003 
5004   /* Copy across relevant vectorization info and associate DR with the
5005      new pattern statement instead of the original statement.  */
5006   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5007   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5008 
5009   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5010   *type_out = vectype;
5011   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5012 
5013   return pattern_stmt;
5014 }
5015 
5016 /* Return true if TYPE is a non-boolean integer type.  These are the types
5017    that we want to consider for narrowing.  */
5018 
5019 static bool
vect_narrowable_type_p(tree type)5020 vect_narrowable_type_p (tree type)
5021 {
5022   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5023 }
5024 
5025 /* Return true if the operation given by CODE can be truncated to N bits
5026    when only N bits of the output are needed.  This is only true if bit N+1
5027    of the inputs has no effect on the low N bits of the result.  */
5028 
5029 static bool
vect_truncatable_operation_p(tree_code code)5030 vect_truncatable_operation_p (tree_code code)
5031 {
5032   switch (code)
5033     {
5034     case NEGATE_EXPR:
5035     case PLUS_EXPR:
5036     case MINUS_EXPR:
5037     case MULT_EXPR:
5038     case BIT_NOT_EXPR:
5039     case BIT_AND_EXPR:
5040     case BIT_IOR_EXPR:
5041     case BIT_XOR_EXPR:
5042     case COND_EXPR:
5043       return true;
5044 
5045     default:
5046       return false;
5047     }
5048 }
5049 
5050 /* Record that STMT_INFO could be changed from operating on TYPE to
5051    operating on a type with the precision and sign given by PRECISION
5052    and SIGN respectively.  PRECISION is an arbitrary bit precision;
5053    it might not be a whole number of bytes.  */
5054 
5055 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)5056 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5057 			 unsigned int precision, signop sign)
5058 {
5059   /* Round the precision up to a whole number of bytes.  */
5060   precision = vect_element_precision (precision);
5061   if (precision < TYPE_PRECISION (type)
5062       && (!stmt_info->operation_precision
5063 	  || stmt_info->operation_precision > precision))
5064     {
5065       stmt_info->operation_precision = precision;
5066       stmt_info->operation_sign = sign;
5067     }
5068 }
5069 
5070 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5071    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
5072    is an arbitrary bit precision; it might not be a whole number of bytes.  */
5073 
5074 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)5075 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5076 			      unsigned int min_input_precision)
5077 {
5078   /* This operation in isolation only requires the inputs to have
5079      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
5080      that MIN_INPUT_PRECISION is a natural precision for the chain
5081      as a whole.  E.g. consider something like:
5082 
5083 	 unsigned short *x, *y;
5084 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5085 
5086      The right shift can be done on unsigned chars, and only requires the
5087      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
5088      approach would mean turning a natural chain of single-vector unsigned
5089      short operations into one that truncates "*x" and then extends
5090      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5091      operation and one vector for each unsigned char operation.
5092      This would be a significant pessimization.
5093 
5094      Instead only propagate the maximum of this precision and the precision
5095      required by the users of the result.  This means that we don't pessimize
5096      the case above but continue to optimize things like:
5097 
5098 	 unsigned char *y;
5099 	 unsigned short *x;
5100 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5101 
5102      Here we would truncate two vectors of *x to a single vector of
5103      unsigned chars and use single-vector unsigned char operations for
5104      everything else, rather than doing two unsigned short copies of
5105      "(*x & 0xf0) >> 4" and then truncating the result.  */
5106   min_input_precision = MAX (min_input_precision,
5107 			     stmt_info->min_output_precision);
5108 
5109   if (min_input_precision < TYPE_PRECISION (type)
5110       && (!stmt_info->min_input_precision
5111 	  || stmt_info->min_input_precision > min_input_precision))
5112     stmt_info->min_input_precision = min_input_precision;
5113 }
5114 
5115 /* Subroutine of vect_determine_min_output_precision.  Return true if
5116    we can calculate a reduced number of output bits for STMT_INFO,
5117    whose result is LHS.  */
5118 
5119 static bool
vect_determine_min_output_precision_1(vec_info * vinfo,stmt_vec_info stmt_info,tree lhs)5120 vect_determine_min_output_precision_1 (vec_info *vinfo,
5121 				       stmt_vec_info stmt_info, tree lhs)
5122 {
5123   /* Take the maximum precision required by users of the result.  */
5124   unsigned int precision = 0;
5125   imm_use_iterator iter;
5126   use_operand_p use;
5127   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5128     {
5129       gimple *use_stmt = USE_STMT (use);
5130       if (is_gimple_debug (use_stmt))
5131 	continue;
5132       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5133       if (!use_stmt_info || !use_stmt_info->min_input_precision)
5134 	return false;
5135       /* The input precision recorded for COND_EXPRs applies only to the
5136 	 "then" and "else" values.  */
5137       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5138       if (assign
5139 	  && gimple_assign_rhs_code (assign) == COND_EXPR
5140 	  && use->use != gimple_assign_rhs2_ptr (assign)
5141 	  && use->use != gimple_assign_rhs3_ptr (assign))
5142 	return false;
5143       precision = MAX (precision, use_stmt_info->min_input_precision);
5144     }
5145 
5146   if (dump_enabled_p ())
5147     dump_printf_loc (MSG_NOTE, vect_location,
5148 		     "only the low %d bits of %T are significant\n",
5149 		     precision, lhs);
5150   stmt_info->min_output_precision = precision;
5151   return true;
5152 }
5153 
5154 /* Calculate min_output_precision for STMT_INFO.  */
5155 
5156 static void
vect_determine_min_output_precision(vec_info * vinfo,stmt_vec_info stmt_info)5157 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5158 {
5159   /* We're only interested in statements with a narrowable result.  */
5160   tree lhs = gimple_get_lhs (stmt_info->stmt);
5161   if (!lhs
5162       || TREE_CODE (lhs) != SSA_NAME
5163       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5164     return;
5165 
5166   if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5167     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5168 }
5169 
5170 /* Use range information to decide whether STMT (described by STMT_INFO)
5171    could be done in a narrower type.  This is effectively a forward
5172    propagation, since it uses context-independent information that applies
5173    to all users of an SSA name.  */
5174 
5175 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)5176 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5177 {
5178   tree lhs = gimple_assign_lhs (stmt);
5179   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5180     return;
5181 
5182   tree type = TREE_TYPE (lhs);
5183   if (!vect_narrowable_type_p (type))
5184     return;
5185 
5186   /* First see whether we have any useful range information for the result.  */
5187   unsigned int precision = TYPE_PRECISION (type);
5188   signop sign = TYPE_SIGN (type);
5189   wide_int min_value, max_value;
5190   if (!vect_get_range_info (lhs, &min_value, &max_value))
5191     return;
5192 
5193   tree_code code = gimple_assign_rhs_code (stmt);
5194   unsigned int nops = gimple_num_ops (stmt);
5195 
5196   if (!vect_truncatable_operation_p (code))
5197     {
5198       /* Handle operations that can be computed in type T if all inputs
5199 	 and outputs can be represented in type T.  Also handle left and
5200 	 right shifts, where (in addition) the maximum shift amount must
5201 	 be less than the number of bits in T.  */
5202       bool is_shift;
5203       switch (code)
5204 	{
5205 	case LSHIFT_EXPR:
5206 	case RSHIFT_EXPR:
5207 	  is_shift = true;
5208 	  break;
5209 
5210 	case ABS_EXPR:
5211 	case MIN_EXPR:
5212 	case MAX_EXPR:
5213 	case TRUNC_DIV_EXPR:
5214 	case CEIL_DIV_EXPR:
5215 	case FLOOR_DIV_EXPR:
5216 	case ROUND_DIV_EXPR:
5217 	case EXACT_DIV_EXPR:
5218 	  /* Modulus is excluded because it is typically calculated by doing
5219 	     a division, for which minimum signed / -1 isn't representable in
5220 	     the original signed type.  We could take the division range into
5221 	     account instead, if handling modulus ever becomes important.  */
5222 	  is_shift = false;
5223 	  break;
5224 
5225 	default:
5226 	  return;
5227 	}
5228       for (unsigned int i = 1; i < nops; ++i)
5229 	{
5230 	  tree op = gimple_op (stmt, i);
5231 	  wide_int op_min_value, op_max_value;
5232 	  if (TREE_CODE (op) == INTEGER_CST)
5233 	    {
5234 	      unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
5235 	      op_min_value = op_max_value = wi::to_wide (op, op_precision);
5236 	    }
5237 	  else if (TREE_CODE (op) == SSA_NAME)
5238 	    {
5239 	      if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5240 		return;
5241 	    }
5242 	  else
5243 	    return;
5244 
5245 	  if (is_shift && i == 2)
5246 	    {
5247 	      /* There needs to be one more bit than the maximum shift amount.
5248 
5249 		 If the maximum shift amount is already 1 less than PRECISION
5250 		 then we can't narrow the shift further.  Dealing with that
5251 		 case first ensures that we can safely use an unsigned range
5252 		 below.
5253 
5254 		 op_min_value isn't relevant, since shifts by negative amounts
5255 		 are UB.  */
5256 	      if (wi::geu_p (op_max_value, precision - 1))
5257 		return;
5258 	      unsigned int min_bits = op_max_value.to_uhwi () + 1;
5259 
5260 	      /* As explained below, we can convert a signed shift into an
5261 		 unsigned shift if the sign bit is always clear.  At this
5262 		 point we've already processed the ranges of the output and
5263 		 the first input.  */
5264 	      auto op_sign = sign;
5265 	      if (sign == SIGNED && !wi::neg_p (min_value))
5266 		op_sign = UNSIGNED;
5267 	      op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
5268 					     precision, op_sign);
5269 	      op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
5270 					     precision, op_sign);
5271 	    }
5272 	  min_value = wi::min (min_value, op_min_value, sign);
5273 	  max_value = wi::max (max_value, op_max_value, sign);
5274 	}
5275     }
5276 
5277   /* Try to switch signed types for unsigned types if we can.
5278      This is better for two reasons.  First, unsigned ops tend
5279      to be cheaper than signed ops.  Second, it means that we can
5280      handle things like:
5281 
5282 	signed char c;
5283 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5284 
5285      as:
5286 
5287 	signed char c;
5288 	unsigned short res_1 = (unsigned short) c & 0xff00;
5289 	int res = (int) res_1;
5290 
5291      where the intermediate result res_1 has unsigned rather than
5292      signed type.  */
5293   if (sign == SIGNED && !wi::neg_p (min_value))
5294     sign = UNSIGNED;
5295 
5296   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
5297   unsigned int precision1 = wi::min_precision (min_value, sign);
5298   unsigned int precision2 = wi::min_precision (max_value, sign);
5299   unsigned int value_precision = MAX (precision1, precision2);
5300   if (value_precision >= precision)
5301     return;
5302 
5303   if (dump_enabled_p ())
5304     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5305 		     " without loss of precision: %G",
5306 		     sign == SIGNED ? "signed" : "unsigned",
5307 		     value_precision, stmt);
5308 
5309   vect_set_operation_type (stmt_info, type, value_precision, sign);
5310   vect_set_min_input_precision (stmt_info, type, value_precision);
5311 }
5312 
5313 /* Use information about the users of STMT's result to decide whether
5314    STMT (described by STMT_INFO) could be done in a narrower type.
5315    This is effectively a backward propagation.  */
5316 
5317 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)5318 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5319 {
5320   tree_code code = gimple_assign_rhs_code (stmt);
5321   unsigned int opno = (code == COND_EXPR ? 2 : 1);
5322   tree type = TREE_TYPE (gimple_op (stmt, opno));
5323   if (!vect_narrowable_type_p (type))
5324     return;
5325 
5326   unsigned int precision = TYPE_PRECISION (type);
5327   unsigned int operation_precision, min_input_precision;
5328   switch (code)
5329     {
5330     CASE_CONVERT:
5331       /* Only the bits that contribute to the output matter.  Don't change
5332 	 the precision of the operation itself.  */
5333       operation_precision = precision;
5334       min_input_precision = stmt_info->min_output_precision;
5335       break;
5336 
5337     case LSHIFT_EXPR:
5338     case RSHIFT_EXPR:
5339       {
5340 	tree shift = gimple_assign_rhs2 (stmt);
5341 	if (TREE_CODE (shift) != INTEGER_CST
5342 	    || !wi::ltu_p (wi::to_widest (shift), precision))
5343 	  return;
5344 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
5345 	if (code == LSHIFT_EXPR)
5346 	  {
5347 	    /* Avoid creating an undefined shift.
5348 
5349 	       ??? We could instead use min_output_precision as-is and
5350 	       optimize out-of-range shifts to zero.  However, only
5351 	       degenerate testcases shift away all their useful input data,
5352 	       and it isn't natural to drop input operations in the middle
5353 	       of vectorization.  This sort of thing should really be
5354 	       handled before vectorization.  */
5355 	    operation_precision = MAX (stmt_info->min_output_precision,
5356 				       const_shift + 1);
5357 	    /* We need CONST_SHIFT fewer bits of the input.  */
5358 	    min_input_precision = (MAX (operation_precision, const_shift)
5359 				   - const_shift);
5360 	  }
5361 	else
5362 	  {
5363 	    /* We need CONST_SHIFT extra bits to do the operation.  */
5364 	    operation_precision = (stmt_info->min_output_precision
5365 				   + const_shift);
5366 	    min_input_precision = operation_precision;
5367 	  }
5368 	break;
5369       }
5370 
5371     default:
5372       if (vect_truncatable_operation_p (code))
5373 	{
5374 	  /* Input bit N has no effect on output bits N-1 and lower.  */
5375 	  operation_precision = stmt_info->min_output_precision;
5376 	  min_input_precision = operation_precision;
5377 	  break;
5378 	}
5379       return;
5380     }
5381 
5382   if (operation_precision < precision)
5383     {
5384       if (dump_enabled_p ())
5385 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5386 			 " without affecting users: %G",
5387 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5388 			 operation_precision, stmt);
5389       vect_set_operation_type (stmt_info, type, operation_precision,
5390 			       TYPE_SIGN (type));
5391     }
5392   vect_set_min_input_precision (stmt_info, type, min_input_precision);
5393 }
5394 
5395 /* Return true if the statement described by STMT_INFO sets a boolean
5396    SSA_NAME and if we know how to vectorize this kind of statement using
5397    vector mask types.  */
5398 
5399 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)5400 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5401 {
5402   tree lhs = gimple_get_lhs (stmt_info->stmt);
5403   if (!lhs
5404       || TREE_CODE (lhs) != SSA_NAME
5405       || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5406     return false;
5407 
5408   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5409     {
5410       tree_code rhs_code = gimple_assign_rhs_code (assign);
5411       switch (rhs_code)
5412 	{
5413 	CASE_CONVERT:
5414 	case SSA_NAME:
5415 	case BIT_NOT_EXPR:
5416 	case BIT_IOR_EXPR:
5417 	case BIT_XOR_EXPR:
5418 	case BIT_AND_EXPR:
5419 	  return true;
5420 
5421 	default:
5422 	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5423 	}
5424     }
5425   else if (is_a <gphi *> (stmt_info->stmt))
5426     return true;
5427   return false;
5428 }
5429 
5430 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5431    a vector mask type instead of a normal vector type.  Record the
5432    result in STMT_INFO->mask_precision.  */
5433 
5434 static void
vect_determine_mask_precision(vec_info * vinfo,stmt_vec_info stmt_info)5435 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5436 {
5437   if (!possible_vector_mask_operation_p (stmt_info))
5438     return;
5439 
5440   /* If at least one boolean input uses a vector mask type,
5441      pick the mask type with the narrowest elements.
5442 
5443      ??? This is the traditional behavior.  It should always produce
5444      the smallest number of operations, but isn't necessarily the
5445      optimal choice.  For example, if we have:
5446 
5447        a = b & c
5448 
5449      where:
5450 
5451        - the user of a wants it to have a mask type for 16-bit elements (M16)
5452        - b also uses M16
5453        - c uses a mask type for 8-bit elements (M8)
5454 
5455      then picking M8 gives:
5456 
5457        - 1 M16->M8 pack for b
5458        - 1 M8 AND for a
5459        - 2 M8->M16 unpacks for the user of a
5460 
5461      whereas picking M16 would have given:
5462 
5463        - 2 M8->M16 unpacks for c
5464        - 2 M16 ANDs for a
5465 
5466      The number of operations are equal, but M16 would have given
5467      a shorter dependency chain and allowed more ILP.  */
5468   unsigned int precision = ~0U;
5469   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5470     {
5471       unsigned int nops = gimple_num_ops (assign);
5472       for (unsigned int i = 1; i < nops; ++i)
5473 	{
5474 	  tree rhs = gimple_op (assign, i);
5475 	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5476 	    continue;
5477 
5478 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5479 	  if (!def_stmt_info)
5480 	    /* Don't let external or constant operands influence the choice.
5481 	       We can convert them to whichever vector type we pick.  */
5482 	    continue;
5483 
5484 	  if (def_stmt_info->mask_precision)
5485 	    {
5486 	      if (precision > def_stmt_info->mask_precision)
5487 		precision = def_stmt_info->mask_precision;
5488 	    }
5489 	}
5490 
5491       /* If the statement compares two values that shouldn't use vector masks,
5492 	 try comparing the values as normal scalars instead.  */
5493       tree_code rhs_code = gimple_assign_rhs_code (assign);
5494       if (precision == ~0U
5495 	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5496 	{
5497 	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5498 	  scalar_mode mode;
5499 	  tree vectype, mask_type;
5500 	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5501 	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5502 	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5503 	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5504 	    precision = GET_MODE_BITSIZE (mode);
5505 	}
5506     }
5507   else
5508     {
5509       gphi *phi = as_a <gphi *> (stmt_info->stmt);
5510       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5511 	{
5512 	  tree rhs = gimple_phi_arg_def (phi, i);
5513 
5514 	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5515 	  if (!def_stmt_info)
5516 	    /* Don't let external or constant operands influence the choice.
5517 	       We can convert them to whichever vector type we pick.  */
5518 	    continue;
5519 
5520 	  if (def_stmt_info->mask_precision)
5521 	    {
5522 	      if (precision > def_stmt_info->mask_precision)
5523 		precision = def_stmt_info->mask_precision;
5524 	    }
5525 	}
5526     }
5527 
5528   if (dump_enabled_p ())
5529     {
5530       if (precision == ~0U)
5531 	dump_printf_loc (MSG_NOTE, vect_location,
5532 			 "using normal nonmask vectors for %G",
5533 			 stmt_info->stmt);
5534       else
5535 	dump_printf_loc (MSG_NOTE, vect_location,
5536 			 "using boolean precision %d for %G",
5537 			 precision, stmt_info->stmt);
5538     }
5539 
5540   stmt_info->mask_precision = precision;
5541 }
5542 
5543 /* Handle vect_determine_precisions for STMT_INFO, given that we
5544    have already done so for the users of its result.  */
5545 
5546 void
vect_determine_stmt_precisions(vec_info * vinfo,stmt_vec_info stmt_info)5547 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5548 {
5549   vect_determine_min_output_precision (vinfo, stmt_info);
5550   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5551     {
5552       vect_determine_precisions_from_range (stmt_info, stmt);
5553       vect_determine_precisions_from_users (stmt_info, stmt);
5554     }
5555 }
5556 
5557 /* Walk backwards through the vectorizable region to determine the
5558    values of these fields:
5559 
5560    - min_output_precision
5561    - min_input_precision
5562    - operation_precision
5563    - operation_sign.  */
5564 
5565 void
vect_determine_precisions(vec_info * vinfo)5566 vect_determine_precisions (vec_info *vinfo)
5567 {
5568   DUMP_VECT_SCOPE ("vect_determine_precisions");
5569 
5570   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5571     {
5572       class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5573       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5574       unsigned int nbbs = loop->num_nodes;
5575 
5576       for (unsigned int i = 0; i < nbbs; i++)
5577 	{
5578 	  basic_block bb = bbs[i];
5579 	  for (auto gsi = gsi_start_phis (bb);
5580 	       !gsi_end_p (gsi); gsi_next (&gsi))
5581 	    {
5582 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5583 	      if (stmt_info)
5584 		vect_determine_mask_precision (vinfo, stmt_info);
5585 	    }
5586 	  for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5587 	    if (!is_gimple_debug (gsi_stmt (si)))
5588 	      vect_determine_mask_precision
5589 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5590 	}
5591       for (unsigned int i = 0; i < nbbs; i++)
5592 	{
5593 	  basic_block bb = bbs[nbbs - i - 1];
5594 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
5595 	       !gsi_end_p (si); gsi_prev (&si))
5596 	    if (!is_gimple_debug (gsi_stmt (si)))
5597 	      vect_determine_stmt_precisions
5598 		(vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5599 	  for (auto gsi = gsi_start_phis (bb);
5600 	       !gsi_end_p (gsi); gsi_next (&gsi))
5601 	    {
5602 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5603 	      if (stmt_info)
5604 		vect_determine_stmt_precisions (vinfo, stmt_info);
5605 	    }
5606 	}
5607     }
5608   else
5609     {
5610       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5611       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5612 	{
5613 	  basic_block bb = bb_vinfo->bbs[i];
5614 	  for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5615 	    {
5616 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5617 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5618 		vect_determine_mask_precision (vinfo, stmt_info);
5619 	    }
5620 	  for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5621 	    {
5622 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5623 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5624 		vect_determine_mask_precision (vinfo, stmt_info);
5625 	    }
5626 	}
5627       for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5628 	{
5629 	  for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5630 	       !gsi_end_p (gsi); gsi_prev (&gsi))
5631 	    {
5632 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5633 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5634 		vect_determine_stmt_precisions (vinfo, stmt_info);
5635 	    }
5636 	  for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5637 	       !gsi_end_p (gsi); gsi_next (&gsi))
5638 	    {
5639 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5640 	      if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5641 		vect_determine_stmt_precisions (vinfo, stmt_info);
5642 	    }
5643 	}
5644     }
5645 }
5646 
5647 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5648 
5649 struct vect_recog_func
5650 {
5651   vect_recog_func_ptr fn;
5652   const char *name;
5653 };
5654 
5655 /* Note that ordering matters - the first pattern matching on a stmt is
5656    taken which means usually the more complex one needs to preceed the
5657    less comples onex (widen_sum only after dot_prod or sad for example).  */
5658 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5659   { vect_recog_over_widening_pattern, "over_widening" },
5660   /* Must come after over_widening, which narrows the shift as much as
5661      possible beforehand.  */
5662   { vect_recog_average_pattern, "average" },
5663   { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5664   { vect_recog_mulhs_pattern, "mult_high" },
5665   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5666   { vect_recog_widen_mult_pattern, "widen_mult" },
5667   { vect_recog_dot_prod_pattern, "dot_prod" },
5668   { vect_recog_sad_pattern, "sad" },
5669   { vect_recog_widen_sum_pattern, "widen_sum" },
5670   { vect_recog_pow_pattern, "pow" },
5671   { vect_recog_popcount_pattern, "popcount" },
5672   { vect_recog_widen_shift_pattern, "widen_shift" },
5673   { vect_recog_rotate_pattern, "rotate" },
5674   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5675   { vect_recog_divmod_pattern, "divmod" },
5676   { vect_recog_mult_pattern, "mult" },
5677   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5678   { vect_recog_bool_pattern, "bool" },
5679   /* This must come before mask conversion, and includes the parts
5680      of mask conversion that are needed for gather and scatter
5681      internal functions.  */
5682   { vect_recog_gather_scatter_pattern, "gather_scatter" },
5683   { vect_recog_mask_conversion_pattern, "mask_conversion" },
5684   { vect_recog_widen_plus_pattern, "widen_plus" },
5685   { vect_recog_widen_minus_pattern, "widen_minus" },
5686 };
5687 
5688 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5689 
5690 /* Mark statements that are involved in a pattern.  */
5691 
5692 void
vect_mark_pattern_stmts(vec_info * vinfo,stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5693 vect_mark_pattern_stmts (vec_info *vinfo,
5694 			 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5695                          tree pattern_vectype)
5696 {
5697   stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5698   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5699 
5700   gimple *orig_pattern_stmt = NULL;
5701   if (is_pattern_stmt_p (orig_stmt_info))
5702     {
5703       /* We're replacing a statement in an existing pattern definition
5704 	 sequence.  */
5705       orig_pattern_stmt = orig_stmt_info->stmt;
5706       if (dump_enabled_p ())
5707 	dump_printf_loc (MSG_NOTE, vect_location,
5708 			 "replacing earlier pattern %G", orig_pattern_stmt);
5709 
5710       /* To keep the book-keeping simple, just swap the lhs of the
5711 	 old and new statements, so that the old one has a valid but
5712 	 unused lhs.  */
5713       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5714       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5715       gimple_set_lhs (pattern_stmt, old_lhs);
5716 
5717       if (dump_enabled_p ())
5718 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5719 
5720       /* Switch to the statement that ORIG replaces.  */
5721       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5722 
5723       /* We shouldn't be replacing the main pattern statement.  */
5724       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5725 		  != orig_pattern_stmt);
5726     }
5727 
5728   if (def_seq)
5729     for (gimple_stmt_iterator si = gsi_start (def_seq);
5730 	 !gsi_end_p (si); gsi_next (&si))
5731       {
5732 	if (dump_enabled_p ())
5733 	  dump_printf_loc (MSG_NOTE, vect_location,
5734 			   "extra pattern stmt: %G", gsi_stmt (si));
5735 	stmt_vec_info pattern_stmt_info
5736 	  = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5737 				    orig_stmt_info, pattern_vectype);
5738 	/* Stmts in the def sequence are not vectorizable cycle or
5739 	   induction defs, instead they should all be vect_internal_def
5740 	   feeding the main pattern stmt which retains this def type.  */
5741 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5742       }
5743 
5744   if (orig_pattern_stmt)
5745     {
5746       vect_init_pattern_stmt (vinfo, pattern_stmt,
5747 			      orig_stmt_info, pattern_vectype);
5748 
5749       /* Insert all the new pattern statements before the original one.  */
5750       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5751       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5752 					       orig_def_seq);
5753       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5754       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5755 
5756       /* Remove the pattern statement that this new pattern replaces.  */
5757       gsi_remove (&gsi, false);
5758     }
5759   else
5760     vect_set_pattern_stmt (vinfo,
5761 			   pattern_stmt, orig_stmt_info, pattern_vectype);
5762 
5763   /* Transfer reduction path info to the pattern.  */
5764   if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5765     {
5766       gimple_match_op op;
5767       if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5768 	gcc_unreachable ();
5769       tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5770       /* Search the pattern def sequence and the main pattern stmt.  Note
5771          we may have inserted all into a containing pattern def sequence
5772 	 so the following is a bit awkward.  */
5773       gimple_stmt_iterator si;
5774       gimple *s;
5775       if (def_seq)
5776 	{
5777 	  si = gsi_start (def_seq);
5778 	  s = gsi_stmt (si);
5779 	  gsi_next (&si);
5780 	}
5781       else
5782 	{
5783 	  si = gsi_none ();
5784 	  s = pattern_stmt;
5785 	}
5786       do
5787 	{
5788 	  bool found = false;
5789 	  if (gimple_extract_op (s, &op))
5790 	    for (unsigned i = 0; i < op.num_ops; ++i)
5791 	      if (op.ops[i] == lookfor)
5792 		{
5793 		  STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5794 		  lookfor = gimple_get_lhs (s);
5795 		  found = true;
5796 		  break;
5797 		}
5798 	  if (s == pattern_stmt)
5799 	    {
5800 	      if (!found && dump_enabled_p ())
5801 		dump_printf_loc (MSG_NOTE, vect_location,
5802 				 "failed to update reduction index.\n");
5803 	      break;
5804 	    }
5805 	  if (gsi_end_p (si))
5806 	    s = pattern_stmt;
5807 	  else
5808 	    {
5809 	      s = gsi_stmt (si);
5810 	      if (s == pattern_stmt)
5811 		/* Found the end inside a bigger pattern def seq.  */
5812 		si = gsi_none ();
5813 	      else
5814 		gsi_next (&si);
5815 	    }
5816 	} while (1);
5817     }
5818 }
5819 
5820 /* Function vect_pattern_recog_1
5821 
5822    Input:
5823    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5824         computation pattern.
5825    STMT_INFO: A stmt from which the pattern search should start.
5826 
5827    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5828    a sequence of statements that has the same functionality and can be
5829    used to replace STMT_INFO.  It returns the last statement in the sequence
5830    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5831    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5832    statement, having first checked that the target supports the new operation
5833    in that type.
5834 
5835    This function also does some bookkeeping, as explained in the documentation
5836    for vect_recog_pattern.  */
5837 
5838 static void
vect_pattern_recog_1(vec_info * vinfo,vect_recog_func * recog_func,stmt_vec_info stmt_info)5839 vect_pattern_recog_1 (vec_info *vinfo,
5840 		      vect_recog_func *recog_func, stmt_vec_info stmt_info)
5841 {
5842   gimple *pattern_stmt;
5843   loop_vec_info loop_vinfo;
5844   tree pattern_vectype;
5845 
5846   /* If this statement has already been replaced with pattern statements,
5847      leave the original statement alone, since the first match wins.
5848      Instead try to match against the definition statements that feed
5849      the main pattern statement.  */
5850   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5851     {
5852       gimple_stmt_iterator gsi;
5853       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5854 	   !gsi_end_p (gsi); gsi_next (&gsi))
5855 	vect_pattern_recog_1 (vinfo, recog_func,
5856 			      vinfo->lookup_stmt (gsi_stmt (gsi)));
5857       return;
5858     }
5859 
5860   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5861   pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5862   if (!pattern_stmt)
5863     {
5864       /* Clear any half-formed pattern definition sequence.  */
5865       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5866       return;
5867     }
5868 
5869   loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5870 
5871   /* Found a vectorizable pattern.  */
5872   if (dump_enabled_p ())
5873     dump_printf_loc (MSG_NOTE, vect_location,
5874 		     "%s pattern recognized: %G",
5875 		     recog_func->name, pattern_stmt);
5876 
5877   /* Mark the stmts that are involved in the pattern. */
5878   vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5879 
5880   /* Patterns cannot be vectorized using SLP, because they change the order of
5881      computation.  */
5882   if (loop_vinfo)
5883     {
5884       unsigned ix, ix2;
5885       stmt_vec_info *elem_ptr;
5886       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5887 			     elem_ptr, *elem_ptr == stmt_info);
5888     }
5889 }
5890 
5891 
5892 /* Function vect_pattern_recog
5893 
5894    Input:
5895    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5896         computation idioms.
5897 
5898    Output - for each computation idiom that is detected we create a new stmt
5899         that provides the same functionality and that can be vectorized.  We
5900         also record some information in the struct_stmt_info of the relevant
5901         stmts, as explained below:
5902 
5903    At the entry to this function we have the following stmts, with the
5904    following initial value in the STMT_VINFO fields:
5905 
5906          stmt                     in_pattern_p  related_stmt    vec_stmt
5907          S1: a_i = ....                 -       -               -
5908          S2: a_2 = ..use(a_i)..         -       -               -
5909          S3: a_1 = ..use(a_2)..         -       -               -
5910          S4: a_0 = ..use(a_1)..         -       -               -
5911          S5: ... = ..use(a_0)..         -       -               -
5912 
5913    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5914    represented by a single stmt.  We then:
5915    - create a new stmt S6 equivalent to the pattern (the stmt is not
5916      inserted into the code)
5917    - fill in the STMT_VINFO fields as follows:
5918 
5919                                   in_pattern_p  related_stmt    vec_stmt
5920          S1: a_i = ....                 -       -               -
5921          S2: a_2 = ..use(a_i)..         -       -               -
5922          S3: a_1 = ..use(a_2)..         -       -               -
5923          S4: a_0 = ..use(a_1)..         true    S6              -
5924           '---> S6: a_new = ....        -       S4              -
5925          S5: ... = ..use(a_0)..         -       -               -
5926 
5927    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5928    to each other through the RELATED_STMT field).
5929 
5930    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5931    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
5932    remain irrelevant unless used by stmts other than S4.
5933 
5934    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5935    (because they are marked as irrelevant).  It will vectorize S6, and record
5936    a pointer to the new vector stmt VS6 from S6 (as usual).
5937    S4 will be skipped, and S5 will be vectorized as usual:
5938 
5939                                   in_pattern_p  related_stmt    vec_stmt
5940          S1: a_i = ....                 -       -               -
5941          S2: a_2 = ..use(a_i)..         -       -               -
5942          S3: a_1 = ..use(a_2)..         -       -               -
5943        > VS6: va_new = ....             -       -               -
5944          S4: a_0 = ..use(a_1)..         true    S6              VS6
5945           '---> S6: a_new = ....        -       S4              VS6
5946        > VS5: ... = ..vuse(va_new)..    -       -               -
5947          S5: ... = ..use(a_0)..         -       -               -
5948 
5949    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5950    elsewhere), and we'll end up with:
5951 
5952         VS6: va_new = ....
5953         VS5: ... = ..vuse(va_new)..
5954 
5955    In case of more than one pattern statements, e.g., widen-mult with
5956    intermediate type:
5957 
5958      S1  a_t = ;
5959      S2  a_T = (TYPE) a_t;
5960            '--> S3: a_it = (interm_type) a_t;
5961      S4  prod_T = a_T * CONST;
5962            '--> S5: prod_T' = a_it w* CONST;
5963 
5964    there may be other users of a_T outside the pattern.  In that case S2 will
5965    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5966    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
5967    be recorded in S3.  */
5968 
5969 void
vect_pattern_recog(vec_info * vinfo)5970 vect_pattern_recog (vec_info *vinfo)
5971 {
5972   class loop *loop;
5973   basic_block *bbs;
5974   unsigned int nbbs;
5975   gimple_stmt_iterator si;
5976   unsigned int i, j;
5977 
5978   vect_determine_precisions (vinfo);
5979 
5980   DUMP_VECT_SCOPE ("vect_pattern_recog");
5981 
5982   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5983     {
5984       loop = LOOP_VINFO_LOOP (loop_vinfo);
5985       bbs = LOOP_VINFO_BBS (loop_vinfo);
5986       nbbs = loop->num_nodes;
5987 
5988       /* Scan through the loop stmts, applying the pattern recognition
5989 	 functions starting at each stmt visited:  */
5990       for (i = 0; i < nbbs; i++)
5991 	{
5992 	  basic_block bb = bbs[i];
5993 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5994 	    {
5995 	      if (is_gimple_debug (gsi_stmt (si)))
5996 		continue;
5997 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5998 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
5999 	      for (j = 0; j < NUM_PATTERNS; j++)
6000 		vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6001 				      stmt_info);
6002 	    }
6003 	}
6004     }
6005   else
6006     {
6007       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6008       for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6009 	for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6010 	     !gsi_end_p (gsi); gsi_next (&gsi))
6011 	  {
6012 	    stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6013 	    if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6014 	      continue;
6015 
6016 	    /* Scan over all generic vect_recog_xxx_pattern functions.  */
6017 	    for (j = 0; j < NUM_PATTERNS; j++)
6018 	      vect_pattern_recog_1 (vinfo,
6019 				    &vect_vect_recog_func_ptrs[j], stmt_info);
6020 	  }
6021     }
6022 
6023   /* After this no more add_stmt calls are allowed.  */
6024   vinfo->stmt_vec_info_ro = true;
6025 }
6026