xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-patterns.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2006-2019 Free Software Foundation, Inc.
3    Contributed by Dorit Nuzman <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 
50 /* Return true if we have a useful VR_RANGE range for VAR, storing it
51    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
52 
53 static bool
54 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55 {
56   value_range_kind vr_type = get_range_info (var, min_value, max_value);
57   wide_int nonzero = get_nonzero_bits (var);
58   signop sgn = TYPE_SIGN (TREE_TYPE (var));
59   if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 					 nonzero, sgn) == VR_RANGE)
61     {
62       if (dump_enabled_p ())
63 	{
64 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 	  dump_printf (MSG_NOTE, " has range [");
66 	  dump_hex (MSG_NOTE, *min_value);
67 	  dump_printf (MSG_NOTE, ", ");
68 	  dump_hex (MSG_NOTE, *max_value);
69 	  dump_printf (MSG_NOTE, "]\n");
70 	}
71       return true;
72     }
73   else
74     {
75       if (dump_enabled_p ())
76 	{
77 	  dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 	  dump_printf (MSG_NOTE, " has no range info\n");
79 	}
80       return false;
81     }
82 }
83 
84 /* Report that we've found an instance of pattern PATTERN in
85    statement STMT.  */
86 
87 static void
88 vect_pattern_detected (const char *name, gimple *stmt)
89 {
90   if (dump_enabled_p ())
91     dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
92 }
93 
94 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95    return the pattern statement's stmt_vec_info.  Set its vector type to
96    VECTYPE if it doesn't have one already.  */
97 
98 static stmt_vec_info
99 vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 			tree vectype)
101 {
102   vec_info *vinfo = orig_stmt_info->vinfo;
103   stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
104   if (pattern_stmt_info == NULL)
105     pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
106   gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107 
108   pattern_stmt_info->pattern_stmt_p = true;
109   STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
110   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113     STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
114   return pattern_stmt_info;
115 }
116 
117 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118    Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119    have one already.  */
120 
121 static void
122 vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 		       tree vectype)
124 {
125   STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
126   STMT_VINFO_RELATED_STMT (orig_stmt_info)
127     = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
128 }
129 
130 /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
131    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132    be different from the vector type of the final pattern statement.  */
133 
134 static inline void
135 append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 			tree vectype = NULL_TREE)
137 {
138   vec_info *vinfo = stmt_info->vinfo;
139   if (vectype)
140     {
141       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
142       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143     }
144   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
145 				      new_stmt);
146 }
147 
148 /* The caller wants to perform new operations on vect_external variable
149    VAR, so that the result of the operations would also be vect_external.
150    Return the edge on which the operations can be performed, if one exists.
151    Return null if the operations should instead be treated as part of
152    the pattern that needs them.  */
153 
154 static edge
155 vect_get_external_def_edge (vec_info *vinfo, tree var)
156 {
157   edge e = NULL;
158   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159     {
160       e = loop_preheader_edge (loop_vinfo->loop);
161       if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 	{
163 	  basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 	  if (bb == NULL
165 	      || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 	    e = NULL;
167 	}
168     }
169   return e;
170 }
171 
172 /* Return true if the target supports a vector version of CODE,
173    where CODE is known to map to a direct optab.  ITYPE specifies
174    the type of (some of) the scalar inputs and OTYPE specifies the
175    type of the scalar result.
176 
177    If CODE allows the inputs and outputs to have different type
178    (such as for WIDEN_SUM_EXPR), it is the input mode rather
179    than the output mode that determines the appropriate target pattern.
180    Operand 0 of the target pattern then specifies the mode that the output
181    must have.
182 
183    When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184    Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185    is nonnull.  */
186 
187 static bool
188 vect_supportable_direct_optab_p (tree otype, tree_code code,
189 				 tree itype, tree *vecotype_out,
190 				 tree *vecitype_out = NULL)
191 {
192   tree vecitype = get_vectype_for_scalar_type (itype);
193   if (!vecitype)
194     return false;
195 
196   tree vecotype = get_vectype_for_scalar_type (otype);
197   if (!vecotype)
198     return false;
199 
200   optab optab = optab_for_tree_code (code, vecitype, optab_default);
201   if (!optab)
202     return false;
203 
204   insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205   if (icode == CODE_FOR_nothing
206       || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207     return false;
208 
209   *vecotype_out = vecotype;
210   if (vecitype_out)
211     *vecitype_out = vecitype;
212   return true;
213 }
214 
215 /* Round bit precision PRECISION up to a full element.  */
216 
217 static unsigned int
218 vect_element_precision (unsigned int precision)
219 {
220   precision = 1 << ceil_log2 (precision);
221   return MAX (precision, BITS_PER_UNIT);
222 }
223 
224 /* If OP is defined by a statement that's being considered for vectorization,
225    return information about that statement, otherwise return NULL.  */
226 
227 static stmt_vec_info
228 vect_get_internal_def (vec_info *vinfo, tree op)
229 {
230   stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231   if (def_stmt_info
232       && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233     return def_stmt_info;
234   return NULL;
235 }
236 
237 /* Check whether NAME, an ssa-name used in STMT_VINFO,
238    is a result of a type promotion, such that:
239      DEF_STMT: NAME = NOP (name0)
240    If CHECK_SIGN is TRUE, check that either both types are signed or both are
241    unsigned.  */
242 
243 static bool
244 type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
245 		   tree *orig_type, gimple **def_stmt, bool *promotion)
246 {
247   tree type = TREE_TYPE (name);
248   tree oprnd0;
249   enum vect_def_type dt;
250 
251   stmt_vec_info def_stmt_info;
252   if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 			   def_stmt))
254     return false;
255 
256   if (dt != vect_internal_def
257       && dt != vect_external_def && dt != vect_constant_def)
258     return false;
259 
260   if (!*def_stmt)
261     return false;
262 
263   if (!is_gimple_assign (*def_stmt))
264     return false;
265 
266   if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
267     return false;
268 
269   oprnd0 = gimple_assign_rhs1 (*def_stmt);
270 
271   *orig_type = TREE_TYPE (oprnd0);
272   if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273       || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274     return false;
275 
276   if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277     *promotion = true;
278   else
279     *promotion = false;
280 
281   if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
282     return false;
283 
284   return true;
285 }
286 
287 /* Holds information about an input operand after some sign changes
288    and type promotions have been peeled away.  */
289 struct vect_unpromoted_value {
290   vect_unpromoted_value ();
291 
292   void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293 
294   /* The value obtained after peeling away zero or more casts.  */
295   tree op;
296 
297   /* The type of OP.  */
298   tree type;
299 
300   /* The definition type of OP.  */
301   vect_def_type dt;
302 
303   /* If OP is the result of peeling at least one cast, and if the cast
304      of OP itself is a vectorizable statement, CASTER identifies that
305      statement, otherwise it is null.  */
306   stmt_vec_info caster;
307 };
308 
309 inline vect_unpromoted_value::vect_unpromoted_value ()
310   : op (NULL_TREE),
311     type (NULL_TREE),
312     dt (vect_uninitialized_def),
313     caster (NULL)
314 {
315 }
316 
317 /* Set the operand to OP_IN, its definition type to DT_IN, and the
318    statement that casts it to CASTER_IN.  */
319 
320 inline void
321 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 			       stmt_vec_info caster_in)
323 {
324   op = op_in;
325   type = TREE_TYPE (op);
326   dt = dt_in;
327   caster = caster_in;
328 }
329 
330 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331    to reach some vectorizable inner operand OP', continuing as long as it
332    is possible to convert OP' back to OP using a possible sign change
333    followed by a possible promotion P.  Return this OP', or null if OP is
334    not a vectorizable SSA name.  If there is a promotion P, describe its
335    input in UNPROM, otherwise describe OP' in UNPROM.  If SINGLE_USE_P
336    is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337    have more than one user.
338 
339    A successful return means that it is possible to go from OP' to OP
340    via UNPROM.  The cast from OP' to UNPROM is at most a sign change,
341    whereas the cast from UNPROM to OP might be a promotion, a sign
342    change, or a nop.
343 
344    E.g. say we have:
345 
346        signed short *ptr = ...;
347        signed short C = *ptr;
348        unsigned short B = (unsigned short) C;    // sign change
349        signed int A = (signed int) B;            // unsigned promotion
350        ...possible other uses of A...
351        unsigned int OP = (unsigned int) A;       // sign change
352 
353    In this case it's possible to go directly from C to OP using:
354 
355        OP = (unsigned int) (unsigned short) C;
356 	    +------------+ +--------------+
357 	       promotion      sign change
358 
359    so OP' would be C.  The input to the promotion is B, so UNPROM
360    would describe B.  */
361 
362 static tree
363 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
364 				      vect_unpromoted_value *unprom,
365 				      bool *single_use_p = NULL)
366 {
367   tree res = NULL_TREE;
368   tree op_type = TREE_TYPE (op);
369   unsigned int orig_precision = TYPE_PRECISION (op_type);
370   unsigned int min_precision = orig_precision;
371   stmt_vec_info caster = NULL;
372   while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373     {
374       /* See whether OP is simple enough to vectorize.  */
375       stmt_vec_info def_stmt_info;
376       gimple *def_stmt;
377       vect_def_type dt;
378       if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
379 	break;
380 
381       /* If OP is the input of a demotion, skip over it to see whether
382 	 OP is itself the result of a promotion.  If so, the combined
383 	 effect of the promotion and the demotion might fit the required
384 	 pattern, otherwise neither operation fits.
385 
386 	 This copes with cases such as the result of an arithmetic
387 	 operation being truncated before being stored, and where that
388 	 arithmetic operation has been recognized as an over-widened one.  */
389       if (TYPE_PRECISION (op_type) <= min_precision)
390 	{
391 	  /* Use OP as the UNPROM described above if we haven't yet
392 	     found a promotion, or if using the new input preserves the
393 	     sign of the previous promotion.  */
394 	  if (!res
395 	      || TYPE_PRECISION (unprom->type) == orig_precision
396 	      || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
397 	    {
398 	      unprom->set_op (op, dt, caster);
399 	      min_precision = TYPE_PRECISION (op_type);
400 	    }
401 	  /* Stop if we've already seen a promotion and if this
402 	     conversion does more than change the sign.  */
403 	  else if (TYPE_PRECISION (op_type)
404 		   != TYPE_PRECISION (unprom->type))
405 	    break;
406 
407 	  /* The sequence now extends to OP.  */
408 	  res = op;
409 	}
410 
411       /* See whether OP is defined by a cast.  Record it as CASTER if
412 	 the cast is potentially vectorizable.  */
413       if (!def_stmt)
414 	break;
415       caster = def_stmt_info;
416 
417       /* Ignore pattern statements, since we don't link uses for them.  */
418       if (caster
419 	  && single_use_p
420 	  && !STMT_VINFO_RELATED_STMT (caster)
421 	  && !has_single_use (res))
422 	*single_use_p = false;
423 
424       gassign *assign = dyn_cast <gassign *> (def_stmt);
425       if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 	break;
427 
428       /* Continue with the input to the cast.  */
429       op = gimple_assign_rhs1 (def_stmt);
430       op_type = TREE_TYPE (op);
431     }
432   return res;
433 }
434 
435 /* OP is an integer operand to an operation that returns TYPE, and we
436    want to treat the operation as a widening one.  So far we can treat
437    it as widening from *COMMON_TYPE.
438 
439    Return true if OP is suitable for such a widening operation,
440    either widening from *COMMON_TYPE or from some supertype of it.
441    Update *COMMON_TYPE to the supertype in the latter case.
442 
443    SHIFT_P is true if OP is a shift amount.  */
444 
445 static bool
446 vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 			    tree *common_type)
448 {
449   /* Calculate the minimum precision required by OP, without changing
450      the sign of either operand.  */
451   unsigned int precision;
452   if (shift_p)
453     {
454       if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 	return false;
456       precision = TREE_INT_CST_LOW (op);
457     }
458   else
459     {
460       precision = wi::min_precision (wi::to_widest (op),
461 				     TYPE_SIGN (*common_type));
462       if (precision * 2 > TYPE_PRECISION (type))
463 	return false;
464     }
465 
466   /* If OP requires a wider type, switch to that type.  The checks
467      above ensure that this is still narrower than the result.  */
468   precision = vect_element_precision (precision);
469   if (TYPE_PRECISION (*common_type) < precision)
470     *common_type = build_nonstandard_integer_type
471       (precision, TYPE_UNSIGNED (*common_type));
472   return true;
473 }
474 
475 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476    is narrower than type, storing the supertype in *COMMON_TYPE if so.  */
477 
478 static bool
479 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480 {
481   if (types_compatible_p (*common_type, new_type))
482     return true;
483 
484   /* See if *COMMON_TYPE can hold all values of NEW_TYPE.  */
485   if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486       && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487     return true;
488 
489   /* See if NEW_TYPE can hold all values of *COMMON_TYPE.  */
490   if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491       && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492     {
493       *common_type = new_type;
494       return true;
495     }
496 
497   /* We have mismatched signs, with the signed type being
498      no wider than the unsigned type.  In this case we need
499      a wider signed type.  */
500   unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 				TYPE_PRECISION (new_type));
502   precision *= 2;
503   if (precision * 2 > TYPE_PRECISION (type))
504     return false;
505 
506   *common_type = build_nonstandard_integer_type (precision, false);
507   return true;
508 }
509 
510 /* Check whether STMT_INFO can be viewed as a tree of integer operations
511    in which each node either performs CODE or WIDENED_CODE, and where
512    each leaf operand is narrower than the result of STMT_INFO.  MAX_NOPS
513    specifies the maximum number of leaf operands.  SHIFT_P says whether
514    CODE and WIDENED_CODE are some sort of shift.
515 
516    If STMT_INFO is such a tree, return the number of leaf operands
517    and describe them in UNPROM[0] onwards.  Also set *COMMON_TYPE
518    to a type that (a) is narrower than the result of STMT_INFO and
519    (b) can hold all leaf operand values.
520 
521    Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522    exists.  */
523 
524 static unsigned int
525 vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 		      tree_code widened_code, bool shift_p,
527 		      unsigned int max_nops,
528 		      vect_unpromoted_value *unprom, tree *common_type)
529 {
530   /* Check for an integer operation with the right code.  */
531   vec_info *vinfo = stmt_info->vinfo;
532   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533   if (!assign)
534     return 0;
535 
536   tree_code rhs_code = gimple_assign_rhs_code (assign);
537   if (rhs_code != code && rhs_code != widened_code)
538     return 0;
539 
540   tree type = gimple_expr_type (assign);
541   if (!INTEGRAL_TYPE_P (type))
542     return 0;
543 
544   /* Assume that both operands will be leaf operands.  */
545   max_nops -= 2;
546 
547   /* Check the operands.  */
548   unsigned int next_op = 0;
549   for (unsigned int i = 0; i < 2; ++i)
550     {
551       vect_unpromoted_value *this_unprom = &unprom[next_op];
552       unsigned int nops = 1;
553       tree op = gimple_op (assign, i + 1);
554       if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 	{
556 	  /* We already have a common type from earlier operands.
557 	     Update it to account for OP.  */
558 	  this_unprom->set_op (op, vect_constant_def);
559 	  if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 	    return 0;
561 	}
562       else
563 	{
564 	  /* Only allow shifts by constants.  */
565 	  if (shift_p && i == 1)
566 	    return 0;
567 
568 	  if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 						     this_unprom))
570 	    return 0;
571 
572 	  if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 	    {
574 	      /* The operand isn't widened.  If STMT_INFO has the code
575 		 for an unwidened operation, recursively check whether
576 		 this operand is a node of the tree.  */
577 	      if (rhs_code != code
578 		  || max_nops == 0
579 		  || this_unprom->dt != vect_internal_def)
580 		return 0;
581 
582 	      /* Give back the leaf slot allocated above now that we're
583 		 not treating this as a leaf operand.  */
584 	      max_nops += 1;
585 
586 	      /* Recursively process the definition of the operand.  */
587 	      stmt_vec_info def_stmt_info
588 		= vinfo->lookup_def (this_unprom->op);
589 	      nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 					   shift_p, max_nops, this_unprom,
591 					   common_type);
592 	      if (nops == 0)
593 		return 0;
594 
595 	      max_nops -= nops;
596 	    }
597 	  else
598 	    {
599 	      /* Make sure that the operand is narrower than the result.  */
600 	      if (TYPE_PRECISION (this_unprom->type) * 2
601 		  > TYPE_PRECISION (type))
602 		return 0;
603 
604 	      /* Update COMMON_TYPE for the new operand.  */
605 	      if (i == 0)
606 		*common_type = this_unprom->type;
607 	      else if (!vect_joust_widened_type (type, this_unprom->type,
608 						 common_type))
609 		return 0;
610 	    }
611 	}
612       next_op += nops;
613     }
614   return next_op;
615 }
616 
617 /* Helper to return a new temporary for pattern of TYPE for STMT.  If STMT
618    is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619 
620 static tree
621 vect_recog_temp_ssa_var (tree type, gimple *stmt)
622 {
623   return make_temp_ssa_name (type, stmt, "patt");
624 }
625 
626 /* STMT2_INFO describes a type conversion that could be split into STMT1
627    followed by a version of STMT2_INFO that takes NEW_RHS as its first
628    input.  Try to do this using pattern statements, returning true on
629    success.  */
630 
631 static bool
632 vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 		      gimple *stmt1, tree vectype)
634 {
635   if (is_pattern_stmt_p (stmt2_info))
636     {
637       /* STMT2_INFO is part of a pattern.  Get the statement to which
638 	 the pattern is attached.  */
639       stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
640       vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641 
642       if (dump_enabled_p ())
643 	dump_printf_loc (MSG_NOTE, vect_location,
644 			 "Splitting pattern statement: %G", stmt2_info->stmt);
645 
646       /* Since STMT2_INFO is a pattern statement, we can change it
647 	 in-situ without worrying about changing the code for the
648 	 containing block.  */
649       gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
650 
651       if (dump_enabled_p ())
652 	{
653 	  dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 			   stmt2_info->stmt);
656 	}
657 
658       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
659       if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
660 	/* STMT2_INFO is the actual pattern statement.  Add STMT1
661 	   to the end of the definition sequence.  */
662 	gimple_seq_add_stmt_without_update (def_seq, stmt1);
663       else
664 	{
665 	  /* STMT2_INFO belongs to the definition sequence.  Insert STMT1
666 	     before it.  */
667 	  gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 	  gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
669 	}
670       return true;
671     }
672   else
673     {
674       /* STMT2_INFO doesn't yet have a pattern.  Try to create a
675 	 two-statement pattern now.  */
676       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678       tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679       if (!lhs_vectype)
680 	return false;
681 
682       if (dump_enabled_p ())
683 	dump_printf_loc (MSG_NOTE, vect_location,
684 			 "Splitting statement: %G", stmt2_info->stmt);
685 
686       /* Add STMT1 as a singleton pattern definition sequence.  */
687       gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688       vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689       gimple_seq_add_stmt_without_update (def_seq, stmt1);
690 
691       /* Build the second of the two pattern statements.  */
692       tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693       gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694       vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
695 
696       if (dump_enabled_p ())
697 	{
698 	  dump_printf_loc (MSG_NOTE, vect_location,
699 			   "into pattern statements: %G", stmt1);
700 	  dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
701 	}
702 
703       return true;
704     }
705 }
706 
707 /* Convert UNPROM to TYPE and return the result, adding new statements
708    to STMT_INFO's pattern definition statements if no better way is
709    available.  VECTYPE is the vector form of TYPE.  */
710 
711 static tree
712 vect_convert_input (stmt_vec_info stmt_info, tree type,
713 		    vect_unpromoted_value *unprom, tree vectype)
714 {
715   /* Check for a no-op conversion.  */
716   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717     return unprom->op;
718 
719   /* Allow the caller to create constant vect_unpromoted_values.  */
720   if (TREE_CODE (unprom->op) == INTEGER_CST)
721     return wide_int_to_tree (type, wi::to_widest (unprom->op));
722 
723   tree input = unprom->op;
724   if (unprom->caster)
725     {
726       tree lhs = gimple_get_lhs (unprom->caster->stmt);
727       tree lhs_type = TREE_TYPE (lhs);
728 
729       /* If the result of the existing cast is the right width, use it
730 	 instead of the source of the cast.  */
731       if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
732 	input = lhs;
733       /* If the precision we want is between the source and result
734 	 precisions of the existing cast, try splitting the cast into
735 	 two and tapping into a mid-way point.  */
736       else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
737 	       && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
738 	{
739 	  /* In order to preserve the semantics of the original cast,
740 	     give the mid-way point the same signedness as the input value.
741 
742 	     It would be possible to use a signed type here instead if
743 	     TYPE is signed and UNPROM->TYPE is unsigned, but that would
744 	     make the sign of the midtype sensitive to the order in
745 	     which we process the statements, since the signedness of
746 	     TYPE is the signedness required by just one of possibly
747 	     many users.  Also, unsigned promotions are usually as cheap
748 	     as or cheaper than signed ones, so it's better to keep an
749 	     unsigned promotion.  */
750 	  tree midtype = build_nonstandard_integer_type
751 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
752 	  tree vec_midtype = get_vectype_for_scalar_type (midtype);
753 	  if (vec_midtype)
754 	    {
755 	      input = vect_recog_temp_ssa_var (midtype, NULL);
756 	      gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
757 						       unprom->op);
758 	      if (!vect_split_statement (unprom->caster, input, new_stmt,
759 					 vec_midtype))
760 		append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
761 	    }
762 	}
763 
764       /* See if we can reuse an existing result.  */
765       if (types_compatible_p (type, TREE_TYPE (input)))
766 	return input;
767     }
768 
769   /* We need a new conversion statement.  */
770   tree new_op = vect_recog_temp_ssa_var (type, NULL);
771   gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
772 
773   /* If OP is an external value, see if we can insert the new statement
774      on an incoming edge.  */
775   if (input == unprom->op && unprom->dt == vect_external_def)
776     if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
777       {
778 	basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
779 	gcc_assert (!new_bb);
780 	return new_op;
781       }
782 
783   /* As a (common) last resort, add the statement to the pattern itself.  */
784   append_pattern_def_seq (stmt_info, new_stmt, vectype);
785   return new_op;
786 }
787 
788 /* Invoke vect_convert_input for N elements of UNPROM and store the
789    result in the corresponding elements of RESULT.  */
790 
791 static void
792 vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
793 		     tree *result, tree type, vect_unpromoted_value *unprom,
794 		     tree vectype)
795 {
796   for (unsigned int i = 0; i < n; ++i)
797     {
798       unsigned int j;
799       for (j = 0; j < i; ++j)
800 	if (unprom[j].op == unprom[i].op)
801 	  break;
802       if (j < i)
803 	result[i] = result[j];
804       else
805 	result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
806     }
807 }
808 
809 /* The caller has created a (possibly empty) sequence of pattern definition
810    statements followed by a single statement PATTERN_STMT.  Cast the result
811    of this final statement to TYPE.  If a new statement is needed, add
812    PATTERN_STMT to the end of STMT_INFO's pattern definition statements
813    and return the new statement, otherwise return PATTERN_STMT as-is.
814    VECITYPE is the vector form of PATTERN_STMT's result type.  */
815 
816 static gimple *
817 vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
818 		     tree vecitype)
819 {
820   tree lhs = gimple_get_lhs (pattern_stmt);
821   if (!types_compatible_p (type, TREE_TYPE (lhs)))
822     {
823       append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
824       tree cast_var = vect_recog_temp_ssa_var (type, NULL);
825       pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
826     }
827   return pattern_stmt;
828 }
829 
830 /* Return true if STMT_VINFO describes a reduction for which reassociation
831    is allowed.  If STMT_INFO is part of a group, assume that it's part of
832    a reduction chain and optimistically assume that all statements
833    except the last allow reassociation.  */
834 
835 static bool
836 vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
837 {
838   return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
839 	  ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
840 	  : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
841 }
842 
843 /* As above, but also require it to have code CODE and to be a reduction
844    in the outermost loop.  When returning true, store the operands in
845    *OP0_OUT and *OP1_OUT.  */
846 
847 static bool
848 vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
849 				tree *op0_out, tree *op1_out)
850 {
851   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
852   if (!loop_info)
853     return false;
854 
855   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
856   if (!assign || gimple_assign_rhs_code (assign) != code)
857     return false;
858 
859   /* We don't allow changing the order of the computation in the inner-loop
860      when doing outer-loop vectorization.  */
861   struct loop *loop = LOOP_VINFO_LOOP (loop_info);
862   if (loop && nested_in_vect_loop_p (loop, stmt_info))
863     return false;
864 
865   if (!vect_reassociating_reduction_p (stmt_info))
866     return false;
867 
868   *op0_out = gimple_assign_rhs1 (assign);
869   *op1_out = gimple_assign_rhs2 (assign);
870   return true;
871 }
872 
873 /* Function vect_recog_dot_prod_pattern
874 
875    Try to find the following pattern:
876 
877      type x_t, y_t;
878      TYPE1 prod;
879      TYPE2 sum = init;
880    loop:
881      sum_0 = phi <init, sum_1>
882      S1  x_t = ...
883      S2  y_t = ...
884      S3  x_T = (TYPE1) x_t;
885      S4  y_T = (TYPE1) y_t;
886      S5  prod = x_T * y_T;
887      [S6  prod = (TYPE2) prod;  #optional]
888      S7  sum_1 = prod + sum_0;
889 
890    where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
891    same size of 'TYPE1' or bigger. This is a special case of a reduction
892    computation.
893 
894    Input:
895 
896    * STMT_VINFO: The stmt from which the pattern search begins.  In the
897    example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
898    will be detected.
899 
900    Output:
901 
902    * TYPE_OUT: The type of the output  of this pattern.
903 
904    * Return value: A new stmt that will be used to replace the sequence of
905    stmts that constitute the pattern. In this case it will be:
906         WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
907 
908    Note: The dot-prod idiom is a widening reduction pattern that is
909          vectorized without preserving all the intermediate results. It
910          produces only N/2 (widened) results (by summing up pairs of
911          intermediate results) rather than all N results.  Therefore, we
912          cannot allow this pattern when we want to get all the results and in
913          the correct order (as is the case when this computation is in an
914          inner-loop nested in an outer-loop that us being vectorized).  */
915 
916 static gimple *
917 vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
918 {
919   tree oprnd0, oprnd1;
920   gimple *last_stmt = stmt_vinfo->stmt;
921   vec_info *vinfo = stmt_vinfo->vinfo;
922   tree type, half_type;
923   gimple *pattern_stmt;
924   tree var;
925 
926   /* Look for the following pattern
927           DX = (TYPE1) X;
928           DY = (TYPE1) Y;
929           DPROD = DX * DY;
930           DDPROD = (TYPE2) DPROD;
931           sum_1 = DDPROD + sum_0;
932      In which
933      - DX is double the size of X
934      - DY is double the size of Y
935      - DX, DY, DPROD all have the same type
936      - sum is the same size of DPROD or bigger
937      - sum has been recognized as a reduction variable.
938 
939      This is equivalent to:
940        DPROD = X w* Y;          #widen mult
941        sum_1 = DPROD w+ sum_0;  #widen summation
942      or
943        DPROD = X w* Y;          #widen mult
944        sum_1 = DPROD + sum_0;   #summation
945    */
946 
947   /* Starting from LAST_STMT, follow the defs of its uses in search
948      of the above pattern.  */
949 
950   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
951 				       &oprnd0, &oprnd1))
952     return NULL;
953 
954   type = gimple_expr_type (last_stmt);
955 
956   vect_unpromoted_value unprom_mult;
957   oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
958 
959   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
960      we know that oprnd1 is the reduction variable (defined by a loop-header
961      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
962      Left to check that oprnd0 is defined by a (widen_)mult_expr  */
963   if (!oprnd0)
964     return NULL;
965 
966   stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
967   if (!mult_vinfo)
968     return NULL;
969 
970   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
971      inside the loop (in case we are analyzing an outer-loop).  */
972   vect_unpromoted_value unprom0[2];
973   if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
974 			     false, 2, unprom0, &half_type))
975     return NULL;
976 
977   /* If there are two widening operations, make sure they agree on
978      the sign of the extension.  */
979   if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
980       && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
981     return NULL;
982 
983   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
984 
985   tree half_vectype;
986   if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
987 					type_out, &half_vectype))
988     return NULL;
989 
990   /* Get the inputs in the appropriate types.  */
991   tree mult_oprnd[2];
992   vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
993 		       unprom0, half_vectype);
994 
995   var = vect_recog_temp_ssa_var (type, NULL);
996   pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
997 				      mult_oprnd[0], mult_oprnd[1], oprnd1);
998 
999   return pattern_stmt;
1000 }
1001 
1002 
1003 /* Function vect_recog_sad_pattern
1004 
1005    Try to find the following Sum of Absolute Difference (SAD) pattern:
1006 
1007      type x_t, y_t;
1008      signed TYPE1 diff, abs_diff;
1009      TYPE2 sum = init;
1010    loop:
1011      sum_0 = phi <init, sum_1>
1012      S1  x_t = ...
1013      S2  y_t = ...
1014      S3  x_T = (TYPE1) x_t;
1015      S4  y_T = (TYPE1) y_t;
1016      S5  diff = x_T - y_T;
1017      S6  abs_diff = ABS_EXPR <diff>;
1018      [S7  abs_diff = (TYPE2) abs_diff;  #optional]
1019      S8  sum_1 = abs_diff + sum_0;
1020 
1021    where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1022    same size of 'TYPE1' or bigger. This is a special case of a reduction
1023    computation.
1024 
1025    Input:
1026 
1027    * STMT_VINFO: The stmt from which the pattern search begins.  In the
1028    example, when this function is called with S8, the pattern
1029    {S3,S4,S5,S6,S7,S8} will be detected.
1030 
1031    Output:
1032 
1033    * TYPE_OUT: The type of the output of this pattern.
1034 
1035    * Return value: A new stmt that will be used to replace the sequence of
1036    stmts that constitute the pattern. In this case it will be:
1037         SAD_EXPR <x_t, y_t, sum_0>
1038   */
1039 
1040 static gimple *
1041 vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1042 {
1043   gimple *last_stmt = stmt_vinfo->stmt;
1044   vec_info *vinfo = stmt_vinfo->vinfo;
1045   tree half_type;
1046 
1047   /* Look for the following pattern
1048           DX = (TYPE1) X;
1049           DY = (TYPE1) Y;
1050           DDIFF = DX - DY;
1051           DAD = ABS_EXPR <DDIFF>;
1052           DDPROD = (TYPE2) DPROD;
1053           sum_1 = DAD + sum_0;
1054      In which
1055      - DX is at least double the size of X
1056      - DY is at least double the size of Y
1057      - DX, DY, DDIFF, DAD all have the same type
1058      - sum is the same size of DAD or bigger
1059      - sum has been recognized as a reduction variable.
1060 
1061      This is equivalent to:
1062        DDIFF = X w- Y;          #widen sub
1063        DAD = ABS_EXPR <DDIFF>;
1064        sum_1 = DAD w+ sum_0;    #widen summation
1065      or
1066        DDIFF = X w- Y;          #widen sub
1067        DAD = ABS_EXPR <DDIFF>;
1068        sum_1 = DAD + sum_0;     #summation
1069    */
1070 
1071   /* Starting from LAST_STMT, follow the defs of its uses in search
1072      of the above pattern.  */
1073 
1074   tree plus_oprnd0, plus_oprnd1;
1075   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1076 				       &plus_oprnd0, &plus_oprnd1))
1077     return NULL;
1078 
1079   tree sum_type = gimple_expr_type (last_stmt);
1080 
1081   /* Any non-truncating sequence of conversions is OK here, since
1082      with a successful match, the result of the ABS(U) is known to fit
1083      within the nonnegative range of the result type.  (It cannot be the
1084      negative of the minimum signed value due to the range of the widening
1085      MINUS_EXPR.)  */
1086   vect_unpromoted_value unprom_abs;
1087   plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1088 						      &unprom_abs);
1089 
1090   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1091      we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1092      phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1093      Then check that plus_oprnd0 is defined by an abs_expr.  */
1094 
1095   if (!plus_oprnd0)
1096     return NULL;
1097 
1098   stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1099   if (!abs_stmt_vinfo)
1100     return NULL;
1101 
1102   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1103      inside the loop (in case we are analyzing an outer-loop).  */
1104   gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1105   if (!abs_stmt
1106       || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1107 	  && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1108     return NULL;
1109 
1110   tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1111   tree abs_type = TREE_TYPE (abs_oprnd);
1112   if (TYPE_UNSIGNED (abs_type))
1113     return NULL;
1114 
1115   /* Peel off conversions from the ABS input.  This can involve sign
1116      changes (e.g. from an unsigned subtraction to a signed ABS input)
1117      or signed promotion, but it can't include unsigned promotion.
1118      (Note that ABS of an unsigned promotion should have been folded
1119      away before now anyway.)  */
1120   vect_unpromoted_value unprom_diff;
1121   abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1122 						    &unprom_diff);
1123   if (!abs_oprnd)
1124     return NULL;
1125   if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1126       && TYPE_UNSIGNED (unprom_diff.type))
1127     return NULL;
1128 
1129   /* We then detect if the operand of abs_expr is defined by a minus_expr.  */
1130   stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1131   if (!diff_stmt_vinfo)
1132     return NULL;
1133 
1134   /* FORNOW.  Can continue analyzing the def-use chain when this stmt in a phi
1135      inside the loop (in case we are analyzing an outer-loop).  */
1136   vect_unpromoted_value unprom[2];
1137   if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1138 			     false, 2, unprom, &half_type))
1139     return NULL;
1140 
1141   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1142 
1143   tree half_vectype;
1144   if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1145 					type_out, &half_vectype))
1146     return NULL;
1147 
1148   /* Get the inputs to the SAD_EXPR in the appropriate types.  */
1149   tree sad_oprnd[2];
1150   vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1151 		       unprom, half_vectype);
1152 
1153   tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1154   gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1155 					      sad_oprnd[1], plus_oprnd1);
1156 
1157   return pattern_stmt;
1158 }
1159 
1160 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1161    so that it can be treated as though it had the form:
1162 
1163       A_TYPE a;
1164       B_TYPE b;
1165       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1166       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1167     | RES_TYPE a_extend = (RES_TYPE) a_cast;  // promotion from HALF_TYPE
1168     | RES_TYPE b_extend = (RES_TYPE) b_cast;  // promotion from HALF_TYPE
1169     | RES_TYPE res = a_extend ORIG_CODE b_extend;
1170 
1171    Try to replace the pattern with:
1172 
1173       A_TYPE a;
1174       B_TYPE b;
1175       HALF_TYPE a_cast = (HALF_TYPE) a;  // possible no-op
1176       HALF_TYPE b_cast = (HALF_TYPE) b;  // possible no-op
1177     | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1178     | RES_TYPE res = (EXT_TYPE) ext;  // possible no-op
1179 
1180    where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1181 
1182    SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts.  NAME is the
1183    name of the pattern being matched, for dump purposes.  */
1184 
1185 static gimple *
1186 vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
1187 			     tree_code orig_code, tree_code wide_code,
1188 			     bool shift_p, const char *name)
1189 {
1190   gimple *last_stmt = last_stmt_info->stmt;
1191 
1192   vect_unpromoted_value unprom[2];
1193   tree half_type;
1194   if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 			     shift_p, 2, unprom, &half_type))
1196     return NULL;
1197 
1198   /* Pattern detected.  */
1199   vect_pattern_detected (name, last_stmt);
1200 
1201   tree type = gimple_expr_type (last_stmt);
1202   tree itype = type;
1203   if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204       || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205     itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 					    TYPE_UNSIGNED (half_type));
1207 
1208   /* Check target support  */
1209   tree vectype = get_vectype_for_scalar_type (half_type);
1210   tree vecitype = get_vectype_for_scalar_type (itype);
1211   enum tree_code dummy_code;
1212   int dummy_int;
1213   auto_vec<tree> dummy_vec;
1214   if (!vectype
1215       || !vecitype
1216       || !supportable_widening_operation (wide_code, last_stmt_info,
1217 					  vecitype, vectype,
1218 					  &dummy_code, &dummy_code,
1219 					  &dummy_int, &dummy_vec))
1220     return NULL;
1221 
1222   *type_out = get_vectype_for_scalar_type (type);
1223   if (!*type_out)
1224     return NULL;
1225 
1226   tree oprnd[2];
1227   vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
1228 
1229   tree var = vect_recog_temp_ssa_var (itype, NULL);
1230   gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1231 					      oprnd[0], oprnd[1]);
1232 
1233   return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
1234 }
1235 
1236 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1237    to WIDEN_MULT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1238 
1239 static gimple *
1240 vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1241 {
1242   return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
1243 				      WIDEN_MULT_EXPR, false,
1244 				      "vect_recog_widen_mult_pattern");
1245 }
1246 
1247 /* Function vect_recog_pow_pattern
1248 
1249    Try to find the following pattern:
1250 
1251      x = POW (y, N);
1252 
1253    with POW being one of pow, powf, powi, powif and N being
1254    either 2 or 0.5.
1255 
1256    Input:
1257 
1258    * STMT_VINFO: The stmt from which the pattern search begins.
1259 
1260    Output:
1261 
1262    * TYPE_OUT: The type of the output of this pattern.
1263 
1264    * Return value: A new stmt that will be used to replace the sequence of
1265    stmts that constitute the pattern. In this case it will be:
1266         x = x * x
1267    or
1268 	x = sqrt (x)
1269 */
1270 
1271 static gimple *
1272 vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1273 {
1274   gimple *last_stmt = stmt_vinfo->stmt;
1275   tree base, exp;
1276   gimple *stmt;
1277   tree var;
1278 
1279   if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1280     return NULL;
1281 
1282   switch (gimple_call_combined_fn (last_stmt))
1283     {
1284     CASE_CFN_POW:
1285     CASE_CFN_POWI:
1286       break;
1287 
1288     default:
1289       return NULL;
1290     }
1291 
1292   base = gimple_call_arg (last_stmt, 0);
1293   exp = gimple_call_arg (last_stmt, 1);
1294   if (TREE_CODE (exp) != REAL_CST
1295       && TREE_CODE (exp) != INTEGER_CST)
1296     {
1297       if (flag_unsafe_math_optimizations
1298 	  && TREE_CODE (base) == REAL_CST
1299 	  && !gimple_call_internal_p (last_stmt))
1300 	{
1301 	  combined_fn log_cfn;
1302 	  built_in_function exp_bfn;
1303 	  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1304 	    {
1305 	    case BUILT_IN_POW:
1306 	      log_cfn = CFN_BUILT_IN_LOG;
1307 	      exp_bfn = BUILT_IN_EXP;
1308 	      break;
1309 	    case BUILT_IN_POWF:
1310 	      log_cfn = CFN_BUILT_IN_LOGF;
1311 	      exp_bfn = BUILT_IN_EXPF;
1312 	      break;
1313 	    case BUILT_IN_POWL:
1314 	      log_cfn = CFN_BUILT_IN_LOGL;
1315 	      exp_bfn = BUILT_IN_EXPL;
1316 	      break;
1317 	    default:
1318 	      return NULL;
1319 	    }
1320 	  tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1321 	  tree exp_decl = builtin_decl_implicit (exp_bfn);
1322 	  /* Optimize pow (C, x) as exp (log (C) * x).  Normally match.pd
1323 	     does that, but if C is a power of 2, we want to use
1324 	     exp2 (log2 (C) * x) in the non-vectorized version, but for
1325 	     vectorization we don't have vectorized exp2.  */
1326 	  if (logc
1327 	      && TREE_CODE (logc) == REAL_CST
1328 	      && exp_decl
1329 	      && lookup_attribute ("omp declare simd",
1330 				   DECL_ATTRIBUTES (exp_decl)))
1331 	    {
1332 	      cgraph_node *node = cgraph_node::get_create (exp_decl);
1333 	      if (node->simd_clones == NULL)
1334 		{
1335 		  if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1336 		      || node->definition)
1337 		    return NULL;
1338 		  expand_simd_clones (node);
1339 		  if (node->simd_clones == NULL)
1340 		    return NULL;
1341 		}
1342 	      *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1343 	      if (!*type_out)
1344 		return NULL;
1345 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1346 	      gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1347 	      append_pattern_def_seq (stmt_vinfo, g);
1348 	      tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 	      g = gimple_build_call (exp_decl, 1, def);
1350 	      gimple_call_set_lhs (g, res);
1351 	      return g;
1352 	    }
1353 	}
1354 
1355       return NULL;
1356     }
1357 
1358   /* We now have a pow or powi builtin function call with a constant
1359      exponent.  */
1360 
1361   /* Catch squaring.  */
1362   if ((tree_fits_shwi_p (exp)
1363        && tree_to_shwi (exp) == 2)
1364       || (TREE_CODE (exp) == REAL_CST
1365           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1366     {
1367       if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1368 					    TREE_TYPE (base), type_out))
1369 	return NULL;
1370 
1371       var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1372       stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1373       return stmt;
1374     }
1375 
1376   /* Catch square root.  */
1377   if (TREE_CODE (exp) == REAL_CST
1378       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1379     {
1380       *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1381       if (*type_out
1382 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1383 					     OPTIMIZE_FOR_SPEED))
1384 	{
1385 	  gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1386 	  var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1387 	  gimple_call_set_lhs (stmt, var);
1388 	  gimple_call_set_nothrow (stmt, true);
1389 	  return stmt;
1390 	}
1391     }
1392 
1393   return NULL;
1394 }
1395 
1396 
1397 /* Function vect_recog_widen_sum_pattern
1398 
1399    Try to find the following pattern:
1400 
1401      type x_t;
1402      TYPE x_T, sum = init;
1403    loop:
1404      sum_0 = phi <init, sum_1>
1405      S1  x_t = *p;
1406      S2  x_T = (TYPE) x_t;
1407      S3  sum_1 = x_T + sum_0;
1408 
1409    where type 'TYPE' is at least double the size of type 'type', i.e - we're
1410    summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1411    a special case of a reduction computation.
1412 
1413    Input:
1414 
1415    * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1416    when this function is called with S3, the pattern {S2,S3} will be detected.
1417 
1418    Output:
1419 
1420    * TYPE_OUT: The type of the output of this pattern.
1421 
1422    * Return value: A new stmt that will be used to replace the sequence of
1423    stmts that constitute the pattern. In this case it will be:
1424         WIDEN_SUM <x_t, sum_0>
1425 
1426    Note: The widening-sum idiom is a widening reduction pattern that is
1427 	 vectorized without preserving all the intermediate results. It
1428          produces only N/2 (widened) results (by summing up pairs of
1429 	 intermediate results) rather than all N results.  Therefore, we
1430 	 cannot allow this pattern when we want to get all the results and in
1431 	 the correct order (as is the case when this computation is in an
1432 	 inner-loop nested in an outer-loop that us being vectorized).  */
1433 
1434 static gimple *
1435 vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1436 {
1437   gimple *last_stmt = stmt_vinfo->stmt;
1438   tree oprnd0, oprnd1;
1439   vec_info *vinfo = stmt_vinfo->vinfo;
1440   tree type;
1441   gimple *pattern_stmt;
1442   tree var;
1443 
1444   /* Look for the following pattern
1445           DX = (TYPE) X;
1446           sum_1 = DX + sum_0;
1447      In which DX is at least double the size of X, and sum_1 has been
1448      recognized as a reduction variable.
1449    */
1450 
1451   /* Starting from LAST_STMT, follow the defs of its uses in search
1452      of the above pattern.  */
1453 
1454   if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1455 				       &oprnd0, &oprnd1))
1456     return NULL;
1457 
1458   type = gimple_expr_type (last_stmt);
1459 
1460   /* So far so good.  Since last_stmt was detected as a (summation) reduction,
1461      we know that oprnd1 is the reduction variable (defined by a loop-header
1462      phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1463      Left to check that oprnd0 is defined by a cast from type 'type' to type
1464      'TYPE'.  */
1465 
1466   vect_unpromoted_value unprom0;
1467   if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1468       || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1469     return NULL;
1470 
1471   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1472 
1473   if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1474 					type_out))
1475     return NULL;
1476 
1477   var = vect_recog_temp_ssa_var (type, NULL);
1478   pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1479 
1480   return pattern_stmt;
1481 }
1482 
1483 /* Recognize cases in which an operation is performed in one type WTYPE
1484    but could be done more efficiently in a narrower type NTYPE.  For example,
1485    if we have:
1486 
1487      ATYPE a;  // narrower than NTYPE
1488      BTYPE b;  // narrower than NTYPE
1489      WTYPE aw = (WTYPE) a;
1490      WTYPE bw = (WTYPE) b;
1491      WTYPE res = aw + bw;  // only uses of aw and bw
1492 
1493    then it would be more efficient to do:
1494 
1495      NTYPE an = (NTYPE) a;
1496      NTYPE bn = (NTYPE) b;
1497      NTYPE resn = an + bn;
1498      WTYPE res = (WTYPE) resn;
1499 
1500    Other situations include things like:
1501 
1502      ATYPE a;  // NTYPE or narrower
1503      WTYPE aw = (WTYPE) a;
1504      WTYPE res = aw + b;
1505 
1506    when only "(NTYPE) res" is significant.  In that case it's more efficient
1507    to truncate "b" and do the operation on NTYPE instead:
1508 
1509      NTYPE an = (NTYPE) a;
1510      NTYPE bn = (NTYPE) b;  // truncation
1511      NTYPE resn = an + bn;
1512      WTYPE res = (WTYPE) resn;
1513 
1514    All users of "res" should then use "resn" instead, making the final
1515    statement dead (not marked as relevant).  The final statement is still
1516    needed to maintain the type correctness of the IR.
1517 
1518    vect_determine_precisions has already determined the minimum
1519    precison of the operation and the minimum precision required
1520    by users of the result.  */
1521 
1522 static gimple *
1523 vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1524 {
1525   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1526   if (!last_stmt)
1527     return NULL;
1528 
1529   /* See whether we have found that this operation can be done on a
1530      narrower type without changing its semantics.  */
1531   unsigned int new_precision = last_stmt_info->operation_precision;
1532   if (!new_precision)
1533     return NULL;
1534 
1535   vec_info *vinfo = last_stmt_info->vinfo;
1536   tree lhs = gimple_assign_lhs (last_stmt);
1537   tree type = TREE_TYPE (lhs);
1538   tree_code code = gimple_assign_rhs_code (last_stmt);
1539 
1540   /* Keep the first operand of a COND_EXPR as-is: only the other two
1541      operands are interesting.  */
1542   unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1543 
1544   /* Check the operands.  */
1545   unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1546   auto_vec <vect_unpromoted_value, 3> unprom (nops);
1547   unprom.quick_grow (nops);
1548   unsigned int min_precision = 0;
1549   bool single_use_p = false;
1550   for (unsigned int i = 0; i < nops; ++i)
1551     {
1552       tree op = gimple_op (last_stmt, first_op + i);
1553       if (TREE_CODE (op) == INTEGER_CST)
1554 	unprom[i].set_op (op, vect_constant_def);
1555       else if (TREE_CODE (op) == SSA_NAME)
1556 	{
1557 	  bool op_single_use_p = true;
1558 	  if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1559 						     &op_single_use_p))
1560 	    return NULL;
1561 	  /* If:
1562 
1563 	     (1) N bits of the result are needed;
1564 	     (2) all inputs are widened from M<N bits; and
1565 	     (3) one operand OP is a single-use SSA name
1566 
1567 	     we can shift the M->N widening from OP to the output
1568 	     without changing the number or type of extensions involved.
1569 	     This then reduces the number of copies of STMT_INFO.
1570 
1571 	     If instead of (3) more than one operand is a single-use SSA name,
1572 	     shifting the extension to the output is even more of a win.
1573 
1574 	     If instead:
1575 
1576 	     (1) N bits of the result are needed;
1577 	     (2) one operand OP2 is widened from M2<N bits;
1578 	     (3) another operand OP1 is widened from M1<M2 bits; and
1579 	     (4) both OP1 and OP2 are single-use
1580 
1581 	     the choice is between:
1582 
1583 	     (a) truncating OP2 to M1, doing the operation on M1,
1584 		 and then widening the result to N
1585 
1586 	     (b) widening OP1 to M2, doing the operation on M2, and then
1587 		 widening the result to N
1588 
1589 	     Both shift the M2->N widening of the inputs to the output.
1590 	     (a) additionally shifts the M1->M2 widening to the output;
1591 	     it requires fewer copies of STMT_INFO but requires an extra
1592 	     M2->M1 truncation.
1593 
1594 	     Which is better will depend on the complexity and cost of
1595 	     STMT_INFO, which is hard to predict at this stage.  However,
1596 	     a clear tie-breaker in favor of (b) is the fact that the
1597 	     truncation in (a) increases the length of the operation chain.
1598 
1599 	     If instead of (4) only one of OP1 or OP2 is single-use,
1600 	     (b) is still a win over doing the operation in N bits:
1601 	     it still shifts the M2->N widening on the single-use operand
1602 	     to the output and reduces the number of STMT_INFO copies.
1603 
1604 	     If neither operand is single-use then operating on fewer than
1605 	     N bits might lead to more extensions overall.  Whether it does
1606 	     or not depends on global information about the vectorization
1607 	     region, and whether that's a good trade-off would again
1608 	     depend on the complexity and cost of the statements involved,
1609 	     as well as things like register pressure that are not normally
1610 	     modelled at this stage.  We therefore ignore these cases
1611 	     and just optimize the clear single-use wins above.
1612 
1613 	     Thus we take the maximum precision of the unpromoted operands
1614 	     and record whether any operand is single-use.  */
1615 	  if (unprom[i].dt == vect_internal_def)
1616 	    {
1617 	      min_precision = MAX (min_precision,
1618 				   TYPE_PRECISION (unprom[i].type));
1619 	      single_use_p |= op_single_use_p;
1620 	    }
1621 	}
1622     }
1623 
1624   /* Although the operation could be done in operation_precision, we have
1625      to balance that against introducing extra truncations or extensions.
1626      Calculate the minimum precision that can be handled efficiently.
1627 
1628      The loop above determined that the operation could be handled
1629      efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1630      extension from the inputs to the output without introducing more
1631      instructions, and would reduce the number of instructions required
1632      for STMT_INFO itself.
1633 
1634      vect_determine_precisions has also determined that the result only
1635      needs min_output_precision bits.  Truncating by a factor of N times
1636      requires a tree of N - 1 instructions, so if TYPE is N times wider
1637      than min_output_precision, doing the operation in TYPE and truncating
1638      the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1639      In contrast:
1640 
1641      - truncating the input to a unary operation and doing the operation
1642        in the new type requires at most N - 1 + 1 = N instructions per
1643        output vector
1644 
1645      - doing the same for a binary operation requires at most
1646        (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1647 
1648      Both unary and binary operations require fewer instructions than
1649      this if the operands were extended from a suitable truncated form.
1650      Thus there is usually nothing to lose by doing operations in
1651      min_output_precision bits, but there can be something to gain.  */
1652   if (!single_use_p)
1653     min_precision = last_stmt_info->min_output_precision;
1654   else
1655     min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1656 
1657   /* Apply the minimum efficient precision we just calculated.  */
1658   if (new_precision < min_precision)
1659     new_precision = min_precision;
1660   if (new_precision >= TYPE_PRECISION (type))
1661     return NULL;
1662 
1663   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1664 
1665   *type_out = get_vectype_for_scalar_type (type);
1666   if (!*type_out)
1667     return NULL;
1668 
1669   /* We've found a viable pattern.  Get the new type of the operation.  */
1670   bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1671   tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1672 
1673   /* If we're truncating an operation, we need to make sure that we
1674      don't introduce new undefined overflow.  The codes tested here are
1675      a subset of those accepted by vect_truncatable_operation_p.  */
1676   tree op_type = new_type;
1677   if (TYPE_OVERFLOW_UNDEFINED (new_type)
1678       && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1679     op_type = build_nonstandard_integer_type (new_precision, true);
1680 
1681   /* We specifically don't check here whether the target supports the
1682      new operation, since it might be something that a later pattern
1683      wants to rewrite anyway.  If targets have a minimum element size
1684      for some optabs, we should pattern-match smaller ops to larger ops
1685      where beneficial.  */
1686   tree new_vectype = get_vectype_for_scalar_type (new_type);
1687   tree op_vectype = get_vectype_for_scalar_type (op_type);
1688   if (!new_vectype || !op_vectype)
1689     return NULL;
1690 
1691   if (dump_enabled_p ())
1692     dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1693 		     type, new_type);
1694 
1695   /* Calculate the rhs operands for an operation on OP_TYPE.  */
1696   tree ops[3] = {};
1697   for (unsigned int i = 1; i < first_op; ++i)
1698     ops[i - 1] = gimple_op (last_stmt, i);
1699   vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1700 		       op_type, &unprom[0], op_vectype);
1701 
1702   /* Use the operation to produce a result of type OP_TYPE.  */
1703   tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
1704   gimple *pattern_stmt = gimple_build_assign (new_var, code,
1705 					      ops[0], ops[1], ops[2]);
1706   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1707 
1708   if (dump_enabled_p ())
1709     dump_printf_loc (MSG_NOTE, vect_location,
1710 		     "created pattern stmt: %G", pattern_stmt);
1711 
1712   /* Convert back to the original signedness, if OP_TYPE is different
1713      from NEW_TYPE.  */
1714   if (op_type != new_type)
1715     pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1716 					pattern_stmt, op_vectype);
1717 
1718   /* Promote the result to the original type.  */
1719   pattern_stmt = vect_convert_output (last_stmt_info, type,
1720 				      pattern_stmt, new_vectype);
1721 
1722   return pattern_stmt;
1723 }
1724 
1725 /* Recognize the patterns:
1726 
1727 	    ATYPE a;  // narrower than TYPE
1728 	    BTYPE b;  // narrower than TYPE
1729 	(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1730      or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1731 
1732    where only the bottom half of avg is used.  Try to transform them into:
1733 
1734 	(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1735      or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1736 
1737   followed by:
1738 
1739 	    TYPE avg = (TYPE) avg';
1740 
1741   where NTYPE is no wider than half of TYPE.  Since only the bottom half
1742   of avg is used, all or part of the cast of avg' should become redundant.  */
1743 
1744 static gimple *
1745 vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1746 {
1747   /* Check for a shift right by one bit.  */
1748   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1749   vec_info *vinfo = last_stmt_info->vinfo;
1750   if (!last_stmt
1751       || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1752       || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1753     return NULL;
1754 
1755   /* Check that the shift result is wider than the users of the
1756      result need (i.e. that narrowing would be a natural choice).  */
1757   tree lhs = gimple_assign_lhs (last_stmt);
1758   tree type = TREE_TYPE (lhs);
1759   unsigned int target_precision
1760     = vect_element_precision (last_stmt_info->min_output_precision);
1761   if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1762     return NULL;
1763 
1764   /* Look through any change in sign on the shift input.  */
1765   tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1766   vect_unpromoted_value unprom_plus;
1767   rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1768 						     &unprom_plus);
1769   if (!rshift_rhs
1770       || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1771     return NULL;
1772 
1773   /* Get the definition of the shift input.  */
1774   stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1775   if (!plus_stmt_info)
1776     return NULL;
1777 
1778   /* Check whether the shift input can be seen as a tree of additions on
1779      2 or 3 widened inputs.
1780 
1781      Note that the pattern should be a win even if the result of one or
1782      more additions is reused elsewhere: if the pattern matches, we'd be
1783      replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s.  */
1784   internal_fn ifn = IFN_AVG_FLOOR;
1785   vect_unpromoted_value unprom[3];
1786   tree new_type;
1787   unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1788 					    PLUS_EXPR, false, 3,
1789 					    unprom, &new_type);
1790   if (nops == 0)
1791     return NULL;
1792   if (nops == 3)
1793     {
1794       /* Check that one operand is 1.  */
1795       unsigned int i;
1796       for (i = 0; i < 3; ++i)
1797 	if (integer_onep (unprom[i].op))
1798 	  break;
1799       if (i == 3)
1800 	return NULL;
1801       /* Throw away the 1 operand and keep the other two.  */
1802       if (i < 2)
1803 	unprom[i] = unprom[2];
1804       ifn = IFN_AVG_CEIL;
1805     }
1806 
1807   vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1808 
1809   /* We know that:
1810 
1811      (a) the operation can be viewed as:
1812 
1813 	   TYPE widened0 = (TYPE) UNPROM[0];
1814 	   TYPE widened1 = (TYPE) UNPROM[1];
1815 	   TYPE tmp1 = widened0 + widened1 {+ 1};
1816 	   TYPE tmp2 = tmp1 >> 1;   // LAST_STMT_INFO
1817 
1818      (b) the first two statements are equivalent to:
1819 
1820 	   TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1821 	   TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1822 
1823      (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1824 	 where sensible;
1825 
1826      (d) all the operations can be performed correctly at twice the width of
1827 	 NEW_TYPE, due to the nature of the average operation; and
1828 
1829      (e) users of the result of the right shift need only TARGET_PRECISION
1830 	 bits, where TARGET_PRECISION is no more than half of TYPE's
1831 	 precision.
1832 
1833      Under these circumstances, the only situation in which NEW_TYPE
1834      could be narrower than TARGET_PRECISION is if widened0, widened1
1835      and an addition result are all used more than once.  Thus we can
1836      treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1837      as "free", whereas widening the result of the average instruction
1838      from NEW_TYPE to TARGET_PRECISION would be a new operation.  It's
1839      therefore better not to go narrower than TARGET_PRECISION.  */
1840   if (TYPE_PRECISION (new_type) < target_precision)
1841     new_type = build_nonstandard_integer_type (target_precision,
1842 					       TYPE_UNSIGNED (new_type));
1843 
1844   /* Check for target support.  */
1845   tree new_vectype = get_vectype_for_scalar_type (new_type);
1846   if (!new_vectype
1847       || !direct_internal_fn_supported_p (ifn, new_vectype,
1848 					  OPTIMIZE_FOR_SPEED))
1849     return NULL;
1850 
1851   /* The IR requires a valid vector type for the cast result, even though
1852      it's likely to be discarded.  */
1853   *type_out = get_vectype_for_scalar_type (type);
1854   if (!*type_out)
1855     return NULL;
1856 
1857   /* Generate the IFN_AVG* call.  */
1858   tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1859   tree new_ops[2];
1860   vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1861 		       unprom, new_vectype);
1862   gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1863 						    new_ops[1]);
1864   gimple_call_set_lhs (average_stmt, new_var);
1865   gimple_set_location (average_stmt, gimple_location (last_stmt));
1866 
1867   if (dump_enabled_p ())
1868     dump_printf_loc (MSG_NOTE, vect_location,
1869 		     "created pattern stmt: %G", average_stmt);
1870 
1871   return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1872 }
1873 
1874 /* Recognize cases in which the input to a cast is wider than its
1875    output, and the input is fed by a widening operation.  Fold this
1876    by removing the unnecessary intermediate widening.  E.g.:
1877 
1878      unsigned char a;
1879      unsigned int b = (unsigned int) a;
1880      unsigned short c = (unsigned short) b;
1881 
1882    -->
1883 
1884      unsigned short c = (unsigned short) a;
1885 
1886    Although this is rare in input IR, it is an expected side-effect
1887    of the over-widening pattern above.
1888 
1889    This is beneficial also for integer-to-float conversions, if the
1890    widened integer has more bits than the float, and if the unwidened
1891    input doesn't.  */
1892 
1893 static gimple *
1894 vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1895 {
1896   /* Check for a cast, including an integer-to-float conversion.  */
1897   gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1898   if (!last_stmt)
1899     return NULL;
1900   tree_code code = gimple_assign_rhs_code (last_stmt);
1901   if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1902     return NULL;
1903 
1904   /* Make sure that the rhs is a scalar with a natural bitsize.  */
1905   tree lhs = gimple_assign_lhs (last_stmt);
1906   if (!lhs)
1907     return NULL;
1908   tree lhs_type = TREE_TYPE (lhs);
1909   scalar_mode lhs_mode;
1910   if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1911       || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1912     return NULL;
1913 
1914   /* Check for a narrowing operation (from a vector point of view).  */
1915   tree rhs = gimple_assign_rhs1 (last_stmt);
1916   tree rhs_type = TREE_TYPE (rhs);
1917   if (!INTEGRAL_TYPE_P (rhs_type)
1918       || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1919       || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1920     return NULL;
1921 
1922   /* Try to find an unpromoted input.  */
1923   vec_info *vinfo = last_stmt_info->vinfo;
1924   vect_unpromoted_value unprom;
1925   if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1926       || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1927     return NULL;
1928 
1929   /* If the bits above RHS_TYPE matter, make sure that they're the
1930      same when extending from UNPROM as they are when extending from RHS.  */
1931   if (!INTEGRAL_TYPE_P (lhs_type)
1932       && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1933     return NULL;
1934 
1935   /* We can get the same result by casting UNPROM directly, to avoid
1936      the unnecessary widening and narrowing.  */
1937   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1938 
1939   *type_out = get_vectype_for_scalar_type (lhs_type);
1940   if (!*type_out)
1941     return NULL;
1942 
1943   tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1944   gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1945   gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1946 
1947   return pattern_stmt;
1948 }
1949 
1950 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1951    to WIDEN_LSHIFT_EXPR.  See vect_recog_widen_op_pattern for details.  */
1952 
1953 static gimple *
1954 vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1955 {
1956   return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
1957 				      WIDEN_LSHIFT_EXPR, true,
1958 				      "vect_recog_widen_shift_pattern");
1959 }
1960 
1961 /* Detect a rotate pattern wouldn't be otherwise vectorized:
1962 
1963    type a_t, b_t, c_t;
1964 
1965    S0 a_t = b_t r<< c_t;
1966 
1967   Input/Output:
1968 
1969   * STMT_VINFO: The stmt from which the pattern search begins,
1970     i.e. the shift/rotate stmt.  The original stmt (S0) is replaced
1971     with a sequence:
1972 
1973    S1 d_t = -c_t;
1974    S2 e_t = d_t & (B - 1);
1975    S3 f_t = b_t << c_t;
1976    S4 g_t = b_t >> e_t;
1977    S0 a_t = f_t | g_t;
1978 
1979     where B is element bitsize of type.
1980 
1981   Output:
1982 
1983   * TYPE_OUT: The type of the output of this pattern.
1984 
1985   * Return value: A new stmt that will be used to replace the rotate
1986     S0 stmt.  */
1987 
1988 static gimple *
1989 vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
1990 {
1991   gimple *last_stmt = stmt_vinfo->stmt;
1992   tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1993   gimple *pattern_stmt, *def_stmt;
1994   enum tree_code rhs_code;
1995   vec_info *vinfo = stmt_vinfo->vinfo;
1996   enum vect_def_type dt;
1997   optab optab1, optab2;
1998   edge ext_def = NULL;
1999 
2000   if (!is_gimple_assign (last_stmt))
2001     return NULL;
2002 
2003   rhs_code = gimple_assign_rhs_code (last_stmt);
2004   switch (rhs_code)
2005     {
2006     case LROTATE_EXPR:
2007     case RROTATE_EXPR:
2008       break;
2009     default:
2010       return NULL;
2011     }
2012 
2013   lhs = gimple_assign_lhs (last_stmt);
2014   oprnd0 = gimple_assign_rhs1 (last_stmt);
2015   type = TREE_TYPE (oprnd0);
2016   oprnd1 = gimple_assign_rhs2 (last_stmt);
2017   if (TREE_CODE (oprnd0) != SSA_NAME
2018       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2019       || !INTEGRAL_TYPE_P (type)
2020       || !TYPE_UNSIGNED (type))
2021     return NULL;
2022 
2023   stmt_vec_info def_stmt_info;
2024   if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2025     return NULL;
2026 
2027   if (dt != vect_internal_def
2028       && dt != vect_constant_def
2029       && dt != vect_external_def)
2030     return NULL;
2031 
2032   vectype = get_vectype_for_scalar_type (type);
2033   if (vectype == NULL_TREE)
2034     return NULL;
2035 
2036   /* If vector/vector or vector/scalar rotate is supported by the target,
2037      don't do anything here.  */
2038   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2039   if (optab1
2040       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2041     return NULL;
2042 
2043   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2044     {
2045       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2046       if (optab2
2047 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2048 	return NULL;
2049     }
2050 
2051   /* If vector/vector or vector/scalar shifts aren't supported by the target,
2052      don't do anything here either.  */
2053   optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2054   optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2055   if (!optab1
2056       || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2057       || !optab2
2058       || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2059     {
2060       if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2061 	return NULL;
2062       optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2063       optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2064       if (!optab1
2065 	  || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2066 	  || !optab2
2067 	  || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2068 	return NULL;
2069     }
2070 
2071   *type_out = vectype;
2072 
2073   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2074     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2075 
2076   def = NULL_TREE;
2077   scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2078   if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2079     def = oprnd1;
2080   else if (def_stmt && gimple_assign_cast_p (def_stmt))
2081     {
2082       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2083       if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2084 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2085 	     == TYPE_PRECISION (type))
2086 	def = rhs1;
2087     }
2088 
2089   if (def == NULL_TREE)
2090     {
2091       def = vect_recog_temp_ssa_var (type, NULL);
2092       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2093       append_pattern_def_seq (stmt_vinfo, def_stmt);
2094     }
2095   stype = TREE_TYPE (def);
2096   scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
2097 
2098   if (TREE_CODE (def) == INTEGER_CST)
2099     {
2100       if (!tree_fits_uhwi_p (def)
2101 	  || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2102 	  || integer_zerop (def))
2103 	return NULL;
2104       def2 = build_int_cst (stype,
2105 			    GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2106     }
2107   else
2108     {
2109       tree vecstype = get_vectype_for_scalar_type (stype);
2110 
2111       if (vecstype == NULL_TREE)
2112 	return NULL;
2113       def2 = vect_recog_temp_ssa_var (stype, NULL);
2114       def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2115       if (ext_def)
2116 	{
2117 	  basic_block new_bb
2118 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2119 	  gcc_assert (!new_bb);
2120 	}
2121       else
2122 	append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2123 
2124       def2 = vect_recog_temp_ssa_var (stype, NULL);
2125       tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
2126       def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2127 				      gimple_assign_lhs (def_stmt), mask);
2128       if (ext_def)
2129 	{
2130 	  basic_block new_bb
2131 	    = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2132 	  gcc_assert (!new_bb);
2133 	}
2134       else
2135 	append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2136     }
2137 
2138   var1 = vect_recog_temp_ssa_var (type, NULL);
2139   def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2140 					? LSHIFT_EXPR : RSHIFT_EXPR,
2141 				  oprnd0, def);
2142   append_pattern_def_seq (stmt_vinfo, def_stmt);
2143 
2144   var2 = vect_recog_temp_ssa_var (type, NULL);
2145   def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2146 					? RSHIFT_EXPR : LSHIFT_EXPR,
2147 				  oprnd0, def2);
2148   append_pattern_def_seq (stmt_vinfo, def_stmt);
2149 
2150   /* Pattern detected.  */
2151   vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2152 
2153   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2154   var = vect_recog_temp_ssa_var (type, NULL);
2155   pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2156 
2157   return pattern_stmt;
2158 }
2159 
2160 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2161    vectorized:
2162 
2163    type a_t;
2164    TYPE b_T, res_T;
2165 
2166    S1 a_t = ;
2167    S2 b_T = ;
2168    S3 res_T = b_T op a_t;
2169 
2170   where type 'TYPE' is a type with different size than 'type',
2171   and op is <<, >> or rotate.
2172 
2173   Also detect cases:
2174 
2175    type a_t;
2176    TYPE b_T, c_T, res_T;
2177 
2178    S0 c_T = ;
2179    S1 a_t = (type) c_T;
2180    S2 b_T = ;
2181    S3 res_T = b_T op a_t;
2182 
2183   Input/Output:
2184 
2185   * STMT_VINFO: The stmt from which the pattern search begins,
2186     i.e. the shift/rotate stmt.  The original stmt (S3) is replaced
2187     with a shift/rotate which has same type on both operands, in the
2188     second case just b_T op c_T, in the first case with added cast
2189     from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2190 
2191   Output:
2192 
2193   * TYPE_OUT: The type of the output of this pattern.
2194 
2195   * Return value: A new stmt that will be used to replace the shift/rotate
2196     S3 stmt.  */
2197 
2198 static gimple *
2199 vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2200 					tree *type_out)
2201 {
2202   gimple *last_stmt = stmt_vinfo->stmt;
2203   tree oprnd0, oprnd1, lhs, var;
2204   gimple *pattern_stmt;
2205   enum tree_code rhs_code;
2206   vec_info *vinfo = stmt_vinfo->vinfo;
2207 
2208   if (!is_gimple_assign (last_stmt))
2209     return NULL;
2210 
2211   rhs_code = gimple_assign_rhs_code (last_stmt);
2212   switch (rhs_code)
2213     {
2214     case LSHIFT_EXPR:
2215     case RSHIFT_EXPR:
2216     case LROTATE_EXPR:
2217     case RROTATE_EXPR:
2218       break;
2219     default:
2220       return NULL;
2221     }
2222 
2223   lhs = gimple_assign_lhs (last_stmt);
2224   oprnd0 = gimple_assign_rhs1 (last_stmt);
2225   oprnd1 = gimple_assign_rhs2 (last_stmt);
2226   if (TREE_CODE (oprnd0) != SSA_NAME
2227       || TREE_CODE (oprnd1) != SSA_NAME
2228       || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2229       || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2230       || TYPE_PRECISION (TREE_TYPE (lhs))
2231 	 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2232     return NULL;
2233 
2234   stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2235   if (!def_vinfo)
2236     return NULL;
2237 
2238   *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2239   if (*type_out == NULL_TREE)
2240     return NULL;
2241 
2242   tree def = NULL_TREE;
2243   gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2244   if (def_stmt && gimple_assign_cast_p (def_stmt))
2245     {
2246       tree rhs1 = gimple_assign_rhs1 (def_stmt);
2247       if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2248 	  && TYPE_PRECISION (TREE_TYPE (rhs1))
2249 	     == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2250 	{
2251 	  if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2252 	      >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2253 	    def = rhs1;
2254 	  else
2255 	    {
2256 	      tree mask
2257 		= build_low_bits_mask (TREE_TYPE (rhs1),
2258 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
2259 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2260 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2261 	      tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2262 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
2263 	    }
2264 	}
2265     }
2266 
2267   if (def == NULL_TREE)
2268     {
2269       def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2270       def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2271       append_pattern_def_seq (stmt_vinfo, def_stmt);
2272     }
2273 
2274   /* Pattern detected.  */
2275   vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2276 
2277   /* Pattern supported.  Create a stmt to be used to replace the pattern.  */
2278   var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2279   pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2280 
2281   return pattern_stmt;
2282 }
2283 
2284 /* Return true iff the target has a vector optab implementing the operation
2285    CODE on type VECTYPE.  */
2286 
2287 static bool
2288 target_has_vecop_for_code (tree_code code, tree vectype)
2289 {
2290   optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2291   return voptab
2292 	 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2293 }
2294 
2295 /* Verify that the target has optabs of VECTYPE to perform all the steps
2296    needed by the multiplication-by-immediate synthesis algorithm described by
2297    ALG and VAR.  If SYNTH_SHIFT_P is true ensure that vector addition is
2298    present.  Return true iff the target supports all the steps.  */
2299 
2300 static bool
2301 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2302 				 tree vectype, bool synth_shift_p)
2303 {
2304   if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2305     return false;
2306 
2307   bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2308   bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2309 
2310   if (var == negate_variant
2311       && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2312     return false;
2313 
2314   /* If we must synthesize shifts with additions make sure that vector
2315      addition is available.  */
2316   if ((var == add_variant || synth_shift_p) && !supports_vplus)
2317     return false;
2318 
2319   for (int i = 1; i < alg->ops; i++)
2320     {
2321       switch (alg->op[i])
2322 	{
2323 	case alg_shift:
2324 	  break;
2325 	case alg_add_t_m2:
2326 	case alg_add_t2_m:
2327 	case alg_add_factor:
2328 	  if (!supports_vplus)
2329 	    return false;
2330 	  break;
2331 	case alg_sub_t_m2:
2332 	case alg_sub_t2_m:
2333 	case alg_sub_factor:
2334 	  if (!supports_vminus)
2335 	    return false;
2336 	  break;
2337 	case alg_unknown:
2338 	case alg_m:
2339 	case alg_zero:
2340 	case alg_impossible:
2341 	  return false;
2342 	default:
2343 	  gcc_unreachable ();
2344 	}
2345     }
2346 
2347   return true;
2348 }
2349 
2350 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
2351    putting the final result in DEST.  Append all statements but the last into
2352    VINFO.  Return the last statement.  */
2353 
2354 static gimple *
2355 synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2356 			   stmt_vec_info vinfo)
2357 {
2358   HOST_WIDE_INT i;
2359   tree itype = TREE_TYPE (op);
2360   tree prev_res = op;
2361   gcc_assert (amnt >= 0);
2362   for (i = 0; i < amnt; i++)
2363     {
2364       tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2365 		      : dest;
2366       gimple *stmt
2367         = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2368       prev_res = tmp_var;
2369       if (i < amnt - 1)
2370 	append_pattern_def_seq (vinfo, stmt);
2371       else
2372 	return stmt;
2373     }
2374   gcc_unreachable ();
2375   return NULL;
2376 }
2377 
2378 /* Helper for vect_synth_mult_by_constant.  Apply a binary operation
2379    CODE to operands OP1 and OP2, creating a new temporary SSA var in
2380    the process if necessary.  Append the resulting assignment statements
2381    to the sequence in STMT_VINFO.  Return the SSA variable that holds the
2382    result of the binary operation.  If SYNTH_SHIFT_P is true synthesize
2383    left shifts using additions.  */
2384 
2385 static tree
2386 apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2387 			     stmt_vec_info stmt_vinfo, bool synth_shift_p)
2388 {
2389   if (integer_zerop (op2)
2390       && (code == LSHIFT_EXPR
2391 	  || code == PLUS_EXPR))
2392     {
2393       gcc_assert (TREE_CODE (op1) == SSA_NAME);
2394       return op1;
2395     }
2396 
2397   gimple *stmt;
2398   tree itype = TREE_TYPE (op1);
2399   tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2400 
2401   if (code == LSHIFT_EXPR
2402       && synth_shift_p)
2403     {
2404       stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2405 					 stmt_vinfo);
2406       append_pattern_def_seq (stmt_vinfo, stmt);
2407       return tmp_var;
2408     }
2409 
2410   stmt = gimple_build_assign (tmp_var, code, op1, op2);
2411   append_pattern_def_seq (stmt_vinfo, stmt);
2412   return tmp_var;
2413 }
2414 
2415 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2416    and simple arithmetic operations to be vectorized.  Record the statements
2417    produced in STMT_VINFO and return the last statement in the sequence or
2418    NULL if it's not possible to synthesize such a multiplication.
2419    This function mirrors the behavior of expand_mult_const in expmed.c but
2420    works on tree-ssa form.  */
2421 
2422 static gimple *
2423 vect_synth_mult_by_constant (tree op, tree val,
2424 			     stmt_vec_info stmt_vinfo)
2425 {
2426   tree itype = TREE_TYPE (op);
2427   machine_mode mode = TYPE_MODE (itype);
2428   struct algorithm alg;
2429   mult_variant variant;
2430   if (!tree_fits_shwi_p (val))
2431     return NULL;
2432 
2433   /* Multiplication synthesis by shifts, adds and subs can introduce
2434      signed overflow where the original operation didn't.  Perform the
2435      operations on an unsigned type and cast back to avoid this.
2436      In the future we may want to relax this for synthesis algorithms
2437      that we can prove do not cause unexpected overflow.  */
2438   bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2439 
2440   tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
2441 
2442   /* Targets that don't support vector shifts but support vector additions
2443      can synthesize shifts that way.  */
2444   bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2445 
2446   HOST_WIDE_INT hwval = tree_to_shwi (val);
2447   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2448      The vectorizer's benefit analysis will decide whether it's beneficial
2449      to do this.  */
2450   bool possible = choose_mult_variant (mode, hwval, &alg,
2451 					&variant, MAX_COST);
2452   if (!possible)
2453     return NULL;
2454 
2455   tree vectype = get_vectype_for_scalar_type (multtype);
2456 
2457   if (!vectype
2458       || !target_supports_mult_synth_alg (&alg, variant,
2459 					   vectype, synth_shift_p))
2460     return NULL;
2461 
2462   tree accumulator;
2463 
2464   /* Clear out the sequence of statements so we can populate it below.  */
2465   gimple *stmt = NULL;
2466 
2467   if (cast_to_unsigned_p)
2468     {
2469       tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2470       stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2471       append_pattern_def_seq (stmt_vinfo, stmt);
2472       op = tmp_op;
2473     }
2474 
2475   if (alg.op[0] == alg_zero)
2476     accumulator = build_int_cst (multtype, 0);
2477   else
2478     accumulator = op;
2479 
2480   bool needs_fixup = (variant == negate_variant)
2481 		      || (variant == add_variant);
2482 
2483   for (int i = 1; i < alg.ops; i++)
2484     {
2485       tree shft_log = build_int_cst (multtype, alg.log[i]);
2486       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2487       tree tmp_var = NULL_TREE;
2488 
2489       switch (alg.op[i])
2490 	{
2491 	case alg_shift:
2492 	  if (synth_shift_p)
2493 	    stmt
2494 	      = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2495 					    stmt_vinfo);
2496 	  else
2497 	    stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2498 					 shft_log);
2499 	  break;
2500 	case alg_add_t_m2:
2501 	  tmp_var
2502 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2503 					    stmt_vinfo, synth_shift_p);
2504 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2505 				       tmp_var);
2506 	  break;
2507 	case alg_sub_t_m2:
2508 	  tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2509 						  shft_log, stmt_vinfo,
2510 						  synth_shift_p);
2511 	  /* In some algorithms the first step involves zeroing the
2512 	     accumulator.  If subtracting from such an accumulator
2513 	     just emit the negation directly.  */
2514 	  if (integer_zerop (accumulator))
2515 	    stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2516 	  else
2517 	    stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2518 					tmp_var);
2519 	  break;
2520 	case alg_add_t2_m:
2521 	  tmp_var
2522 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2523 					   stmt_vinfo, synth_shift_p);
2524 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2525 	  break;
2526 	case alg_sub_t2_m:
2527 	  tmp_var
2528 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2529 					   stmt_vinfo, synth_shift_p);
2530 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2531 	  break;
2532 	case alg_add_factor:
2533 	  tmp_var
2534 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2535 					    stmt_vinfo, synth_shift_p);
2536 	  stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2537 				       tmp_var);
2538 	  break;
2539 	case alg_sub_factor:
2540 	  tmp_var
2541 	    = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2542 					   stmt_vinfo, synth_shift_p);
2543 	  stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2544 				      accumulator);
2545 	  break;
2546 	default:
2547 	  gcc_unreachable ();
2548 	}
2549       /* We don't want to append the last stmt in the sequence to stmt_vinfo
2550 	 but rather return it directly.  */
2551 
2552       if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2553 	append_pattern_def_seq (stmt_vinfo, stmt);
2554       accumulator = accum_tmp;
2555     }
2556   if (variant == negate_variant)
2557     {
2558       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2559       stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2560       accumulator = accum_tmp;
2561       if (cast_to_unsigned_p)
2562 	append_pattern_def_seq (stmt_vinfo, stmt);
2563     }
2564   else if (variant == add_variant)
2565     {
2566       tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2567       stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2568       accumulator = accum_tmp;
2569       if (cast_to_unsigned_p)
2570 	append_pattern_def_seq (stmt_vinfo, stmt);
2571     }
2572   /* Move back to a signed if needed.  */
2573   if (cast_to_unsigned_p)
2574     {
2575       tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2576       stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2577     }
2578 
2579   return stmt;
2580 }
2581 
2582 /* Detect multiplication by constant and convert it into a sequence of
2583    shifts and additions, subtractions, negations.  We reuse the
2584    choose_mult_variant algorithms from expmed.c
2585 
2586    Input/Output:
2587 
2588    STMT_VINFO: The stmt from which the pattern search begins,
2589    i.e. the mult stmt.
2590 
2591  Output:
2592 
2593   * TYPE_OUT: The type of the output of this pattern.
2594 
2595   * Return value: A new stmt that will be used to replace
2596     the multiplication.  */
2597 
2598 static gimple *
2599 vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2600 {
2601   gimple *last_stmt = stmt_vinfo->stmt;
2602   tree oprnd0, oprnd1, vectype, itype;
2603   gimple *pattern_stmt;
2604 
2605   if (!is_gimple_assign (last_stmt))
2606     return NULL;
2607 
2608   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2609     return NULL;
2610 
2611   oprnd0 = gimple_assign_rhs1 (last_stmt);
2612   oprnd1 = gimple_assign_rhs2 (last_stmt);
2613   itype = TREE_TYPE (oprnd0);
2614 
2615   if (TREE_CODE (oprnd0) != SSA_NAME
2616       || TREE_CODE (oprnd1) != INTEGER_CST
2617       || !INTEGRAL_TYPE_P (itype)
2618       || !type_has_mode_precision_p (itype))
2619     return NULL;
2620 
2621   vectype = get_vectype_for_scalar_type (itype);
2622   if (vectype == NULL_TREE)
2623     return NULL;
2624 
2625   /* If the target can handle vectorized multiplication natively,
2626      don't attempt to optimize this.  */
2627   optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2628   if (mul_optab != unknown_optab)
2629     {
2630       machine_mode vec_mode = TYPE_MODE (vectype);
2631       int icode = (int) optab_handler (mul_optab, vec_mode);
2632       if (icode != CODE_FOR_nothing)
2633        return NULL;
2634     }
2635 
2636   pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2637   if (!pattern_stmt)
2638     return NULL;
2639 
2640   /* Pattern detected.  */
2641   vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
2642 
2643   *type_out = vectype;
2644 
2645   return pattern_stmt;
2646 }
2647 
2648 /* Detect a signed division by a constant that wouldn't be
2649    otherwise vectorized:
2650 
2651    type a_t, b_t;
2652 
2653    S1 a_t = b_t / N;
2654 
2655   where type 'type' is an integral type and N is a constant.
2656 
2657   Similarly handle modulo by a constant:
2658 
2659    S4 a_t = b_t % N;
2660 
2661   Input/Output:
2662 
2663   * STMT_VINFO: The stmt from which the pattern search begins,
2664     i.e. the division stmt.  S1 is replaced by if N is a power
2665     of two constant and type is signed:
2666   S3  y_t = b_t < 0 ? N - 1 : 0;
2667   S2  x_t = b_t + y_t;
2668   S1' a_t = x_t >> log2 (N);
2669 
2670     S4 is replaced if N is a power of two constant and
2671     type is signed by (where *_T temporaries have unsigned type):
2672   S9  y_T = b_t < 0 ? -1U : 0U;
2673   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2674   S7  z_t = (type) z_T;
2675   S6  w_t = b_t + z_t;
2676   S5  x_t = w_t & (N - 1);
2677   S4' a_t = x_t - z_t;
2678 
2679   Output:
2680 
2681   * TYPE_OUT: The type of the output of this pattern.
2682 
2683   * Return value: A new stmt that will be used to replace the division
2684     S1 or modulo S4 stmt.  */
2685 
2686 static gimple *
2687 vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
2688 {
2689   gimple *last_stmt = stmt_vinfo->stmt;
2690   tree oprnd0, oprnd1, vectype, itype, cond;
2691   gimple *pattern_stmt, *def_stmt;
2692   enum tree_code rhs_code;
2693   optab optab;
2694   tree q;
2695   int dummy_int, prec;
2696 
2697   if (!is_gimple_assign (last_stmt))
2698     return NULL;
2699 
2700   rhs_code = gimple_assign_rhs_code (last_stmt);
2701   switch (rhs_code)
2702     {
2703     case TRUNC_DIV_EXPR:
2704     case EXACT_DIV_EXPR:
2705     case TRUNC_MOD_EXPR:
2706       break;
2707     default:
2708       return NULL;
2709     }
2710 
2711   oprnd0 = gimple_assign_rhs1 (last_stmt);
2712   oprnd1 = gimple_assign_rhs2 (last_stmt);
2713   itype = TREE_TYPE (oprnd0);
2714   if (TREE_CODE (oprnd0) != SSA_NAME
2715       || TREE_CODE (oprnd1) != INTEGER_CST
2716       || TREE_CODE (itype) != INTEGER_TYPE
2717       || !type_has_mode_precision_p (itype))
2718     return NULL;
2719 
2720   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
2721   vectype = get_vectype_for_scalar_type (itype);
2722   if (vectype == NULL_TREE)
2723     return NULL;
2724 
2725   if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
2726     {
2727       /* If the target can handle vectorized division or modulo natively,
2728 	 don't attempt to optimize this, since native division is likely
2729 	 to give smaller code.  */
2730       optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2731       if (optab != unknown_optab)
2732 	{
2733 	  machine_mode vec_mode = TYPE_MODE (vectype);
2734 	  int icode = (int) optab_handler (optab, vec_mode);
2735 	  if (icode != CODE_FOR_nothing)
2736 	    return NULL;
2737 	}
2738     }
2739 
2740   prec = TYPE_PRECISION (itype);
2741   if (integer_pow2p (oprnd1))
2742     {
2743       if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2744 	return NULL;
2745 
2746       /* Pattern detected.  */
2747       vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
2748 
2749       cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2750 		     build_int_cst (itype, 0));
2751       if (rhs_code == TRUNC_DIV_EXPR
2752 	  || rhs_code == EXACT_DIV_EXPR)
2753 	{
2754 	  tree var = vect_recog_temp_ssa_var (itype, NULL);
2755 	  tree shift;
2756 	  def_stmt
2757 	    = gimple_build_assign (var, COND_EXPR, cond,
2758 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2759 						build_int_cst (itype, 1)),
2760 				   build_int_cst (itype, 0));
2761 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 	  var = vect_recog_temp_ssa_var (itype, NULL);
2763 	  def_stmt
2764 	    = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2765 				   gimple_assign_lhs (def_stmt));
2766 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2767 
2768 	  shift = build_int_cst (itype, tree_log2 (oprnd1));
2769 	  pattern_stmt
2770 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2771 				   RSHIFT_EXPR, var, shift);
2772 	}
2773       else
2774 	{
2775 	  tree signmask;
2776 	  if (compare_tree_int (oprnd1, 2) == 0)
2777 	    {
2778 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2779 	      def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2780 					      build_int_cst (itype, 1),
2781 					      build_int_cst (itype, 0));
2782 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2783 	    }
2784 	  else
2785 	    {
2786 	      tree utype
2787 		= build_nonstandard_integer_type (prec, 1);
2788 	      tree vecutype = get_vectype_for_scalar_type (utype);
2789 	      tree shift
2790 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
2791 					- tree_log2 (oprnd1));
2792 	      tree var = vect_recog_temp_ssa_var (utype, NULL);
2793 
2794 	      def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2795 					      build_int_cst (utype, -1),
2796 					      build_int_cst (utype, 0));
2797 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2798 	      var = vect_recog_temp_ssa_var (utype, NULL);
2799 	      def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2800 					      gimple_assign_lhs (def_stmt),
2801 					      shift);
2802 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
2803 	      signmask = vect_recog_temp_ssa_var (itype, NULL);
2804 	      def_stmt
2805 		= gimple_build_assign (signmask, NOP_EXPR, var);
2806 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2807 	    }
2808 	  def_stmt
2809 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2810 				   PLUS_EXPR, oprnd0, signmask);
2811 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2812 	  def_stmt
2813 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2814 				   BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2815 				   fold_build2 (MINUS_EXPR, itype, oprnd1,
2816 						build_int_cst (itype, 1)));
2817 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2818 
2819 	  pattern_stmt
2820 	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2821 				   MINUS_EXPR, gimple_assign_lhs (def_stmt),
2822 				   signmask);
2823 	}
2824 
2825       *type_out = vectype;
2826       return pattern_stmt;
2827     }
2828 
2829   if (prec > HOST_BITS_PER_WIDE_INT
2830       || integer_zerop (oprnd1))
2831     return NULL;
2832 
2833   if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2834     return NULL;
2835 
2836   if (TYPE_UNSIGNED (itype))
2837     {
2838       unsigned HOST_WIDE_INT mh, ml;
2839       int pre_shift, post_shift;
2840       unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2841 				  & GET_MODE_MASK (itype_mode));
2842       tree t1, t2, t3, t4;
2843 
2844       if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
2845 	/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
2846 	return NULL;
2847 
2848       /* Find a suitable multiplier and right shift count
2849 	 instead of multiplying with D.  */
2850       mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2851 
2852       /* If the suggested multiplier is more than SIZE bits, we can do better
2853 	 for even divisors, using an initial right shift.  */
2854       if (mh != 0 && (d & 1) == 0)
2855 	{
2856 	  pre_shift = ctz_or_zero (d);
2857 	  mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2858 				  &ml, &post_shift, &dummy_int);
2859 	  gcc_assert (!mh);
2860 	}
2861       else
2862 	pre_shift = 0;
2863 
2864       if (mh != 0)
2865 	{
2866 	  if (post_shift - 1 >= prec)
2867 	    return NULL;
2868 
2869 	  /* t1 = oprnd0 h* ml;
2870 	     t2 = oprnd0 - t1;
2871 	     t3 = t2 >> 1;
2872 	     t4 = t1 + t3;
2873 	     q = t4 >> (post_shift - 1);  */
2874 	  t1 = vect_recog_temp_ssa_var (itype, NULL);
2875 	  def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2876 					  build_int_cst (itype, ml));
2877 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2878 
2879 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2880 	  def_stmt
2881 	    = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
2882 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2883 
2884 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
2885 	  def_stmt
2886 	    = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
2887 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2888 
2889 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
2890 	  def_stmt
2891 	    = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
2892 
2893 	  if (post_shift != 1)
2894 	    {
2895 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2896 
2897 	      q = vect_recog_temp_ssa_var (itype, NULL);
2898 	      pattern_stmt
2899 		= gimple_build_assign (q, RSHIFT_EXPR, t4,
2900 				       build_int_cst (itype, post_shift - 1));
2901 	    }
2902 	  else
2903 	    {
2904 	      q = t4;
2905 	      pattern_stmt = def_stmt;
2906 	    }
2907 	}
2908       else
2909 	{
2910 	  if (pre_shift >= prec || post_shift >= prec)
2911 	    return NULL;
2912 
2913 	  /* t1 = oprnd0 >> pre_shift;
2914 	     t2 = t1 h* ml;
2915 	     q = t2 >> post_shift;  */
2916 	  if (pre_shift)
2917 	    {
2918 	      t1 = vect_recog_temp_ssa_var (itype, NULL);
2919 	      def_stmt
2920 		= gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2921 				       build_int_cst (NULL, pre_shift));
2922 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2923 	    }
2924 	  else
2925 	    t1 = oprnd0;
2926 
2927 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2928 	  def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2929 					  build_int_cst (itype, ml));
2930 
2931 	  if (post_shift)
2932 	    {
2933 	      append_pattern_def_seq (stmt_vinfo, def_stmt);
2934 
2935 	      q = vect_recog_temp_ssa_var (itype, NULL);
2936 	      def_stmt
2937 		= gimple_build_assign (q, RSHIFT_EXPR, t2,
2938 				       build_int_cst (itype, post_shift));
2939 	    }
2940 	  else
2941 	    q = t2;
2942 
2943 	  pattern_stmt = def_stmt;
2944 	}
2945     }
2946   else
2947     {
2948       unsigned HOST_WIDE_INT ml;
2949       int post_shift;
2950       HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
2951       unsigned HOST_WIDE_INT abs_d;
2952       bool add = false;
2953       tree t1, t2, t3, t4;
2954 
2955       /* Give up for -1.  */
2956       if (d == -1)
2957 	return NULL;
2958 
2959       /* Since d might be INT_MIN, we have to cast to
2960 	 unsigned HOST_WIDE_INT before negating to avoid
2961 	 undefined signed overflow.  */
2962       abs_d = (d >= 0
2963 	       ? (unsigned HOST_WIDE_INT) d
2964 	       : - (unsigned HOST_WIDE_INT) d);
2965 
2966       /* n rem d = n rem -d */
2967       if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2968 	{
2969 	  d = abs_d;
2970 	  oprnd1 = build_int_cst (itype, abs_d);
2971 	}
2972       else if (HOST_BITS_PER_WIDE_INT >= prec
2973 	       && abs_d == HOST_WIDE_INT_1U << (prec - 1))
2974 	/* This case is not handled correctly below.  */
2975 	return NULL;
2976 
2977       choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
2978       if (ml >= HOST_WIDE_INT_1U << (prec - 1))
2979 	{
2980 	  add = true;
2981 	  ml |= HOST_WIDE_INT_M1U << (prec - 1);
2982 	}
2983       if (post_shift >= prec)
2984 	return NULL;
2985 
2986       /* t1 = oprnd0 h* ml;  */
2987       t1 = vect_recog_temp_ssa_var (itype, NULL);
2988       def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2989 				      build_int_cst (itype, ml));
2990 
2991       if (add)
2992 	{
2993 	  /* t2 = t1 + oprnd0;  */
2994 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
2995 	  t2 = vect_recog_temp_ssa_var (itype, NULL);
2996 	  def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
2997 	}
2998       else
2999 	t2 = t1;
3000 
3001       if (post_shift)
3002 	{
3003 	  /* t3 = t2 >> post_shift;  */
3004 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3005 	  t3 = vect_recog_temp_ssa_var (itype, NULL);
3006 	  def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3007 					  build_int_cst (itype, post_shift));
3008 	}
3009       else
3010 	t3 = t2;
3011 
3012       wide_int oprnd0_min, oprnd0_max;
3013       int msb = 1;
3014       if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3015 	{
3016 	  if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
3017 	    msb = 0;
3018 	  else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
3019 	    msb = -1;
3020 	}
3021 
3022       if (msb == 0 && d >= 0)
3023 	{
3024 	  /* q = t3;  */
3025 	  q = t3;
3026 	  pattern_stmt = def_stmt;
3027 	}
3028       else
3029 	{
3030 	  /* t4 = oprnd0 >> (prec - 1);
3031 	     or if we know from VRP that oprnd0 >= 0
3032 	     t4 = 0;
3033 	     or if we know from VRP that oprnd0 < 0
3034 	     t4 = -1;  */
3035 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3036 	  t4 = vect_recog_temp_ssa_var (itype, NULL);
3037 	  if (msb != 1)
3038 	    def_stmt = gimple_build_assign (t4, INTEGER_CST,
3039 					    build_int_cst (itype, msb));
3040 	  else
3041 	    def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3042 					    build_int_cst (itype, prec - 1));
3043 	  append_pattern_def_seq (stmt_vinfo, def_stmt);
3044 
3045 	  /* q = t3 - t4;  or q = t4 - t3;  */
3046 	  q = vect_recog_temp_ssa_var (itype, NULL);
3047 	  pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3048 					      d < 0 ? t3 : t4);
3049 	}
3050     }
3051 
3052   if (rhs_code == TRUNC_MOD_EXPR)
3053     {
3054       tree r, t1;
3055 
3056       /* We divided.  Now finish by:
3057 	 t1 = q * oprnd1;
3058 	 r = oprnd0 - t1;  */
3059       append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3060 
3061       t1 = vect_recog_temp_ssa_var (itype, NULL);
3062       def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3063       append_pattern_def_seq (stmt_vinfo, def_stmt);
3064 
3065       r = vect_recog_temp_ssa_var (itype, NULL);
3066       pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3067     }
3068 
3069   /* Pattern detected.  */
3070   vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3071 
3072   *type_out = vectype;
3073   return pattern_stmt;
3074 }
3075 
3076 /* Function vect_recog_mixed_size_cond_pattern
3077 
3078    Try to find the following pattern:
3079 
3080      type x_t, y_t;
3081      TYPE a_T, b_T, c_T;
3082    loop:
3083      S1  a_T = x_t CMP y_t ? b_T : c_T;
3084 
3085    where type 'TYPE' is an integral type which has different size
3086    from 'type'.  b_T and c_T are either constants (and if 'TYPE' is wider
3087    than 'type', the constants need to fit into an integer type
3088    with the same width as 'type') or results of conversion from 'type'.
3089 
3090    Input:
3091 
3092    * STMT_VINFO: The stmt from which the pattern search begins.
3093 
3094    Output:
3095 
3096    * TYPE_OUT: The type of the output of this pattern.
3097 
3098    * Return value: A new stmt that will be used to replace the pattern.
3099 	Additionally a def_stmt is added.
3100 
3101 	a_it = x_t CMP y_t ? b_it : c_it;
3102 	a_T = (TYPE) a_it;  */
3103 
3104 static gimple *
3105 vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3106 {
3107   gimple *last_stmt = stmt_vinfo->stmt;
3108   tree cond_expr, then_clause, else_clause;
3109   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3110   gimple *pattern_stmt, *def_stmt;
3111   tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3112   gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3113   bool promotion;
3114   tree comp_scalar_type;
3115 
3116   if (!is_gimple_assign (last_stmt)
3117       || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3118       || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3119     return NULL;
3120 
3121   cond_expr = gimple_assign_rhs1 (last_stmt);
3122   then_clause = gimple_assign_rhs2 (last_stmt);
3123   else_clause = gimple_assign_rhs3 (last_stmt);
3124 
3125   if (!COMPARISON_CLASS_P (cond_expr))
3126     return NULL;
3127 
3128   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3129   comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
3130   if (comp_vectype == NULL_TREE)
3131     return NULL;
3132 
3133   type = gimple_expr_type (last_stmt);
3134   if (types_compatible_p (type, comp_scalar_type)
3135       || ((TREE_CODE (then_clause) != INTEGER_CST
3136 	   || TREE_CODE (else_clause) != INTEGER_CST)
3137 	  && !INTEGRAL_TYPE_P (comp_scalar_type))
3138       || !INTEGRAL_TYPE_P (type))
3139     return NULL;
3140 
3141   if ((TREE_CODE (then_clause) != INTEGER_CST
3142        && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3143 			      &def_stmt0, &promotion))
3144       || (TREE_CODE (else_clause) != INTEGER_CST
3145 	  && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3146 				 &def_stmt1, &promotion)))
3147     return NULL;
3148 
3149   if (orig_type0 && orig_type1
3150       && !types_compatible_p (orig_type0, orig_type1))
3151     return NULL;
3152 
3153   if (orig_type0)
3154     {
3155       if (!types_compatible_p (orig_type0, comp_scalar_type))
3156 	return NULL;
3157       then_clause = gimple_assign_rhs1 (def_stmt0);
3158       itype = orig_type0;
3159     }
3160 
3161   if (orig_type1)
3162     {
3163       if (!types_compatible_p (orig_type1, comp_scalar_type))
3164 	return NULL;
3165       else_clause = gimple_assign_rhs1 (def_stmt1);
3166       itype = orig_type1;
3167     }
3168 
3169 
3170   HOST_WIDE_INT cmp_mode_size
3171     = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3172 
3173   scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3174   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3175     return NULL;
3176 
3177   vectype = get_vectype_for_scalar_type (type);
3178   if (vectype == NULL_TREE)
3179     return NULL;
3180 
3181   if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3182     return NULL;
3183 
3184   if (itype == NULL_TREE)
3185     itype = build_nonstandard_integer_type (cmp_mode_size,
3186   					    TYPE_UNSIGNED (type));
3187 
3188   if (itype == NULL_TREE
3189       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3190     return NULL;
3191 
3192   vecitype = get_vectype_for_scalar_type (itype);
3193   if (vecitype == NULL_TREE)
3194     return NULL;
3195 
3196   if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3197     return NULL;
3198 
3199   if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3200     {
3201       if ((TREE_CODE (then_clause) == INTEGER_CST
3202 	   && !int_fits_type_p (then_clause, itype))
3203 	  || (TREE_CODE (else_clause) == INTEGER_CST
3204 	      && !int_fits_type_p (else_clause, itype)))
3205 	return NULL;
3206     }
3207 
3208   def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3209 				  COND_EXPR, unshare_expr (cond_expr),
3210 				  fold_convert (itype, then_clause),
3211 				  fold_convert (itype, else_clause));
3212   pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3213 				      NOP_EXPR, gimple_assign_lhs (def_stmt));
3214 
3215   append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
3216   *type_out = vectype;
3217 
3218   vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3219 
3220   return pattern_stmt;
3221 }
3222 
3223 
3224 /* Helper function of vect_recog_bool_pattern.  Called recursively, return
3225    true if bool VAR can and should be optimized that way.  Assume it shouldn't
3226    in case it's a result of a comparison which can be directly vectorized into
3227    a vector comparison.  Fills in STMTS with all stmts visited during the
3228    walk.  */
3229 
3230 static bool
3231 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3232 {
3233   tree rhs1;
3234   enum tree_code rhs_code;
3235 
3236   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3237   if (!def_stmt_info)
3238     return false;
3239 
3240   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3241   if (!def_stmt)
3242     return false;
3243 
3244   if (stmts.contains (def_stmt))
3245     return true;
3246 
3247   rhs1 = gimple_assign_rhs1 (def_stmt);
3248   rhs_code = gimple_assign_rhs_code (def_stmt);
3249   switch (rhs_code)
3250     {
3251     case SSA_NAME:
3252       if (! check_bool_pattern (rhs1, vinfo, stmts))
3253 	return false;
3254       break;
3255 
3256     CASE_CONVERT:
3257       if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3258 	return false;
3259       if (! check_bool_pattern (rhs1, vinfo, stmts))
3260 	return false;
3261       break;
3262 
3263     case BIT_NOT_EXPR:
3264       if (! check_bool_pattern (rhs1, vinfo, stmts))
3265 	return false;
3266       break;
3267 
3268     case BIT_AND_EXPR:
3269     case BIT_IOR_EXPR:
3270     case BIT_XOR_EXPR:
3271       if (! check_bool_pattern (rhs1, vinfo, stmts)
3272 	  || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
3273 	return false;
3274       break;
3275 
3276     default:
3277       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3278 	{
3279 	  tree vecitype, comp_vectype;
3280 
3281 	  /* If the comparison can throw, then is_gimple_condexpr will be
3282 	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
3283 	  if (stmt_could_throw_p (cfun, def_stmt))
3284 	    return false;
3285 
3286 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3287 	  if (comp_vectype == NULL_TREE)
3288 	    return false;
3289 
3290 	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3291 	  if (mask_type
3292 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3293 	    return false;
3294 
3295 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3296 	    {
3297 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3298 	      tree itype
3299 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3300 	      vecitype = get_vectype_for_scalar_type (itype);
3301 	      if (vecitype == NULL_TREE)
3302 		return false;
3303 	    }
3304 	  else
3305 	    vecitype = comp_vectype;
3306 	  if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
3307 	    return false;
3308 	}
3309       else
3310 	return false;
3311       break;
3312     }
3313 
3314   bool res = stmts.add (def_stmt);
3315   /* We can't end up recursing when just visiting SSA defs but not PHIs.  */
3316   gcc_assert (!res);
3317 
3318   return true;
3319 }
3320 
3321 
3322 /* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
3323    stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3324    pattern sequence.  */
3325 
3326 static tree
3327 adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
3328 {
3329   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3330 					   NOP_EXPR, var);
3331   append_pattern_def_seq (stmt_info, cast_stmt,
3332 			  get_vectype_for_scalar_type (type));
3333   return gimple_assign_lhs (cast_stmt);
3334 }
3335 
3336 /* Helper function of vect_recog_bool_pattern.  Do the actual transformations.
3337    VAR is an SSA_NAME that should be transformed from bool to a wider integer
3338    type, OUT_TYPE is the desired final integer type of the whole pattern.
3339    STMT_INFO is the info of the pattern root and is where pattern stmts should
3340    be associated with.  DEFS is a map of pattern defs.  */
3341 
3342 static void
3343 adjust_bool_pattern (tree var, tree out_type,
3344 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
3345 {
3346   gimple *stmt = SSA_NAME_DEF_STMT (var);
3347   enum tree_code rhs_code, def_rhs_code;
3348   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3349   location_t loc;
3350   gimple *pattern_stmt, *def_stmt;
3351   tree trueval = NULL_TREE;
3352 
3353   rhs1 = gimple_assign_rhs1 (stmt);
3354   rhs2 = gimple_assign_rhs2 (stmt);
3355   rhs_code = gimple_assign_rhs_code (stmt);
3356   loc = gimple_location (stmt);
3357   switch (rhs_code)
3358     {
3359     case SSA_NAME:
3360     CASE_CONVERT:
3361       irhs1 = *defs.get (rhs1);
3362       itype = TREE_TYPE (irhs1);
3363       pattern_stmt
3364 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3365 			       SSA_NAME, irhs1);
3366       break;
3367 
3368     case BIT_NOT_EXPR:
3369       irhs1 = *defs.get (rhs1);
3370       itype = TREE_TYPE (irhs1);
3371       pattern_stmt
3372 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3373 			       BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
3374       break;
3375 
3376     case BIT_AND_EXPR:
3377       /* Try to optimize x = y & (a < b ? 1 : 0); into
3378 	 x = (a < b ? y : 0);
3379 
3380 	 E.g. for:
3381 	   bool a_b, b_b, c_b;
3382 	   TYPE d_T;
3383 
3384 	   S1  a_b = x1 CMP1 y1;
3385 	   S2  b_b = x2 CMP2 y2;
3386 	   S3  c_b = a_b & b_b;
3387 	   S4  d_T = (TYPE) c_b;
3388 
3389 	 we would normally emit:
3390 
3391 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3392 	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3393 	   S3'  c_T = a_T & b_T;
3394 	   S4'  d_T = c_T;
3395 
3396 	 but we can save one stmt by using the
3397 	 result of one of the COND_EXPRs in the other COND_EXPR and leave
3398 	 BIT_AND_EXPR stmt out:
3399 
3400 	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3401 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3402 	   S4'  f_T = c_T;
3403 
3404 	 At least when VEC_COND_EXPR is implemented using masks
3405 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3406 	 computes the comparison masks and ands it, in one case with
3407 	 all ones vector, in the other case with a vector register.
3408 	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3409 	 often more expensive.  */
3410       def_stmt = SSA_NAME_DEF_STMT (rhs2);
3411       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3412       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3413 	{
3414 	  irhs1 = *defs.get (rhs1);
3415 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3416 	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
3417 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3418 	    {
3419 	      rhs_code = def_rhs_code;
3420 	      rhs1 = def_rhs1;
3421 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3422 	      trueval = irhs1;
3423 	      goto do_compare;
3424 	    }
3425 	  else
3426 	    irhs2 = *defs.get (rhs2);
3427 	  goto and_ior_xor;
3428 	}
3429       def_stmt = SSA_NAME_DEF_STMT (rhs1);
3430       def_rhs_code = gimple_assign_rhs_code (def_stmt);
3431       if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3432 	{
3433 	  irhs2 = *defs.get (rhs2);
3434 	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3435 	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
3436 	      == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
3437 	    {
3438 	      rhs_code = def_rhs_code;
3439 	      rhs1 = def_rhs1;
3440 	      rhs2 = gimple_assign_rhs2 (def_stmt);
3441 	      trueval = irhs2;
3442 	      goto do_compare;
3443 	    }
3444 	  else
3445 	    irhs1 = *defs.get (rhs1);
3446 	  goto and_ior_xor;
3447 	}
3448       /* FALLTHRU */
3449     case BIT_IOR_EXPR:
3450     case BIT_XOR_EXPR:
3451       irhs1 = *defs.get (rhs1);
3452       irhs2 = *defs.get (rhs2);
3453     and_ior_xor:
3454       if (TYPE_PRECISION (TREE_TYPE (irhs1))
3455 	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
3456 	{
3457 	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3458 	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3459 	  int out_prec = TYPE_PRECISION (out_type);
3460 	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3461 	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3462 					      stmt_info);
3463 	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3464 	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3465 					      stmt_info);
3466 	  else
3467 	    {
3468 	      irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3469 	      irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
3470 	    }
3471 	}
3472       itype = TREE_TYPE (irhs1);
3473       pattern_stmt
3474 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3475 			       rhs_code, irhs1, irhs2);
3476       break;
3477 
3478     default:
3479     do_compare:
3480       gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3481       if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3482 	  || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3483 	  || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3484 		       GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
3485 	{
3486 	  scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3487 	  itype
3488 	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3489 	}
3490       else
3491 	itype = TREE_TYPE (rhs1);
3492       cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3493       if (trueval == NULL_TREE)
3494 	trueval = build_int_cst (itype, 1);
3495       else
3496 	gcc_checking_assert (useless_type_conversion_p (itype,
3497 							TREE_TYPE (trueval)));
3498       pattern_stmt
3499 	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3500 			       COND_EXPR, cond_expr, trueval,
3501 			       build_int_cst (itype, 0));
3502       break;
3503     }
3504 
3505   gimple_set_location (pattern_stmt, loc);
3506   append_pattern_def_seq (stmt_info, pattern_stmt,
3507 			  get_vectype_for_scalar_type (itype));
3508   defs.put (var, gimple_assign_lhs (pattern_stmt));
3509 }
3510 
3511 /* Comparison function to qsort a vector of gimple stmts after UID.  */
3512 
3513 static int
3514 sort_after_uid (const void *p1, const void *p2)
3515 {
3516   const gimple *stmt1 = *(const gimple * const *)p1;
3517   const gimple *stmt2 = *(const gimple * const *)p2;
3518   return gimple_uid (stmt1) - gimple_uid (stmt2);
3519 }
3520 
3521 /* Create pattern stmts for all stmts participating in the bool pattern
3522    specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
3523    OUT_TYPE.  Return the def of the pattern root.  */
3524 
3525 static tree
3526 adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3527 		   tree out_type, stmt_vec_info stmt_info)
3528 {
3529   /* Gather original stmts in the bool pattern in their order of appearance
3530      in the IL.  */
3531   auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3532   for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3533        i != bool_stmt_set.end (); ++i)
3534     bool_stmts.quick_push (*i);
3535   bool_stmts.qsort (sort_after_uid);
3536 
3537   /* Now process them in that order, producing pattern stmts.  */
3538   hash_map <tree, tree> defs;
3539   for (unsigned i = 0; i < bool_stmts.length (); ++i)
3540     adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3541 			 out_type, stmt_info, defs);
3542 
3543   /* Pop the last pattern seq stmt and install it as pattern root for STMT.  */
3544   gimple *pattern_stmt
3545     = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
3546   return gimple_assign_lhs (pattern_stmt);
3547 }
3548 
3549 /* Helper for search_type_for_mask.  */
3550 
3551 static tree
3552 search_type_for_mask_1 (tree var, vec_info *vinfo,
3553 			hash_map<gimple *, tree> &cache)
3554 {
3555   tree rhs1;
3556   enum tree_code rhs_code;
3557   tree res = NULL_TREE, res2;
3558 
3559   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3560     return NULL_TREE;
3561 
3562   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3563   if (!def_stmt_info)
3564     return NULL_TREE;
3565 
3566   gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3567   if (!def_stmt)
3568     return NULL_TREE;
3569 
3570   tree *c = cache.get (def_stmt);
3571   if (c)
3572     return *c;
3573 
3574   rhs_code = gimple_assign_rhs_code (def_stmt);
3575   rhs1 = gimple_assign_rhs1 (def_stmt);
3576 
3577   switch (rhs_code)
3578     {
3579     case SSA_NAME:
3580     case BIT_NOT_EXPR:
3581     CASE_CONVERT:
3582       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3583       break;
3584 
3585     case BIT_AND_EXPR:
3586     case BIT_IOR_EXPR:
3587     case BIT_XOR_EXPR:
3588       res = search_type_for_mask_1 (rhs1, vinfo, cache);
3589       res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3590 				     cache);
3591       if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3592 	res = res2;
3593       break;
3594 
3595     default:
3596       if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3597 	{
3598 	  tree comp_vectype, mask_type;
3599 
3600 	  if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
3601 	    {
3602 	      res = search_type_for_mask_1 (rhs1, vinfo, cache);
3603 	      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3604 					     vinfo, cache);
3605 	      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3606 		res = res2;
3607 	      break;
3608 	    }
3609 
3610 	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3611 	  if (comp_vectype == NULL_TREE)
3612 	    {
3613 	      res = NULL_TREE;
3614 	      break;
3615 	    }
3616 
3617 	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3618 	  if (!mask_type
3619 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
3620 	    {
3621 	      res = NULL_TREE;
3622 	      break;
3623 	    }
3624 
3625 	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3626 	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3627 	    {
3628 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
3629 	      res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3630 	    }
3631 	  else
3632 	    res = TREE_TYPE (rhs1);
3633 	}
3634     }
3635 
3636   cache.put (def_stmt, res);
3637   return res;
3638 }
3639 
3640 /* Return the proper type for converting bool VAR into
3641    an integer value or NULL_TREE if no such type exists.
3642    The type is chosen so that converted value has the
3643    same number of elements as VAR's vector type.  */
3644 
3645 static tree
3646 search_type_for_mask (tree var, vec_info *vinfo)
3647 {
3648   hash_map<gimple *, tree> cache;
3649   return search_type_for_mask_1 (var, vinfo, cache);
3650 }
3651 
3652 /* Function vect_recog_bool_pattern
3653 
3654    Try to find pattern like following:
3655 
3656      bool a_b, b_b, c_b, d_b, e_b;
3657      TYPE f_T;
3658    loop:
3659      S1  a_b = x1 CMP1 y1;
3660      S2  b_b = x2 CMP2 y2;
3661      S3  c_b = a_b & b_b;
3662      S4  d_b = x3 CMP3 y3;
3663      S5  e_b = c_b | d_b;
3664      S6  f_T = (TYPE) e_b;
3665 
3666    where type 'TYPE' is an integral type.  Or a similar pattern
3667    ending in
3668 
3669      S6  f_Y = e_b ? r_Y : s_Y;
3670 
3671    as results from if-conversion of a complex condition.
3672 
3673    Input:
3674 
3675    * STMT_VINFO: The stmt at the end from which the pattern
3676 		 search begins, i.e. cast of a bool to
3677 		 an integer type.
3678 
3679    Output:
3680 
3681    * TYPE_OUT: The type of the output of this pattern.
3682 
3683    * Return value: A new stmt that will be used to replace the pattern.
3684 
3685 	Assuming size of TYPE is the same as size of all comparisons
3686 	(otherwise some casts would be added where needed), the above
3687 	sequence we create related pattern stmts:
3688 	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
3689 	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
3690 	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
3691 	S5'  e_T = c_T | d_T;
3692 	S6'  f_T = e_T;
3693 
3694 	Instead of the above S3' we could emit:
3695 	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
3696 	S3'  c_T = a_T | b_T;
3697 	but the above is more efficient.  */
3698 
3699 static gimple *
3700 vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3701 {
3702   gimple *last_stmt = stmt_vinfo->stmt;
3703   enum tree_code rhs_code;
3704   tree var, lhs, rhs, vectype;
3705   vec_info *vinfo = stmt_vinfo->vinfo;
3706   gimple *pattern_stmt;
3707 
3708   if (!is_gimple_assign (last_stmt))
3709     return NULL;
3710 
3711   var = gimple_assign_rhs1 (last_stmt);
3712   lhs = gimple_assign_lhs (last_stmt);
3713 
3714   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
3715     return NULL;
3716 
3717   hash_set<gimple *> bool_stmts;
3718 
3719   rhs_code = gimple_assign_rhs_code (last_stmt);
3720   if (CONVERT_EXPR_CODE_P (rhs_code))
3721     {
3722       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3723 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
3724 	return NULL;
3725       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3726       if (vectype == NULL_TREE)
3727 	return NULL;
3728 
3729       if (check_bool_pattern (var, vinfo, bool_stmts))
3730 	{
3731 	  rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
3732 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3733 	  if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3734 	    pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3735 	  else
3736 	    pattern_stmt
3737 	      = gimple_build_assign (lhs, NOP_EXPR, rhs);
3738 	}
3739       else
3740 	{
3741 	  tree type = search_type_for_mask (var, vinfo);
3742 	  tree cst0, cst1, tmp;
3743 
3744 	  if (!type)
3745 	    return NULL;
3746 
3747 	  /* We may directly use cond with narrowed type to avoid
3748 	     multiple cond exprs with following result packing and
3749 	     perform single cond with packed mask instead.  In case
3750 	     of widening we better make cond first and then extract
3751 	     results.  */
3752 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3753 	    type = TREE_TYPE (lhs);
3754 
3755 	  cst0 = build_int_cst (type, 0);
3756 	  cst1 = build_int_cst (type, 1);
3757 	  tmp = vect_recog_temp_ssa_var (type, NULL);
3758 	  pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
3759 
3760 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3761 	    {
3762 	      tree new_vectype = get_vectype_for_scalar_type (type);
3763 	      append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3764 
3765 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3766 	      pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3767 	    }
3768 	}
3769 
3770       *type_out = vectype;
3771       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3772 
3773       return pattern_stmt;
3774     }
3775   else if (rhs_code == COND_EXPR
3776 	   && TREE_CODE (var) == SSA_NAME)
3777     {
3778       vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3779       if (vectype == NULL_TREE)
3780 	return NULL;
3781 
3782       /* Build a scalar type for the boolean result that when
3783          vectorized matches the vector type of the result in
3784 	 size and number of elements.  */
3785       unsigned prec
3786 	= vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3787 			       TYPE_VECTOR_SUBPARTS (vectype));
3788 
3789       tree type
3790 	= build_nonstandard_integer_type (prec,
3791 					  TYPE_UNSIGNED (TREE_TYPE (var)));
3792       if (get_vectype_for_scalar_type (type) == NULL_TREE)
3793 	return NULL;
3794 
3795       if (!check_bool_pattern (var, vinfo, bool_stmts))
3796 	return NULL;
3797 
3798       rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
3799 
3800       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3801       pattern_stmt
3802 	  = gimple_build_assign (lhs, COND_EXPR,
3803 				 build2 (NE_EXPR, boolean_type_node,
3804 					 rhs, build_int_cst (type, 0)),
3805 				 gimple_assign_rhs2 (last_stmt),
3806 				 gimple_assign_rhs3 (last_stmt));
3807       *type_out = vectype;
3808       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3809 
3810       return pattern_stmt;
3811     }
3812   else if (rhs_code == SSA_NAME
3813 	   && STMT_VINFO_DATA_REF (stmt_vinfo))
3814     {
3815       stmt_vec_info pattern_stmt_info;
3816       vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3817       gcc_assert (vectype != NULL_TREE);
3818       if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3819 	return NULL;
3820 
3821       if (check_bool_pattern (var, vinfo, bool_stmts))
3822 	rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
3823       else
3824 	{
3825 	  tree type = search_type_for_mask (var, vinfo);
3826 	  tree cst0, cst1, new_vectype;
3827 
3828 	  if (!type)
3829 	    return NULL;
3830 
3831 	  if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3832 	    type = TREE_TYPE (vectype);
3833 
3834 	  cst0 = build_int_cst (type, 0);
3835 	  cst1 = build_int_cst (type, 1);
3836 	  new_vectype = get_vectype_for_scalar_type (type);
3837 
3838 	  rhs = vect_recog_temp_ssa_var (type, NULL);
3839 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
3840 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
3841 	}
3842 
3843       lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3844       if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3845 	{
3846 	  tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3847 	  gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
3848 	  append_pattern_def_seq (stmt_vinfo, cast_stmt);
3849 	  rhs = rhs2;
3850 	}
3851       pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3852       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3853       vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3854       *type_out = vectype;
3855       vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3856 
3857       return pattern_stmt;
3858     }
3859   else
3860     return NULL;
3861 }
3862 
3863 
3864 /* A helper for vect_recog_mask_conversion_pattern.  Build
3865    conversion of MASK to a type suitable for masking VECTYPE.
3866    Built statement gets required vectype and is appended to
3867    a pattern sequence of STMT_VINFO.
3868 
3869    Return converted mask.  */
3870 
3871 static tree
3872 build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
3873 {
3874   gimple *stmt;
3875   tree masktype, tmp;
3876 
3877   masktype = build_same_sized_truth_vector_type (vectype);
3878   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3879   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3880   append_pattern_def_seq (stmt_vinfo, stmt, masktype);
3881 
3882   return tmp;
3883 }
3884 
3885 
3886 /* Function vect_recog_mask_conversion_pattern
3887 
3888    Try to find statements which require boolean type
3889    converison.  Additional conversion statements are
3890    added to handle such cases.  For example:
3891 
3892    bool m_1, m_2, m_3;
3893    int i_4, i_5;
3894    double d_6, d_7;
3895    char c_1, c_2, c_3;
3896 
3897    S1   m_1 = i_4 > i_5;
3898    S2   m_2 = d_6 < d_7;
3899    S3   m_3 = m_1 & m_2;
3900    S4   c_1 = m_3 ? c_2 : c_3;
3901 
3902    Will be transformed into:
3903 
3904    S1   m_1 = i_4 > i_5;
3905    S2   m_2 = d_6 < d_7;
3906    S3'' m_2' = (_Bool[bitsize=32])m_2
3907    S3'  m_3' = m_1 & m_2';
3908    S4'' m_3'' = (_Bool[bitsize=8])m_3'
3909    S4'  c_1' = m_3'' ? c_2 : c_3;  */
3910 
3911 static gimple *
3912 vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
3913 {
3914   gimple *last_stmt = stmt_vinfo->stmt;
3915   enum tree_code rhs_code;
3916   tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3917   tree vectype1, vectype2;
3918   stmt_vec_info pattern_stmt_info;
3919   vec_info *vinfo = stmt_vinfo->vinfo;
3920 
3921   /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion.  */
3922   if (is_gimple_call (last_stmt)
3923       && gimple_call_internal_p (last_stmt))
3924     {
3925       gcall *pattern_stmt;
3926 
3927       internal_fn ifn = gimple_call_internal_fn (last_stmt);
3928       int mask_argno = internal_fn_mask_index (ifn);
3929       if (mask_argno < 0)
3930 	return NULL;
3931 
3932       bool store_p = internal_store_fn_p (ifn);
3933       if (store_p)
3934 	{
3935 	  int rhs_index = internal_fn_stored_value_index (ifn);
3936 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
3937 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
3938 	}
3939       else
3940 	{
3941 	  lhs = gimple_call_lhs (last_stmt);
3942 	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3943 	}
3944 
3945       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3946       tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3947       if (!mask_arg_type)
3948 	return NULL;
3949       vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
3950 
3951       if (!vectype1 || !vectype2
3952 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3953 		       TYPE_VECTOR_SUBPARTS (vectype2)))
3954 	return NULL;
3955 
3956       tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3957 
3958       auto_vec<tree, 8> args;
3959       unsigned int nargs = gimple_call_num_args (last_stmt);
3960       args.safe_grow (nargs);
3961       for (unsigned int i = 0; i < nargs; ++i)
3962 	args[i] = ((int) i == mask_argno
3963 		   ? tmp
3964 		   : gimple_call_arg (last_stmt, i));
3965       pattern_stmt = gimple_build_call_internal_vec (ifn, args);
3966 
3967       if (!store_p)
3968 	{
3969 	  lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3970 	  gimple_call_set_lhs (pattern_stmt, lhs);
3971 	}
3972       gimple_call_set_nothrow (pattern_stmt, true);
3973 
3974       pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
3975       if (STMT_VINFO_DATA_REF (stmt_vinfo))
3976 	vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
3977 
3978       *type_out = vectype1;
3979       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
3980 
3981       return pattern_stmt;
3982     }
3983 
3984   if (!is_gimple_assign (last_stmt))
3985     return NULL;
3986 
3987   gimple *pattern_stmt;
3988   lhs = gimple_assign_lhs (last_stmt);
3989   rhs1 = gimple_assign_rhs1 (last_stmt);
3990   rhs_code = gimple_assign_rhs_code (last_stmt);
3991 
3992   /* Check for cond expression requiring mask conversion.  */
3993   if (rhs_code == COND_EXPR)
3994     {
3995       vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3996 
3997       if (TREE_CODE (rhs1) == SSA_NAME)
3998 	{
3999 	  rhs1_type = search_type_for_mask (rhs1, vinfo);
4000 	  if (!rhs1_type)
4001 	    return NULL;
4002 	}
4003       else if (COMPARISON_CLASS_P (rhs1))
4004 	{
4005 	  /* Check whether we're comparing scalar booleans and (if so)
4006 	     whether a better mask type exists than the mask associated
4007 	     with boolean-sized elements.  This avoids unnecessary packs
4008 	     and unpacks if the booleans are set from comparisons of
4009 	     wider types.  E.g. in:
4010 
4011 	       int x1, x2, x3, x4, y1, y1;
4012 	       ...
4013 	       bool b1 = (x1 == x2);
4014 	       bool b2 = (x3 == x4);
4015 	       ... = b1 == b2 ? y1 : y2;
4016 
4017 	     it is better for b1 and b2 to use the mask type associated
4018 	     with int elements rather bool (byte) elements.  */
4019 	  rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4020 	  if (!rhs1_type)
4021 	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4022 	}
4023       else
4024 	return NULL;
4025 
4026       vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4027 
4028       if (!vectype1 || !vectype2)
4029 	return NULL;
4030 
4031       /* Continue if a conversion is needed.  Also continue if we have
4032 	 a comparison whose vector type would normally be different from
4033 	 VECTYPE2 when considered in isolation.  In that case we'll
4034 	 replace the comparison with an SSA name (so that we can record
4035 	 its vector type) and behave as though the comparison was an SSA
4036 	 name from the outset.  */
4037       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4038 		    TYPE_VECTOR_SUBPARTS (vectype2))
4039 	  && (TREE_CODE (rhs1) == SSA_NAME
4040 	      || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
4041 	return NULL;
4042 
4043       /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4044          in place, we can handle it in vectorizable_condition.  This avoids
4045 	 unnecessary promotion stmts and increased vectorization factor.  */
4046       if (COMPARISON_CLASS_P (rhs1)
4047 	  && INTEGRAL_TYPE_P (rhs1_type)
4048 	  && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4049 		       TYPE_VECTOR_SUBPARTS (vectype2)))
4050 	{
4051 	  enum vect_def_type dt;
4052 	  if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4053 	      && dt == vect_external_def
4054 	      && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4055 	      && (dt == vect_external_def
4056 		  || dt == vect_constant_def))
4057 	    {
4058 	      tree wide_scalar_type = build_nonstandard_integer_type
4059 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4060 		 TYPE_UNSIGNED (rhs1_type));
4061 	      tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4062 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4063 		return NULL;
4064 	    }
4065 	}
4066 
4067       /* If rhs1 is a comparison we need to move it into a
4068 	 separate statement.  */
4069       if (TREE_CODE (rhs1) != SSA_NAME)
4070 	{
4071 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4072 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
4073 	  rhs1 = tmp;
4074 	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
4075 	}
4076 
4077       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4078 		    TYPE_VECTOR_SUBPARTS (vectype2)))
4079 	tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4080       else
4081 	tmp = rhs1;
4082 
4083       lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4084       pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4085 					  gimple_assign_rhs2 (last_stmt),
4086 					  gimple_assign_rhs3 (last_stmt));
4087 
4088       *type_out = vectype1;
4089       vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4090 
4091       return pattern_stmt;
4092     }
4093 
4094   /* Now check for binary boolean operations requiring conversion for
4095      one of operands.  */
4096   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4097     return NULL;
4098 
4099   if (rhs_code != BIT_IOR_EXPR
4100       && rhs_code != BIT_XOR_EXPR
4101       && rhs_code != BIT_AND_EXPR
4102       && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4103     return NULL;
4104 
4105   rhs2 = gimple_assign_rhs2 (last_stmt);
4106 
4107   rhs1_type = search_type_for_mask (rhs1, vinfo);
4108   rhs2_type = search_type_for_mask (rhs2, vinfo);
4109 
4110   if (!rhs1_type || !rhs2_type
4111       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4112     return NULL;
4113 
4114   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4115     {
4116       vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4117       if (!vectype1)
4118 	return NULL;
4119       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
4120     }
4121   else
4122     {
4123       vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4124       if (!vectype1)
4125 	return NULL;
4126       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
4127     }
4128 
4129   lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4130   pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4131 
4132   *type_out = vectype1;
4133   vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4134 
4135   return pattern_stmt;
4136 }
4137 
4138 /* STMT_INFO is a load or store.  If the load or store is conditional, return
4139    the boolean condition under which it occurs, otherwise return null.  */
4140 
4141 static tree
4142 vect_get_load_store_mask (stmt_vec_info stmt_info)
4143 {
4144   if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4145     {
4146       gcc_assert (gimple_assign_single_p (def_assign));
4147       return NULL_TREE;
4148     }
4149 
4150   if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4151     {
4152       internal_fn ifn = gimple_call_internal_fn (def_call);
4153       int mask_index = internal_fn_mask_index (ifn);
4154       return gimple_call_arg (def_call, mask_index);
4155     }
4156 
4157   gcc_unreachable ();
4158 }
4159 
4160 /* Return the scalar offset type that an internal gather/scatter function
4161    should use.  GS_INFO describes the gather/scatter operation.  */
4162 
4163 static tree
4164 vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4165 {
4166   tree offset_type = TREE_TYPE (gs_info->offset);
4167   unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4168 
4169   /* Enforced by vect_check_gather_scatter.  */
4170   unsigned int offset_bits = TYPE_PRECISION (offset_type);
4171   gcc_assert (element_bits >= offset_bits);
4172 
4173   /* If the offset is narrower than the elements, extend it according
4174      to its sign.  */
4175   if (element_bits > offset_bits)
4176     return build_nonstandard_integer_type (element_bits,
4177 					   TYPE_UNSIGNED (offset_type));
4178 
4179   return offset_type;
4180 }
4181 
4182 /* Return MASK if MASK is suitable for masking an operation on vectors
4183    of type VECTYPE, otherwise convert it into such a form and return
4184    the result.  Associate any conversion statements with STMT_INFO's
4185    pattern.  */
4186 
4187 static tree
4188 vect_convert_mask_for_vectype (tree mask, tree vectype,
4189 			       stmt_vec_info stmt_info, vec_info *vinfo)
4190 {
4191   tree mask_type = search_type_for_mask (mask, vinfo);
4192   if (mask_type)
4193     {
4194       tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4195       if (mask_vectype
4196 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4197 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
4198 	mask = build_mask_conversion (mask, vectype, stmt_info);
4199     }
4200   return mask;
4201 }
4202 
4203 /* Return the equivalent of:
4204 
4205      fold_convert (TYPE, VALUE)
4206 
4207    with the expectation that the operation will be vectorized.
4208    If new statements are needed, add them as pattern statements
4209    to STMT_INFO.  */
4210 
4211 static tree
4212 vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
4213 {
4214   if (useless_type_conversion_p (type, TREE_TYPE (value)))
4215     return value;
4216 
4217   tree new_value = vect_recog_temp_ssa_var (type, NULL);
4218   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4219   append_pattern_def_seq (stmt_info, conversion,
4220 			  get_vectype_for_scalar_type (type));
4221   return new_value;
4222 }
4223 
4224 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4225    internal function.  Return the final statement on success and set
4226    *TYPE_OUT to the vector type being loaded or stored.
4227 
4228    This function only handles gathers and scatters that were recognized
4229    as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
4230 
4231 static gimple *
4232 vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
4233 {
4234   /* Currently we only support this for loop vectorization.  */
4235   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4236   if (!loop_vinfo)
4237     return NULL;
4238 
4239   /* Make sure that we're looking at a gather load or scatter store.  */
4240   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4241   if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4242     return NULL;
4243 
4244   /* Get the boolean that controls whether the load or store happens.
4245      This is null if the operation is unconditional.  */
4246   tree mask = vect_get_load_store_mask (stmt_info);
4247 
4248   /* Make sure that the target supports an appropriate internal
4249      function for the gather/scatter operation.  */
4250   gather_scatter_info gs_info;
4251   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4252       || gs_info.decl)
4253     return NULL;
4254 
4255   /* Convert the mask to the right form.  */
4256   tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4257   if (mask)
4258     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4259 					  loop_vinfo);
4260 
4261   /* Get the invariant base and non-invariant offset, converting the
4262      latter to the same width as the vector elements.  */
4263   tree base = gs_info.base;
4264   tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
4265   tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4266 						stmt_info);
4267 
4268   /* Build the new pattern statement.  */
4269   tree scale = size_int (gs_info.scale);
4270   gcall *pattern_stmt;
4271   if (DR_IS_READ (dr))
4272     {
4273       if (mask != NULL)
4274 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4275 						   offset, scale, mask);
4276       else
4277 	pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4278 						   offset, scale);
4279       tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4280       gimple_call_set_lhs (pattern_stmt, load_lhs);
4281     }
4282   else
4283     {
4284       tree rhs = vect_get_store_rhs (stmt_info);
4285       if (mask != NULL)
4286 	pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4287 						   base, offset, scale, rhs,
4288 						   mask);
4289       else
4290 	pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4291 						   base, offset, scale, rhs);
4292     }
4293   gimple_call_set_nothrow (pattern_stmt, true);
4294 
4295   /* Copy across relevant vectorization info and associate DR with the
4296      new pattern statement instead of the original statement.  */
4297   stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
4298   loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
4299 
4300   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4301   *type_out = vectype;
4302   vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
4303 
4304   return pattern_stmt;
4305 }
4306 
4307 /* Return true if TYPE is a non-boolean integer type.  These are the types
4308    that we want to consider for narrowing.  */
4309 
4310 static bool
4311 vect_narrowable_type_p (tree type)
4312 {
4313   return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4314 }
4315 
4316 /* Return true if the operation given by CODE can be truncated to N bits
4317    when only N bits of the output are needed.  This is only true if bit N+1
4318    of the inputs has no effect on the low N bits of the result.  */
4319 
4320 static bool
4321 vect_truncatable_operation_p (tree_code code)
4322 {
4323   switch (code)
4324     {
4325     case PLUS_EXPR:
4326     case MINUS_EXPR:
4327     case MULT_EXPR:
4328     case BIT_AND_EXPR:
4329     case BIT_IOR_EXPR:
4330     case BIT_XOR_EXPR:
4331     case COND_EXPR:
4332       return true;
4333 
4334     default:
4335       return false;
4336     }
4337 }
4338 
4339 /* Record that STMT_INFO could be changed from operating on TYPE to
4340    operating on a type with the precision and sign given by PRECISION
4341    and SIGN respectively.  PRECISION is an arbitrary bit precision;
4342    it might not be a whole number of bytes.  */
4343 
4344 static void
4345 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4346 			 unsigned int precision, signop sign)
4347 {
4348   /* Round the precision up to a whole number of bytes.  */
4349   precision = vect_element_precision (precision);
4350   if (precision < TYPE_PRECISION (type)
4351       && (!stmt_info->operation_precision
4352 	  || stmt_info->operation_precision > precision))
4353     {
4354       stmt_info->operation_precision = precision;
4355       stmt_info->operation_sign = sign;
4356     }
4357 }
4358 
4359 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4360    non-boolean inputs, all of which have type TYPE.  MIN_INPUT_PRECISION
4361    is an arbitrary bit precision; it might not be a whole number of bytes.  */
4362 
4363 static void
4364 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4365 			      unsigned int min_input_precision)
4366 {
4367   /* This operation in isolation only requires the inputs to have
4368      MIN_INPUT_PRECISION of precision,  However, that doesn't mean
4369      that MIN_INPUT_PRECISION is a natural precision for the chain
4370      as a whole.  E.g. consider something like:
4371 
4372 	 unsigned short *x, *y;
4373 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4374 
4375      The right shift can be done on unsigned chars, and only requires the
4376      result of "*x & 0xf0" to be done on unsigned chars.  But taking that
4377      approach would mean turning a natural chain of single-vector unsigned
4378      short operations into one that truncates "*x" and then extends
4379      "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4380      operation and one vector for each unsigned char operation.
4381      This would be a significant pessimization.
4382 
4383      Instead only propagate the maximum of this precision and the precision
4384      required by the users of the result.  This means that we don't pessimize
4385      the case above but continue to optimize things like:
4386 
4387 	 unsigned char *y;
4388 	 unsigned short *x;
4389 	 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4390 
4391      Here we would truncate two vectors of *x to a single vector of
4392      unsigned chars and use single-vector unsigned char operations for
4393      everything else, rather than doing two unsigned short copies of
4394      "(*x & 0xf0) >> 4" and then truncating the result.  */
4395   min_input_precision = MAX (min_input_precision,
4396 			     stmt_info->min_output_precision);
4397 
4398   if (min_input_precision < TYPE_PRECISION (type)
4399       && (!stmt_info->min_input_precision
4400 	  || stmt_info->min_input_precision > min_input_precision))
4401     stmt_info->min_input_precision = min_input_precision;
4402 }
4403 
4404 /* Subroutine of vect_determine_min_output_precision.  Return true if
4405    we can calculate a reduced number of output bits for STMT_INFO,
4406    whose result is LHS.  */
4407 
4408 static bool
4409 vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4410 {
4411   /* Take the maximum precision required by users of the result.  */
4412   vec_info *vinfo = stmt_info->vinfo;
4413   unsigned int precision = 0;
4414   imm_use_iterator iter;
4415   use_operand_p use;
4416   FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4417     {
4418       gimple *use_stmt = USE_STMT (use);
4419       if (is_gimple_debug (use_stmt))
4420 	continue;
4421       stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4422       if (!use_stmt_info || !use_stmt_info->min_input_precision)
4423 	return false;
4424       /* The input precision recorded for COND_EXPRs applies only to the
4425 	 "then" and "else" values.  */
4426       gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4427       if (assign
4428 	  && gimple_assign_rhs_code (assign) == COND_EXPR
4429 	  && use->use != gimple_assign_rhs2_ptr (assign)
4430 	  && use->use != gimple_assign_rhs3_ptr (assign))
4431 	return false;
4432       precision = MAX (precision, use_stmt_info->min_input_precision);
4433     }
4434 
4435   if (dump_enabled_p ())
4436     dump_printf_loc (MSG_NOTE, vect_location,
4437 		     "only the low %d bits of %T are significant\n",
4438 		     precision, lhs);
4439   stmt_info->min_output_precision = precision;
4440   return true;
4441 }
4442 
4443 /* Calculate min_output_precision for STMT_INFO.  */
4444 
4445 static void
4446 vect_determine_min_output_precision (stmt_vec_info stmt_info)
4447 {
4448   /* We're only interested in statements with a narrowable result.  */
4449   tree lhs = gimple_get_lhs (stmt_info->stmt);
4450   if (!lhs
4451       || TREE_CODE (lhs) != SSA_NAME
4452       || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4453     return;
4454 
4455   if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4456     stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4457 }
4458 
4459 /* Use range information to decide whether STMT (described by STMT_INFO)
4460    could be done in a narrower type.  This is effectively a forward
4461    propagation, since it uses context-independent information that applies
4462    to all users of an SSA name.  */
4463 
4464 static void
4465 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4466 {
4467   tree lhs = gimple_assign_lhs (stmt);
4468   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4469     return;
4470 
4471   tree type = TREE_TYPE (lhs);
4472   if (!vect_narrowable_type_p (type))
4473     return;
4474 
4475   /* First see whether we have any useful range information for the result.  */
4476   unsigned int precision = TYPE_PRECISION (type);
4477   signop sign = TYPE_SIGN (type);
4478   wide_int min_value, max_value;
4479   if (!vect_get_range_info (lhs, &min_value, &max_value))
4480     return;
4481 
4482   tree_code code = gimple_assign_rhs_code (stmt);
4483   unsigned int nops = gimple_num_ops (stmt);
4484 
4485   if (!vect_truncatable_operation_p (code))
4486     /* Check that all relevant input operands are compatible, and update
4487        [MIN_VALUE, MAX_VALUE] to include their ranges.  */
4488     for (unsigned int i = 1; i < nops; ++i)
4489       {
4490 	tree op = gimple_op (stmt, i);
4491 	if (TREE_CODE (op) == INTEGER_CST)
4492 	  {
4493 	    /* Don't require the integer to have RHS_TYPE (which it might
4494 	       not for things like shift amounts, etc.), but do require it
4495 	       to fit the type.  */
4496 	    if (!int_fits_type_p (op, type))
4497 	      return;
4498 
4499 	    min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4500 	    max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4501 	  }
4502 	else if (TREE_CODE (op) == SSA_NAME)
4503 	  {
4504 	    /* Ignore codes that don't take uniform arguments.  */
4505 	    if (!types_compatible_p (TREE_TYPE (op), type))
4506 	      return;
4507 
4508 	    wide_int op_min_value, op_max_value;
4509 	    if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4510 	      return;
4511 
4512 	    min_value = wi::min (min_value, op_min_value, sign);
4513 	    max_value = wi::max (max_value, op_max_value, sign);
4514 	  }
4515 	else
4516 	  return;
4517       }
4518 
4519   /* Try to switch signed types for unsigned types if we can.
4520      This is better for two reasons.  First, unsigned ops tend
4521      to be cheaper than signed ops.  Second, it means that we can
4522      handle things like:
4523 
4524 	signed char c;
4525 	int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4526 
4527      as:
4528 
4529 	signed char c;
4530 	unsigned short res_1 = (unsigned short) c & 0xff00;
4531 	int res = (int) res_1;
4532 
4533      where the intermediate result res_1 has unsigned rather than
4534      signed type.  */
4535   if (sign == SIGNED && !wi::neg_p (min_value))
4536     sign = UNSIGNED;
4537 
4538   /* See what precision is required for MIN_VALUE and MAX_VALUE.  */
4539   unsigned int precision1 = wi::min_precision (min_value, sign);
4540   unsigned int precision2 = wi::min_precision (max_value, sign);
4541   unsigned int value_precision = MAX (precision1, precision2);
4542   if (value_precision >= precision)
4543     return;
4544 
4545   if (dump_enabled_p ())
4546     dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4547 		     " without loss of precision: %G",
4548 		     sign == SIGNED ? "signed" : "unsigned",
4549 		     value_precision, stmt);
4550 
4551   vect_set_operation_type (stmt_info, type, value_precision, sign);
4552   vect_set_min_input_precision (stmt_info, type, value_precision);
4553 }
4554 
4555 /* Use information about the users of STMT's result to decide whether
4556    STMT (described by STMT_INFO) could be done in a narrower type.
4557    This is effectively a backward propagation.  */
4558 
4559 static void
4560 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4561 {
4562   tree_code code = gimple_assign_rhs_code (stmt);
4563   unsigned int opno = (code == COND_EXPR ? 2 : 1);
4564   tree type = TREE_TYPE (gimple_op (stmt, opno));
4565   if (!vect_narrowable_type_p (type))
4566     return;
4567 
4568   unsigned int precision = TYPE_PRECISION (type);
4569   unsigned int operation_precision, min_input_precision;
4570   switch (code)
4571     {
4572     CASE_CONVERT:
4573       /* Only the bits that contribute to the output matter.  Don't change
4574 	 the precision of the operation itself.  */
4575       operation_precision = precision;
4576       min_input_precision = stmt_info->min_output_precision;
4577       break;
4578 
4579     case LSHIFT_EXPR:
4580     case RSHIFT_EXPR:
4581       {
4582 	tree shift = gimple_assign_rhs2 (stmt);
4583 	if (TREE_CODE (shift) != INTEGER_CST
4584 	    || !wi::ltu_p (wi::to_widest (shift), precision))
4585 	  return;
4586 	unsigned int const_shift = TREE_INT_CST_LOW (shift);
4587 	if (code == LSHIFT_EXPR)
4588 	  {
4589 	    /* We need CONST_SHIFT fewer bits of the input.  */
4590 	    operation_precision = stmt_info->min_output_precision;
4591 	    min_input_precision = (MAX (operation_precision, const_shift)
4592 				    - const_shift);
4593 	  }
4594 	else
4595 	  {
4596 	    /* We need CONST_SHIFT extra bits to do the operation.  */
4597 	    operation_precision = (stmt_info->min_output_precision
4598 				   + const_shift);
4599 	    min_input_precision = operation_precision;
4600 	  }
4601 	break;
4602       }
4603 
4604     default:
4605       if (vect_truncatable_operation_p (code))
4606 	{
4607 	  /* Input bit N has no effect on output bits N-1 and lower.  */
4608 	  operation_precision = stmt_info->min_output_precision;
4609 	  min_input_precision = operation_precision;
4610 	  break;
4611 	}
4612       return;
4613     }
4614 
4615   if (operation_precision < precision)
4616     {
4617       if (dump_enabled_p ())
4618 	dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4619 			 " without affecting users: %G",
4620 			 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4621 			 operation_precision, stmt);
4622       vect_set_operation_type (stmt_info, type, operation_precision,
4623 			       TYPE_SIGN (type));
4624     }
4625   vect_set_min_input_precision (stmt_info, type, min_input_precision);
4626 }
4627 
4628 /* Handle vect_determine_precisions for STMT_INFO, given that we
4629    have already done so for the users of its result.  */
4630 
4631 void
4632 vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4633 {
4634   vect_determine_min_output_precision (stmt_info);
4635   if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4636     {
4637       vect_determine_precisions_from_range (stmt_info, stmt);
4638       vect_determine_precisions_from_users (stmt_info, stmt);
4639     }
4640 }
4641 
4642 /* Walk backwards through the vectorizable region to determine the
4643    values of these fields:
4644 
4645    - min_output_precision
4646    - min_input_precision
4647    - operation_precision
4648    - operation_sign.  */
4649 
4650 void
4651 vect_determine_precisions (vec_info *vinfo)
4652 {
4653   DUMP_VECT_SCOPE ("vect_determine_precisions");
4654 
4655   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4656     {
4657       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4658       basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4659       unsigned int nbbs = loop->num_nodes;
4660 
4661       for (unsigned int i = 0; i < nbbs; i++)
4662 	{
4663 	  basic_block bb = bbs[nbbs - i - 1];
4664 	  for (gimple_stmt_iterator si = gsi_last_bb (bb);
4665 	       !gsi_end_p (si); gsi_prev (&si))
4666 	    vect_determine_stmt_precisions
4667 	      (vinfo->lookup_stmt (gsi_stmt (si)));
4668 	}
4669     }
4670   else
4671     {
4672       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4673       gimple_stmt_iterator si = bb_vinfo->region_end;
4674       gimple *stmt;
4675       do
4676 	{
4677 	  if (!gsi_stmt (si))
4678 	    si = gsi_last_bb (bb_vinfo->bb);
4679 	  else
4680 	    gsi_prev (&si);
4681 	  stmt = gsi_stmt (si);
4682 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
4683 	  if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4684 	    vect_determine_stmt_precisions (stmt_info);
4685 	}
4686       while (stmt != gsi_stmt (bb_vinfo->region_begin));
4687     }
4688 }
4689 
4690 typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
4691 
4692 struct vect_recog_func
4693 {
4694   vect_recog_func_ptr fn;
4695   const char *name;
4696 };
4697 
4698 /* Note that ordering matters - the first pattern matching on a stmt is
4699    taken which means usually the more complex one needs to preceed the
4700    less comples onex (widen_sum only after dot_prod or sad for example).  */
4701 static vect_recog_func vect_vect_recog_func_ptrs[] = {
4702   { vect_recog_over_widening_pattern, "over_widening" },
4703   /* Must come after over_widening, which narrows the shift as much as
4704      possible beforehand.  */
4705   { vect_recog_average_pattern, "average" },
4706   { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
4707   { vect_recog_widen_mult_pattern, "widen_mult" },
4708   { vect_recog_dot_prod_pattern, "dot_prod" },
4709   { vect_recog_sad_pattern, "sad" },
4710   { vect_recog_widen_sum_pattern, "widen_sum" },
4711   { vect_recog_pow_pattern, "pow" },
4712   { vect_recog_widen_shift_pattern, "widen_shift" },
4713   { vect_recog_rotate_pattern, "rotate" },
4714   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4715   { vect_recog_divmod_pattern, "divmod" },
4716   { vect_recog_mult_pattern, "mult" },
4717   { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4718   { vect_recog_bool_pattern, "bool" },
4719   /* This must come before mask conversion, and includes the parts
4720      of mask conversion that are needed for gather and scatter
4721      internal functions.  */
4722   { vect_recog_gather_scatter_pattern, "gather_scatter" },
4723   { vect_recog_mask_conversion_pattern, "mask_conversion" }
4724 };
4725 
4726 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4727 
4728 /* Mark statements that are involved in a pattern.  */
4729 
4730 static inline void
4731 vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
4732                          tree pattern_vectype)
4733 {
4734   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4735 
4736   gimple *orig_pattern_stmt = NULL;
4737   if (is_pattern_stmt_p (orig_stmt_info))
4738     {
4739       /* We're replacing a statement in an existing pattern definition
4740 	 sequence.  */
4741       orig_pattern_stmt = orig_stmt_info->stmt;
4742       if (dump_enabled_p ())
4743 	dump_printf_loc (MSG_NOTE, vect_location,
4744 			 "replacing earlier pattern %G", orig_pattern_stmt);
4745 
4746       /* To keep the book-keeping simple, just swap the lhs of the
4747 	 old and new statements, so that the old one has a valid but
4748 	 unused lhs.  */
4749       tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4750       gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
4751       gimple_set_lhs (pattern_stmt, old_lhs);
4752 
4753       if (dump_enabled_p ())
4754 	dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
4755 
4756       /* Switch to the statement that ORIG replaces.  */
4757       orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
4758 
4759       /* We shouldn't be replacing the main pattern statement.  */
4760       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4761 		  != orig_pattern_stmt);
4762     }
4763 
4764   if (def_seq)
4765     for (gimple_stmt_iterator si = gsi_start (def_seq);
4766 	 !gsi_end_p (si); gsi_next (&si))
4767       {
4768 	stmt_vec_info pattern_stmt_info
4769 	  = vect_init_pattern_stmt (gsi_stmt (si),
4770 				    orig_stmt_info, pattern_vectype);
4771 	/* Stmts in the def sequence are not vectorizable cycle or
4772 	   induction defs, instead they should all be vect_internal_def
4773 	   feeding the main pattern stmt which retains this def type.  */
4774 	STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4775       }
4776 
4777   if (orig_pattern_stmt)
4778     {
4779       vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4780 
4781       /* Insert all the new pattern statements before the original one.  */
4782       gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4783       gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4784 					       orig_def_seq);
4785       gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4786       gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
4787 
4788       /* Remove the pattern statement that this new pattern replaces.  */
4789       gsi_remove (&gsi, false);
4790     }
4791   else
4792     vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4793 }
4794 
4795 /* Function vect_pattern_recog_1
4796 
4797    Input:
4798    PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4799         computation pattern.
4800    STMT_INFO: A stmt from which the pattern search should start.
4801 
4802    If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4803    a sequence of statements that has the same functionality and can be
4804    used to replace STMT_INFO.  It returns the last statement in the sequence
4805    and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
4806    PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4807    statement, having first checked that the target supports the new operation
4808    in that type.
4809 
4810    This function also does some bookkeeping, as explained in the documentation
4811    for vect_recog_pattern.  */
4812 
4813 static void
4814 vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4815 {
4816   vec_info *vinfo = stmt_info->vinfo;
4817   gimple *pattern_stmt;
4818   loop_vec_info loop_vinfo;
4819   tree pattern_vectype;
4820 
4821   /* If this statement has already been replaced with pattern statements,
4822      leave the original statement alone, since the first match wins.
4823      Instead try to match against the definition statements that feed
4824      the main pattern statement.  */
4825   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4826     {
4827       gimple_stmt_iterator gsi;
4828       for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4829 	   !gsi_end_p (gsi); gsi_next (&gsi))
4830 	vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
4831       return;
4832     }
4833 
4834   gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4835   pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
4836   if (!pattern_stmt)
4837     {
4838       /* Clear any half-formed pattern definition sequence.  */
4839       STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
4840       return;
4841     }
4842 
4843   loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4844   gcc_assert (pattern_vectype);
4845 
4846   /* Found a vectorizable pattern.  */
4847   if (dump_enabled_p ())
4848     dump_printf_loc (MSG_NOTE, vect_location,
4849 		     "%s pattern recognized: %G",
4850 		     recog_func->name, pattern_stmt);
4851 
4852   /* Mark the stmts that are involved in the pattern. */
4853   vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4854 
4855   /* Patterns cannot be vectorized using SLP, because they change the order of
4856      computation.  */
4857   if (loop_vinfo)
4858     {
4859       unsigned ix, ix2;
4860       stmt_vec_info *elem_ptr;
4861       VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
4862 			     elem_ptr, *elem_ptr == stmt_info);
4863     }
4864 }
4865 
4866 
4867 /* Function vect_pattern_recog
4868 
4869    Input:
4870    LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4871         computation idioms.
4872 
4873    Output - for each computation idiom that is detected we create a new stmt
4874         that provides the same functionality and that can be vectorized.  We
4875         also record some information in the struct_stmt_info of the relevant
4876         stmts, as explained below:
4877 
4878    At the entry to this function we have the following stmts, with the
4879    following initial value in the STMT_VINFO fields:
4880 
4881          stmt                     in_pattern_p  related_stmt    vec_stmt
4882          S1: a_i = ....                 -       -               -
4883          S2: a_2 = ..use(a_i)..         -       -               -
4884          S3: a_1 = ..use(a_2)..         -       -               -
4885          S4: a_0 = ..use(a_1)..         -       -               -
4886          S5: ... = ..use(a_0)..         -       -               -
4887 
4888    Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
4889    represented by a single stmt.  We then:
4890    - create a new stmt S6 equivalent to the pattern (the stmt is not
4891      inserted into the code)
4892    - fill in the STMT_VINFO fields as follows:
4893 
4894                                   in_pattern_p  related_stmt    vec_stmt
4895          S1: a_i = ....                 -       -               -
4896          S2: a_2 = ..use(a_i)..         -       -               -
4897          S3: a_1 = ..use(a_2)..         -       -               -
4898          S4: a_0 = ..use(a_1)..         true    S6              -
4899           '---> S6: a_new = ....        -       S4              -
4900          S5: ... = ..use(a_0)..         -       -               -
4901 
4902    (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
4903    to each other through the RELATED_STMT field).
4904 
4905    S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4906    of S4 because it will replace all its uses.  Stmts {S1,S2,S3} will
4907    remain irrelevant unless used by stmts other than S4.
4908 
4909    If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
4910    (because they are marked as irrelevant).  It will vectorize S6, and record
4911    a pointer to the new vector stmt VS6 from S6 (as usual).
4912    S4 will be skipped, and S5 will be vectorized as usual:
4913 
4914                                   in_pattern_p  related_stmt    vec_stmt
4915          S1: a_i = ....                 -       -               -
4916          S2: a_2 = ..use(a_i)..         -       -               -
4917          S3: a_1 = ..use(a_2)..         -       -               -
4918        > VS6: va_new = ....             -       -               -
4919          S4: a_0 = ..use(a_1)..         true    S6              VS6
4920           '---> S6: a_new = ....        -       S4              VS6
4921        > VS5: ... = ..vuse(va_new)..    -       -               -
4922          S5: ... = ..use(a_0)..         -       -               -
4923 
4924    DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4925    elsewhere), and we'll end up with:
4926 
4927         VS6: va_new = ....
4928         VS5: ... = ..vuse(va_new)..
4929 
4930    In case of more than one pattern statements, e.g., widen-mult with
4931    intermediate type:
4932 
4933      S1  a_t = ;
4934      S2  a_T = (TYPE) a_t;
4935            '--> S3: a_it = (interm_type) a_t;
4936      S4  prod_T = a_T * CONST;
4937            '--> S5: prod_T' = a_it w* CONST;
4938 
4939    there may be other users of a_T outside the pattern.  In that case S2 will
4940    be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4941    and vectorized.  The vector stmt VS2 will be recorded in S2, and VS3 will
4942    be recorded in S3.  */
4943 
4944 void
4945 vect_pattern_recog (vec_info *vinfo)
4946 {
4947   struct loop *loop;
4948   basic_block *bbs;
4949   unsigned int nbbs;
4950   gimple_stmt_iterator si;
4951   unsigned int i, j;
4952 
4953   vect_determine_precisions (vinfo);
4954 
4955   DUMP_VECT_SCOPE ("vect_pattern_recog");
4956 
4957   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4958     {
4959       loop = LOOP_VINFO_LOOP (loop_vinfo);
4960       bbs = LOOP_VINFO_BBS (loop_vinfo);
4961       nbbs = loop->num_nodes;
4962 
4963       /* Scan through the loop stmts, applying the pattern recognition
4964 	 functions starting at each stmt visited:  */
4965       for (i = 0; i < nbbs; i++)
4966 	{
4967 	  basic_block bb = bbs[i];
4968 	  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4969 	    {
4970 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4971 	      /* Scan over all generic vect_recog_xxx_pattern functions.  */
4972 	      for (j = 0; j < NUM_PATTERNS; j++)
4973 		vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4974 				      stmt_info);
4975 	    }
4976 	}
4977     }
4978   else
4979     {
4980       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4981       for (si = bb_vinfo->region_begin;
4982 	   gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4983 	{
4984 	  gimple *stmt = gsi_stmt (si);
4985 	  stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
4986 	  if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4987 	    continue;
4988 
4989 	  /* Scan over all generic vect_recog_xxx_pattern functions.  */
4990 	  for (j = 0; j < NUM_PATTERNS; j++)
4991 	    vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
4992 	}
4993     }
4994 }
4995