xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-generic.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 
41 
42 static void expand_vector_operations_1 (gimple_stmt_iterator *);
43 
44 
45 /* Build a constant of type TYPE, made of VALUE's bits replicated
46    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
47 static tree
48 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
49 {
50   int width = tree_to_uhwi (TYPE_SIZE (inner_type));
51   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
52     / HOST_BITS_PER_WIDE_INT;
53   unsigned HOST_WIDE_INT low, mask;
54   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
55   int i;
56 
57   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
58 
59   if (width == HOST_BITS_PER_WIDE_INT)
60     low = value;
61   else
62     {
63       mask = ((HOST_WIDE_INT)1 << width) - 1;
64       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
65     }
66 
67   for (i = 0; i < n; i++)
68     a[i] = low;
69 
70   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
71   return wide_int_to_tree
72     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
73 }
74 
75 static GTY(()) tree vector_inner_type;
76 static GTY(()) tree vector_last_type;
77 static GTY(()) int vector_last_nunits;
78 
79 /* Return a suitable vector types made of SUBPARTS units each of mode
80    "word_mode" (the global variable).  */
81 static tree
82 build_word_mode_vector_type (int nunits)
83 {
84   if (!vector_inner_type)
85     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
86   else if (vector_last_nunits == nunits)
87     {
88       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
89       return vector_last_type;
90     }
91 
92   vector_last_nunits = nunits;
93   vector_last_type = build_vector_type (vector_inner_type, nunits);
94   return vector_last_type;
95 }
96 
97 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
98 			      tree, tree, tree, tree, tree, enum tree_code,
99 			      tree);
100 
101 static inline tree
102 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
103 		  tree t, tree bitsize, tree bitpos)
104 {
105   if (TREE_CODE (t) == SSA_NAME)
106     {
107       gimple *def_stmt = SSA_NAME_DEF_STMT (t);
108       if (is_gimple_assign (def_stmt)
109 	  && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
110 	      || (bitpos
111 		  && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
112 	t = gimple_assign_rhs1 (def_stmt);
113     }
114   if (bitpos)
115     {
116       if (TREE_CODE (type) == BOOLEAN_TYPE)
117 	{
118 	  tree itype
119 	    = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
120 	  tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
121 					bitsize, bitpos);
122 	  return gimplify_build2 (gsi, NE_EXPR, type, field,
123 				  build_zero_cst (itype));
124 	}
125       else
126 	return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
127     }
128   else
129     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
130 }
131 
132 static tree
133 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
134 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
135 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
136 {
137   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
138   return gimplify_build1 (gsi, code, inner_type, a);
139 }
140 
141 static tree
142 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 	  tree bitpos, tree bitsize, enum tree_code code,
144 	  tree type ATTRIBUTE_UNUSED)
145 {
146   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
147     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
149     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
150   return gimplify_build2 (gsi, code, inner_type, a, b);
151 }
152 
153 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
154 
155    INNER_TYPE is the type of A and B elements
156 
157    returned expression is of signed integer type with the
158    size equal to the size of INNER_TYPE.  */
159 static tree
160 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
161 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
162 {
163   tree stype = TREE_TYPE (type);
164   tree cst_false = build_zero_cst (stype);
165   tree cst_true = build_all_ones_cst (stype);
166   tree cmp;
167 
168   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
169   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
170 
171   cmp = build2 (code, boolean_type_node, a, b);
172   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
173 }
174 
175 /* Expand vector addition to scalars.  This does bit twiddling
176    in order to increase parallelism:
177 
178    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
179            (a ^ b) & 0x80808080
180 
181    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
182             (a ^ ~b) & 0x80808080
183 
184    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
185 
186    This optimization should be done only if 4 vector items or more
187    fit into a word.  */
188 static tree
189 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
190 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
191 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
192 {
193   tree inner_type = TREE_TYPE (TREE_TYPE (a));
194   unsigned HOST_WIDE_INT max;
195   tree low_bits, high_bits, a_low, b_low, result_low, signs;
196 
197   max = GET_MODE_MASK (TYPE_MODE (inner_type));
198   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
199   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
200 
201   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
202   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
203 
204   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
205   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
206   if (code == PLUS_EXPR)
207     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
208   else
209     {
210       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
211       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
212     }
213 
214   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
215   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
216   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
217 }
218 
219 static tree
220 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
221 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
222 	   tree bitsize ATTRIBUTE_UNUSED,
223 	   enum tree_code code ATTRIBUTE_UNUSED,
224 	   tree type ATTRIBUTE_UNUSED)
225 {
226   tree inner_type = TREE_TYPE (TREE_TYPE (b));
227   HOST_WIDE_INT max;
228   tree low_bits, high_bits, b_low, result_low, signs;
229 
230   max = GET_MODE_MASK (TYPE_MODE (inner_type));
231   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
232   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
233 
234   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
235 
236   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
237   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
238   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
239   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
240   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
241 }
242 
243 /* Expand a vector operation to scalars, by using many operations
244    whose type is the vector type's inner type.  */
245 static tree
246 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
247 			 tree type, tree inner_type,
248 			 tree a, tree b, enum tree_code code)
249 {
250   vec<constructor_elt, va_gc> *v;
251   tree part_width = TYPE_SIZE (inner_type);
252   tree index = bitsize_int (0);
253   int nunits = TYPE_VECTOR_SUBPARTS (type);
254   int delta = tree_to_uhwi (part_width)
255 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
256   int i;
257   location_t loc = gimple_location (gsi_stmt (*gsi));
258 
259   if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
260     warning_at (loc, OPT_Wvector_operation_performance,
261 		"vector operation will be expanded piecewise");
262   else
263     warning_at (loc, OPT_Wvector_operation_performance,
264 		"vector operation will be expanded in parallel");
265 
266   vec_alloc (v, (nunits + delta - 1) / delta);
267   for (i = 0; i < nunits;
268        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
269     {
270       tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
271       constructor_elt ce = {NULL_TREE, result};
272       v->quick_push (ce);
273     }
274 
275   return build_constructor (type, v);
276 }
277 
278 /* Expand a vector operation to scalars with the freedom to use
279    a scalar integer type, or to use a different size for the items
280    in the vector type.  */
281 static tree
282 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
283 			tree a, tree b,
284 			enum tree_code code)
285 {
286   tree result, compute_type;
287   machine_mode mode;
288   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
289   location_t loc = gimple_location (gsi_stmt (*gsi));
290 
291   /* We have three strategies.  If the type is already correct, just do
292      the operation an element at a time.  Else, if the vector is wider than
293      one word, do it a word at a time; finally, if the vector is smaller
294      than one word, do it as a scalar.  */
295   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
296      return expand_vector_piecewise (gsi, f,
297 				     type, TREE_TYPE (type),
298 				     a, b, code);
299   else if (n_words > 1)
300     {
301       tree word_type = build_word_mode_vector_type (n_words);
302       result = expand_vector_piecewise (gsi, f,
303 				        word_type, TREE_TYPE (word_type),
304 					a, b, code);
305       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
306                                          GSI_SAME_STMT);
307     }
308   else
309     {
310       /* Use a single scalar operation with a mode no wider than word_mode.  */
311       mode = mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), MODE_INT, 0);
312       compute_type = lang_hooks.types.type_for_mode (mode, 1);
313       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
314       warning_at (loc, OPT_Wvector_operation_performance,
315 	          "vector operation will be expanded with a "
316 		  "single scalar operation");
317     }
318 
319   return result;
320 }
321 
322 /* Expand a vector operation to scalars; for integer types we can use
323    special bit twiddling tricks to do the sums a word at a time, using
324    function F_PARALLEL instead of F.  These tricks are done only if
325    they can process at least four items, that is, only if the vector
326    holds at least four items and if a word can hold four items.  */
327 static tree
328 expand_vector_addition (gimple_stmt_iterator *gsi,
329 			elem_op_func f, elem_op_func f_parallel,
330 			tree type, tree a, tree b, enum tree_code code)
331 {
332   int parts_per_word = UNITS_PER_WORD
333 	  	       / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
334 
335   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
336       && parts_per_word >= 4
337       && TYPE_VECTOR_SUBPARTS (type) >= 4)
338     return expand_vector_parallel (gsi, f_parallel,
339 				   type, a, b, code);
340   else
341     return expand_vector_piecewise (gsi, f,
342 				    type, TREE_TYPE (type),
343 				    a, b, code);
344 }
345 
346 /* Try to expand vector comparison expression OP0 CODE OP1 by
347    querying optab if the following expression:
348 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
349    can be expanded.  */
350 static tree
351 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
352                           tree op1, enum tree_code code)
353 {
354   tree t;
355   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
356       && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
357     t = expand_vector_piecewise (gsi, do_compare, type,
358 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
359   else
360     t = NULL_TREE;
361 
362   return t;
363 }
364 
365 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
366    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
367    the result if successful, otherwise return NULL_TREE.  */
368 static tree
369 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
370 {
371   optab op;
372   unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
373   bool scalar_shift = true;
374 
375   for (i = 1; i < nunits; i++)
376     {
377       if (shiftcnts[i] != shiftcnts[0])
378 	scalar_shift = false;
379     }
380 
381   if (scalar_shift && shiftcnts[0] == 0)
382     return op0;
383 
384   if (scalar_shift)
385     {
386       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
387       if (op != unknown_optab
388 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
389 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
390 				build_int_cst (NULL_TREE, shiftcnts[0]));
391     }
392 
393   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
394   if (op != unknown_optab
395       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
396     {
397       tree *vec = XALLOCAVEC (tree, nunits);
398       for (i = 0; i < nunits; i++)
399 	vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
400       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
401 			      build_vector (type, vec));
402     }
403 
404   return NULL_TREE;
405 }
406 
407 /* Try to expand integer vector division by constant using
408    widening multiply, shifts and additions.  */
409 static tree
410 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
411 		      tree op1, enum tree_code code)
412 {
413   bool use_pow2 = true;
414   bool has_vector_shift = true;
415   int mode = -1, this_mode;
416   int pre_shift = -1, post_shift;
417   unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
418   int *shifts = XALLOCAVEC (int, nunits * 4);
419   int *pre_shifts = shifts + nunits;
420   int *post_shifts = pre_shifts + nunits;
421   int *shift_temps = post_shifts + nunits;
422   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
423   int prec = TYPE_PRECISION (TREE_TYPE (type));
424   int dummy_int;
425   unsigned int i;
426   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
427   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
428   tree *vec;
429   tree cur_op, mulcst, tem;
430   optab op;
431 
432   if (prec > HOST_BITS_PER_WIDE_INT)
433     return NULL_TREE;
434 
435   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
436   if (op == unknown_optab
437       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
438     has_vector_shift = false;
439 
440   /* Analysis phase.  Determine if all op1 elements are either power
441      of two and it is possible to expand it using shifts (or for remainder
442      using masking).  Additionally compute the multiplicative constants
443      and pre and post shifts if the division is to be expanded using
444      widening or high part multiplication plus shifts.  */
445   for (i = 0; i < nunits; i++)
446     {
447       tree cst = VECTOR_CST_ELT (op1, i);
448       unsigned HOST_WIDE_INT ml;
449 
450       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
451 	return NULL_TREE;
452       pre_shifts[i] = 0;
453       post_shifts[i] = 0;
454       mulc[i] = 0;
455       if (use_pow2
456 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
457 	use_pow2 = false;
458       if (use_pow2)
459 	{
460 	  shifts[i] = tree_log2 (cst);
461 	  if (shifts[i] != shifts[0]
462 	      && code == TRUNC_DIV_EXPR
463 	      && !has_vector_shift)
464 	    use_pow2 = false;
465 	}
466       if (mode == -2)
467 	continue;
468       if (sign_p == UNSIGNED)
469 	{
470 	  unsigned HOST_WIDE_INT mh;
471 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
472 
473 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
474 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
475 	    return NULL_TREE;
476 
477 	  if (d <= 1)
478 	    {
479 	      mode = -2;
480 	      continue;
481 	    }
482 
483 	  /* Find a suitable multiplier and right shift count
484 	     instead of multiplying with D.  */
485 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
486 
487 	  /* If the suggested multiplier is more than SIZE bits, we can
488 	     do better for even divisors, using an initial right shift.  */
489 	  if ((mh != 0 && (d & 1) == 0)
490 	      || (!has_vector_shift && pre_shift != -1))
491 	    {
492 	      if (has_vector_shift)
493 		pre_shift = ctz_or_zero (d);
494 	      else if (pre_shift == -1)
495 		{
496 		  unsigned int j;
497 		  for (j = 0; j < nunits; j++)
498 		    {
499 		      tree cst2 = VECTOR_CST_ELT (op1, j);
500 		      unsigned HOST_WIDE_INT d2;
501 		      int this_pre_shift;
502 
503 		      if (!tree_fits_uhwi_p (cst2))
504 			return NULL_TREE;
505 		      d2 = tree_to_uhwi (cst2) & mask;
506 		      if (d2 == 0)
507 			return NULL_TREE;
508 		      this_pre_shift = floor_log2 (d2 & -d2);
509 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
510 			pre_shift = this_pre_shift;
511 		    }
512 		  if (i != 0 && pre_shift != 0)
513 		    {
514 		      /* Restart.  */
515 		      i = -1U;
516 		      mode = -1;
517 		      continue;
518 		    }
519 		}
520 	      if (pre_shift != 0)
521 		{
522 		  if ((d >> pre_shift) <= 1)
523 		    {
524 		      mode = -2;
525 		      continue;
526 		    }
527 		  mh = choose_multiplier (d >> pre_shift, prec,
528 					  prec - pre_shift,
529 					  &ml, &post_shift, &dummy_int);
530 		  gcc_assert (!mh);
531 		  pre_shifts[i] = pre_shift;
532 		}
533 	    }
534 	  if (!mh)
535 	    this_mode = 0;
536 	  else
537 	    this_mode = 1;
538 	}
539       else
540 	{
541 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
542 	  unsigned HOST_WIDE_INT abs_d;
543 
544 	  if (d == -1)
545 	    return NULL_TREE;
546 
547 	  /* Since d might be INT_MIN, we have to cast to
548 	     unsigned HOST_WIDE_INT before negating to avoid
549 	     undefined signed overflow.  */
550 	  abs_d = (d >= 0
551 		  ? (unsigned HOST_WIDE_INT) d
552 		  : - (unsigned HOST_WIDE_INT) d);
553 
554 	  /* n rem d = n rem -d */
555 	  if (code == TRUNC_MOD_EXPR && d < 0)
556 	    d = abs_d;
557 	  else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
558 	    {
559 	      /* This case is not handled correctly below.  */
560 	      mode = -2;
561 	      continue;
562 	    }
563 	  if (abs_d <= 1)
564 	    {
565 	      mode = -2;
566 	      continue;
567 	    }
568 
569 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
570 			     &post_shift, &dummy_int);
571 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
572 	    {
573 	      this_mode = 4 + (d < 0);
574 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
575 	    }
576 	  else
577 	    this_mode = 2 + (d < 0);
578 	}
579       mulc[i] = ml;
580       post_shifts[i] = post_shift;
581       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
582 	  || post_shift >= prec
583 	  || pre_shifts[i] >= prec)
584 	this_mode = -2;
585 
586       if (i == 0)
587 	mode = this_mode;
588       else if (mode != this_mode)
589 	mode = -2;
590     }
591 
592   vec = XALLOCAVEC (tree, nunits);
593 
594   if (use_pow2)
595     {
596       tree addend = NULL_TREE;
597       if (sign_p == SIGNED)
598 	{
599 	  tree uns_type;
600 
601 	  /* Both division and remainder sequences need
602 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
603 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
604 	     if none of the shifts is 0, or as the conditional.  */
605 	  for (i = 0; i < nunits; i++)
606 	    if (shifts[i] == 0)
607 	      break;
608 	  uns_type
609 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
610 				 nunits);
611 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
612 	    {
613 	      for (i = 0; i < nunits; i++)
614 		shift_temps[i] = prec - 1;
615 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
616 	      if (cur_op != NULL_TREE)
617 		{
618 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
619 					    uns_type, cur_op);
620 		  for (i = 0; i < nunits; i++)
621 		    shift_temps[i] = prec - shifts[i];
622 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
623 		  if (cur_op != NULL_TREE)
624 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
625 					      type, cur_op);
626 		}
627 	    }
628 	  if (addend == NULL_TREE
629 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
630 	    {
631 	      tree zero, cst, cond, mask_type;
632 	      gimple *stmt;
633 
634 	      mask_type = build_same_sized_truth_vector_type (type);
635 	      zero = build_zero_cst (type);
636 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
637 	      for (i = 0; i < nunits; i++)
638 		vec[i] = build_int_cst (TREE_TYPE (type),
639 					(HOST_WIDE_INT_1U
640 					 << shifts[i]) - 1);
641 	      cst = build_vector (type, vec);
642 	      addend = make_ssa_name (type);
643 	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
644 					  cst, zero);
645 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
646 	    }
647 	}
648       if (code == TRUNC_DIV_EXPR)
649 	{
650 	  if (sign_p == UNSIGNED)
651 	    {
652 	      /* q = op0 >> shift;  */
653 	      cur_op = add_rshift (gsi, type, op0, shifts);
654 	      if (cur_op != NULL_TREE)
655 		return cur_op;
656 	    }
657 	  else if (addend != NULL_TREE)
658 	    {
659 	      /* t1 = op0 + addend;
660 		 q = t1 >> shift;  */
661 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
662 	      if (op != unknown_optab
663 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
664 		{
665 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
666 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
667 		  if (cur_op != NULL_TREE)
668 		    return cur_op;
669 		}
670 	    }
671 	}
672       else
673 	{
674 	  tree mask;
675 	  for (i = 0; i < nunits; i++)
676 	    vec[i] = build_int_cst (TREE_TYPE (type),
677 				    (HOST_WIDE_INT_1U
678 				     << shifts[i]) - 1);
679 	  mask = build_vector (type, vec);
680 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
681 	  if (op != unknown_optab
682 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
683 	    {
684 	      if (sign_p == UNSIGNED)
685 		/* r = op0 & mask;  */
686 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
687 	      else if (addend != NULL_TREE)
688 		{
689 		  /* t1 = op0 + addend;
690 		     t2 = t1 & mask;
691 		     r = t2 - addend;  */
692 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
693 		  if (op != unknown_optab
694 		      && optab_handler (op, TYPE_MODE (type))
695 			 != CODE_FOR_nothing)
696 		    {
697 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
698 						addend);
699 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
700 						cur_op, mask);
701 		      op = optab_for_tree_code (MINUS_EXPR, type,
702 						optab_default);
703 		      if (op != unknown_optab
704 			  && optab_handler (op, TYPE_MODE (type))
705 			     != CODE_FOR_nothing)
706 			return gimplify_build2 (gsi, MINUS_EXPR, type,
707 						cur_op, addend);
708 		    }
709 		}
710 	    }
711 	}
712     }
713 
714   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
715     return NULL_TREE;
716 
717   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
718     return NULL_TREE;
719 
720   cur_op = op0;
721 
722   switch (mode)
723     {
724     case 0:
725       gcc_assert (sign_p == UNSIGNED);
726       /* t1 = oprnd0 >> pre_shift;
727 	 t2 = t1 h* ml;
728 	 q = t2 >> post_shift;  */
729       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
730       if (cur_op == NULL_TREE)
731 	return NULL_TREE;
732       break;
733     case 1:
734       gcc_assert (sign_p == UNSIGNED);
735       for (i = 0; i < nunits; i++)
736 	{
737 	  shift_temps[i] = 1;
738 	  post_shifts[i]--;
739 	}
740       break;
741     case 2:
742     case 3:
743     case 4:
744     case 5:
745       gcc_assert (sign_p == SIGNED);
746       for (i = 0; i < nunits; i++)
747 	shift_temps[i] = prec - 1;
748       break;
749     default:
750       return NULL_TREE;
751     }
752 
753   for (i = 0; i < nunits; i++)
754     vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
755   mulcst = build_vector (type, vec);
756 
757   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
758 
759   switch (mode)
760     {
761     case 0:
762       /* t1 = oprnd0 >> pre_shift;
763 	 t2 = t1 h* ml;
764 	 q = t2 >> post_shift;  */
765       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
766       break;
767     case 1:
768       /* t1 = oprnd0 h* ml;
769 	 t2 = oprnd0 - t1;
770 	 t3 = t2 >> 1;
771 	 t4 = t1 + t3;
772 	 q = t4 >> (post_shift - 1);  */
773       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
774       if (op == unknown_optab
775 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
776 	return NULL_TREE;
777       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
778       tem = add_rshift (gsi, type, tem, shift_temps);
779       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
780       if (op == unknown_optab
781 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
782 	return NULL_TREE;
783       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
784       cur_op = add_rshift (gsi, type, tem, post_shifts);
785       if (cur_op == NULL_TREE)
786 	return NULL_TREE;
787       break;
788     case 2:
789     case 3:
790     case 4:
791     case 5:
792       /* t1 = oprnd0 h* ml;
793 	 t2 = t1; [ iff (mode & 2) != 0 ]
794 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
795 	 t3 = t2 >> post_shift;
796 	 t4 = oprnd0 >> (prec - 1);
797 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
798 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
799       if ((mode & 2) == 0)
800 	{
801 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
802 	  if (op == unknown_optab
803 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
804 	    return NULL_TREE;
805 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
806 	}
807       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
808       if (cur_op == NULL_TREE)
809 	return NULL_TREE;
810       tem = add_rshift (gsi, type, op0, shift_temps);
811       if (tem == NULL_TREE)
812 	return NULL_TREE;
813       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
814       if (op == unknown_optab
815 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
816 	return NULL_TREE;
817       if ((mode & 1) == 0)
818 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
819       else
820 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
821       break;
822     default:
823       gcc_unreachable ();
824     }
825 
826   if (code == TRUNC_DIV_EXPR)
827     return cur_op;
828 
829   /* We divided.  Now finish by:
830      t1 = q * oprnd1;
831      r = oprnd0 - t1;  */
832   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
833   if (op == unknown_optab
834       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
835     return NULL_TREE;
836   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
837   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
838   if (op == unknown_optab
839       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
840     return NULL_TREE;
841   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
842 }
843 
844 /* Expand a vector condition to scalars, by using many conditions
845    on the vector's elements.  */
846 static void
847 expand_vector_condition (gimple_stmt_iterator *gsi)
848 {
849   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
850   tree type = gimple_expr_type (stmt);
851   tree a = gimple_assign_rhs1 (stmt);
852   tree a1 = a;
853   tree a2 = NULL_TREE;
854   bool a_is_comparison = false;
855   tree b = gimple_assign_rhs2 (stmt);
856   tree c = gimple_assign_rhs3 (stmt);
857   vec<constructor_elt, va_gc> *v;
858   tree constr;
859   tree inner_type = TREE_TYPE (type);
860   tree cond_type = TREE_TYPE (TREE_TYPE (a));
861   tree comp_inner_type = cond_type;
862   tree width = TYPE_SIZE (inner_type);
863   tree index = bitsize_int (0);
864   tree comp_width = width;
865   tree comp_index = index;
866   int nunits = TYPE_VECTOR_SUBPARTS (type);
867   int i;
868   location_t loc = gimple_location (gsi_stmt (*gsi));
869 
870   if (!is_gimple_val (a))
871     {
872       gcc_assert (COMPARISON_CLASS_P (a));
873       a_is_comparison = true;
874       a1 = TREE_OPERAND (a, 0);
875       a2 = TREE_OPERAND (a, 1);
876       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
877       comp_width = TYPE_SIZE (comp_inner_type);
878     }
879 
880   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
881     return;
882 
883   /* Handle vector boolean types with bitmasks.  If there is a comparison
884      and we can expand the comparison into the vector boolean bitmask,
885      or otherwise if it is compatible with type, we can transform
886       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
887      into
888       tmp_6 = x_2 < y_3;
889       tmp_7 = tmp_6 & vbfld_4;
890       tmp_8 = ~tmp_6;
891       tmp_9 = tmp_8 & vbfld_5;
892       vbfld_1 = tmp_7 | tmp_9;
893      Similarly for vbfld_10 instead of x_2 < y_3.  */
894   if (VECTOR_BOOLEAN_TYPE_P (type)
895       && SCALAR_INT_MODE_P (TYPE_MODE (type))
896       && (GET_MODE_BITSIZE (TYPE_MODE (type))
897 	  < (TYPE_VECTOR_SUBPARTS (type)
898 	     * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type)))))
899       && (a_is_comparison
900 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
901 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
902     {
903       if (a_is_comparison)
904 	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
905       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
906       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
907       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
908       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
909       gimple_assign_set_rhs_from_tree (gsi, a);
910       update_stmt (gsi_stmt (*gsi));
911       return;
912     }
913 
914   /* TODO: try and find a smaller vector type.  */
915 
916   warning_at (loc, OPT_Wvector_operation_performance,
917 	      "vector condition will be expanded piecewise");
918 
919   vec_alloc (v, nunits);
920   for (i = 0; i < nunits; i++)
921     {
922       tree aa, result;
923       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
924       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
925       if (a_is_comparison)
926 	{
927 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
928 				       comp_width, comp_index);
929 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
930 				       comp_width, comp_index);
931 	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
932 	}
933       else
934 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
935       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
936       constructor_elt ce = {NULL_TREE, result};
937       v->quick_push (ce);
938       index = int_const_binop (PLUS_EXPR, index, width);
939       if (width == comp_width)
940 	comp_index = index;
941       else
942 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
943     }
944 
945   constr = build_constructor (type, v);
946   gimple_assign_set_rhs_from_tree (gsi, constr);
947   update_stmt (gsi_stmt (*gsi));
948 }
949 
950 static tree
951 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
952 			 gassign *assign, enum tree_code code)
953 {
954   machine_mode compute_mode = TYPE_MODE (compute_type);
955 
956   /* If the compute mode is not a vector mode (hence we are not decomposing
957      a BLKmode vector to smaller, hardware-supported vectors), we may want
958      to expand the operations in parallel.  */
959   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
960       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
961       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
962       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
963       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
964       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
965     switch (code)
966       {
967       case PLUS_EXPR:
968       case MINUS_EXPR:
969         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
970 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
971 					 gimple_assign_rhs1 (assign),
972 					 gimple_assign_rhs2 (assign), code);
973 	break;
974 
975       case NEGATE_EXPR:
976         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
977           return expand_vector_addition (gsi, do_unop, do_negate, type,
978 		      		         gimple_assign_rhs1 (assign),
979 					 NULL_TREE, code);
980 	break;
981 
982       case BIT_AND_EXPR:
983       case BIT_IOR_EXPR:
984       case BIT_XOR_EXPR:
985         return expand_vector_parallel (gsi, do_binop, type,
986 		      		       gimple_assign_rhs1 (assign),
987 				       gimple_assign_rhs2 (assign), code);
988 
989       case BIT_NOT_EXPR:
990         return expand_vector_parallel (gsi, do_unop, type,
991 		      		       gimple_assign_rhs1 (assign),
992         			       NULL_TREE, code);
993       case EQ_EXPR:
994       case NE_EXPR:
995       case GT_EXPR:
996       case LT_EXPR:
997       case GE_EXPR:
998       case LE_EXPR:
999       case UNEQ_EXPR:
1000       case UNGT_EXPR:
1001       case UNLT_EXPR:
1002       case UNGE_EXPR:
1003       case UNLE_EXPR:
1004       case LTGT_EXPR:
1005       case ORDERED_EXPR:
1006       case UNORDERED_EXPR:
1007 	{
1008 	  tree rhs1 = gimple_assign_rhs1 (assign);
1009 	  tree rhs2 = gimple_assign_rhs2 (assign);
1010 
1011 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1012 	}
1013 
1014       case TRUNC_DIV_EXPR:
1015       case TRUNC_MOD_EXPR:
1016 	{
1017 	  tree rhs1 = gimple_assign_rhs1 (assign);
1018 	  tree rhs2 = gimple_assign_rhs2 (assign);
1019 	  tree ret;
1020 
1021 	  if (!optimize
1022 	      || !VECTOR_INTEGER_TYPE_P (type)
1023 	      || TREE_CODE (rhs2) != VECTOR_CST
1024 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1025 	    break;
1026 
1027 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1028 	  if (ret != NULL_TREE)
1029 	    return ret;
1030 	  break;
1031 	}
1032 
1033       default:
1034 	break;
1035       }
1036 
1037   if (TREE_CODE_CLASS (code) == tcc_unary)
1038     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1039 				    gimple_assign_rhs1 (assign),
1040 				    NULL_TREE, code);
1041   else
1042     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1043 				    gimple_assign_rhs1 (assign),
1044 				    gimple_assign_rhs2 (assign), code);
1045 }
1046 
1047 /* Try to optimize
1048    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1049    style stmts into:
1050    _9 = { b_7, b_7, b_7, b_7 };
1051    a_5 = _9 + { 0, 3, 6, 9 };
1052    because vector splat operation is usually more efficient
1053    than piecewise initialization of the vector.  */
1054 
1055 static void
1056 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1057 {
1058   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1059   tree lhs = gimple_assign_lhs (stmt);
1060   tree rhs = gimple_assign_rhs1 (stmt);
1061   tree type = TREE_TYPE (rhs);
1062   unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1063   bool all_same = true;
1064   constructor_elt *elt;
1065   tree *cst;
1066   gimple *g;
1067   tree base = NULL_TREE;
1068   optab op;
1069 
1070   if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1071     return;
1072   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1073   if (op == unknown_optab
1074       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1075     return;
1076   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1077     if (TREE_CODE (elt->value) != SSA_NAME
1078 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1079       return;
1080     else
1081       {
1082 	tree this_base = elt->value;
1083 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1084 	  all_same = false;
1085 	for (j = 0; j < nelts + 1; j++)
1086 	  {
1087 	    g = SSA_NAME_DEF_STMT (this_base);
1088 	    if (is_gimple_assign (g)
1089 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1090 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1091 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1092 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1093 	      this_base = gimple_assign_rhs1 (g);
1094 	    else
1095 	      break;
1096 	  }
1097 	if (i == 0)
1098 	  base = this_base;
1099 	else if (this_base != base)
1100 	  return;
1101       }
1102   if (all_same)
1103     return;
1104   cst = XALLOCAVEC (tree, nelts);
1105   for (i = 0; i < nelts; i++)
1106     {
1107       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;;
1108       cst[i] = build_zero_cst (TREE_TYPE (base));
1109       while (this_base != base)
1110 	{
1111 	  g = SSA_NAME_DEF_STMT (this_base);
1112 	  cst[i] = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1113 				cst[i], gimple_assign_rhs2 (g));
1114 	  if (cst[i] == NULL_TREE
1115 	      || TREE_CODE (cst[i]) != INTEGER_CST
1116 	      || TREE_OVERFLOW (cst[i]))
1117 	    return;
1118 	  this_base = gimple_assign_rhs1 (g);
1119 	}
1120     }
1121   for (i = 0; i < nelts; i++)
1122     CONSTRUCTOR_ELT (rhs, i)->value = base;
1123   g = gimple_build_assign (make_ssa_name (type), rhs);
1124   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1125   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1126 			   build_vector (type, cst));
1127   gsi_replace (gsi, g, false);
1128 }
1129 
1130 /* Return a type for the widest vector mode whose components are of type
1131    TYPE, or NULL_TREE if none is found.  */
1132 
1133 static tree
1134 type_for_widest_vector_mode (tree type, optab op)
1135 {
1136   machine_mode inner_mode = TYPE_MODE (type);
1137   machine_mode best_mode = VOIDmode, mode;
1138   int best_nunits = 0;
1139 
1140   if (SCALAR_FLOAT_MODE_P (inner_mode))
1141     mode = MIN_MODE_VECTOR_FLOAT;
1142   else if (SCALAR_FRACT_MODE_P (inner_mode))
1143     mode = MIN_MODE_VECTOR_FRACT;
1144   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1145     mode = MIN_MODE_VECTOR_UFRACT;
1146   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1147     mode = MIN_MODE_VECTOR_ACCUM;
1148   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1149     mode = MIN_MODE_VECTOR_UACCUM;
1150   else
1151     mode = MIN_MODE_VECTOR_INT;
1152 
1153   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1154     if (GET_MODE_INNER (mode) == inner_mode
1155         && GET_MODE_NUNITS (mode) > best_nunits
1156 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1157       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1158 
1159   if (best_mode == VOIDmode)
1160     return NULL_TREE;
1161   else
1162     return build_vector_type_for_mode (type, best_mode);
1163 }
1164 
1165 
1166 /* Build a reference to the element of the vector VECT.  Function
1167    returns either the element itself, either BIT_FIELD_REF, or an
1168    ARRAY_REF expression.
1169 
1170    GSI is required to insert temporary variables while building a
1171    refernece to the element of the vector VECT.
1172 
1173    PTMPVEC is a pointer to the temporary variable for caching
1174    purposes.  In case when PTMPVEC is NULL new temporary variable
1175    will be created.  */
1176 static tree
1177 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1178 {
1179   tree vect_type, vect_elt_type;
1180   gimple *asgn;
1181   tree tmpvec;
1182   tree arraytype;
1183   bool need_asgn = true;
1184   unsigned int elements;
1185 
1186   vect_type = TREE_TYPE (vect);
1187   vect_elt_type = TREE_TYPE (vect_type);
1188   elements = TYPE_VECTOR_SUBPARTS (vect_type);
1189 
1190   if (TREE_CODE (idx) == INTEGER_CST)
1191     {
1192       unsigned HOST_WIDE_INT index;
1193 
1194       /* Given that we're about to compute a binary modulus,
1195 	 we don't care about the high bits of the value.  */
1196       index = TREE_INT_CST_LOW (idx);
1197       if (!tree_fits_uhwi_p (idx) || index >= elements)
1198 	{
1199 	  index &= elements - 1;
1200 	  idx = build_int_cst (TREE_TYPE (idx), index);
1201 	}
1202 
1203       /* When lowering a vector statement sequence do some easy
1204          simplification by looking through intermediate vector results.  */
1205       if (TREE_CODE (vect) == SSA_NAME)
1206 	{
1207 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1208 	  if (is_gimple_assign (def_stmt)
1209 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1210 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1211 	    vect = gimple_assign_rhs1 (def_stmt);
1212 	}
1213 
1214       if (TREE_CODE (vect) == VECTOR_CST)
1215 	return VECTOR_CST_ELT (vect, index);
1216       else if (TREE_CODE (vect) == CONSTRUCTOR
1217 	       && (CONSTRUCTOR_NELTS (vect) == 0
1218 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1219 		      != VECTOR_TYPE))
1220         {
1221 	  if (index < CONSTRUCTOR_NELTS (vect))
1222 	    return CONSTRUCTOR_ELT (vect, index)->value;
1223           return build_zero_cst (vect_elt_type);
1224         }
1225       else
1226         {
1227 	  tree size = TYPE_SIZE (vect_elt_type);
1228 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1229 				  size);
1230 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1231         }
1232     }
1233 
1234   if (!ptmpvec)
1235     tmpvec = create_tmp_var (vect_type, "vectmp");
1236   else if (!*ptmpvec)
1237     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1238   else
1239     {
1240       tmpvec = *ptmpvec;
1241       need_asgn = false;
1242     }
1243 
1244   if (need_asgn)
1245     {
1246       TREE_ADDRESSABLE (tmpvec) = 1;
1247       asgn = gimple_build_assign (tmpvec, vect);
1248       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1249     }
1250 
1251   arraytype = build_array_type_nelts (vect_elt_type, elements);
1252   return build4 (ARRAY_REF, vect_elt_type,
1253                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1254                  idx, NULL_TREE, NULL_TREE);
1255 }
1256 
1257 /* Check if VEC_PERM_EXPR within the given setting is supported
1258    by hardware, or lower it piecewise.
1259 
1260    When VEC_PERM_EXPR has the same first and second operands:
1261    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1262    {v0[mask[0]], v0[mask[1]], ...}
1263    MASK and V0 must have the same number of elements.
1264 
1265    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1266    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1267    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1268    same number of arguments.  */
1269 
1270 static void
1271 lower_vec_perm (gimple_stmt_iterator *gsi)
1272 {
1273   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1274   tree mask = gimple_assign_rhs3 (stmt);
1275   tree vec0 = gimple_assign_rhs1 (stmt);
1276   tree vec1 = gimple_assign_rhs2 (stmt);
1277   tree vect_type = TREE_TYPE (vec0);
1278   tree mask_type = TREE_TYPE (mask);
1279   tree vect_elt_type = TREE_TYPE (vect_type);
1280   tree mask_elt_type = TREE_TYPE (mask_type);
1281   unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1282   vec<constructor_elt, va_gc> *v;
1283   tree constr, t, si, i_val;
1284   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1285   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1286   location_t loc = gimple_location (gsi_stmt (*gsi));
1287   unsigned i;
1288 
1289   if (TREE_CODE (mask) == SSA_NAME)
1290     {
1291       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1292       if (is_gimple_assign (def_stmt)
1293 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1294 	mask = gimple_assign_rhs1 (def_stmt);
1295     }
1296 
1297   if (TREE_CODE (mask) == VECTOR_CST)
1298     {
1299       unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1300 
1301       for (i = 0; i < elements; ++i)
1302 	sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1303 		      & (2 * elements - 1));
1304 
1305       if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1306 	{
1307 	  gimple_assign_set_rhs3 (stmt, mask);
1308 	  update_stmt (stmt);
1309 	  return;
1310 	}
1311       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1312 	 vector as VEC1 and a right element shift MASK.  */
1313       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1314 	  != CODE_FOR_nothing
1315 	  && TREE_CODE (vec1) == VECTOR_CST
1316 	  && initializer_zerop (vec1)
1317 	  && sel_int[0]
1318 	  && sel_int[0] < elements)
1319 	{
1320 	  for (i = 1; i < elements; ++i)
1321 	    {
1322 	      unsigned int expected = i + sel_int[0];
1323 	      /* Indices into the second vector are all equivalent.  */
1324 	      if (MIN (elements, (unsigned) sel_int[i])
1325 		  != MIN (elements, expected))
1326  		break;
1327 	    }
1328 	  if (i == elements)
1329 	    {
1330 	      gimple_assign_set_rhs3 (stmt, mask);
1331 	      update_stmt (stmt);
1332 	      return;
1333 	    }
1334 	}
1335     }
1336   else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1337     return;
1338 
1339   warning_at (loc, OPT_Wvector_operation_performance,
1340               "vector shuffling operation will be expanded piecewise");
1341 
1342   vec_alloc (v, elements);
1343   for (i = 0; i < elements; i++)
1344     {
1345       si = size_int (i);
1346       i_val = vector_element (gsi, mask, si, &masktmp);
1347 
1348       if (TREE_CODE (i_val) == INTEGER_CST)
1349         {
1350 	  unsigned HOST_WIDE_INT index;
1351 
1352 	  index = TREE_INT_CST_LOW (i_val);
1353 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1354 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1355 
1356           if (two_operand_p && (index & elements) != 0)
1357 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1358 	  else
1359 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1360 
1361           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1362 					true, GSI_SAME_STMT);
1363         }
1364       else
1365         {
1366 	  tree cond = NULL_TREE, v0_val;
1367 
1368 	  if (two_operand_p)
1369 	    {
1370 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1371 			          build_int_cst (mask_elt_type, elements));
1372 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1373 					       true, GSI_SAME_STMT);
1374 	    }
1375 
1376 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1377 			       build_int_cst (mask_elt_type, elements - 1));
1378 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1379 					    true, GSI_SAME_STMT);
1380 
1381 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1382 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1383 					     true, GSI_SAME_STMT);
1384 
1385 	  if (two_operand_p)
1386 	    {
1387 	      tree v1_val;
1388 
1389 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1390 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1391 						 true, GSI_SAME_STMT);
1392 
1393 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1394 				  cond, build_zero_cst (mask_elt_type));
1395 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1396 				  cond, v0_val, v1_val);
1397               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1398 					    true, GSI_SAME_STMT);
1399             }
1400 	  else
1401 	    t = v0_val;
1402         }
1403 
1404       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1405     }
1406 
1407   constr = build_constructor (vect_type, v);
1408   gimple_assign_set_rhs_from_tree (gsi, constr);
1409   update_stmt (gsi_stmt (*gsi));
1410 }
1411 
1412 /* If OP is a uniform vector return the element it is a splat from.  */
1413 
1414 static tree
1415 ssa_uniform_vector_p (tree op)
1416 {
1417   if (TREE_CODE (op) == VECTOR_CST
1418       || TREE_CODE (op) == CONSTRUCTOR)
1419     return uniform_vector_p (op);
1420   if (TREE_CODE (op) == SSA_NAME)
1421     {
1422       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1423       if (gimple_assign_single_p (def_stmt))
1424 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1425     }
1426   return NULL_TREE;
1427 }
1428 
1429 /* Return type in which CODE operation with optab OP can be
1430    computed.  */
1431 
1432 static tree
1433 get_compute_type (enum tree_code code, optab op, tree type)
1434 {
1435   /* For very wide vectors, try using a smaller vector mode.  */
1436   tree compute_type = type;
1437   if (op
1438       && (!VECTOR_MODE_P (TYPE_MODE (type))
1439 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1440     {
1441       tree vector_compute_type
1442 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
1443       if (vector_compute_type != NULL_TREE
1444 	  && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1445 	      < TYPE_VECTOR_SUBPARTS (compute_type))
1446 	  && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1
1447 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1448 	      != CODE_FOR_nothing))
1449 	compute_type = vector_compute_type;
1450     }
1451 
1452   /* If we are breaking a BLKmode vector into smaller pieces,
1453      type_for_widest_vector_mode has already looked into the optab,
1454      so skip these checks.  */
1455   if (compute_type == type)
1456     {
1457       machine_mode compute_mode = TYPE_MODE (compute_type);
1458       if (VECTOR_MODE_P (compute_mode))
1459 	{
1460 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1461 	    return compute_type;
1462 	  if (code == MULT_HIGHPART_EXPR
1463 	      && can_mult_highpart_p (compute_mode,
1464 				      TYPE_UNSIGNED (compute_type)))
1465 	    return compute_type;
1466 	}
1467       /* There is no operation in hardware, so fall back to scalars.  */
1468       compute_type = TREE_TYPE (type);
1469     }
1470 
1471   return compute_type;
1472 }
1473 
1474 /* Helper function of expand_vector_operations_1.  Return number of
1475    vector elements for vector types or 1 for other types.  */
1476 
1477 static inline int
1478 count_type_subparts (tree type)
1479 {
1480   return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1481 }
1482 
1483 static tree
1484 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1485 	 tree bitpos, tree bitsize, enum tree_code code,
1486 	 tree type ATTRIBUTE_UNUSED)
1487 {
1488   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1489     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1490   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1491     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1492   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1493   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1494 }
1495 
1496 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1497 static void
1498 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1499 {
1500   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1501   tree type = gimple_expr_type (stmt);
1502   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1503   machine_mode compute_mode = TYPE_MODE (compute_type);
1504   gcc_assert (compute_mode != BLKmode);
1505   tree lhs = gimple_assign_lhs (stmt);
1506   tree rhs2 = gimple_assign_rhs2 (stmt);
1507   tree rhs3 = gimple_assign_rhs3 (stmt);
1508   tree new_rhs;
1509 
1510   /* If the compute mode is not a vector mode (hence we are not decomposing
1511      a BLKmode vector to smaller, hardware-supported vectors), we may want
1512      to expand the operations in parallel.  */
1513   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1514       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1515       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1516       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1517       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1518       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1519     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1520 				      COND_EXPR);
1521   else
1522     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1523 				       rhs2, rhs3, COND_EXPR);
1524   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1525     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1526 			       new_rhs);
1527 
1528   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1529      way to do it is change expand_vector_operation and its callees to
1530      return a tree_code, RHS1 and RHS2 instead of a tree. */
1531   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1532   update_stmt (gsi_stmt (*gsi));
1533 }
1534 
1535 /* Process one statement.  If we identify a vector operation, expand it.  */
1536 
1537 static void
1538 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1539 {
1540   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
1541   enum tree_code code;
1542   optab op = unknown_optab;
1543   enum gimple_rhs_class rhs_class;
1544   tree new_rhs;
1545 
1546   /* Only consider code == GIMPLE_ASSIGN. */
1547   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1548   if (!stmt)
1549     return;
1550 
1551   code = gimple_assign_rhs_code (stmt);
1552   rhs_class = get_gimple_rhs_class (code);
1553   lhs = gimple_assign_lhs (stmt);
1554 
1555   if (code == VEC_PERM_EXPR)
1556     {
1557       lower_vec_perm (gsi);
1558       return;
1559     }
1560 
1561   if (code == VEC_COND_EXPR)
1562     {
1563       expand_vector_condition (gsi);
1564       return;
1565     }
1566 
1567   if (code == COND_EXPR
1568       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1569       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1570     {
1571       expand_vector_scalar_condition (gsi);
1572       return;
1573     }
1574 
1575   if (code == CONSTRUCTOR
1576       && TREE_CODE (lhs) == SSA_NAME
1577       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1578       && !gimple_clobber_p (stmt)
1579       && optimize)
1580     {
1581       optimize_vector_constructor (gsi);
1582       return;
1583     }
1584 
1585   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1586     return;
1587 
1588   rhs1 = gimple_assign_rhs1 (stmt);
1589   type = gimple_expr_type (stmt);
1590   if (rhs_class == GIMPLE_BINARY_RHS)
1591     rhs2 = gimple_assign_rhs2 (stmt);
1592 
1593   if (TREE_CODE (type) != VECTOR_TYPE)
1594     return;
1595 
1596   /* If the vector operation is operating on all same vector elements
1597      implement it with a scalar operation and a splat if the target
1598      supports the scalar operation.  */
1599   tree srhs1, srhs2 = NULL_TREE;
1600   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1601       && (rhs2 == NULL_TREE
1602 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1603 	      && (srhs2 = rhs2))
1604 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1605       /* As we query direct optabs restrict to non-convert operations.  */
1606       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1607     {
1608       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1609       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1610 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1611 	{
1612 	  tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1613 	  gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1614 	  gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1615 	  gimple_assign_set_rhs_from_tree (gsi,
1616 					   build_vector_from_val (type, slhs));
1617 	  update_stmt (stmt);
1618 	  return;
1619 	}
1620     }
1621 
1622   /* A scalar operation pretending to be a vector one.  */
1623   if (VECTOR_BOOLEAN_TYPE_P (type)
1624       && !VECTOR_MODE_P (TYPE_MODE (type))
1625       && TYPE_MODE (type) != BLKmode)
1626     return;
1627 
1628   if (CONVERT_EXPR_CODE_P (code)
1629       || code == FLOAT_EXPR
1630       || code == FIX_TRUNC_EXPR
1631       || code == VIEW_CONVERT_EXPR)
1632     return;
1633 
1634   /* The signedness is determined from input argument.  */
1635   if (code == VEC_UNPACK_FLOAT_HI_EXPR
1636       || code == VEC_UNPACK_FLOAT_LO_EXPR)
1637     type = TREE_TYPE (rhs1);
1638 
1639   /* For widening/narrowing vector operations, the relevant type is of the
1640      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
1641      calculated in the same way above.  */
1642   if (code == WIDEN_SUM_EXPR
1643       || code == VEC_WIDEN_MULT_HI_EXPR
1644       || code == VEC_WIDEN_MULT_LO_EXPR
1645       || code == VEC_WIDEN_MULT_EVEN_EXPR
1646       || code == VEC_WIDEN_MULT_ODD_EXPR
1647       || code == VEC_UNPACK_HI_EXPR
1648       || code == VEC_UNPACK_LO_EXPR
1649       || code == VEC_PACK_TRUNC_EXPR
1650       || code == VEC_PACK_SAT_EXPR
1651       || code == VEC_PACK_FIX_TRUNC_EXPR
1652       || code == VEC_WIDEN_LSHIFT_HI_EXPR
1653       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1654     type = TREE_TYPE (rhs1);
1655 
1656   /* Choose between vector shift/rotate by vector and vector shift/rotate by
1657      scalar */
1658   if (code == LSHIFT_EXPR
1659       || code == RSHIFT_EXPR
1660       || code == LROTATE_EXPR
1661       || code == RROTATE_EXPR)
1662     {
1663       optab opv;
1664 
1665       /* Check whether we have vector <op> {x,x,x,x} where x
1666          could be a scalar variable or a constant.  Transform
1667          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
1668       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1669         {
1670           tree first;
1671 
1672           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1673             {
1674               gimple_assign_set_rhs2 (stmt, first);
1675               update_stmt (stmt);
1676               rhs2 = first;
1677             }
1678         }
1679 
1680       opv = optab_for_tree_code (code, type, optab_vector);
1681       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1682 	op = opv;
1683       else
1684 	{
1685           op = optab_for_tree_code (code, type, optab_scalar);
1686 
1687 	  compute_type = get_compute_type (code, op, type);
1688 	  if (compute_type == type)
1689 	    return;
1690 	  /* The rtl expander will expand vector/scalar as vector/vector
1691 	     if necessary.  Pick one with wider vector type.  */
1692 	  tree compute_vtype = get_compute_type (code, opv, type);
1693 	  if (count_type_subparts (compute_vtype)
1694 	      > count_type_subparts (compute_type))
1695 	    {
1696 	      compute_type = compute_vtype;
1697 	      op = opv;
1698 	    }
1699 	}
1700 
1701       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1702 	{
1703 	  if (compute_type == NULL_TREE)
1704 	    compute_type = get_compute_type (code, op, type);
1705 	  if (compute_type == type)
1706 	    return;
1707 	  /* Before splitting vector rotates into scalar rotates,
1708 	     see if we can't use vector shifts and BIT_IOR_EXPR
1709 	     instead.  For vector by vector rotates we'd also
1710 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1711 	     for now, fold doesn't seem to create such rotates anyway.  */
1712 	  if (compute_type == TREE_TYPE (type)
1713 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1714 	    {
1715 	      optab oplv = vashl_optab, opl = ashl_optab;
1716 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1717 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1718 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1719 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1720 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1721 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1722 	      /* The rtl expander will expand vector/scalar as vector/vector
1723 		 if necessary.  Pick one with wider vector type.  */
1724 	      if (count_type_subparts (compute_lvtype)
1725 		  > count_type_subparts (compute_ltype))
1726 		{
1727 		  compute_ltype = compute_lvtype;
1728 		  opl = oplv;
1729 		}
1730 	      if (count_type_subparts (compute_rvtype)
1731 		  > count_type_subparts (compute_rtype))
1732 		{
1733 		  compute_rtype = compute_rvtype;
1734 		  opr = oprv;
1735 		}
1736 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1737 		 BIT_IOR_EXPR.  */
1738 	      compute_type = compute_ltype;
1739 	      if (count_type_subparts (compute_type)
1740 		  > count_type_subparts (compute_rtype))
1741 		compute_type = compute_rtype;
1742 	      if (count_type_subparts (compute_type)
1743 		  > count_type_subparts (compute_otype))
1744 		compute_type = compute_otype;
1745 	      /* Verify all 3 operations can be performed in that type.  */
1746 	      if (compute_type != TREE_TYPE (type))
1747 		{
1748 		  if (optab_handler (opl, TYPE_MODE (compute_type))
1749 		      == CODE_FOR_nothing
1750 		      || optab_handler (opr, TYPE_MODE (compute_type))
1751 			 == CODE_FOR_nothing
1752 		      || optab_handler (opo, TYPE_MODE (compute_type))
1753 			 == CODE_FOR_nothing)
1754 		    compute_type = TREE_TYPE (type);
1755 		}
1756 	    }
1757 	}
1758     }
1759   else
1760     op = optab_for_tree_code (code, type, optab_default);
1761 
1762   /* Optabs will try converting a negation into a subtraction, so
1763      look for it as well.  TODO: negation of floating-point vectors
1764      might be turned into an exclusive OR toggling the sign bit.  */
1765   if (op == unknown_optab
1766       && code == NEGATE_EXPR
1767       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1768     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1769 
1770   if (compute_type == NULL_TREE)
1771     compute_type = get_compute_type (code, op, type);
1772   if (compute_type == type)
1773     return;
1774 
1775   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1776 
1777   /* Leave expression untouched for later expansion.  */
1778   if (new_rhs == NULL_TREE)
1779     return;
1780 
1781   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1782     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1783                                new_rhs);
1784 
1785   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1786      way to do it is change expand_vector_operation and its callees to
1787      return a tree_code, RHS1 and RHS2 instead of a tree. */
1788   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1789   update_stmt (gsi_stmt (*gsi));
1790 }
1791 
1792 /* Use this to lower vector operations introduced by the vectorizer,
1793    if it may need the bit-twiddling tricks implemented in this file.  */
1794 
1795 static unsigned int
1796 expand_vector_operations (void)
1797 {
1798   gimple_stmt_iterator gsi;
1799   basic_block bb;
1800   bool cfg_changed = false;
1801 
1802   FOR_EACH_BB_FN (bb, cfun)
1803     {
1804       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1805 	{
1806 	  expand_vector_operations_1 (&gsi);
1807 	  /* ???  If we do not cleanup EH then we will ICE in
1808 	     verification.  But in reality we have created wrong-code
1809 	     as we did not properly transition EH info and edges to
1810 	     the piecewise computations.  */
1811 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1812 	      && gimple_purge_dead_eh_edges (bb))
1813 	    cfg_changed = true;
1814 	}
1815     }
1816 
1817   return cfg_changed ? TODO_cleanup_cfg : 0;
1818 }
1819 
1820 namespace {
1821 
1822 const pass_data pass_data_lower_vector =
1823 {
1824   GIMPLE_PASS, /* type */
1825   "veclower", /* name */
1826   OPTGROUP_VEC, /* optinfo_flags */
1827   TV_NONE, /* tv_id */
1828   PROP_cfg, /* properties_required */
1829   PROP_gimple_lvec, /* properties_provided */
1830   0, /* properties_destroyed */
1831   0, /* todo_flags_start */
1832   TODO_update_ssa, /* todo_flags_finish */
1833 };
1834 
1835 class pass_lower_vector : public gimple_opt_pass
1836 {
1837 public:
1838   pass_lower_vector (gcc::context *ctxt)
1839     : gimple_opt_pass (pass_data_lower_vector, ctxt)
1840   {}
1841 
1842   /* opt_pass methods: */
1843   virtual bool gate (function *fun)
1844     {
1845       return !(fun->curr_properties & PROP_gimple_lvec);
1846     }
1847 
1848   virtual unsigned int execute (function *)
1849     {
1850       return expand_vector_operations ();
1851     }
1852 
1853 }; // class pass_lower_vector
1854 
1855 } // anon namespace
1856 
1857 gimple_opt_pass *
1858 make_pass_lower_vector (gcc::context *ctxt)
1859 {
1860   return new pass_lower_vector (ctxt);
1861 }
1862 
1863 namespace {
1864 
1865 const pass_data pass_data_lower_vector_ssa =
1866 {
1867   GIMPLE_PASS, /* type */
1868   "veclower2", /* name */
1869   OPTGROUP_VEC, /* optinfo_flags */
1870   TV_NONE, /* tv_id */
1871   PROP_cfg, /* properties_required */
1872   PROP_gimple_lvec, /* properties_provided */
1873   0, /* properties_destroyed */
1874   0, /* todo_flags_start */
1875   ( TODO_update_ssa
1876     | TODO_cleanup_cfg ), /* todo_flags_finish */
1877 };
1878 
1879 class pass_lower_vector_ssa : public gimple_opt_pass
1880 {
1881 public:
1882   pass_lower_vector_ssa (gcc::context *ctxt)
1883     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1884   {}
1885 
1886   /* opt_pass methods: */
1887   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1888   virtual unsigned int execute (function *)
1889     {
1890       return expand_vector_operations ();
1891     }
1892 
1893 }; // class pass_lower_vector_ssa
1894 
1895 } // anon namespace
1896 
1897 gimple_opt_pass *
1898 make_pass_lower_vector_ssa (gcc::context *ctxt)
1899 {
1900   return new pass_lower_vector_ssa (ctxt);
1901 }
1902 
1903 #include "gt-tree-vect-generic.h"
1904