xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-vect-generic.c (revision fdd524d4ccd2bb0c6f67401e938dabf773eb0372)
1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2013 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "tm.h"
25 #include "langhooks.h"
26 #include "tree-flow.h"
27 #include "gimple.h"
28 #include "tree-iterator.h"
29 #include "tree-pass.h"
30 #include "flags.h"
31 #include "ggc.h"
32 #include "diagnostic.h"
33 #include "target.h"
34 
35 /* Need to include rtl.h, expr.h, etc. for optabs.  */
36 #include "expr.h"
37 #include "optabs.h"
38 
39 
40 static void expand_vector_operations_1 (gimple_stmt_iterator *);
41 
42 
43 /* Build a constant of type TYPE, made of VALUE's bits replicated
44    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
45 static tree
46 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
47 {
48   int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
49   int n = HOST_BITS_PER_WIDE_INT / width;
50   unsigned HOST_WIDE_INT low, high, mask;
51   tree ret;
52 
53   gcc_assert (n);
54 
55   if (width == HOST_BITS_PER_WIDE_INT)
56     low = value;
57   else
58     {
59       mask = ((HOST_WIDE_INT)1 << width) - 1;
60       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
61     }
62 
63   if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
64     low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
65   else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
66     high = 0;
67   else if (TYPE_PRECISION (type) == HOST_BITS_PER_DOUBLE_INT)
68     high = low;
69   else
70     gcc_unreachable ();
71 
72   ret = build_int_cst_wide (type, low, high);
73   return ret;
74 }
75 
76 static GTY(()) tree vector_inner_type;
77 static GTY(()) tree vector_last_type;
78 static GTY(()) int vector_last_nunits;
79 
80 /* Return a suitable vector types made of SUBPARTS units each of mode
81    "word_mode" (the global variable).  */
82 static tree
83 build_word_mode_vector_type (int nunits)
84 {
85   if (!vector_inner_type)
86     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
87   else if (vector_last_nunits == nunits)
88     {
89       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
90       return vector_last_type;
91     }
92 
93   /* We build a new type, but we canonicalize it nevertheless,
94      because it still saves some memory.  */
95   vector_last_nunits = nunits;
96   vector_last_type = type_hash_canon (nunits,
97 				      build_vector_type (vector_inner_type,
98 							 nunits));
99   return vector_last_type;
100 }
101 
102 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
103 			      tree, tree, tree, tree, tree, enum tree_code);
104 
105 static inline tree
106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
107 		  tree t, tree bitsize, tree bitpos)
108 {
109   if (bitpos)
110     return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
111   else
112     return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
113 }
114 
115 static tree
116 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
117 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
118 	 enum tree_code code)
119 {
120   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
121   return gimplify_build1 (gsi, code, inner_type, a);
122 }
123 
124 static tree
125 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
126 	  tree bitpos, tree bitsize, enum tree_code code)
127 {
128   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
129     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
130   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
131     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
132   return gimplify_build2 (gsi, code, inner_type, a, b);
133 }
134 
135 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
136 
137    INNER_TYPE is the type of A and B elements
138 
139    returned expression is of signed integer type with the
140    size equal to the size of INNER_TYPE.  */
141 static tree
142 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
143 	  tree bitpos, tree bitsize, enum tree_code code)
144 {
145   tree comp_type;
146 
147   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
148   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
149 
150   comp_type = build_nonstandard_integer_type
151 		      (GET_MODE_BITSIZE (TYPE_MODE (inner_type)), 0);
152 
153   return gimplify_build3 (gsi, COND_EXPR, comp_type,
154 			  fold_build2 (code, boolean_type_node, a, b),
155 			  build_int_cst (comp_type, -1),
156 			  build_int_cst (comp_type, 0));
157 }
158 
159 /* Expand vector addition to scalars.  This does bit twiddling
160    in order to increase parallelism:
161 
162    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
163            (a ^ b) & 0x80808080
164 
165    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
166             (a ^ ~b) & 0x80808080
167 
168    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
169 
170    This optimization should be done only if 4 vector items or more
171    fit into a word.  */
172 static tree
173 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
174 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
175 	       enum tree_code code)
176 {
177   tree inner_type = TREE_TYPE (TREE_TYPE (a));
178   unsigned HOST_WIDE_INT max;
179   tree low_bits, high_bits, a_low, b_low, result_low, signs;
180 
181   max = GET_MODE_MASK (TYPE_MODE (inner_type));
182   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
183   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
184 
185   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
186   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
187 
188   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
189   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
190   if (code == PLUS_EXPR)
191     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
192   else
193     {
194       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
195       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
196     }
197 
198   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
199   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
200   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
201 }
202 
203 static tree
204 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
205 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
206 	   tree bitsize ATTRIBUTE_UNUSED,
207 	   enum tree_code code ATTRIBUTE_UNUSED)
208 {
209   tree inner_type = TREE_TYPE (TREE_TYPE (b));
210   HOST_WIDE_INT max;
211   tree low_bits, high_bits, b_low, result_low, signs;
212 
213   max = GET_MODE_MASK (TYPE_MODE (inner_type));
214   low_bits = build_replicated_const (word_type, inner_type, max >> 1);
215   high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
216 
217   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
218 
219   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
220   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
221   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
222   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
223   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
224 }
225 
226 /* Expand a vector operation to scalars, by using many operations
227    whose type is the vector type's inner type.  */
228 static tree
229 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
230 			 tree type, tree inner_type,
231 			 tree a, tree b, enum tree_code code)
232 {
233   vec<constructor_elt, va_gc> *v;
234   tree part_width = TYPE_SIZE (inner_type);
235   tree index = bitsize_int (0);
236   int nunits = TYPE_VECTOR_SUBPARTS (type);
237   int delta = tree_low_cst (part_width, 1)
238 	      / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
239   int i;
240   location_t loc = gimple_location (gsi_stmt (*gsi));
241 
242   if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
243     warning_at (loc, OPT_Wvector_operation_performance,
244 		"vector operation will be expanded piecewise");
245   else
246     warning_at (loc, OPT_Wvector_operation_performance,
247 		"vector operation will be expanded in parallel");
248 
249   vec_alloc (v, (nunits + delta - 1) / delta);
250   for (i = 0; i < nunits;
251        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
252     {
253       tree result = f (gsi, inner_type, a, b, index, part_width, code);
254       constructor_elt ce = {NULL_TREE, result};
255       v->quick_push (ce);
256     }
257 
258   return build_constructor (type, v);
259 }
260 
261 /* Expand a vector operation to scalars with the freedom to use
262    a scalar integer type, or to use a different size for the items
263    in the vector type.  */
264 static tree
265 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
266 			tree a, tree b,
267 			enum tree_code code)
268 {
269   tree result, compute_type;
270   enum machine_mode mode;
271   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
272   location_t loc = gimple_location (gsi_stmt (*gsi));
273 
274   /* We have three strategies.  If the type is already correct, just do
275      the operation an element at a time.  Else, if the vector is wider than
276      one word, do it a word at a time; finally, if the vector is smaller
277      than one word, do it as a scalar.  */
278   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
279      return expand_vector_piecewise (gsi, f,
280 				     type, TREE_TYPE (type),
281 				     a, b, code);
282   else if (n_words > 1)
283     {
284       tree word_type = build_word_mode_vector_type (n_words);
285       result = expand_vector_piecewise (gsi, f,
286 				        word_type, TREE_TYPE (word_type),
287 					a, b, code);
288       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
289                                          GSI_SAME_STMT);
290     }
291   else
292     {
293       /* Use a single scalar operation with a mode no wider than word_mode.  */
294       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
295       compute_type = lang_hooks.types.type_for_mode (mode, 1);
296       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
297       warning_at (loc, OPT_Wvector_operation_performance,
298 	          "vector operation will be expanded with a "
299 		  "single scalar operation");
300     }
301 
302   return result;
303 }
304 
305 /* Expand a vector operation to scalars; for integer types we can use
306    special bit twiddling tricks to do the sums a word at a time, using
307    function F_PARALLEL instead of F.  These tricks are done only if
308    they can process at least four items, that is, only if the vector
309    holds at least four items and if a word can hold four items.  */
310 static tree
311 expand_vector_addition (gimple_stmt_iterator *gsi,
312 			elem_op_func f, elem_op_func f_parallel,
313 			tree type, tree a, tree b, enum tree_code code)
314 {
315   int parts_per_word = UNITS_PER_WORD
316 	  	       / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
317 
318   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
319       && parts_per_word >= 4
320       && TYPE_VECTOR_SUBPARTS (type) >= 4)
321     return expand_vector_parallel (gsi, f_parallel,
322 				   type, a, b, code);
323   else
324     return expand_vector_piecewise (gsi, f,
325 				    type, TREE_TYPE (type),
326 				    a, b, code);
327 }
328 
329 /* Check if vector VEC consists of all the equal elements and
330    that the number of elements corresponds to the type of VEC.
331    The function returns first element of the vector
332    or NULL_TREE if the vector is not uniform.  */
333 static tree
334 uniform_vector_p (tree vec)
335 {
336   tree first, t;
337   unsigned i;
338 
339   if (vec == NULL_TREE)
340     return NULL_TREE;
341 
342   if (TREE_CODE (vec) == VECTOR_CST)
343     {
344       first = VECTOR_CST_ELT (vec, 0);
345       for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
346 	if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
347 	  return NULL_TREE;
348 
349       return first;
350     }
351 
352   else if (TREE_CODE (vec) == CONSTRUCTOR)
353     {
354       first = error_mark_node;
355 
356       FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
357         {
358           if (i == 0)
359             {
360               first = t;
361               continue;
362             }
363 	  if (!operand_equal_p (first, t, 0))
364 	    return NULL_TREE;
365         }
366       if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
367 	return NULL_TREE;
368 
369       return first;
370     }
371 
372   return NULL_TREE;
373 }
374 
375 /* Try to expand vector comparison expression OP0 CODE OP1 by
376    querying optab if the following expression:
377 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
378    can be expanded.  */
379 static tree
380 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
381                           tree op1, enum tree_code code)
382 {
383   tree t;
384   if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
385     t = expand_vector_piecewise (gsi, do_compare, type,
386 				 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
387   else
388     t = NULL_TREE;
389 
390   return t;
391 }
392 
393 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
394    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
395    the result if successful, otherwise return NULL_TREE.  */
396 static tree
397 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
398 {
399   optab op;
400   unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
401   bool scalar_shift = true;
402 
403   for (i = 1; i < nunits; i++)
404     {
405       if (shiftcnts[i] != shiftcnts[0])
406 	scalar_shift = false;
407     }
408 
409   if (scalar_shift && shiftcnts[0] == 0)
410     return op0;
411 
412   if (scalar_shift)
413     {
414       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
415       if (op != unknown_optab
416 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
417 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
418 				build_int_cst (NULL_TREE, shiftcnts[0]));
419     }
420 
421   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
422   if (op != unknown_optab
423       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
424     {
425       tree *vec = XALLOCAVEC (tree, nunits);
426       for (i = 0; i < nunits; i++)
427 	vec[i] = build_int_cst (TREE_TYPE (type), shiftcnts[i]);
428       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
429 			      build_vector (type, vec));
430     }
431 
432   return NULL_TREE;
433 }
434 
435 /* Try to expand integer vector division by constant using
436    widening multiply, shifts and additions.  */
437 static tree
438 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
439 		      tree op1, enum tree_code code)
440 {
441   bool use_pow2 = true;
442   bool has_vector_shift = true;
443   int mode = -1, this_mode;
444   int pre_shift = -1, post_shift;
445   unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
446   int *shifts = XALLOCAVEC (int, nunits * 4);
447   int *pre_shifts = shifts + nunits;
448   int *post_shifts = pre_shifts + nunits;
449   int *shift_temps = post_shifts + nunits;
450   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
451   int prec = TYPE_PRECISION (TREE_TYPE (type));
452   int dummy_int;
453   unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
454   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
455   tree *vec;
456   tree cur_op, mulcst, tem;
457   optab op;
458 
459   if (prec > HOST_BITS_PER_WIDE_INT)
460     return NULL_TREE;
461 
462   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
463   if (op == unknown_optab
464       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
465     has_vector_shift = false;
466 
467   /* Analysis phase.  Determine if all op1 elements are either power
468      of two and it is possible to expand it using shifts (or for remainder
469      using masking).  Additionally compute the multiplicative constants
470      and pre and post shifts if the division is to be expanded using
471      widening or high part multiplication plus shifts.  */
472   for (i = 0; i < nunits; i++)
473     {
474       tree cst = VECTOR_CST_ELT (op1, i);
475       unsigned HOST_WIDE_INT ml;
476 
477       if (!host_integerp (cst, unsignedp) || integer_zerop (cst))
478 	return NULL_TREE;
479       pre_shifts[i] = 0;
480       post_shifts[i] = 0;
481       mulc[i] = 0;
482       if (use_pow2
483 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
484 	use_pow2 = false;
485       if (use_pow2)
486 	{
487 	  shifts[i] = tree_log2 (cst);
488 	  if (shifts[i] != shifts[0]
489 	      && code == TRUNC_DIV_EXPR
490 	      && !has_vector_shift)
491 	    use_pow2 = false;
492 	}
493       if (mode == -2)
494 	continue;
495       if (unsignedp)
496 	{
497 	  unsigned HOST_WIDE_INT mh;
498 	  unsigned HOST_WIDE_INT d = tree_low_cst (cst, 1) & mask;
499 
500 	  if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
501 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
502 	    return NULL_TREE;
503 
504 	  if (d <= 1)
505 	    {
506 	      mode = -2;
507 	      continue;
508 	    }
509 
510 	  /* Find a suitable multiplier and right shift count
511 	     instead of multiplying with D.  */
512 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
513 
514 	  /* If the suggested multiplier is more than SIZE bits, we can
515 	     do better for even divisors, using an initial right shift.  */
516 	  if ((mh != 0 && (d & 1) == 0)
517 	      || (!has_vector_shift && pre_shift != -1))
518 	    {
519 	      if (has_vector_shift)
520 		pre_shift = floor_log2 (d & -d);
521 	      else if (pre_shift == -1)
522 		{
523 		  unsigned int j;
524 		  for (j = 0; j < nunits; j++)
525 		    {
526 		      tree cst2 = VECTOR_CST_ELT (op1, j);
527 		      unsigned HOST_WIDE_INT d2;
528 		      int this_pre_shift;
529 
530 		      if (!host_integerp (cst2, 1))
531 			return NULL_TREE;
532 		      d2 = tree_low_cst (cst2, 1) & mask;
533 		      if (d2 == 0)
534 			return NULL_TREE;
535 		      this_pre_shift = floor_log2 (d2 & -d2);
536 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
537 			pre_shift = this_pre_shift;
538 		    }
539 		  if (i != 0 && pre_shift != 0)
540 		    {
541 		      /* Restart.  */
542 		      i = -1U;
543 		      mode = -1;
544 		      continue;
545 		    }
546 		}
547 	      if (pre_shift != 0)
548 		{
549 		  if ((d >> pre_shift) <= 1)
550 		    {
551 		      mode = -2;
552 		      continue;
553 		    }
554 		  mh = choose_multiplier (d >> pre_shift, prec,
555 					  prec - pre_shift,
556 					  &ml, &post_shift, &dummy_int);
557 		  gcc_assert (!mh);
558 		  pre_shifts[i] = pre_shift;
559 		}
560 	    }
561 	  if (!mh)
562 	    this_mode = 0;
563 	  else
564 	    this_mode = 1;
565 	}
566       else
567 	{
568 	  HOST_WIDE_INT d = tree_low_cst (cst, 0);
569 	  unsigned HOST_WIDE_INT abs_d;
570 
571 	  if (d == -1)
572 	    return NULL_TREE;
573 
574 	  /* Since d might be INT_MIN, we have to cast to
575 	     unsigned HOST_WIDE_INT before negating to avoid
576 	     undefined signed overflow.  */
577 	  abs_d = (d >= 0
578 		  ? (unsigned HOST_WIDE_INT) d
579 		  : - (unsigned HOST_WIDE_INT) d);
580 
581 	  /* n rem d = n rem -d */
582 	  if (code == TRUNC_MOD_EXPR && d < 0)
583 	    d = abs_d;
584 	  else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
585 	    {
586 	      /* This case is not handled correctly below.  */
587 	      mode = -2;
588 	      continue;
589 	    }
590 	  if (abs_d <= 1)
591 	    {
592 	      mode = -2;
593 	      continue;
594 	    }
595 
596 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
597 			     &post_shift, &dummy_int);
598 	  if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
599 	    {
600 	      this_mode = 4 + (d < 0);
601 	      ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
602 	    }
603 	  else
604 	    this_mode = 2 + (d < 0);
605 	}
606       mulc[i] = ml;
607       post_shifts[i] = post_shift;
608       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
609 	  || post_shift >= prec
610 	  || pre_shifts[i] >= prec)
611 	this_mode = -2;
612 
613       if (i == 0)
614 	mode = this_mode;
615       else if (mode != this_mode)
616 	mode = -2;
617     }
618 
619   vec = XALLOCAVEC (tree, nunits);
620 
621   if (use_pow2)
622     {
623       tree addend = NULL_TREE;
624       if (!unsignedp)
625 	{
626 	  tree uns_type;
627 
628 	  /* Both division and remainder sequences need
629 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
630 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
631 	     if none of the shifts is 0, or as the conditional.  */
632 	  for (i = 0; i < nunits; i++)
633 	    if (shifts[i] == 0)
634 	      break;
635 	  uns_type
636 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
637 				 nunits);
638 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
639 	    {
640 	      for (i = 0; i < nunits; i++)
641 		shift_temps[i] = prec - 1;
642 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
643 	      if (cur_op != NULL_TREE)
644 		{
645 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
646 					    uns_type, cur_op);
647 		  for (i = 0; i < nunits; i++)
648 		    shift_temps[i] = prec - shifts[i];
649 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
650 		  if (cur_op != NULL_TREE)
651 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
652 					      type, cur_op);
653 		}
654 	    }
655 	  if (addend == NULL_TREE
656 	      && expand_vec_cond_expr_p (type, type))
657 	    {
658 	      tree zero, cst, cond;
659 	      gimple stmt;
660 
661 	      zero = build_zero_cst (type);
662 	      cond = build2 (LT_EXPR, type, op0, zero);
663 	      for (i = 0; i < nunits; i++)
664 		vec[i] = build_int_cst (TREE_TYPE (type),
665 					((unsigned HOST_WIDE_INT) 1
666 					 << shifts[i]) - 1);
667 	      cst = build_vector (type, vec);
668 	      addend = make_ssa_name (type, NULL);
669 	      stmt = gimple_build_assign_with_ops (VEC_COND_EXPR, addend,
670 						   cond, cst, zero);
671 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
672 	    }
673 	}
674       if (code == TRUNC_DIV_EXPR)
675 	{
676 	  if (unsignedp)
677 	    {
678 	      /* q = op0 >> shift;  */
679 	      cur_op = add_rshift (gsi, type, op0, shifts);
680 	      if (cur_op != NULL_TREE)
681 		return cur_op;
682 	    }
683 	  else if (addend != NULL_TREE)
684 	    {
685 	      /* t1 = op0 + addend;
686 		 q = t1 >> shift;  */
687 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
688 	      if (op != unknown_optab
689 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
690 		{
691 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
692 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
693 		  if (cur_op != NULL_TREE)
694 		    return cur_op;
695 		}
696 	    }
697 	}
698       else
699 	{
700 	  tree mask;
701 	  for (i = 0; i < nunits; i++)
702 	    vec[i] = build_int_cst (TREE_TYPE (type),
703 				    ((unsigned HOST_WIDE_INT) 1
704 				     << shifts[i]) - 1);
705 	  mask = build_vector (type, vec);
706 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
707 	  if (op != unknown_optab
708 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
709 	    {
710 	      if (unsignedp)
711 		/* r = op0 & mask;  */
712 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
713 	      else if (addend != NULL_TREE)
714 		{
715 		  /* t1 = op0 + addend;
716 		     t2 = t1 & mask;
717 		     r = t2 - addend;  */
718 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
719 		  if (op != unknown_optab
720 		      && optab_handler (op, TYPE_MODE (type))
721 			 != CODE_FOR_nothing)
722 		    {
723 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
724 						addend);
725 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
726 						cur_op, mask);
727 		      op = optab_for_tree_code (MINUS_EXPR, type,
728 						optab_default);
729 		      if (op != unknown_optab
730 			  && optab_handler (op, TYPE_MODE (type))
731 			     != CODE_FOR_nothing)
732 			return gimplify_build2 (gsi, MINUS_EXPR, type,
733 						cur_op, addend);
734 		    }
735 		}
736 	    }
737 	}
738     }
739 
740   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
741     return NULL_TREE;
742 
743   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
744     return NULL_TREE;
745 
746   cur_op = op0;
747 
748   switch (mode)
749     {
750     case 0:
751       gcc_assert (unsignedp);
752       /* t1 = oprnd0 >> pre_shift;
753 	 t2 = t1 h* ml;
754 	 q = t2 >> post_shift;  */
755       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
756       if (cur_op == NULL_TREE)
757 	return NULL_TREE;
758       break;
759     case 1:
760       gcc_assert (unsignedp);
761       for (i = 0; i < nunits; i++)
762 	{
763 	  shift_temps[i] = 1;
764 	  post_shifts[i]--;
765 	}
766       break;
767     case 2:
768     case 3:
769     case 4:
770     case 5:
771       gcc_assert (!unsignedp);
772       for (i = 0; i < nunits; i++)
773 	shift_temps[i] = prec - 1;
774       break;
775     default:
776       return NULL_TREE;
777     }
778 
779   for (i = 0; i < nunits; i++)
780     vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
781   mulcst = build_vector (type, vec);
782 
783   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
784 
785   switch (mode)
786     {
787     case 0:
788       /* t1 = oprnd0 >> pre_shift;
789 	 t2 = t1 h* ml;
790 	 q = t2 >> post_shift;  */
791       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
792       break;
793     case 1:
794       /* t1 = oprnd0 h* ml;
795 	 t2 = oprnd0 - t1;
796 	 t3 = t2 >> 1;
797 	 t4 = t1 + t3;
798 	 q = t4 >> (post_shift - 1);  */
799       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
800       if (op == unknown_optab
801 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
802 	return NULL_TREE;
803       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
804       tem = add_rshift (gsi, type, tem, shift_temps);
805       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
806       if (op == unknown_optab
807 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
808 	return NULL_TREE;
809       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
810       cur_op = add_rshift (gsi, type, tem, post_shifts);
811       if (cur_op == NULL_TREE)
812 	return NULL_TREE;
813       break;
814     case 2:
815     case 3:
816     case 4:
817     case 5:
818       /* t1 = oprnd0 h* ml;
819 	 t2 = t1; [ iff (mode & 2) != 0 ]
820 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
821 	 t3 = t2 >> post_shift;
822 	 t4 = oprnd0 >> (prec - 1);
823 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
824 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
825       if ((mode & 2) == 0)
826 	{
827 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
828 	  if (op == unknown_optab
829 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
830 	    return NULL_TREE;
831 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
832 	}
833       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
834       if (cur_op == NULL_TREE)
835 	return NULL_TREE;
836       tem = add_rshift (gsi, type, op0, shift_temps);
837       if (tem == NULL_TREE)
838 	return NULL_TREE;
839       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
840       if (op == unknown_optab
841 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
842 	return NULL_TREE;
843       if ((mode & 1) == 0)
844 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
845       else
846 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
847       break;
848     default:
849       gcc_unreachable ();
850     }
851 
852   if (code == TRUNC_DIV_EXPR)
853     return cur_op;
854 
855   /* We divided.  Now finish by:
856      t1 = q * oprnd1;
857      r = oprnd0 - t1;  */
858   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
859   if (op == unknown_optab
860       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
861     return NULL_TREE;
862   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
863   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
864   if (op == unknown_optab
865       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
866     return NULL_TREE;
867   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
868 }
869 
870 /* Expand a vector condition to scalars, by using many conditions
871    on the vector's elements.  */
872 static void
873 expand_vector_condition (gimple_stmt_iterator *gsi)
874 {
875   gimple stmt = gsi_stmt (*gsi);
876   tree type = gimple_expr_type (stmt);
877   tree a = gimple_assign_rhs1 (stmt);
878   tree a1 = a;
879   tree a2;
880   bool a_is_comparison = false;
881   tree b = gimple_assign_rhs2 (stmt);
882   tree c = gimple_assign_rhs3 (stmt);
883   vec<constructor_elt, va_gc> *v;
884   tree constr;
885   tree inner_type = TREE_TYPE (type);
886   tree cond_type = TREE_TYPE (TREE_TYPE (a));
887   tree comp_inner_type = cond_type;
888   tree width = TYPE_SIZE (inner_type);
889   tree index = bitsize_int (0);
890   int nunits = TYPE_VECTOR_SUBPARTS (type);
891   int i;
892   location_t loc = gimple_location (gsi_stmt (*gsi));
893 
894   if (!is_gimple_val (a))
895     {
896       gcc_assert (COMPARISON_CLASS_P (a));
897       a_is_comparison = true;
898       a1 = TREE_OPERAND (a, 0);
899       a2 = TREE_OPERAND (a, 1);
900       comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
901     }
902 
903   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1)))
904     return;
905 
906   /* TODO: try and find a smaller vector type.  */
907 
908   warning_at (loc, OPT_Wvector_operation_performance,
909 	      "vector condition will be expanded piecewise");
910 
911   vec_alloc (v, nunits);
912   for (i = 0; i < nunits;
913        i++, index = int_const_binop (PLUS_EXPR, index, width))
914     {
915       tree aa, result;
916       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
917       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
918       if (a_is_comparison)
919 	{
920 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, width, index);
921 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, width, index);
922 	  aa = build2 (TREE_CODE (a), cond_type, aa1, aa2);
923 	}
924       else
925 	aa = tree_vec_extract (gsi, cond_type, a, width, index);
926       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
927       constructor_elt ce = {NULL_TREE, result};
928       v->quick_push (ce);
929     }
930 
931   constr = build_constructor (type, v);
932   gimple_assign_set_rhs_from_tree (gsi, constr);
933   update_stmt (gsi_stmt (*gsi));
934 }
935 
936 static tree
937 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
938 			 gimple assign, enum tree_code code)
939 {
940   enum machine_mode compute_mode = TYPE_MODE (compute_type);
941 
942   /* If the compute mode is not a vector mode (hence we are not decomposing
943      a BLKmode vector to smaller, hardware-supported vectors), we may want
944      to expand the operations in parallel.  */
945   if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
946       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
947       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
948       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
949       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
950       && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
951     switch (code)
952       {
953       case PLUS_EXPR:
954       case MINUS_EXPR:
955         if (!TYPE_OVERFLOW_TRAPS (type))
956 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
957 					 gimple_assign_rhs1 (assign),
958 					 gimple_assign_rhs2 (assign), code);
959 	break;
960 
961       case NEGATE_EXPR:
962         if (!TYPE_OVERFLOW_TRAPS (type))
963           return expand_vector_addition (gsi, do_unop, do_negate, type,
964 		      		         gimple_assign_rhs1 (assign),
965 					 NULL_TREE, code);
966 	break;
967 
968       case BIT_AND_EXPR:
969       case BIT_IOR_EXPR:
970       case BIT_XOR_EXPR:
971         return expand_vector_parallel (gsi, do_binop, type,
972 		      		       gimple_assign_rhs1 (assign),
973 				       gimple_assign_rhs2 (assign), code);
974 
975       case BIT_NOT_EXPR:
976         return expand_vector_parallel (gsi, do_unop, type,
977 		      		       gimple_assign_rhs1 (assign),
978         			       NULL_TREE, code);
979       case EQ_EXPR:
980       case NE_EXPR:
981       case GT_EXPR:
982       case LT_EXPR:
983       case GE_EXPR:
984       case LE_EXPR:
985       case UNEQ_EXPR:
986       case UNGT_EXPR:
987       case UNLT_EXPR:
988       case UNGE_EXPR:
989       case UNLE_EXPR:
990       case LTGT_EXPR:
991       case ORDERED_EXPR:
992       case UNORDERED_EXPR:
993 	{
994 	  tree rhs1 = gimple_assign_rhs1 (assign);
995 	  tree rhs2 = gimple_assign_rhs2 (assign);
996 
997 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
998 	}
999 
1000       case TRUNC_DIV_EXPR:
1001       case TRUNC_MOD_EXPR:
1002 	{
1003 	  tree rhs1 = gimple_assign_rhs1 (assign);
1004 	  tree rhs2 = gimple_assign_rhs2 (assign);
1005 	  tree ret;
1006 
1007 	  if (!optimize
1008 	      || !VECTOR_INTEGER_TYPE_P (type)
1009 	      || TREE_CODE (rhs2) != VECTOR_CST
1010 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1011 	    break;
1012 
1013 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1014 	  if (ret != NULL_TREE)
1015 	    return ret;
1016 	  break;
1017 	}
1018 
1019       default:
1020 	break;
1021       }
1022 
1023   if (TREE_CODE_CLASS (code) == tcc_unary)
1024     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1025 				    gimple_assign_rhs1 (assign),
1026 				    NULL_TREE, code);
1027   else
1028     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1029 				    gimple_assign_rhs1 (assign),
1030 				    gimple_assign_rhs2 (assign), code);
1031 }
1032 
1033 /* Return a type for the widest vector mode whose components are of type
1034    TYPE, or NULL_TREE if none is found.  */
1035 
1036 static tree
1037 type_for_widest_vector_mode (tree type, optab op)
1038 {
1039   enum machine_mode inner_mode = TYPE_MODE (type);
1040   enum machine_mode best_mode = VOIDmode, mode;
1041   int best_nunits = 0;
1042 
1043   if (SCALAR_FLOAT_MODE_P (inner_mode))
1044     mode = MIN_MODE_VECTOR_FLOAT;
1045   else if (SCALAR_FRACT_MODE_P (inner_mode))
1046     mode = MIN_MODE_VECTOR_FRACT;
1047   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1048     mode = MIN_MODE_VECTOR_UFRACT;
1049   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1050     mode = MIN_MODE_VECTOR_ACCUM;
1051   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1052     mode = MIN_MODE_VECTOR_UACCUM;
1053   else
1054     mode = MIN_MODE_VECTOR_INT;
1055 
1056   for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
1057     if (GET_MODE_INNER (mode) == inner_mode
1058         && GET_MODE_NUNITS (mode) > best_nunits
1059 	&& optab_handler (op, mode) != CODE_FOR_nothing)
1060       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1061 
1062   if (best_mode == VOIDmode)
1063     return NULL_TREE;
1064   else
1065     return build_vector_type_for_mode (type, best_mode);
1066 }
1067 
1068 
1069 /* Build a reference to the element of the vector VECT.  Function
1070    returns either the element itself, either BIT_FIELD_REF, or an
1071    ARRAY_REF expression.
1072 
1073    GSI is required to insert temporary variables while building a
1074    refernece to the element of the vector VECT.
1075 
1076    PTMPVEC is a pointer to the temporary variable for caching
1077    purposes.  In case when PTMPVEC is NULL new temporary variable
1078    will be created.  */
1079 static tree
1080 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1081 {
1082   tree vect_type, vect_elt_type;
1083   gimple asgn;
1084   tree tmpvec;
1085   tree arraytype;
1086   bool need_asgn = true;
1087   unsigned int elements;
1088 
1089   vect_type = TREE_TYPE (vect);
1090   vect_elt_type = TREE_TYPE (vect_type);
1091   elements = TYPE_VECTOR_SUBPARTS (vect_type);
1092 
1093   if (TREE_CODE (idx) == INTEGER_CST)
1094     {
1095       unsigned HOST_WIDE_INT index;
1096 
1097       /* Given that we're about to compute a binary modulus,
1098 	 we don't care about the high bits of the value.  */
1099       index = TREE_INT_CST_LOW (idx);
1100       if (!host_integerp (idx, 1) || index >= elements)
1101 	{
1102 	  index &= elements - 1;
1103 	  idx = build_int_cst (TREE_TYPE (idx), index);
1104 	}
1105 
1106       /* When lowering a vector statement sequence do some easy
1107          simplification by looking through intermediate vector results.  */
1108       if (TREE_CODE (vect) == SSA_NAME)
1109 	{
1110 	  gimple def_stmt = SSA_NAME_DEF_STMT (vect);
1111 	  if (is_gimple_assign (def_stmt)
1112 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1113 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1114 	    vect = gimple_assign_rhs1 (def_stmt);
1115 	}
1116 
1117       if (TREE_CODE (vect) == VECTOR_CST)
1118 	return VECTOR_CST_ELT (vect, index);
1119       else if (TREE_CODE (vect) == CONSTRUCTOR
1120 	       && (CONSTRUCTOR_NELTS (vect) == 0
1121 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1122 		      != VECTOR_TYPE))
1123         {
1124 	  if (index < CONSTRUCTOR_NELTS (vect))
1125 	    return CONSTRUCTOR_ELT (vect, index)->value;
1126           return build_zero_cst (vect_elt_type);
1127         }
1128       else
1129         {
1130 	  tree size = TYPE_SIZE (vect_elt_type);
1131 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1132 				  size);
1133 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1134         }
1135     }
1136 
1137   if (!ptmpvec)
1138     tmpvec = create_tmp_var (vect_type, "vectmp");
1139   else if (!*ptmpvec)
1140     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1141   else
1142     {
1143       tmpvec = *ptmpvec;
1144       need_asgn = false;
1145     }
1146 
1147   if (need_asgn)
1148     {
1149       TREE_ADDRESSABLE (tmpvec) = 1;
1150       asgn = gimple_build_assign (tmpvec, vect);
1151       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1152     }
1153 
1154   arraytype = build_array_type_nelts (vect_elt_type, elements);
1155   return build4 (ARRAY_REF, vect_elt_type,
1156                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1157                  idx, NULL_TREE, NULL_TREE);
1158 }
1159 
1160 /* Check if VEC_PERM_EXPR within the given setting is supported
1161    by hardware, or lower it piecewise.
1162 
1163    When VEC_PERM_EXPR has the same first and second operands:
1164    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1165    {v0[mask[0]], v0[mask[1]], ...}
1166    MASK and V0 must have the same number of elements.
1167 
1168    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1169    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1170    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1171    same number of arguments.  */
1172 
1173 static void
1174 lower_vec_perm (gimple_stmt_iterator *gsi)
1175 {
1176   gimple stmt = gsi_stmt (*gsi);
1177   tree mask = gimple_assign_rhs3 (stmt);
1178   tree vec0 = gimple_assign_rhs1 (stmt);
1179   tree vec1 = gimple_assign_rhs2 (stmt);
1180   tree vect_type = TREE_TYPE (vec0);
1181   tree mask_type = TREE_TYPE (mask);
1182   tree vect_elt_type = TREE_TYPE (vect_type);
1183   tree mask_elt_type = TREE_TYPE (mask_type);
1184   unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1185   vec<constructor_elt, va_gc> *v;
1186   tree constr, t, si, i_val;
1187   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1188   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1189   location_t loc = gimple_location (gsi_stmt (*gsi));
1190   unsigned i;
1191 
1192   if (TREE_CODE (mask) == SSA_NAME)
1193     {
1194       gimple def_stmt = SSA_NAME_DEF_STMT (mask);
1195       if (is_gimple_assign (def_stmt)
1196 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1197 	mask = gimple_assign_rhs1 (def_stmt);
1198     }
1199 
1200   if (TREE_CODE (mask) == VECTOR_CST)
1201     {
1202       unsigned char *sel_int = XALLOCAVEC (unsigned char, elements);
1203 
1204       for (i = 0; i < elements; ++i)
1205 	sel_int[i] = (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1206 		      & (2 * elements - 1));
1207 
1208       if (can_vec_perm_p (TYPE_MODE (vect_type), false, sel_int))
1209 	{
1210 	  gimple_assign_set_rhs3 (stmt, mask);
1211 	  update_stmt (stmt);
1212 	  return;
1213 	}
1214     }
1215   else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1216     return;
1217 
1218   warning_at (loc, OPT_Wvector_operation_performance,
1219               "vector shuffling operation will be expanded piecewise");
1220 
1221   vec_alloc (v, elements);
1222   for (i = 0; i < elements; i++)
1223     {
1224       si = size_int (i);
1225       i_val = vector_element (gsi, mask, si, &masktmp);
1226 
1227       if (TREE_CODE (i_val) == INTEGER_CST)
1228         {
1229 	  unsigned HOST_WIDE_INT index;
1230 
1231 	  index = TREE_INT_CST_LOW (i_val);
1232 	  if (!host_integerp (i_val, 1) || index >= elements)
1233 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1234 
1235           if (two_operand_p && (index & elements) != 0)
1236 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1237 	  else
1238 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1239 
1240           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1241 					true, GSI_SAME_STMT);
1242         }
1243       else
1244         {
1245 	  tree cond = NULL_TREE, v0_val;
1246 
1247 	  if (two_operand_p)
1248 	    {
1249 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1250 			          build_int_cst (mask_elt_type, elements));
1251 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1252 					       true, GSI_SAME_STMT);
1253 	    }
1254 
1255 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1256 			       build_int_cst (mask_elt_type, elements - 1));
1257 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1258 					    true, GSI_SAME_STMT);
1259 
1260 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1261 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1262 					     true, GSI_SAME_STMT);
1263 
1264 	  if (two_operand_p)
1265 	    {
1266 	      tree v1_val;
1267 
1268 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1269 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1270 						 true, GSI_SAME_STMT);
1271 
1272 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1273 				  cond, build_zero_cst (mask_elt_type));
1274 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1275 				  cond, v0_val, v1_val);
1276               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1277 					    true, GSI_SAME_STMT);
1278             }
1279 	  else
1280 	    t = v0_val;
1281         }
1282 
1283       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1284     }
1285 
1286   constr = build_constructor (vect_type, v);
1287   gimple_assign_set_rhs_from_tree (gsi, constr);
1288   update_stmt (gsi_stmt (*gsi));
1289 }
1290 
1291 /* Process one statement.  If we identify a vector operation, expand it.  */
1292 
1293 static void
1294 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
1295 {
1296   gimple stmt = gsi_stmt (*gsi);
1297   tree lhs, rhs1, rhs2 = NULL, type, compute_type;
1298   enum tree_code code;
1299   enum machine_mode compute_mode;
1300   optab op = unknown_optab;
1301   enum gimple_rhs_class rhs_class;
1302   tree new_rhs;
1303 
1304   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1305     return;
1306 
1307   code = gimple_assign_rhs_code (stmt);
1308   rhs_class = get_gimple_rhs_class (code);
1309   lhs = gimple_assign_lhs (stmt);
1310 
1311   if (code == VEC_PERM_EXPR)
1312     {
1313       lower_vec_perm (gsi);
1314       return;
1315     }
1316 
1317   if (code == VEC_COND_EXPR)
1318     {
1319       expand_vector_condition (gsi);
1320       return;
1321     }
1322   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
1323     return;
1324 
1325   rhs1 = gimple_assign_rhs1 (stmt);
1326   type = gimple_expr_type (stmt);
1327   if (rhs_class == GIMPLE_BINARY_RHS)
1328     rhs2 = gimple_assign_rhs2 (stmt);
1329 
1330   if (TREE_CODE (type) != VECTOR_TYPE)
1331     return;
1332 
1333   if (code == NOP_EXPR
1334       || code == FLOAT_EXPR
1335       || code == FIX_TRUNC_EXPR
1336       || code == VIEW_CONVERT_EXPR)
1337     return;
1338 
1339   gcc_assert (code != CONVERT_EXPR);
1340 
1341   /* The signedness is determined from input argument.  */
1342   if (code == VEC_UNPACK_FLOAT_HI_EXPR
1343       || code == VEC_UNPACK_FLOAT_LO_EXPR)
1344     type = TREE_TYPE (rhs1);
1345 
1346   /* For widening/narrowing vector operations, the relevant type is of the
1347      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
1348      calculated in the same way above.  */
1349   if (code == WIDEN_SUM_EXPR
1350       || code == VEC_WIDEN_MULT_HI_EXPR
1351       || code == VEC_WIDEN_MULT_LO_EXPR
1352       || code == VEC_WIDEN_MULT_EVEN_EXPR
1353       || code == VEC_WIDEN_MULT_ODD_EXPR
1354       || code == VEC_UNPACK_HI_EXPR
1355       || code == VEC_UNPACK_LO_EXPR
1356       || code == VEC_PACK_TRUNC_EXPR
1357       || code == VEC_PACK_SAT_EXPR
1358       || code == VEC_PACK_FIX_TRUNC_EXPR
1359       || code == VEC_WIDEN_LSHIFT_HI_EXPR
1360       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1361     type = TREE_TYPE (rhs1);
1362 
1363   /* Choose between vector shift/rotate by vector and vector shift/rotate by
1364      scalar */
1365   if (code == LSHIFT_EXPR
1366       || code == RSHIFT_EXPR
1367       || code == LROTATE_EXPR
1368       || code == RROTATE_EXPR)
1369     {
1370       optab opv;
1371 
1372       /* Check whether we have vector <op> {x,x,x,x} where x
1373          could be a scalar variable or a constant.  Transform
1374          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
1375       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1376         {
1377           tree first;
1378           gimple def_stmt;
1379 
1380           if ((TREE_CODE (rhs2) == VECTOR_CST
1381 	       && (first = uniform_vector_p (rhs2)) != NULL_TREE)
1382 	      || (TREE_CODE (rhs2) == SSA_NAME
1383 		  && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
1384 		  && gimple_assign_single_p (def_stmt)
1385 		  && (first = uniform_vector_p
1386 		      (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))
1387             {
1388               gimple_assign_set_rhs2 (stmt, first);
1389               update_stmt (stmt);
1390               rhs2 = first;
1391             }
1392         }
1393 
1394       opv = optab_for_tree_code (code, type, optab_vector);
1395       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1396 	op = opv;
1397       else
1398 	{
1399           op = optab_for_tree_code (code, type, optab_scalar);
1400 
1401 	  /* The rtl expander will expand vector/scalar as vector/vector
1402 	     if necessary.  Don't bother converting the stmt here.  */
1403 	  if (optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing
1404 	      && optab_handler (opv, TYPE_MODE (type)) != CODE_FOR_nothing)
1405 	    return;
1406 	}
1407     }
1408   else
1409     op = optab_for_tree_code (code, type, optab_default);
1410 
1411   /* Optabs will try converting a negation into a subtraction, so
1412      look for it as well.  TODO: negation of floating-point vectors
1413      might be turned into an exclusive OR toggling the sign bit.  */
1414   if (op == unknown_optab
1415       && code == NEGATE_EXPR
1416       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
1417     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1418 
1419   /* For very wide vectors, try using a smaller vector mode.  */
1420   compute_type = type;
1421   if (!VECTOR_MODE_P (TYPE_MODE (type)) && op)
1422     {
1423       tree vector_compute_type
1424         = type_for_widest_vector_mode (TREE_TYPE (type), op);
1425       if (vector_compute_type != NULL_TREE
1426 	  && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1427 	      < TYPE_VECTOR_SUBPARTS (compute_type))
1428 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1429 	      != CODE_FOR_nothing))
1430 	compute_type = vector_compute_type;
1431     }
1432 
1433   /* If we are breaking a BLKmode vector into smaller pieces,
1434      type_for_widest_vector_mode has already looked into the optab,
1435      so skip these checks.  */
1436   if (compute_type == type)
1437     {
1438       compute_mode = TYPE_MODE (compute_type);
1439       if (VECTOR_MODE_P (compute_mode))
1440 	{
1441           if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1442 	    return;
1443 	  if (code == MULT_HIGHPART_EXPR
1444 	      && can_mult_highpart_p (compute_mode,
1445 				      TYPE_UNSIGNED (compute_type)))
1446 	    return;
1447 	}
1448       /* There is no operation in hardware, so fall back to scalars.  */
1449       compute_type = TREE_TYPE (type);
1450     }
1451 
1452   gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
1453   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1454 
1455   /* Leave expression untouched for later expansion.  */
1456   if (new_rhs == NULL_TREE)
1457     return;
1458 
1459   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1460     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1461                                new_rhs);
1462 
1463   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1464      way to do it is change expand_vector_operation and its callees to
1465      return a tree_code, RHS1 and RHS2 instead of a tree. */
1466   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1467   update_stmt (gsi_stmt (*gsi));
1468 }
1469 
1470 /* Use this to lower vector operations introduced by the vectorizer,
1471    if it may need the bit-twiddling tricks implemented in this file.  */
1472 
1473 static bool
1474 gate_expand_vector_operations_ssa (void)
1475 {
1476   return optimize == 0;
1477 }
1478 
1479 static unsigned int
1480 expand_vector_operations (void)
1481 {
1482   gimple_stmt_iterator gsi;
1483   basic_block bb;
1484   bool cfg_changed = false;
1485 
1486   FOR_EACH_BB (bb)
1487     {
1488       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1489 	{
1490 	  expand_vector_operations_1 (&gsi);
1491 	  /* ???  If we do not cleanup EH then we will ICE in
1492 	     verification.  But in reality we have created wrong-code
1493 	     as we did not properly transition EH info and edges to
1494 	     the piecewise computations.  */
1495 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
1496 	      && gimple_purge_dead_eh_edges (bb))
1497 	    cfg_changed = true;
1498 	}
1499     }
1500 
1501   return cfg_changed ? TODO_cleanup_cfg : 0;
1502 }
1503 
1504 struct gimple_opt_pass pass_lower_vector =
1505 {
1506  {
1507   GIMPLE_PASS,
1508   "veclower",				/* name */
1509   OPTGROUP_VEC,                         /* optinfo_flags */
1510   gate_expand_vector_operations_ssa,    /* gate */
1511   expand_vector_operations,		/* execute */
1512   NULL,					/* sub */
1513   NULL,					/* next */
1514   0,					/* static_pass_number */
1515   TV_NONE,				/* tv_id */
1516   PROP_cfg,				/* properties_required */
1517   0,					/* properties_provided */
1518   0,					/* properties_destroyed */
1519   0,					/* todo_flags_start */
1520   TODO_update_ssa	                /* todo_flags_finish */
1521     | TODO_verify_ssa
1522     | TODO_verify_stmts | TODO_verify_flow
1523     | TODO_cleanup_cfg
1524  }
1525 };
1526 
1527 struct gimple_opt_pass pass_lower_vector_ssa =
1528 {
1529  {
1530   GIMPLE_PASS,
1531   "veclower2",				/* name */
1532   OPTGROUP_VEC,                         /* optinfo_flags */
1533   0,	                                /* gate */
1534   expand_vector_operations,		/* execute */
1535   NULL,					/* sub */
1536   NULL,					/* next */
1537   0,					/* static_pass_number */
1538   TV_NONE,				/* tv_id */
1539   PROP_cfg,				/* properties_required */
1540   0,					/* properties_provided */
1541   0,					/* properties_destroyed */
1542   0,					/* todo_flags_start */
1543   TODO_update_ssa	                /* todo_flags_finish */
1544     | TODO_verify_ssa
1545     | TODO_verify_stmts | TODO_verify_flow
1546     | TODO_cleanup_cfg
1547  }
1548 };
1549 
1550 #include "gt-tree-vect-generic.h"
1551