xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-vect-generic.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "gimple-fold.h"
45 #include "gimple-match.h"
46 #include "recog.h"		/* FIXME: for insn_data */
47 
48 
49 /* Build a ternary operation and gimplify it.  Emit code before GSI.
50    Return the gimple_val holding the result.  */
51 
52 static tree
gimplify_build3(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a,tree b,tree c)53 gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
54 		 tree type, tree a, tree b, tree c)
55 {
56   location_t loc = gimple_location (gsi_stmt (*gsi));
57   gimple_seq stmts = NULL;
58   tree ret = gimple_build (&stmts, loc, code, type, a, b, c);
59   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
60   return ret;
61 }
62 
63 /* Build a binary operation and gimplify it.  Emit code before GSI.
64    Return the gimple_val holding the result.  */
65 
66 static tree
gimplify_build2(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a,tree b)67 gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
68 		 tree type, tree a, tree b)
69 {
70   location_t loc = gimple_location (gsi_stmt (*gsi));
71   gimple_seq stmts = NULL;
72   tree ret = gimple_build (&stmts, loc, code, type, a, b);
73   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
74   return ret;
75 }
76 
77 /* Build a unary operation and gimplify it.  Emit code before GSI.
78    Return the gimple_val holding the result.  */
79 
80 static tree
gimplify_build1(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a)81 gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
82 		 tree a)
83 {
84   location_t loc = gimple_location (gsi_stmt (*gsi));
85   gimple_seq stmts = NULL;
86   tree ret = gimple_build (&stmts, loc, code, type, a);
87   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
88   return ret;
89 }
90 
91 
92 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
93 
94 /* Return the number of elements in a vector type TYPE that we have
95    already decided needs to be expanded piecewise.  We don't support
96    this kind of expansion for variable-length vectors, since we should
97    always check for target support before introducing uses of those.  */
98 static unsigned int
nunits_for_known_piecewise_op(const_tree type)99 nunits_for_known_piecewise_op (const_tree type)
100 {
101   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
102 }
103 
104 /* Return true if TYPE1 has more elements than TYPE2, where either
105    type may be a vector or a scalar.  */
106 
107 static inline bool
subparts_gt(tree type1,tree type2)108 subparts_gt (tree type1, tree type2)
109 {
110   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
111   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
112   return known_gt (n1, n2);
113 }
114 
115 /* Build a constant of type TYPE, made of VALUE's bits replicated
116    every WIDTH bits to fit TYPE's precision.  */
117 static tree
build_replicated_const(tree type,unsigned int width,HOST_WIDE_INT value)118 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
119 {
120   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
121     / HOST_BITS_PER_WIDE_INT;
122   unsigned HOST_WIDE_INT low, mask;
123   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
124   int i;
125 
126   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
127 
128   if (width == HOST_BITS_PER_WIDE_INT)
129     low = value;
130   else
131     {
132       mask = ((HOST_WIDE_INT)1 << width) - 1;
133       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
134     }
135 
136   for (i = 0; i < n; i++)
137     a[i] = low;
138 
139   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
140   return wide_int_to_tree
141     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
142 }
143 
144 static GTY(()) tree vector_inner_type;
145 static GTY(()) tree vector_last_type;
146 static GTY(()) int vector_last_nunits;
147 
148 /* Return a suitable vector types made of SUBPARTS units each of mode
149    "word_mode" (the global variable).  */
150 static tree
build_word_mode_vector_type(int nunits)151 build_word_mode_vector_type (int nunits)
152 {
153   if (!vector_inner_type)
154     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
155   else if (vector_last_nunits == nunits)
156     {
157       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
158       return vector_last_type;
159     }
160 
161   vector_last_nunits = nunits;
162   vector_last_type = build_vector_type (vector_inner_type, nunits);
163   return vector_last_type;
164 }
165 
166 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
167 			      tree, tree, tree, tree, tree, enum tree_code,
168 			      tree);
169 
170 /* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
171    and return it.  */
172 
173 tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)174 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
175 		  tree t, tree bitsize, tree bitpos)
176 {
177   /* We're using the resimplify API and maybe_push_res_to_seq to
178      simplify the BIT_FIELD_REF but restrict the simplification to
179      a single stmt while at the same time following SSA edges for
180      simplification with already emitted CTORs.  */
181   gimple_match_op opr;
182   opr.set_op (BIT_FIELD_REF, type, t, bitsize, bitpos);
183   opr.resimplify (NULL, follow_all_ssa_edges);
184   gimple_seq stmts = NULL;
185   tree res = maybe_push_res_to_seq (&opr, &stmts);
186   if (!res)
187     {
188       /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
189 	 used.  Build BIT_FIELD_REF manually otherwise.  */
190       t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
191       res = make_ssa_name (type);
192       gimple *g = gimple_build_assign (res, t);
193       gsi_insert_before (gsi, g, GSI_SAME_STMT);
194       return res;
195     }
196   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
197   return res;
198 }
199 
200 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)201 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
202 	 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
203 	 enum tree_code code, tree type ATTRIBUTE_UNUSED)
204 {
205   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
206   return gimplify_build1 (gsi, code, inner_type, a);
207 }
208 
209 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)210 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
211 	  tree bitpos, tree bitsize, enum tree_code code,
212 	  tree type ATTRIBUTE_UNUSED)
213 {
214   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
215     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
216   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
217     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
218   return gimplify_build2 (gsi, code, inner_type, a, b);
219 }
220 
221 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
222 
223    INNER_TYPE is the type of A and B elements
224 
225    returned expression is of signed integer type with the
226    size equal to the size of INNER_TYPE.  */
227 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)228 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
229 	    tree bitpos, tree bitsize, enum tree_code code, tree type)
230 {
231   tree stype = TREE_TYPE (type);
232   tree cst_false = build_zero_cst (stype);
233   tree cst_true = build_all_ones_cst (stype);
234   tree cmp;
235 
236   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
237   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
238 
239   cmp = build2 (code, boolean_type_node, a, b);
240   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
241 }
242 
243 /* Expand vector addition to scalars.  This does bit twiddling
244    in order to increase parallelism:
245 
246    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
247            (a ^ b) & 0x80808080
248 
249    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
250             (a ^ ~b) & 0x80808080
251 
252    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
253 
254    This optimization should be done only if 4 vector items or more
255    fit into a word.  */
256 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)257 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
258 	       tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
259 	       enum tree_code code, tree type ATTRIBUTE_UNUSED)
260 {
261   unsigned int width = vector_element_bits (TREE_TYPE (a));
262   tree inner_type = TREE_TYPE (TREE_TYPE (a));
263   unsigned HOST_WIDE_INT max;
264   tree low_bits, high_bits, a_low, b_low, result_low, signs;
265 
266   max = GET_MODE_MASK (TYPE_MODE (inner_type));
267   low_bits = build_replicated_const (word_type, width, max >> 1);
268   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
269 
270   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
271   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
272 
273   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
274   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
275   if (code == PLUS_EXPR)
276     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
277   else
278     {
279       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
280       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
281     }
282 
283   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
284   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
285   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
286 }
287 
288 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)289 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
290 	   tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
291 	   tree bitsize ATTRIBUTE_UNUSED,
292 	   enum tree_code code ATTRIBUTE_UNUSED,
293 	   tree type ATTRIBUTE_UNUSED)
294 {
295   unsigned int width = vector_element_bits (TREE_TYPE (b));
296   tree inner_type = TREE_TYPE (TREE_TYPE (b));
297   HOST_WIDE_INT max;
298   tree low_bits, high_bits, b_low, result_low, signs;
299 
300   max = GET_MODE_MASK (TYPE_MODE (inner_type));
301   low_bits = build_replicated_const (word_type, width, max >> 1);
302   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
303 
304   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
305 
306   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
307   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
308   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
309   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
310   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
311 }
312 
313 /* Expand a vector operation to scalars, by using many operations
314    whose type is the vector type's inner type.  */
315 static tree
expand_vector_piecewise(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree inner_type,tree a,tree b,enum tree_code code,bool parallel_p,tree ret_type=NULL_TREE)316 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
317 			 tree type, tree inner_type,
318 			 tree a, tree b, enum tree_code code,
319 			 bool parallel_p, tree ret_type = NULL_TREE)
320 {
321   vec<constructor_elt, va_gc> *v;
322   tree part_width = TYPE_SIZE (inner_type);
323   tree index = bitsize_int (0);
324   int nunits = nunits_for_known_piecewise_op (type);
325   int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
326   int i;
327   location_t loc = gimple_location (gsi_stmt (*gsi));
328 
329   if (nunits == 1
330       || warning_suppressed_p (gsi_stmt (*gsi),
331 			       OPT_Wvector_operation_performance))
332     /* Do not diagnose decomposing single element vectors or when
333        decomposing vectorizer produced operations.  */
334     ;
335   else if (ret_type || !parallel_p)
336     warning_at (loc, OPT_Wvector_operation_performance,
337 		"vector operation will be expanded piecewise");
338   else
339     warning_at (loc, OPT_Wvector_operation_performance,
340 		"vector operation will be expanded in parallel");
341 
342   if (!ret_type)
343     ret_type = type;
344   vec_alloc (v, (nunits + delta - 1) / delta);
345   bool constant_p = true;
346   for (i = 0; i < nunits;
347        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
348     {
349       tree result = f (gsi, inner_type, a, b, index, part_width, code,
350 		       ret_type);
351       if (!CONSTANT_CLASS_P (result))
352 	constant_p = false;
353       constructor_elt ce = {NULL_TREE, result};
354       v->quick_push (ce);
355     }
356 
357   if (constant_p)
358     return build_vector_from_ctor (ret_type, v);
359   else
360     return build_constructor (ret_type, v);
361 }
362 
363 /* Expand a vector operation to scalars with the freedom to use
364    a scalar integer type, or to use a different size for the items
365    in the vector type.  */
366 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)367 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
368 			tree a, tree b, enum tree_code code)
369 {
370   tree result, compute_type;
371   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
372   location_t loc = gimple_location (gsi_stmt (*gsi));
373 
374   /* We have three strategies.  If the type is already correct, just do
375      the operation an element at a time.  Else, if the vector is wider than
376      one word, do it a word at a time; finally, if the vector is smaller
377      than one word, do it as a scalar.  */
378   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
379      return expand_vector_piecewise (gsi, f,
380 				     type, TREE_TYPE (type),
381 				     a, b, code, true);
382   else if (n_words > 1)
383     {
384       tree word_type = build_word_mode_vector_type (n_words);
385       result = expand_vector_piecewise (gsi, f,
386 				        word_type, TREE_TYPE (word_type),
387 					a, b, code, true);
388       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
389                                          GSI_SAME_STMT);
390     }
391   else
392     {
393       /* Use a single scalar operation with a mode no wider than word_mode.  */
394       if (!warning_suppressed_p (gsi_stmt (*gsi),
395 				 OPT_Wvector_operation_performance))
396 	warning_at (loc, OPT_Wvector_operation_performance,
397 		    "vector operation will be expanded with a "
398 		    "single scalar operation");
399       scalar_int_mode mode
400 	= int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
401       compute_type = lang_hooks.types.type_for_mode (mode, 1);
402       result = f (gsi, compute_type, a, b, bitsize_zero_node,
403 		  TYPE_SIZE (compute_type), code, type);
404     }
405 
406   return result;
407 }
408 
409 /* Expand a vector operation to scalars; for integer types we can use
410    special bit twiddling tricks to do the sums a word at a time, using
411    function F_PARALLEL instead of F.  These tricks are done only if
412    they can process at least four items, that is, only if the vector
413    holds at least four items and if a word can hold four items.  */
414 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)415 expand_vector_addition (gimple_stmt_iterator *gsi,
416 			elem_op_func f, elem_op_func f_parallel,
417 			tree type, tree a, tree b, enum tree_code code)
418 {
419   int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
420 
421   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
422       && parts_per_word >= 4
423       && nunits_for_known_piecewise_op (type) >= 4)
424     return expand_vector_parallel (gsi, f_parallel,
425 				   type, a, b, code);
426   else
427     return expand_vector_piecewise (gsi, f,
428 				    type, TREE_TYPE (type),
429 				    a, b, code, false);
430 }
431 
432 static bool
433 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
434 
435 /* Try to expand vector comparison expression OP0 CODE OP1 by
436    querying optab if the following expression:
437 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
438    can be expanded.  */
439 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code,bitmap dce_ssa_names)440 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
441 			  tree op1, enum tree_code code,
442 			  bitmap dce_ssa_names)
443 {
444   tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
445   use_operand_p use_p;
446   imm_use_iterator iterator;
447   bool vec_cond_expr_only = true;
448 
449   /* As seen in PR95830, we should not expand comparisons that are only
450      feeding a VEC_COND_EXPR statement.  */
451   auto_vec<gimple *> uses;
452   FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
453     {
454       gimple *use = USE_STMT (use_p);
455       if (is_gimple_debug (use))
456 	continue;
457       if (is_gimple_assign (use)
458 	  && gimple_assign_rhs_code (use) == VEC_COND_EXPR
459 	  && gimple_assign_rhs1 (use) == lhs
460 	  && gimple_assign_rhs2 (use) != lhs
461 	  && gimple_assign_rhs3 (use) != lhs)
462 	uses.safe_push (use);
463       else
464 	vec_cond_expr_only = false;
465     }
466 
467   if (vec_cond_expr_only)
468     for (gimple *use : uses)
469       {
470 	gimple_stmt_iterator it = gsi_for_stmt (use);
471 	if (!expand_vector_condition (&it, dce_ssa_names))
472 	  {
473 	    vec_cond_expr_only = false;
474 	    break;
475 	  }
476       }
477 
478   if (!uses.is_empty () && vec_cond_expr_only)
479     return NULL_TREE;
480 
481   tree t;
482   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
483     {
484       if (VECTOR_BOOLEAN_TYPE_P (type)
485 	  && SCALAR_INT_MODE_P (TYPE_MODE (type))
486 	  && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
487 		       TYPE_VECTOR_SUBPARTS (type)
488 		       * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
489 						(TREE_TYPE (type)))))
490 	{
491 	  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
492 	  tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
493 	  tree index = bitsize_int (0);
494 	  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
495 	  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
496 	  tree ret_type = build_nonstandard_integer_type (prec, 1);
497 	  tree ret_inner_type = boolean_type_node;
498 	  int i;
499 	  location_t loc = gimple_location (gsi_stmt (*gsi));
500 	  t = build_zero_cst (ret_type);
501 
502 	  if (TYPE_PRECISION (ret_inner_type) != 1)
503 	    ret_inner_type = build_nonstandard_integer_type (1, 1);
504 	  if (!warning_suppressed_p (gsi_stmt (*gsi),
505 				     OPT_Wvector_operation_performance))
506 	    warning_at (loc, OPT_Wvector_operation_performance,
507 			"vector operation will be expanded piecewise");
508 	  for (i = 0; i < nunits;
509 	       i++, index = int_const_binop (PLUS_EXPR, index, part_width))
510 	    {
511 	      tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
512 					 index);
513 	      tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
514 					 index);
515 	      tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
516 	      t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
517 				   bitsize_int (i));
518 	    }
519 	  t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
520 	}
521       else
522 	t = expand_vector_piecewise (gsi, do_compare, type,
523 				     TREE_TYPE (TREE_TYPE (op0)), op0, op1,
524 				     code, false);
525     }
526   else
527     t = NULL_TREE;
528 
529   return t;
530 }
531 
532 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
533    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
534    the result if successful, otherwise return NULL_TREE.  */
535 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)536 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
537 {
538   optab op;
539   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
540   bool scalar_shift = true;
541 
542   for (i = 1; i < nunits; i++)
543     {
544       if (shiftcnts[i] != shiftcnts[0])
545 	scalar_shift = false;
546     }
547 
548   if (scalar_shift && shiftcnts[0] == 0)
549     return op0;
550 
551   if (scalar_shift)
552     {
553       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
554       if (op != unknown_optab
555 	  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
556 	return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
557 				build_int_cst (NULL_TREE, shiftcnts[0]));
558     }
559 
560   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
561   if (op != unknown_optab
562       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
563     {
564       tree_vector_builder vec (type, nunits, 1);
565       for (i = 0; i < nunits; i++)
566 	vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
567       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
568     }
569 
570   return NULL_TREE;
571 }
572 
573 /* Try to expand integer vector division by constant using
574    widening multiply, shifts and additions.  */
575 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)576 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
577 		      tree op1, enum tree_code code)
578 {
579   bool use_pow2 = true;
580   bool has_vector_shift = true;
581   bool use_abs_op1 = false;
582   int mode = -1, this_mode;
583   int pre_shift = -1, post_shift;
584   unsigned int nunits = nunits_for_known_piecewise_op (type);
585   int *shifts = XALLOCAVEC (int, nunits * 4);
586   int *pre_shifts = shifts + nunits;
587   int *post_shifts = pre_shifts + nunits;
588   int *shift_temps = post_shifts + nunits;
589   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
590   int prec = TYPE_PRECISION (TREE_TYPE (type));
591   int dummy_int;
592   unsigned int i;
593   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
594   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
595   tree cur_op, mulcst, tem;
596   optab op;
597 
598   if (prec > HOST_BITS_PER_WIDE_INT)
599     return NULL_TREE;
600 
601   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
602   if (op == unknown_optab
603       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
604     has_vector_shift = false;
605 
606   /* Analysis phase.  Determine if all op1 elements are either power
607      of two and it is possible to expand it using shifts (or for remainder
608      using masking).  Additionally compute the multiplicative constants
609      and pre and post shifts if the division is to be expanded using
610      widening or high part multiplication plus shifts.  */
611   for (i = 0; i < nunits; i++)
612     {
613       tree cst = VECTOR_CST_ELT (op1, i);
614       unsigned HOST_WIDE_INT ml;
615 
616       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
617 	return NULL_TREE;
618       pre_shifts[i] = 0;
619       post_shifts[i] = 0;
620       mulc[i] = 0;
621       if (use_pow2
622 	  && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
623 	use_pow2 = false;
624       if (use_pow2)
625 	{
626 	  shifts[i] = tree_log2 (cst);
627 	  if (shifts[i] != shifts[0]
628 	      && code == TRUNC_DIV_EXPR
629 	      && !has_vector_shift)
630 	    use_pow2 = false;
631 	}
632       if (mode == -2)
633 	continue;
634       if (sign_p == UNSIGNED)
635 	{
636 	  unsigned HOST_WIDE_INT mh;
637 	  unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
638 
639 	  if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
640 	    /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
641 	    return NULL_TREE;
642 
643 	  if (d <= 1)
644 	    {
645 	      mode = -2;
646 	      continue;
647 	    }
648 
649 	  /* Find a suitable multiplier and right shift count
650 	     instead of multiplying with D.  */
651 	  mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
652 
653 	  /* If the suggested multiplier is more than SIZE bits, we can
654 	     do better for even divisors, using an initial right shift.  */
655 	  if ((mh != 0 && (d & 1) == 0)
656 	      || (!has_vector_shift && pre_shift != -1))
657 	    {
658 	      if (has_vector_shift)
659 		pre_shift = ctz_or_zero (d);
660 	      else if (pre_shift == -1)
661 		{
662 		  unsigned int j;
663 		  for (j = 0; j < nunits; j++)
664 		    {
665 		      tree cst2 = VECTOR_CST_ELT (op1, j);
666 		      unsigned HOST_WIDE_INT d2;
667 		      int this_pre_shift;
668 
669 		      if (!tree_fits_uhwi_p (cst2))
670 			return NULL_TREE;
671 		      d2 = tree_to_uhwi (cst2) & mask;
672 		      if (d2 == 0)
673 			return NULL_TREE;
674 		      this_pre_shift = floor_log2 (d2 & -d2);
675 		      if (pre_shift == -1 || this_pre_shift < pre_shift)
676 			pre_shift = this_pre_shift;
677 		    }
678 		  if (i != 0 && pre_shift != 0)
679 		    {
680 		      /* Restart.  */
681 		      i = -1U;
682 		      mode = -1;
683 		      continue;
684 		    }
685 		}
686 	      if (pre_shift != 0)
687 		{
688 		  if ((d >> pre_shift) <= 1)
689 		    {
690 		      mode = -2;
691 		      continue;
692 		    }
693 		  mh = choose_multiplier (d >> pre_shift, prec,
694 					  prec - pre_shift,
695 					  &ml, &post_shift, &dummy_int);
696 		  gcc_assert (!mh);
697 		  pre_shifts[i] = pre_shift;
698 		}
699 	    }
700 	  if (!mh)
701 	    this_mode = 0;
702 	  else
703 	    this_mode = 1;
704 	}
705       else
706 	{
707 	  HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
708 	  unsigned HOST_WIDE_INT abs_d;
709 
710 	  if (d == -1)
711 	    return NULL_TREE;
712 
713 	  /* Since d might be INT_MIN, we have to cast to
714 	     unsigned HOST_WIDE_INT before negating to avoid
715 	     undefined signed overflow.  */
716 	  abs_d = (d >= 0
717 		  ? (unsigned HOST_WIDE_INT) d
718 		  : - (unsigned HOST_WIDE_INT) d);
719 
720 	  /* n rem d = n rem -d */
721 	  if (code == TRUNC_MOD_EXPR && d < 0)
722 	    {
723 	      d = abs_d;
724 	      use_abs_op1 = true;
725 	    }
726 	  if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
727 	    {
728 	      /* This case is not handled correctly below.  */
729 	      mode = -2;
730 	      continue;
731 	    }
732 	  if (abs_d <= 1)
733 	    {
734 	      mode = -2;
735 	      continue;
736 	    }
737 
738 	  choose_multiplier (abs_d, prec, prec - 1, &ml,
739 			     &post_shift, &dummy_int);
740 	  if (ml >= HOST_WIDE_INT_1U << (prec - 1))
741 	    {
742 	      this_mode = 4 + (d < 0);
743 	      ml |= HOST_WIDE_INT_M1U << (prec - 1);
744 	    }
745 	  else
746 	    this_mode = 2 + (d < 0);
747 	}
748       mulc[i] = ml;
749       post_shifts[i] = post_shift;
750       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
751 	  || post_shift >= prec
752 	  || pre_shifts[i] >= prec)
753 	this_mode = -2;
754 
755       if (i == 0)
756 	mode = this_mode;
757       else if (mode != this_mode)
758 	mode = -2;
759     }
760 
761   if (use_pow2)
762     {
763       tree addend = NULL_TREE;
764       if (sign_p == SIGNED)
765 	{
766 	  tree uns_type;
767 
768 	  /* Both division and remainder sequences need
769 	     op0 < 0 ? mask : 0 computed.  It can be either computed as
770 	     (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
771 	     if none of the shifts is 0, or as the conditional.  */
772 	  for (i = 0; i < nunits; i++)
773 	    if (shifts[i] == 0)
774 	      break;
775 	  uns_type
776 	    = build_vector_type (build_nonstandard_integer_type (prec, 1),
777 				 nunits);
778 	  if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
779 	    {
780 	      for (i = 0; i < nunits; i++)
781 		shift_temps[i] = prec - 1;
782 	      cur_op = add_rshift (gsi, type, op0, shift_temps);
783 	      if (cur_op != NULL_TREE)
784 		{
785 		  cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
786 					    uns_type, cur_op);
787 		  for (i = 0; i < nunits; i++)
788 		    shift_temps[i] = prec - shifts[i];
789 		  cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
790 		  if (cur_op != NULL_TREE)
791 		    addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
792 					      type, cur_op);
793 		}
794 	    }
795 	  if (addend == NULL_TREE
796 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
797 	    {
798 	      tree zero, cst, mask_type, mask;
799 	      gimple *stmt, *cond;
800 
801 	      mask_type = truth_type_for (type);
802 	      zero = build_zero_cst (type);
803 	      mask = make_ssa_name (mask_type);
804 	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
805 	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
806 	      tree_vector_builder vec (type, nunits, 1);
807 	      for (i = 0; i < nunits; i++)
808 		vec.quick_push (build_int_cst (TREE_TYPE (type),
809 					       (HOST_WIDE_INT_1U
810 						<< shifts[i]) - 1));
811 	      cst = vec.build ();
812 	      addend = make_ssa_name (type);
813 	      stmt
814 		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
815 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
816 	    }
817 	}
818       if (code == TRUNC_DIV_EXPR)
819 	{
820 	  if (sign_p == UNSIGNED)
821 	    {
822 	      /* q = op0 >> shift;  */
823 	      cur_op = add_rshift (gsi, type, op0, shifts);
824 	      if (cur_op != NULL_TREE)
825 		return cur_op;
826 	    }
827 	  else if (addend != NULL_TREE)
828 	    {
829 	      /* t1 = op0 + addend;
830 		 q = t1 >> shift;  */
831 	      op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
832 	      if (op != unknown_optab
833 		  && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
834 		{
835 		  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
836 		  cur_op = add_rshift (gsi, type, cur_op, shifts);
837 		  if (cur_op != NULL_TREE)
838 		    return cur_op;
839 		}
840 	    }
841 	}
842       else
843 	{
844 	  tree mask;
845 	  tree_vector_builder vec (type, nunits, 1);
846 	  for (i = 0; i < nunits; i++)
847 	    vec.quick_push (build_int_cst (TREE_TYPE (type),
848 					   (HOST_WIDE_INT_1U
849 					    << shifts[i]) - 1));
850 	  mask = vec.build ();
851 	  op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
852 	  if (op != unknown_optab
853 	      && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
854 	    {
855 	      if (sign_p == UNSIGNED)
856 		/* r = op0 & mask;  */
857 		return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
858 	      else if (addend != NULL_TREE)
859 		{
860 		  /* t1 = op0 + addend;
861 		     t2 = t1 & mask;
862 		     r = t2 - addend;  */
863 		  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
864 		  if (op != unknown_optab
865 		      && optab_handler (op, TYPE_MODE (type))
866 			 != CODE_FOR_nothing)
867 		    {
868 		      cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
869 						addend);
870 		      cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
871 						cur_op, mask);
872 		      op = optab_for_tree_code (MINUS_EXPR, type,
873 						optab_default);
874 		      if (op != unknown_optab
875 			  && optab_handler (op, TYPE_MODE (type))
876 			     != CODE_FOR_nothing)
877 			return gimplify_build2 (gsi, MINUS_EXPR, type,
878 						cur_op, addend);
879 		    }
880 		}
881 	    }
882 	}
883     }
884 
885   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
886     return NULL_TREE;
887 
888   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
889     return NULL_TREE;
890 
891   cur_op = op0;
892 
893   switch (mode)
894     {
895     case 0:
896       gcc_assert (sign_p == UNSIGNED);
897       /* t1 = oprnd0 >> pre_shift;
898 	 t2 = t1 h* ml;
899 	 q = t2 >> post_shift;  */
900       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
901       if (cur_op == NULL_TREE)
902 	return NULL_TREE;
903       break;
904     case 1:
905       gcc_assert (sign_p == UNSIGNED);
906       for (i = 0; i < nunits; i++)
907 	{
908 	  shift_temps[i] = 1;
909 	  post_shifts[i]--;
910 	}
911       break;
912     case 2:
913     case 3:
914     case 4:
915     case 5:
916       gcc_assert (sign_p == SIGNED);
917       for (i = 0; i < nunits; i++)
918 	shift_temps[i] = prec - 1;
919       break;
920     default:
921       return NULL_TREE;
922     }
923 
924   tree_vector_builder vec (type, nunits, 1);
925   for (i = 0; i < nunits; i++)
926     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
927   mulcst = vec.build ();
928 
929   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
930 
931   switch (mode)
932     {
933     case 0:
934       /* t1 = oprnd0 >> pre_shift;
935 	 t2 = t1 h* ml;
936 	 q = t2 >> post_shift;  */
937       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
938       break;
939     case 1:
940       /* t1 = oprnd0 h* ml;
941 	 t2 = oprnd0 - t1;
942 	 t3 = t2 >> 1;
943 	 t4 = t1 + t3;
944 	 q = t4 >> (post_shift - 1);  */
945       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
946       if (op == unknown_optab
947 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
948 	return NULL_TREE;
949       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
950       tem = add_rshift (gsi, type, tem, shift_temps);
951       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
952       if (op == unknown_optab
953 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
954 	return NULL_TREE;
955       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
956       cur_op = add_rshift (gsi, type, tem, post_shifts);
957       if (cur_op == NULL_TREE)
958 	return NULL_TREE;
959       break;
960     case 2:
961     case 3:
962     case 4:
963     case 5:
964       /* t1 = oprnd0 h* ml;
965 	 t2 = t1; [ iff (mode & 2) != 0 ]
966 	 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
967 	 t3 = t2 >> post_shift;
968 	 t4 = oprnd0 >> (prec - 1);
969 	 q = t3 - t4; [ iff (mode & 1) == 0 ]
970 	 q = t4 - t3; [ iff (mode & 1) != 0 ]  */
971       if ((mode & 2) == 0)
972 	{
973 	  op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
974 	  if (op == unknown_optab
975 	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
976 	    return NULL_TREE;
977 	  cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
978 	}
979       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
980       if (cur_op == NULL_TREE)
981 	return NULL_TREE;
982       tem = add_rshift (gsi, type, op0, shift_temps);
983       if (tem == NULL_TREE)
984 	return NULL_TREE;
985       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
986       if (op == unknown_optab
987 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
988 	return NULL_TREE;
989       if ((mode & 1) == 0)
990 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
991       else
992 	cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
993       break;
994     default:
995       gcc_unreachable ();
996     }
997 
998   if (code == TRUNC_DIV_EXPR)
999     return cur_op;
1000 
1001   /* We divided.  Now finish by:
1002      t1 = q * oprnd1;
1003      r = oprnd0 - t1;  */
1004   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
1005   if (op == unknown_optab
1006       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1007     return NULL_TREE;
1008   if (use_abs_op1)
1009     {
1010       tree_vector_builder elts;
1011       if (!elts.new_unary_operation (type, op1, false))
1012 	return NULL_TREE;
1013       unsigned int count = elts.encoded_nelts ();
1014       for (unsigned int i = 0; i < count; ++i)
1015 	{
1016 	  tree elem1 = VECTOR_CST_ELT (op1, i);
1017 
1018 	  tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
1019 	  if (elt == NULL_TREE)
1020 	    return NULL_TREE;
1021 	  elts.quick_push (elt);
1022 	}
1023       op1 = elts.build ();
1024     }
1025   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
1026   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1027   if (op == unknown_optab
1028       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1029     return NULL_TREE;
1030   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
1031 }
1032 
1033 /* Expand a vector condition to scalars, by using many conditions
1034    on the vector's elements.  */
1035 
1036 static bool
expand_vector_condition(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)1037 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
1038 {
1039   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1040   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1041   tree a = gimple_assign_rhs1 (stmt);
1042   tree a1 = a;
1043   tree a2 = NULL_TREE;
1044   bool a_is_comparison = false;
1045   bool a_is_scalar_bitmask = false;
1046   tree b = gimple_assign_rhs2 (stmt);
1047   tree c = gimple_assign_rhs3 (stmt);
1048   vec<constructor_elt, va_gc> *v;
1049   tree constr;
1050   tree inner_type = TREE_TYPE (type);
1051   tree width = vector_element_bits_tree (type);
1052   tree cond_type = TREE_TYPE (TREE_TYPE (a));
1053   tree comp_inner_type = cond_type;
1054   tree index = bitsize_int (0);
1055   tree comp_width = width;
1056   tree comp_index = index;
1057   location_t loc = gimple_location (gsi_stmt (*gsi));
1058   tree_code code = TREE_CODE (a);
1059   gassign *assign = NULL;
1060 
1061   if (code == SSA_NAME)
1062     {
1063       assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
1064       if (assign != NULL
1065 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
1066 	{
1067 	  a_is_comparison = true;
1068 	  a1 = gimple_assign_rhs1 (assign);
1069 	  a2 = gimple_assign_rhs2 (assign);
1070 	  code = gimple_assign_rhs_code (assign);
1071 	  comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
1072 	  comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1073 	}
1074     }
1075 
1076   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)
1077       || (integer_all_onesp (b) && integer_zerop (c)
1078 	  && expand_vec_cmp_expr_p (type, TREE_TYPE (a1), code)))
1079     {
1080       gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1081       return true;
1082     }
1083 
1084   /* If a has vector boolean type and is a comparison, above
1085      expand_vec_cond_expr_p might fail, even if both the comparison and
1086      VEC_COND_EXPR could be supported individually.  See PR109176.  */
1087   if (a_is_comparison
1088       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1089       && expand_vec_cond_expr_p (type, TREE_TYPE (a), SSA_NAME)
1090       && expand_vec_cmp_expr_p (TREE_TYPE (a1), TREE_TYPE (a), code))
1091     return true;
1092 
1093   /* Handle vector boolean types with bitmasks.  If there is a comparison
1094      and we can expand the comparison into the vector boolean bitmask,
1095      or otherwise if it is compatible with type, we can transform
1096       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1097      into
1098       tmp_6 = x_2 < y_3;
1099       tmp_7 = tmp_6 & vbfld_4;
1100       tmp_8 = ~tmp_6;
1101       tmp_9 = tmp_8 & vbfld_5;
1102       vbfld_1 = tmp_7 | tmp_9;
1103      Similarly for vbfld_10 instead of x_2 < y_3.  */
1104   if (VECTOR_BOOLEAN_TYPE_P (type)
1105       && SCALAR_INT_MODE_P (TYPE_MODE (type))
1106       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1107 		   TYPE_VECTOR_SUBPARTS (type)
1108 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1109       && (a_is_comparison
1110 	  ? useless_type_conversion_p (type, TREE_TYPE (a))
1111 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1112     {
1113       if (a_is_comparison)
1114 	a = gimplify_build2 (gsi, code, type, a1, a2);
1115       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1116       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1117       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1118       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1119       gimple_assign_set_rhs_from_tree (gsi, a);
1120       update_stmt (gsi_stmt (*gsi));
1121       return true;
1122     }
1123 
1124   /* TODO: try and find a smaller vector type.  */
1125 
1126   if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1127     warning_at (loc, OPT_Wvector_operation_performance,
1128 		"vector condition will be expanded piecewise");
1129 
1130   if (!a_is_comparison
1131       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1132       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1133       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1134 		   TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1135 		   * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1136 						(TREE_TYPE (TREE_TYPE (a))))))
1137     {
1138       a_is_scalar_bitmask = true;
1139       int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1140       tree atype = build_nonstandard_integer_type (prec, 1);
1141       a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1142     }
1143   else if (!a_is_comparison
1144 	   && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)))
1145     comp_width = vector_element_bits_tree (TREE_TYPE (a));
1146 
1147   int nunits = nunits_for_known_piecewise_op (type);
1148   vec_alloc (v, nunits);
1149   bool constant_p = true;
1150   for (int i = 0; i < nunits; i++)
1151     {
1152       tree aa, result;
1153       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1154       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1155       if (a_is_comparison)
1156 	{
1157 	  tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1158 				       comp_width, comp_index);
1159 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1160 				       comp_width, comp_index);
1161 	  aa = build2 (code, cond_type, aa1, aa2);
1162 	}
1163       else if (a_is_scalar_bitmask)
1164 	{
1165 	  wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1166 	  result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1167 				    a, wide_int_to_tree (TREE_TYPE (a), w));
1168 	  aa = build2 (NE_EXPR, boolean_type_node, result,
1169 		       build_zero_cst (TREE_TYPE (a)));
1170 	}
1171       else
1172 	aa = tree_vec_extract (gsi, cond_type, a, comp_width, comp_index);
1173       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1174       if (!CONSTANT_CLASS_P (result))
1175 	constant_p = false;
1176       constructor_elt ce = {NULL_TREE, result};
1177       v->quick_push (ce);
1178       index = int_const_binop (PLUS_EXPR, index, width);
1179       if (width == comp_width)
1180 	comp_index = index;
1181       else
1182 	comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1183     }
1184 
1185   if (constant_p)
1186     constr = build_vector_from_ctor (type, v);
1187   else
1188     constr = build_constructor (type, v);
1189   gimple_assign_set_rhs_from_tree (gsi, constr);
1190   update_stmt (gsi_stmt (*gsi));
1191 
1192   if (a_is_comparison)
1193     bitmap_set_bit (dce_ssa_names,
1194 		    SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1195 
1196   return false;
1197 }
1198 
1199 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code,bitmap dce_ssa_names)1200 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1201 			 gassign *assign, enum tree_code code,
1202 			 bitmap dce_ssa_names)
1203 {
1204   machine_mode compute_mode = TYPE_MODE (compute_type);
1205 
1206   /* If the compute mode is not a vector mode (hence we are not decomposing
1207      a BLKmode vector to smaller, hardware-supported vectors), we may want
1208      to expand the operations in parallel.  */
1209   if (!VECTOR_MODE_P (compute_mode))
1210     switch (code)
1211       {
1212       case PLUS_EXPR:
1213       case MINUS_EXPR:
1214         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1215 	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1216 					 gimple_assign_rhs1 (assign),
1217 					 gimple_assign_rhs2 (assign), code);
1218 	break;
1219 
1220       case NEGATE_EXPR:
1221         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1222           return expand_vector_addition (gsi, do_unop, do_negate, type,
1223 		      		         gimple_assign_rhs1 (assign),
1224 					 NULL_TREE, code);
1225 	break;
1226 
1227       case BIT_AND_EXPR:
1228       case BIT_IOR_EXPR:
1229       case BIT_XOR_EXPR:
1230         return expand_vector_parallel (gsi, do_binop, type,
1231 		      		       gimple_assign_rhs1 (assign),
1232 				       gimple_assign_rhs2 (assign), code);
1233 
1234       case BIT_NOT_EXPR:
1235         return expand_vector_parallel (gsi, do_unop, type,
1236 		      		       gimple_assign_rhs1 (assign),
1237         			       NULL_TREE, code);
1238       case EQ_EXPR:
1239       case NE_EXPR:
1240       case GT_EXPR:
1241       case LT_EXPR:
1242       case GE_EXPR:
1243       case LE_EXPR:
1244       case UNEQ_EXPR:
1245       case UNGT_EXPR:
1246       case UNLT_EXPR:
1247       case UNGE_EXPR:
1248       case UNLE_EXPR:
1249       case LTGT_EXPR:
1250       case ORDERED_EXPR:
1251       case UNORDERED_EXPR:
1252 	{
1253 	  tree rhs1 = gimple_assign_rhs1 (assign);
1254 	  tree rhs2 = gimple_assign_rhs2 (assign);
1255 
1256 	  return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1257 					   dce_ssa_names);
1258 	}
1259 
1260       case TRUNC_DIV_EXPR:
1261       case TRUNC_MOD_EXPR:
1262 	{
1263 	  tree rhs1 = gimple_assign_rhs1 (assign);
1264 	  tree rhs2 = gimple_assign_rhs2 (assign);
1265 	  tree ret;
1266 
1267 	  if (!optimize
1268 	      || !VECTOR_INTEGER_TYPE_P (type)
1269 	      || TREE_CODE (rhs2) != VECTOR_CST
1270 	      || !VECTOR_MODE_P (TYPE_MODE (type)))
1271 	    break;
1272 
1273 	  ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1274 	  if (ret != NULL_TREE)
1275 	    return ret;
1276 	  break;
1277 	}
1278 
1279       default:
1280 	break;
1281       }
1282 
1283   if (TREE_CODE_CLASS (code) == tcc_unary)
1284     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1285 				    gimple_assign_rhs1 (assign),
1286 				    NULL_TREE, code, false);
1287   else
1288     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1289 				    gimple_assign_rhs1 (assign),
1290 				    gimple_assign_rhs2 (assign), code, false);
1291 }
1292 
1293 /* Try to optimize
1294    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1295    style stmts into:
1296    _9 = { b_7, b_7, b_7, b_7 };
1297    a_5 = _9 + { 0, 3, 6, 9 };
1298    because vector splat operation is usually more efficient
1299    than piecewise initialization of the vector.  */
1300 
1301 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1302 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1303 {
1304   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1305   tree lhs = gimple_assign_lhs (stmt);
1306   tree rhs = gimple_assign_rhs1 (stmt);
1307   tree type = TREE_TYPE (rhs);
1308   unsigned int i, j;
1309   unsigned HOST_WIDE_INT nelts;
1310   bool all_same = true;
1311   constructor_elt *elt;
1312   gimple *g;
1313   tree base = NULL_TREE;
1314   optab op;
1315 
1316   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1317       || nelts <= 2
1318       || CONSTRUCTOR_NELTS (rhs) != nelts)
1319     return;
1320   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1321   if (op == unknown_optab
1322       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1323     return;
1324   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1325     if (TREE_CODE (elt->value) != SSA_NAME
1326 	|| TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1327       return;
1328     else
1329       {
1330 	tree this_base = elt->value;
1331 	if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1332 	  all_same = false;
1333 	for (j = 0; j < nelts + 1; j++)
1334 	  {
1335 	    g = SSA_NAME_DEF_STMT (this_base);
1336 	    if (is_gimple_assign (g)
1337 		&& gimple_assign_rhs_code (g) == PLUS_EXPR
1338 		&& TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1339 		&& TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1340 		&& !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1341 	      this_base = gimple_assign_rhs1 (g);
1342 	    else
1343 	      break;
1344 	  }
1345 	if (i == 0)
1346 	  base = this_base;
1347 	else if (this_base != base)
1348 	  return;
1349       }
1350   if (all_same)
1351     return;
1352   tree_vector_builder cst (type, nelts, 1);
1353   for (i = 0; i < nelts; i++)
1354     {
1355       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1356       tree elt = build_zero_cst (TREE_TYPE (base));
1357       while (this_base != base)
1358 	{
1359 	  g = SSA_NAME_DEF_STMT (this_base);
1360 	  elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1361 			     elt, gimple_assign_rhs2 (g));
1362 	  if (elt == NULL_TREE
1363 	      || TREE_CODE (elt) != INTEGER_CST
1364 	      || TREE_OVERFLOW (elt))
1365 	    return;
1366 	  this_base = gimple_assign_rhs1 (g);
1367 	}
1368       cst.quick_push (elt);
1369     }
1370   for (i = 0; i < nelts; i++)
1371     CONSTRUCTOR_ELT (rhs, i)->value = base;
1372   g = gimple_build_assign (make_ssa_name (type), rhs);
1373   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1374   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1375 			   cst.build ());
1376   gsi_replace (gsi, g, false);
1377 }
1378 
1379 /* Return a type for the widest vector mode with the same element type as
1380    type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
1381    ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
1382    with optab OP, or return NULL_TREE if none is found.  */
1383 
1384 static tree
type_for_widest_vector_mode(tree original_vector_type,optab op)1385 type_for_widest_vector_mode (tree original_vector_type, optab op)
1386 {
1387   gcc_assert (VECTOR_TYPE_P (original_vector_type));
1388   tree type = TREE_TYPE (original_vector_type);
1389   machine_mode inner_mode = TYPE_MODE (type);
1390   machine_mode best_mode = VOIDmode, mode;
1391   poly_int64 best_nunits = 0;
1392 
1393   if (SCALAR_FLOAT_MODE_P (inner_mode))
1394     mode = MIN_MODE_VECTOR_FLOAT;
1395   else if (SCALAR_FRACT_MODE_P (inner_mode))
1396     mode = MIN_MODE_VECTOR_FRACT;
1397   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1398     mode = MIN_MODE_VECTOR_UFRACT;
1399   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1400     mode = MIN_MODE_VECTOR_ACCUM;
1401   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1402     mode = MIN_MODE_VECTOR_UACCUM;
1403   else if (inner_mode == BImode)
1404     mode = MIN_MODE_VECTOR_BOOL;
1405   else
1406     mode = MIN_MODE_VECTOR_INT;
1407 
1408   FOR_EACH_MODE_FROM (mode, mode)
1409     if (GET_MODE_INNER (mode) == inner_mode
1410 	&& maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1411 	&& optab_handler (op, mode) != CODE_FOR_nothing
1412 	&& known_le (GET_MODE_NUNITS (mode),
1413 		     TYPE_VECTOR_SUBPARTS (original_vector_type)))
1414       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1415 
1416   if (best_mode == VOIDmode)
1417     return NULL_TREE;
1418   else
1419     return build_vector_type_for_mode (type, best_mode);
1420 }
1421 
1422 
1423 /* Build a reference to the element of the vector VECT.  Function
1424    returns either the element itself, either BIT_FIELD_REF, or an
1425    ARRAY_REF expression.
1426 
1427    GSI is required to insert temporary variables while building a
1428    refernece to the element of the vector VECT.
1429 
1430    PTMPVEC is a pointer to the temporary variable for caching
1431    purposes.  In case when PTMPVEC is NULL new temporary variable
1432    will be created.  */
1433 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1434 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1435 {
1436   tree vect_type, vect_elt_type;
1437   gimple *asgn;
1438   tree tmpvec;
1439   tree arraytype;
1440   bool need_asgn = true;
1441   unsigned int elements;
1442 
1443   vect_type = TREE_TYPE (vect);
1444   vect_elt_type = TREE_TYPE (vect_type);
1445   elements = nunits_for_known_piecewise_op (vect_type);
1446 
1447   if (TREE_CODE (idx) == INTEGER_CST)
1448     {
1449       unsigned HOST_WIDE_INT index;
1450 
1451       /* Given that we're about to compute a binary modulus,
1452 	 we don't care about the high bits of the value.  */
1453       index = TREE_INT_CST_LOW (idx);
1454       if (!tree_fits_uhwi_p (idx) || index >= elements)
1455 	{
1456 	  index &= elements - 1;
1457 	  idx = build_int_cst (TREE_TYPE (idx), index);
1458 	}
1459 
1460       /* When lowering a vector statement sequence do some easy
1461          simplification by looking through intermediate vector results.  */
1462       if (TREE_CODE (vect) == SSA_NAME)
1463 	{
1464 	  gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1465 	  if (is_gimple_assign (def_stmt)
1466 	      && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1467 		  || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1468 	    vect = gimple_assign_rhs1 (def_stmt);
1469 	}
1470 
1471       if (TREE_CODE (vect) == VECTOR_CST)
1472 	return VECTOR_CST_ELT (vect, index);
1473       else if (TREE_CODE (vect) == CONSTRUCTOR
1474 	       && (CONSTRUCTOR_NELTS (vect) == 0
1475 		   || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1476 		      != VECTOR_TYPE))
1477         {
1478 	  if (index < CONSTRUCTOR_NELTS (vect))
1479 	    return CONSTRUCTOR_ELT (vect, index)->value;
1480           return build_zero_cst (vect_elt_type);
1481         }
1482       else
1483         {
1484 	  tree size = vector_element_bits_tree (vect_type);
1485 	  tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1486 				  size);
1487 	  return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1488         }
1489     }
1490 
1491   if (!ptmpvec)
1492     tmpvec = create_tmp_var (vect_type, "vectmp");
1493   else if (!*ptmpvec)
1494     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1495   else
1496     {
1497       tmpvec = *ptmpvec;
1498       need_asgn = false;
1499     }
1500 
1501   if (need_asgn)
1502     {
1503       TREE_ADDRESSABLE (tmpvec) = 1;
1504       asgn = gimple_build_assign (tmpvec, vect);
1505       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1506     }
1507 
1508   arraytype = build_array_type_nelts (vect_elt_type, elements);
1509   return build4 (ARRAY_REF, vect_elt_type,
1510                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1511                  idx, NULL_TREE, NULL_TREE);
1512 }
1513 
1514 /* Check if VEC_PERM_EXPR within the given setting is supported
1515    by hardware, or lower it piecewise.
1516 
1517    When VEC_PERM_EXPR has the same first and second operands:
1518    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1519    {v0[mask[0]], v0[mask[1]], ...}
1520    MASK and V0 must have the same number of elements.
1521 
1522    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1523    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1524    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1525    same number of arguments.  */
1526 
1527 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1528 lower_vec_perm (gimple_stmt_iterator *gsi)
1529 {
1530   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1531   tree mask = gimple_assign_rhs3 (stmt);
1532   tree vec0 = gimple_assign_rhs1 (stmt);
1533   tree vec1 = gimple_assign_rhs2 (stmt);
1534   tree vect_type = TREE_TYPE (vec0);
1535   tree mask_type = TREE_TYPE (mask);
1536   tree vect_elt_type = TREE_TYPE (vect_type);
1537   tree mask_elt_type = TREE_TYPE (mask_type);
1538   unsigned HOST_WIDE_INT elements;
1539   vec<constructor_elt, va_gc> *v;
1540   tree constr, t, si, i_val;
1541   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1542   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1543   location_t loc = gimple_location (gsi_stmt (*gsi));
1544   unsigned i;
1545 
1546   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1547     return;
1548 
1549   if (TREE_CODE (mask) == SSA_NAME)
1550     {
1551       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1552       if (is_gimple_assign (def_stmt)
1553 	  && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1554 	mask = gimple_assign_rhs1 (def_stmt);
1555     }
1556 
1557   vec_perm_builder sel_int;
1558 
1559   if (TREE_CODE (mask) == VECTOR_CST
1560       && tree_to_vec_perm_builder (&sel_int, mask))
1561     {
1562       vec_perm_indices indices (sel_int, 2, elements);
1563       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1564 	{
1565 	  gimple_assign_set_rhs3 (stmt, mask);
1566 	  update_stmt (stmt);
1567 	  return;
1568 	}
1569       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1570 	 vector as VEC1 and a right element shift MASK.  */
1571       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1572 	  != CODE_FOR_nothing
1573 	  && TREE_CODE (vec1) == VECTOR_CST
1574 	  && initializer_zerop (vec1)
1575 	  && maybe_ne (indices[0], 0)
1576 	  && known_lt (poly_uint64 (indices[0]), elements))
1577 	{
1578 	  bool ok_p = indices.series_p (0, 1, indices[0], 1);
1579 	  if (!ok_p)
1580 	    {
1581 	      for (i = 1; i < elements; ++i)
1582 		{
1583 		  poly_uint64 actual = indices[i];
1584 		  poly_uint64 expected = i + indices[0];
1585 		  /* Indices into the second vector are all equivalent.  */
1586 		  if (maybe_lt (actual, elements)
1587 		      ? maybe_ne (actual, expected)
1588 		      : maybe_lt (expected, elements))
1589 		    break;
1590 		}
1591 	      ok_p = i == elements;
1592 	    }
1593 	  if (ok_p)
1594 	    {
1595 	      gimple_assign_set_rhs3 (stmt, mask);
1596 	      update_stmt (stmt);
1597 	      return;
1598 	    }
1599 	}
1600       /* And similarly vec_shl pattern.  */
1601       if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1602 	  != CODE_FOR_nothing
1603 	  && TREE_CODE (vec0) == VECTOR_CST
1604 	  && initializer_zerop (vec0))
1605 	{
1606 	  unsigned int first = 0;
1607 	  for (i = 0; i < elements; ++i)
1608 	    if (known_eq (poly_uint64 (indices[i]), elements))
1609 	      {
1610 		if (i == 0 || first)
1611 		  break;
1612 		first = i;
1613 	      }
1614 	    else if (first
1615 		     ? maybe_ne (poly_uint64 (indices[i]),
1616 					      elements + i - first)
1617 		     : maybe_ge (poly_uint64 (indices[i]), elements))
1618 	      break;
1619 	  if (first && i == elements)
1620 	    {
1621 	      gimple_assign_set_rhs3 (stmt, mask);
1622 	      update_stmt (stmt);
1623 	      return;
1624 	    }
1625 	}
1626     }
1627   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1628     return;
1629 
1630   if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1631     warning_at (loc, OPT_Wvector_operation_performance,
1632 		"vector shuffling operation will be expanded piecewise");
1633 
1634   vec_alloc (v, elements);
1635   bool constant_p = true;
1636   for (i = 0; i < elements; i++)
1637     {
1638       si = size_int (i);
1639       i_val = vector_element (gsi, mask, si, &masktmp);
1640 
1641       if (TREE_CODE (i_val) == INTEGER_CST)
1642         {
1643 	  unsigned HOST_WIDE_INT index;
1644 
1645 	  index = TREE_INT_CST_LOW (i_val);
1646 	  if (!tree_fits_uhwi_p (i_val) || index >= elements)
1647 	    i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1648 
1649           if (two_operand_p && (index & elements) != 0)
1650 	    t = vector_element (gsi, vec1, i_val, &vec1tmp);
1651 	  else
1652 	    t = vector_element (gsi, vec0, i_val, &vec0tmp);
1653 
1654           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1655 					true, GSI_SAME_STMT);
1656         }
1657       else
1658         {
1659 	  tree cond = NULL_TREE, v0_val;
1660 
1661 	  if (two_operand_p)
1662 	    {
1663 	      cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1664 			          build_int_cst (mask_elt_type, elements));
1665 	      cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1666 					       true, GSI_SAME_STMT);
1667 	    }
1668 
1669 	  i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1670 			       build_int_cst (mask_elt_type, elements - 1));
1671 	  i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1672 					    true, GSI_SAME_STMT);
1673 
1674 	  v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1675 	  v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1676 					     true, GSI_SAME_STMT);
1677 
1678 	  if (two_operand_p)
1679 	    {
1680 	      tree v1_val;
1681 
1682 	      v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1683 	      v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1684 						 true, GSI_SAME_STMT);
1685 
1686 	      cond = fold_build2 (EQ_EXPR, boolean_type_node,
1687 				  cond, build_zero_cst (mask_elt_type));
1688 	      cond = fold_build3 (COND_EXPR, vect_elt_type,
1689 				  cond, v0_val, v1_val);
1690               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1691 					    true, GSI_SAME_STMT);
1692             }
1693 	  else
1694 	    t = v0_val;
1695         }
1696 
1697       if (!CONSTANT_CLASS_P (t))
1698 	constant_p = false;
1699       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1700     }
1701 
1702   if (constant_p)
1703     constr = build_vector_from_ctor (vect_type, v);
1704   else
1705     constr = build_constructor (vect_type, v);
1706   gimple_assign_set_rhs_from_tree (gsi, constr);
1707   update_stmt (gsi_stmt (*gsi));
1708 }
1709 
1710 /* If OP is a uniform vector return the element it is a splat from.  */
1711 
1712 static tree
ssa_uniform_vector_p(tree op)1713 ssa_uniform_vector_p (tree op)
1714 {
1715   if (TREE_CODE (op) == VECTOR_CST
1716       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1717       || TREE_CODE (op) == CONSTRUCTOR)
1718     return uniform_vector_p (op);
1719   if (TREE_CODE (op) == SSA_NAME)
1720     {
1721       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1722       if (gimple_assign_single_p (def_stmt))
1723 	return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1724     }
1725   return NULL_TREE;
1726 }
1727 
1728 /* Return type in which CODE operation with optab OP can be
1729    computed.  */
1730 
1731 static tree
get_compute_type(enum tree_code code,optab op,tree type)1732 get_compute_type (enum tree_code code, optab op, tree type)
1733 {
1734   /* For very wide vectors, try using a smaller vector mode.  */
1735   tree compute_type = type;
1736   if (op
1737       && (!VECTOR_MODE_P (TYPE_MODE (type))
1738 	  || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1739     {
1740       tree vector_compute_type
1741 	= type_for_widest_vector_mode (type, op);
1742       if (vector_compute_type != NULL_TREE
1743 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1744 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
1745 	      != CODE_FOR_nothing))
1746 	compute_type = vector_compute_type;
1747     }
1748 
1749   /* If we are breaking a BLKmode vector into smaller pieces,
1750      type_for_widest_vector_mode has already looked into the optab,
1751      so skip these checks.  */
1752   if (compute_type == type)
1753     {
1754       machine_mode compute_mode = TYPE_MODE (compute_type);
1755       if (VECTOR_MODE_P (compute_mode))
1756 	{
1757 	  if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1758 	    return compute_type;
1759 	  if (code == MULT_HIGHPART_EXPR
1760 	      && can_mult_highpart_p (compute_mode,
1761 				      TYPE_UNSIGNED (compute_type)))
1762 	    return compute_type;
1763 	}
1764       /* There is no operation in hardware, so fall back to scalars.  */
1765       compute_type = TREE_TYPE (type);
1766     }
1767 
1768   return compute_type;
1769 }
1770 
1771 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1772 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1773 	 tree bitpos, tree bitsize, enum tree_code code,
1774 	 tree type ATTRIBUTE_UNUSED)
1775 {
1776   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1777     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1778   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1779     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1780   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1781   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1782 }
1783 
1784 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1785 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1786 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1787 {
1788   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1789   tree lhs = gimple_assign_lhs (stmt);
1790   tree type = TREE_TYPE (lhs);
1791   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1792   machine_mode compute_mode = TYPE_MODE (compute_type);
1793   gcc_assert (compute_mode != BLKmode);
1794   tree rhs2 = gimple_assign_rhs2 (stmt);
1795   tree rhs3 = gimple_assign_rhs3 (stmt);
1796   tree new_rhs;
1797 
1798   /* If the compute mode is not a vector mode (hence we are not decomposing
1799      a BLKmode vector to smaller, hardware-supported vectors), we may want
1800      to expand the operations in parallel.  */
1801   if (!VECTOR_MODE_P (compute_mode))
1802     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1803 				      COND_EXPR);
1804   else
1805     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1806 				       rhs2, rhs3, COND_EXPR, false);
1807   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1808     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1809 			       new_rhs);
1810 
1811   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1812      way to do it is change expand_vector_operation and its callees to
1813      return a tree_code, RHS1 and RHS2 instead of a tree. */
1814   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1815   update_stmt (gsi_stmt (*gsi));
1816 }
1817 
1818 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1819    lowering.  If INNER_TYPE is not a vector type, this is a scalar
1820    fallback.  */
1821 
1822 static tree
do_vec_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree decl,tree bitpos,tree bitsize,enum tree_code code,tree type)1823 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1824 		   tree decl, tree bitpos, tree bitsize,
1825 		   enum tree_code code, tree type)
1826 {
1827   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1828   if (!VECTOR_TYPE_P (inner_type))
1829     return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1830   if (code == CALL_EXPR)
1831     {
1832       gimple *g = gimple_build_call (decl, 1, a);
1833       tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1834       gimple_call_set_lhs (g, lhs);
1835       gsi_insert_before (gsi, g, GSI_SAME_STMT);
1836       return lhs;
1837     }
1838   else
1839     {
1840       tree outer_type = build_vector_type (TREE_TYPE (type),
1841 					   TYPE_VECTOR_SUBPARTS (inner_type));
1842       return gimplify_build1 (gsi, code, outer_type, a);
1843     }
1844 }
1845 
1846 /* Similarly, but for narrowing conversion.  */
1847 
1848 static tree
do_vec_narrow_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree,tree bitpos,tree,enum tree_code code,tree type)1849 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1850 			  tree, tree bitpos, tree, enum tree_code code,
1851 			  tree type)
1852 {
1853   tree itype = build_vector_type (TREE_TYPE (inner_type),
1854 				  exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1855 					     2));
1856   tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1857   tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1858 			     int_const_binop (PLUS_EXPR, bitpos,
1859 					      TYPE_SIZE (itype)));
1860   tree outer_type = build_vector_type (TREE_TYPE (type),
1861 				       TYPE_VECTOR_SUBPARTS (inner_type));
1862   return gimplify_build2 (gsi, code, outer_type, b, c);
1863 }
1864 
1865 /* Expand VEC_CONVERT ifn call.  */
1866 
1867 static void
expand_vector_conversion(gimple_stmt_iterator * gsi)1868 expand_vector_conversion (gimple_stmt_iterator *gsi)
1869 {
1870   gimple *stmt = gsi_stmt (*gsi);
1871   gimple *g;
1872   tree lhs = gimple_call_lhs (stmt);
1873   if (lhs == NULL_TREE)
1874     {
1875       g = gimple_build_nop ();
1876       gsi_replace (gsi, g, false);
1877       return;
1878     }
1879   tree arg = gimple_call_arg (stmt, 0);
1880   tree ret_type = TREE_TYPE (lhs);
1881   tree arg_type = TREE_TYPE (arg);
1882   tree new_rhs, compute_type = TREE_TYPE (arg_type);
1883   enum tree_code code = NOP_EXPR;
1884   enum tree_code code1 = ERROR_MARK;
1885   enum { NARROW, NONE, WIDEN } modifier = NONE;
1886   optab optab1 = unknown_optab;
1887 
1888   gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1889   if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1890       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1891     code = FIX_TRUNC_EXPR;
1892   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1893 	   && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1894     code = FLOAT_EXPR;
1895   unsigned int ret_elt_bits = vector_element_bits (ret_type);
1896   unsigned int arg_elt_bits = vector_element_bits (arg_type);
1897   if (ret_elt_bits < arg_elt_bits)
1898     modifier = NARROW;
1899   else if (ret_elt_bits > arg_elt_bits)
1900     modifier = WIDEN;
1901 
1902   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1903     {
1904       if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1905 	{
1906 	  g = gimple_build_assign (lhs, code1, arg);
1907 	  gsi_replace (gsi, g, false);
1908 	  return;
1909 	}
1910       /* Can't use get_compute_type here, as supportable_convert_operation
1911 	 doesn't necessarily use an optab and needs two arguments.  */
1912       tree vec_compute_type
1913 	= type_for_widest_vector_mode (arg_type, mov_optab);
1914       if (vec_compute_type
1915 	  && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
1916 	{
1917 	  unsigned HOST_WIDE_INT nelts
1918 	    = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1919 	  while (nelts > 1)
1920 	    {
1921 	      tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1922 	      tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1923 	      if (supportable_convert_operation (code, ret1_type, arg1_type,
1924 						 &code1))
1925 		{
1926 		  new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1927 						     ret_type, arg1_type, arg,
1928 						     NULL_TREE, code1, false);
1929 		  g = gimple_build_assign (lhs, new_rhs);
1930 		  gsi_replace (gsi, g, false);
1931 		  return;
1932 		}
1933 	      nelts = nelts / 2;
1934 	    }
1935 	}
1936     }
1937   else if (modifier == NARROW)
1938     {
1939       switch (code)
1940 	{
1941 	CASE_CONVERT:
1942 	  code1 = VEC_PACK_TRUNC_EXPR;
1943 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1944 	  break;
1945 	case FIX_TRUNC_EXPR:
1946 	  code1 = VEC_PACK_FIX_TRUNC_EXPR;
1947 	  /* The signedness is determined from output operand.  */
1948 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1949 	  break;
1950 	case FLOAT_EXPR:
1951 	  code1 = VEC_PACK_FLOAT_EXPR;
1952 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1953 	  break;
1954 	default:
1955 	  gcc_unreachable ();
1956 	}
1957 
1958       if (optab1)
1959 	compute_type = get_compute_type (code1, optab1, arg_type);
1960       enum insn_code icode1;
1961       if (VECTOR_TYPE_P (compute_type)
1962 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1963 	      != CODE_FOR_nothing)
1964 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1965 	{
1966 	  tree cretd_type
1967 	    = build_vector_type (TREE_TYPE (ret_type),
1968 				 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1969 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1970 	    {
1971 	      if (compute_type == arg_type)
1972 		{
1973 		  new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1974 					     arg, build_zero_cst (arg_type));
1975 		  new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1976 					      TYPE_SIZE (ret_type),
1977 					      bitsize_int (0));
1978 		  g = gimple_build_assign (lhs, new_rhs);
1979 		  gsi_replace (gsi, g, false);
1980 		  return;
1981 		}
1982 	      tree dcompute_type
1983 		= build_vector_type (TREE_TYPE (compute_type),
1984 				     TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1985 	      if (TYPE_MAIN_VARIANT (dcompute_type)
1986 		  == TYPE_MAIN_VARIANT (arg_type))
1987 		new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1988 						    NULL_TREE, bitsize_int (0),
1989 						    NULL_TREE, code1,
1990 						    ret_type);
1991 	      else
1992 		new_rhs = expand_vector_piecewise (gsi,
1993 						   do_vec_narrow_conversion,
1994 						   arg_type, dcompute_type,
1995 						   arg, NULL_TREE, code1,
1996 						   false, ret_type);
1997 	      g = gimple_build_assign (lhs, new_rhs);
1998 	      gsi_replace (gsi, g, false);
1999 	      return;
2000 	    }
2001 	}
2002     }
2003   else if (modifier == WIDEN)
2004     {
2005       enum tree_code code2 = ERROR_MARK;
2006       optab optab2 = unknown_optab;
2007       switch (code)
2008 	{
2009 	CASE_CONVERT:
2010 	  code1 = VEC_UNPACK_LO_EXPR;
2011           code2 = VEC_UNPACK_HI_EXPR;
2012 	  break;
2013 	case FIX_TRUNC_EXPR:
2014 	  code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
2015 	  code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
2016 	  break;
2017 	case FLOAT_EXPR:
2018 	  code1 = VEC_UNPACK_FLOAT_LO_EXPR;
2019 	  code2 = VEC_UNPACK_FLOAT_HI_EXPR;
2020 	  break;
2021 	default:
2022 	  gcc_unreachable ();
2023 	}
2024       if (BYTES_BIG_ENDIAN)
2025 	std::swap (code1, code2);
2026 
2027       if (code == FIX_TRUNC_EXPR)
2028 	{
2029 	  /* The signedness is determined from output operand.  */
2030 	  optab1 = optab_for_tree_code (code1, ret_type, optab_default);
2031 	  optab2 = optab_for_tree_code (code2, ret_type, optab_default);
2032 	}
2033       else
2034 	{
2035 	  optab1 = optab_for_tree_code (code1, arg_type, optab_default);
2036 	  optab2 = optab_for_tree_code (code2, arg_type, optab_default);
2037 	}
2038 
2039       if (optab1 && optab2)
2040 	compute_type = get_compute_type (code1, optab1, arg_type);
2041 
2042       enum insn_code icode1, icode2;
2043       if (VECTOR_TYPE_P (compute_type)
2044 	  && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
2045 	      != CODE_FOR_nothing)
2046 	  && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
2047 	      != CODE_FOR_nothing)
2048 	  && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
2049 	  && (insn_data[icode1].operand[0].mode
2050 	      == insn_data[icode2].operand[0].mode))
2051 	{
2052 	  poly_uint64 nunits
2053 	    = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
2054 	  tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
2055 	  if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
2056 	    {
2057 	      vec<constructor_elt, va_gc> *v;
2058 	      tree part_width = TYPE_SIZE (compute_type);
2059 	      tree index = bitsize_int (0);
2060 	      int nunits = nunits_for_known_piecewise_op (arg_type);
2061 	      int delta = tree_to_uhwi (part_width) / arg_elt_bits;
2062 	      int i;
2063 	      location_t loc = gimple_location (gsi_stmt (*gsi));
2064 
2065 	      if (compute_type != arg_type)
2066 		{
2067 		  if (!warning_suppressed_p (gsi_stmt (*gsi),
2068 					     OPT_Wvector_operation_performance))
2069 		    warning_at (loc, OPT_Wvector_operation_performance,
2070 				"vector operation will be expanded piecewise");
2071 		}
2072 	      else
2073 		{
2074 		  nunits = 1;
2075 		  delta = 1;
2076 		}
2077 
2078 	      vec_alloc (v, (nunits + delta - 1) / delta * 2);
2079 	      bool constant_p = true;
2080 	      for (i = 0; i < nunits;
2081 		   i += delta, index = int_const_binop (PLUS_EXPR, index,
2082 							part_width))
2083 		{
2084 		  tree a = arg;
2085 		  if (compute_type != arg_type)
2086 		    a = tree_vec_extract (gsi, compute_type, a, part_width,
2087 					  index);
2088 		  tree result = gimplify_build1 (gsi, code1, cretd_type, a);
2089 		  constructor_elt ce = { NULL_TREE, result };
2090 		  if (!CONSTANT_CLASS_P (ce.value))
2091 		    constant_p = false;
2092 		  v->quick_push (ce);
2093 		  ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
2094 		  if (!CONSTANT_CLASS_P (ce.value))
2095 		    constant_p = false;
2096 		  v->quick_push (ce);
2097 		}
2098 
2099 	      if (constant_p)
2100 		new_rhs = build_vector_from_ctor (ret_type, v);
2101 	      else
2102 		new_rhs = build_constructor (ret_type, v);
2103 	      g = gimple_build_assign (lhs, new_rhs);
2104 	      gsi_replace (gsi, g, false);
2105 	      return;
2106 	    }
2107 	}
2108     }
2109 
2110   new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
2111 				     TREE_TYPE (arg_type), arg,
2112 				     NULL_TREE, code, false, ret_type);
2113   g = gimple_build_assign (lhs, new_rhs);
2114   gsi_replace (gsi, g, false);
2115 }
2116 
2117 /* Process one statement.  If we identify a vector operation, expand it.  */
2118 
2119 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)2120 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2121 			    bitmap dce_ssa_names)
2122 {
2123   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2124   enum tree_code code;
2125   optab op = unknown_optab;
2126   enum gimple_rhs_class rhs_class;
2127   tree new_rhs;
2128 
2129   /* Only consider code == GIMPLE_ASSIGN. */
2130   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2131   if (!stmt)
2132     {
2133       if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2134 	expand_vector_conversion (gsi);
2135       return;
2136     }
2137 
2138   code = gimple_assign_rhs_code (stmt);
2139   rhs_class = get_gimple_rhs_class (code);
2140   lhs = gimple_assign_lhs (stmt);
2141 
2142   if (code == VEC_PERM_EXPR)
2143     {
2144       lower_vec_perm (gsi);
2145       return;
2146     }
2147 
2148   if (code == VEC_COND_EXPR)
2149     {
2150       expand_vector_condition (gsi, dce_ssa_names);
2151       return;
2152     }
2153 
2154   if (code == COND_EXPR
2155       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2156       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2157     {
2158       expand_vector_scalar_condition (gsi);
2159       return;
2160     }
2161 
2162   if (code == CONSTRUCTOR
2163       && TREE_CODE (lhs) == SSA_NAME
2164       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2165       && !gimple_clobber_p (stmt)
2166       && optimize)
2167     {
2168       optimize_vector_constructor (gsi);
2169       return;
2170     }
2171 
2172   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2173     return;
2174 
2175   rhs1 = gimple_assign_rhs1 (stmt);
2176   if (rhs_class == GIMPLE_BINARY_RHS)
2177     rhs2 = gimple_assign_rhs2 (stmt);
2178 
2179   type = TREE_TYPE (lhs);
2180   if (!VECTOR_TYPE_P (type)
2181       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2182     return;
2183 
2184   /* A scalar operation pretending to be a vector one.  */
2185   if (VECTOR_BOOLEAN_TYPE_P (type)
2186       && !VECTOR_MODE_P (TYPE_MODE (type))
2187       && TYPE_MODE (type) != BLKmode
2188       && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2189 	  || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2190 	      && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2191 	      && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2192     return;
2193 
2194   /* If the vector operation is operating on all same vector elements
2195      implement it with a scalar operation and a splat if the target
2196      supports the scalar operation.  */
2197   tree srhs1, srhs2 = NULL_TREE;
2198   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2199       && (rhs2 == NULL_TREE
2200 	  || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2201 	      && (srhs2 = rhs2))
2202 	  || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2203       /* As we query direct optabs restrict to non-convert operations.  */
2204       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2205     {
2206       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2207       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2208 	  && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2209 	{
2210 	  tree stype = TREE_TYPE (TREE_TYPE (lhs));
2211 	  tree slhs = (rhs2 != NULL_TREE)
2212 		      ? gimplify_build2 (gsi, code, stype, srhs1, srhs2)
2213 		      : gimplify_build1 (gsi, code, stype, srhs1);
2214 	  gimple_assign_set_rhs_from_tree (gsi,
2215 					   build_vector_from_val (type, slhs));
2216 	  update_stmt (stmt);
2217 	  return;
2218 	}
2219     }
2220 
2221   if (CONVERT_EXPR_CODE_P (code)
2222       || code == FLOAT_EXPR
2223       || code == FIX_TRUNC_EXPR
2224       || code == VIEW_CONVERT_EXPR)
2225     return;
2226 
2227   /* The signedness is determined from input argument.  */
2228   if (code == VEC_UNPACK_FLOAT_HI_EXPR
2229       || code == VEC_UNPACK_FLOAT_LO_EXPR
2230       || code == VEC_PACK_FLOAT_EXPR)
2231     {
2232       /* We do not know how to scalarize those.  */
2233       return;
2234     }
2235 
2236   /* For widening/narrowing vector operations, the relevant type is of the
2237      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
2238      calculated in the same way above.  */
2239   if (code == WIDEN_SUM_EXPR
2240       || code == VEC_WIDEN_PLUS_HI_EXPR
2241       || code == VEC_WIDEN_PLUS_LO_EXPR
2242       || code == VEC_WIDEN_MINUS_HI_EXPR
2243       || code == VEC_WIDEN_MINUS_LO_EXPR
2244       || code == VEC_WIDEN_MULT_HI_EXPR
2245       || code == VEC_WIDEN_MULT_LO_EXPR
2246       || code == VEC_WIDEN_MULT_EVEN_EXPR
2247       || code == VEC_WIDEN_MULT_ODD_EXPR
2248       || code == VEC_UNPACK_HI_EXPR
2249       || code == VEC_UNPACK_LO_EXPR
2250       || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2251       || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2252       || code == VEC_PACK_TRUNC_EXPR
2253       || code == VEC_PACK_SAT_EXPR
2254       || code == VEC_PACK_FIX_TRUNC_EXPR
2255       || code == VEC_WIDEN_LSHIFT_HI_EXPR
2256       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2257     {
2258       /* We do not know how to scalarize those.  */
2259       return;
2260     }
2261 
2262   /* Choose between vector shift/rotate by vector and vector shift/rotate by
2263      scalar */
2264   if (code == LSHIFT_EXPR
2265       || code == RSHIFT_EXPR
2266       || code == LROTATE_EXPR
2267       || code == RROTATE_EXPR)
2268     {
2269       optab opv;
2270 
2271       /* Check whether we have vector <op> {x,x,x,x} where x
2272          could be a scalar variable or a constant.  Transform
2273          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
2274       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2275         {
2276           tree first;
2277 
2278           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2279             {
2280               gimple_assign_set_rhs2 (stmt, first);
2281               update_stmt (stmt);
2282               rhs2 = first;
2283             }
2284         }
2285 
2286       opv = optab_for_tree_code (code, type, optab_vector);
2287       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2288 	op = opv;
2289       else
2290 	{
2291           op = optab_for_tree_code (code, type, optab_scalar);
2292 
2293 	  compute_type = get_compute_type (code, op, type);
2294 	  if (compute_type == type)
2295 	    return;
2296 	  /* The rtl expander will expand vector/scalar as vector/vector
2297 	     if necessary.  Pick one with wider vector type.  */
2298 	  tree compute_vtype = get_compute_type (code, opv, type);
2299 	  if (subparts_gt (compute_vtype, compute_type))
2300 	    {
2301 	      compute_type = compute_vtype;
2302 	      op = opv;
2303 	    }
2304 	}
2305 
2306       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2307 	{
2308 	  if (compute_type == NULL_TREE)
2309 	    compute_type = get_compute_type (code, op, type);
2310 	  if (compute_type == type)
2311 	    return;
2312 	  /* Before splitting vector rotates into scalar rotates,
2313 	     see if we can't use vector shifts and BIT_IOR_EXPR
2314 	     instead.  For vector by vector rotates we'd also
2315 	     need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2316 	     for now, fold doesn't seem to create such rotates anyway.  */
2317 	  if (compute_type == TREE_TYPE (type)
2318 	      && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2319 	    {
2320 	      optab oplv = vashl_optab, opl = ashl_optab;
2321 	      optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2322 	      tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2323 	      tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2324 	      tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2325 	      tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2326 	      tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2327 	      /* The rtl expander will expand vector/scalar as vector/vector
2328 		 if necessary.  Pick one with wider vector type.  */
2329 	      if (subparts_gt (compute_lvtype, compute_ltype))
2330 		{
2331 		  compute_ltype = compute_lvtype;
2332 		  opl = oplv;
2333 		}
2334 	      if (subparts_gt (compute_rvtype, compute_rtype))
2335 		{
2336 		  compute_rtype = compute_rvtype;
2337 		  opr = oprv;
2338 		}
2339 	      /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2340 		 BIT_IOR_EXPR.  */
2341 	      compute_type = compute_ltype;
2342 	      if (subparts_gt (compute_type, compute_rtype))
2343 		compute_type = compute_rtype;
2344 	      if (subparts_gt (compute_type, compute_otype))
2345 		compute_type = compute_otype;
2346 	      /* Verify all 3 operations can be performed in that type.  */
2347 	      if (compute_type != TREE_TYPE (type))
2348 		{
2349 		  if (optab_handler (opl, TYPE_MODE (compute_type))
2350 		      == CODE_FOR_nothing
2351 		      || optab_handler (opr, TYPE_MODE (compute_type))
2352 			 == CODE_FOR_nothing
2353 		      || optab_handler (opo, TYPE_MODE (compute_type))
2354 			 == CODE_FOR_nothing)
2355 		    compute_type = TREE_TYPE (type);
2356 		}
2357 	    }
2358 	}
2359     }
2360   else
2361     op = optab_for_tree_code (code, type, optab_default);
2362 
2363   /* Optabs will try converting a negation into a subtraction, so
2364      look for it as well.  TODO: negation of floating-point vectors
2365      might be turned into an exclusive OR toggling the sign bit.  */
2366   if (op == unknown_optab
2367       && code == NEGATE_EXPR
2368       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2369     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2370 
2371   if (compute_type == NULL_TREE)
2372     compute_type = get_compute_type (code, op, type);
2373   if (compute_type == type)
2374     return;
2375 
2376   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2377 				     dce_ssa_names);
2378 
2379   /* Leave expression untouched for later expansion.  */
2380   if (new_rhs == NULL_TREE)
2381     return;
2382 
2383   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2384     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2385                                new_rhs);
2386 
2387   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
2388      way to do it is change expand_vector_operation and its callees to
2389      return a tree_code, RHS1 and RHS2 instead of a tree. */
2390   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2391   update_stmt (gsi_stmt (*gsi));
2392 }
2393 
2394 /* Use this to lower vector operations introduced by the vectorizer,
2395    if it may need the bit-twiddling tricks implemented in this file.  */
2396 
2397 static unsigned int
expand_vector_operations(void)2398 expand_vector_operations (void)
2399 {
2400   gimple_stmt_iterator gsi;
2401   basic_block bb;
2402   bool cfg_changed = false;
2403 
2404   auto_bitmap dce_ssa_names;
2405 
2406   FOR_EACH_BB_FN (bb, cfun)
2407     {
2408       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2409 	{
2410 	  expand_vector_operations_1 (&gsi, dce_ssa_names);
2411 	  /* ???  If we do not cleanup EH then we will ICE in
2412 	     verification.  But in reality we have created wrong-code
2413 	     as we did not properly transition EH info and edges to
2414 	     the piecewise computations.  */
2415 	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2416 	      && gimple_purge_dead_eh_edges (bb))
2417 	    cfg_changed = true;
2418 	}
2419     }
2420 
2421   simple_dce_from_worklist (dce_ssa_names);
2422 
2423   return cfg_changed ? TODO_cleanup_cfg : 0;
2424 }
2425 
2426 namespace {
2427 
2428 const pass_data pass_data_lower_vector =
2429 {
2430   GIMPLE_PASS, /* type */
2431   "veclower", /* name */
2432   OPTGROUP_VEC, /* optinfo_flags */
2433   TV_NONE, /* tv_id */
2434   PROP_cfg, /* properties_required */
2435   PROP_gimple_lvec, /* properties_provided */
2436   0, /* properties_destroyed */
2437   0, /* todo_flags_start */
2438   TODO_update_ssa, /* todo_flags_finish */
2439 };
2440 
2441 class pass_lower_vector : public gimple_opt_pass
2442 {
2443 public:
pass_lower_vector(gcc::context * ctxt)2444   pass_lower_vector (gcc::context *ctxt)
2445     : gimple_opt_pass (pass_data_lower_vector, ctxt)
2446   {}
2447 
2448   /* opt_pass methods: */
gate(function * fun)2449   virtual bool gate (function *fun)
2450     {
2451       return !(fun->curr_properties & PROP_gimple_lvec);
2452     }
2453 
execute(function *)2454   virtual unsigned int execute (function *)
2455     {
2456       return expand_vector_operations ();
2457     }
2458 
2459 }; // class pass_lower_vector
2460 
2461 } // anon namespace
2462 
2463 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)2464 make_pass_lower_vector (gcc::context *ctxt)
2465 {
2466   return new pass_lower_vector (ctxt);
2467 }
2468 
2469 namespace {
2470 
2471 const pass_data pass_data_lower_vector_ssa =
2472 {
2473   GIMPLE_PASS, /* type */
2474   "veclower2", /* name */
2475   OPTGROUP_VEC, /* optinfo_flags */
2476   TV_NONE, /* tv_id */
2477   PROP_cfg, /* properties_required */
2478   PROP_gimple_lvec, /* properties_provided */
2479   0, /* properties_destroyed */
2480   0, /* todo_flags_start */
2481   ( TODO_update_ssa
2482     | TODO_cleanup_cfg ), /* todo_flags_finish */
2483 };
2484 
2485 class pass_lower_vector_ssa : public gimple_opt_pass
2486 {
2487 public:
pass_lower_vector_ssa(gcc::context * ctxt)2488   pass_lower_vector_ssa (gcc::context *ctxt)
2489     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2490   {}
2491 
2492   /* opt_pass methods: */
clone()2493   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)2494   virtual unsigned int execute (function *)
2495     {
2496       return expand_vector_operations ();
2497     }
2498 
2499 }; // class pass_lower_vector_ssa
2500 
2501 } // anon namespace
2502 
2503 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)2504 make_pass_lower_vector_ssa (gcc::context *ctxt)
2505 {
2506   return new pass_lower_vector_ssa (ctxt);
2507 }
2508 
2509 #include "gt-tree-vect-generic.h"
2510