1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004-2022 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "gimple-fold.h"
45 #include "gimple-match.h"
46 #include "recog.h" /* FIXME: for insn_data */
47
48
49 /* Build a ternary operation and gimplify it. Emit code before GSI.
50 Return the gimple_val holding the result. */
51
52 static tree
gimplify_build3(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a,tree b,tree c)53 gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
54 tree type, tree a, tree b, tree c)
55 {
56 location_t loc = gimple_location (gsi_stmt (*gsi));
57 gimple_seq stmts = NULL;
58 tree ret = gimple_build (&stmts, loc, code, type, a, b, c);
59 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
60 return ret;
61 }
62
63 /* Build a binary operation and gimplify it. Emit code before GSI.
64 Return the gimple_val holding the result. */
65
66 static tree
gimplify_build2(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a,tree b)67 gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
68 tree type, tree a, tree b)
69 {
70 location_t loc = gimple_location (gsi_stmt (*gsi));
71 gimple_seq stmts = NULL;
72 tree ret = gimple_build (&stmts, loc, code, type, a, b);
73 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
74 return ret;
75 }
76
77 /* Build a unary operation and gimplify it. Emit code before GSI.
78 Return the gimple_val holding the result. */
79
80 static tree
gimplify_build1(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a)81 gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
82 tree a)
83 {
84 location_t loc = gimple_location (gsi_stmt (*gsi));
85 gimple_seq stmts = NULL;
86 tree ret = gimple_build (&stmts, loc, code, type, a);
87 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
88 return ret;
89 }
90
91
92 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
93
94 /* Return the number of elements in a vector type TYPE that we have
95 already decided needs to be expanded piecewise. We don't support
96 this kind of expansion for variable-length vectors, since we should
97 always check for target support before introducing uses of those. */
98 static unsigned int
nunits_for_known_piecewise_op(const_tree type)99 nunits_for_known_piecewise_op (const_tree type)
100 {
101 return TYPE_VECTOR_SUBPARTS (type).to_constant ();
102 }
103
104 /* Return true if TYPE1 has more elements than TYPE2, where either
105 type may be a vector or a scalar. */
106
107 static inline bool
subparts_gt(tree type1,tree type2)108 subparts_gt (tree type1, tree type2)
109 {
110 poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
111 poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
112 return known_gt (n1, n2);
113 }
114
115 /* Build a constant of type TYPE, made of VALUE's bits replicated
116 every WIDTH bits to fit TYPE's precision. */
117 static tree
build_replicated_const(tree type,unsigned int width,HOST_WIDE_INT value)118 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
119 {
120 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
121 / HOST_BITS_PER_WIDE_INT;
122 unsigned HOST_WIDE_INT low, mask;
123 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
124 int i;
125
126 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
127
128 if (width == HOST_BITS_PER_WIDE_INT)
129 low = value;
130 else
131 {
132 mask = ((HOST_WIDE_INT)1 << width) - 1;
133 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
134 }
135
136 for (i = 0; i < n; i++)
137 a[i] = low;
138
139 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
140 return wide_int_to_tree
141 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
142 }
143
144 static GTY(()) tree vector_inner_type;
145 static GTY(()) tree vector_last_type;
146 static GTY(()) int vector_last_nunits;
147
148 /* Return a suitable vector types made of SUBPARTS units each of mode
149 "word_mode" (the global variable). */
150 static tree
build_word_mode_vector_type(int nunits)151 build_word_mode_vector_type (int nunits)
152 {
153 if (!vector_inner_type)
154 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
155 else if (vector_last_nunits == nunits)
156 {
157 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
158 return vector_last_type;
159 }
160
161 vector_last_nunits = nunits;
162 vector_last_type = build_vector_type (vector_inner_type, nunits);
163 return vector_last_type;
164 }
165
166 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
167 tree, tree, tree, tree, tree, enum tree_code,
168 tree);
169
170 /* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
171 and return it. */
172
173 tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)174 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
175 tree t, tree bitsize, tree bitpos)
176 {
177 /* We're using the resimplify API and maybe_push_res_to_seq to
178 simplify the BIT_FIELD_REF but restrict the simplification to
179 a single stmt while at the same time following SSA edges for
180 simplification with already emitted CTORs. */
181 gimple_match_op opr;
182 opr.set_op (BIT_FIELD_REF, type, t, bitsize, bitpos);
183 opr.resimplify (NULL, follow_all_ssa_edges);
184 gimple_seq stmts = NULL;
185 tree res = maybe_push_res_to_seq (&opr, &stmts);
186 if (!res)
187 {
188 /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
189 used. Build BIT_FIELD_REF manually otherwise. */
190 t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
191 res = make_ssa_name (type);
192 gimple *g = gimple_build_assign (res, t);
193 gsi_insert_before (gsi, g, GSI_SAME_STMT);
194 return res;
195 }
196 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
197 return res;
198 }
199
200 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)201 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
202 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
203 enum tree_code code, tree type ATTRIBUTE_UNUSED)
204 {
205 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
206 return gimplify_build1 (gsi, code, inner_type, a);
207 }
208
209 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)210 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
211 tree bitpos, tree bitsize, enum tree_code code,
212 tree type ATTRIBUTE_UNUSED)
213 {
214 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
215 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
216 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
217 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
218 return gimplify_build2 (gsi, code, inner_type, a, b);
219 }
220
221 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
222
223 INNER_TYPE is the type of A and B elements
224
225 returned expression is of signed integer type with the
226 size equal to the size of INNER_TYPE. */
227 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)228 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
229 tree bitpos, tree bitsize, enum tree_code code, tree type)
230 {
231 tree stype = TREE_TYPE (type);
232 tree cst_false = build_zero_cst (stype);
233 tree cst_true = build_all_ones_cst (stype);
234 tree cmp;
235
236 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
237 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
238
239 cmp = build2 (code, boolean_type_node, a, b);
240 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
241 }
242
243 /* Expand vector addition to scalars. This does bit twiddling
244 in order to increase parallelism:
245
246 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
247 (a ^ b) & 0x80808080
248
249 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
250 (a ^ ~b) & 0x80808080
251
252 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
253
254 This optimization should be done only if 4 vector items or more
255 fit into a word. */
256 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)257 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
258 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
259 enum tree_code code, tree type ATTRIBUTE_UNUSED)
260 {
261 unsigned int width = vector_element_bits (TREE_TYPE (a));
262 tree inner_type = TREE_TYPE (TREE_TYPE (a));
263 unsigned HOST_WIDE_INT max;
264 tree low_bits, high_bits, a_low, b_low, result_low, signs;
265
266 max = GET_MODE_MASK (TYPE_MODE (inner_type));
267 low_bits = build_replicated_const (word_type, width, max >> 1);
268 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
269
270 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
271 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
272
273 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
274 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
275 if (code == PLUS_EXPR)
276 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
277 else
278 {
279 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
280 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
281 }
282
283 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
284 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
285 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
286 }
287
288 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)289 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
290 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
291 tree bitsize ATTRIBUTE_UNUSED,
292 enum tree_code code ATTRIBUTE_UNUSED,
293 tree type ATTRIBUTE_UNUSED)
294 {
295 unsigned int width = vector_element_bits (TREE_TYPE (b));
296 tree inner_type = TREE_TYPE (TREE_TYPE (b));
297 HOST_WIDE_INT max;
298 tree low_bits, high_bits, b_low, result_low, signs;
299
300 max = GET_MODE_MASK (TYPE_MODE (inner_type));
301 low_bits = build_replicated_const (word_type, width, max >> 1);
302 high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
303
304 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
305
306 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
307 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
308 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
309 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
310 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
311 }
312
313 /* Expand a vector operation to scalars, by using many operations
314 whose type is the vector type's inner type. */
315 static tree
expand_vector_piecewise(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree inner_type,tree a,tree b,enum tree_code code,bool parallel_p,tree ret_type=NULL_TREE)316 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
317 tree type, tree inner_type,
318 tree a, tree b, enum tree_code code,
319 bool parallel_p, tree ret_type = NULL_TREE)
320 {
321 vec<constructor_elt, va_gc> *v;
322 tree part_width = TYPE_SIZE (inner_type);
323 tree index = bitsize_int (0);
324 int nunits = nunits_for_known_piecewise_op (type);
325 int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
326 int i;
327 location_t loc = gimple_location (gsi_stmt (*gsi));
328
329 if (nunits == 1
330 || warning_suppressed_p (gsi_stmt (*gsi),
331 OPT_Wvector_operation_performance))
332 /* Do not diagnose decomposing single element vectors or when
333 decomposing vectorizer produced operations. */
334 ;
335 else if (ret_type || !parallel_p)
336 warning_at (loc, OPT_Wvector_operation_performance,
337 "vector operation will be expanded piecewise");
338 else
339 warning_at (loc, OPT_Wvector_operation_performance,
340 "vector operation will be expanded in parallel");
341
342 if (!ret_type)
343 ret_type = type;
344 vec_alloc (v, (nunits + delta - 1) / delta);
345 bool constant_p = true;
346 for (i = 0; i < nunits;
347 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
348 {
349 tree result = f (gsi, inner_type, a, b, index, part_width, code,
350 ret_type);
351 if (!CONSTANT_CLASS_P (result))
352 constant_p = false;
353 constructor_elt ce = {NULL_TREE, result};
354 v->quick_push (ce);
355 }
356
357 if (constant_p)
358 return build_vector_from_ctor (ret_type, v);
359 else
360 return build_constructor (ret_type, v);
361 }
362
363 /* Expand a vector operation to scalars with the freedom to use
364 a scalar integer type, or to use a different size for the items
365 in the vector type. */
366 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)367 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
368 tree a, tree b, enum tree_code code)
369 {
370 tree result, compute_type;
371 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
372 location_t loc = gimple_location (gsi_stmt (*gsi));
373
374 /* We have three strategies. If the type is already correct, just do
375 the operation an element at a time. Else, if the vector is wider than
376 one word, do it a word at a time; finally, if the vector is smaller
377 than one word, do it as a scalar. */
378 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
379 return expand_vector_piecewise (gsi, f,
380 type, TREE_TYPE (type),
381 a, b, code, true);
382 else if (n_words > 1)
383 {
384 tree word_type = build_word_mode_vector_type (n_words);
385 result = expand_vector_piecewise (gsi, f,
386 word_type, TREE_TYPE (word_type),
387 a, b, code, true);
388 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
389 GSI_SAME_STMT);
390 }
391 else
392 {
393 /* Use a single scalar operation with a mode no wider than word_mode. */
394 if (!warning_suppressed_p (gsi_stmt (*gsi),
395 OPT_Wvector_operation_performance))
396 warning_at (loc, OPT_Wvector_operation_performance,
397 "vector operation will be expanded with a "
398 "single scalar operation");
399 scalar_int_mode mode
400 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
401 compute_type = lang_hooks.types.type_for_mode (mode, 1);
402 result = f (gsi, compute_type, a, b, bitsize_zero_node,
403 TYPE_SIZE (compute_type), code, type);
404 }
405
406 return result;
407 }
408
409 /* Expand a vector operation to scalars; for integer types we can use
410 special bit twiddling tricks to do the sums a word at a time, using
411 function F_PARALLEL instead of F. These tricks are done only if
412 they can process at least four items, that is, only if the vector
413 holds at least four items and if a word can hold four items. */
414 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)415 expand_vector_addition (gimple_stmt_iterator *gsi,
416 elem_op_func f, elem_op_func f_parallel,
417 tree type, tree a, tree b, enum tree_code code)
418 {
419 int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
420
421 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
422 && parts_per_word >= 4
423 && nunits_for_known_piecewise_op (type) >= 4)
424 return expand_vector_parallel (gsi, f_parallel,
425 type, a, b, code);
426 else
427 return expand_vector_piecewise (gsi, f,
428 type, TREE_TYPE (type),
429 a, b, code, false);
430 }
431
432 static bool
433 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
434
435 /* Try to expand vector comparison expression OP0 CODE OP1 by
436 querying optab if the following expression:
437 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
438 can be expanded. */
439 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code,bitmap dce_ssa_names)440 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
441 tree op1, enum tree_code code,
442 bitmap dce_ssa_names)
443 {
444 tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
445 use_operand_p use_p;
446 imm_use_iterator iterator;
447 bool vec_cond_expr_only = true;
448
449 /* As seen in PR95830, we should not expand comparisons that are only
450 feeding a VEC_COND_EXPR statement. */
451 auto_vec<gimple *> uses;
452 FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
453 {
454 gimple *use = USE_STMT (use_p);
455 if (is_gimple_debug (use))
456 continue;
457 if (is_gimple_assign (use)
458 && gimple_assign_rhs_code (use) == VEC_COND_EXPR
459 && gimple_assign_rhs1 (use) == lhs
460 && gimple_assign_rhs2 (use) != lhs
461 && gimple_assign_rhs3 (use) != lhs)
462 uses.safe_push (use);
463 else
464 vec_cond_expr_only = false;
465 }
466
467 if (vec_cond_expr_only)
468 for (gimple *use : uses)
469 {
470 gimple_stmt_iterator it = gsi_for_stmt (use);
471 if (!expand_vector_condition (&it, dce_ssa_names))
472 {
473 vec_cond_expr_only = false;
474 break;
475 }
476 }
477
478 if (!uses.is_empty () && vec_cond_expr_only)
479 return NULL_TREE;
480
481 tree t;
482 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
483 {
484 if (VECTOR_BOOLEAN_TYPE_P (type)
485 && SCALAR_INT_MODE_P (TYPE_MODE (type))
486 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
487 TYPE_VECTOR_SUBPARTS (type)
488 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
489 (TREE_TYPE (type)))))
490 {
491 tree inner_type = TREE_TYPE (TREE_TYPE (op0));
492 tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
493 tree index = bitsize_int (0);
494 int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
495 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
496 tree ret_type = build_nonstandard_integer_type (prec, 1);
497 tree ret_inner_type = boolean_type_node;
498 int i;
499 location_t loc = gimple_location (gsi_stmt (*gsi));
500 t = build_zero_cst (ret_type);
501
502 if (TYPE_PRECISION (ret_inner_type) != 1)
503 ret_inner_type = build_nonstandard_integer_type (1, 1);
504 if (!warning_suppressed_p (gsi_stmt (*gsi),
505 OPT_Wvector_operation_performance))
506 warning_at (loc, OPT_Wvector_operation_performance,
507 "vector operation will be expanded piecewise");
508 for (i = 0; i < nunits;
509 i++, index = int_const_binop (PLUS_EXPR, index, part_width))
510 {
511 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
512 index);
513 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
514 index);
515 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
516 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
517 bitsize_int (i));
518 }
519 t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
520 }
521 else
522 t = expand_vector_piecewise (gsi, do_compare, type,
523 TREE_TYPE (TREE_TYPE (op0)), op0, op1,
524 code, false);
525 }
526 else
527 t = NULL_TREE;
528
529 return t;
530 }
531
532 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
533 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
534 the result if successful, otherwise return NULL_TREE. */
535 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)536 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
537 {
538 optab op;
539 unsigned int i, nunits = nunits_for_known_piecewise_op (type);
540 bool scalar_shift = true;
541
542 for (i = 1; i < nunits; i++)
543 {
544 if (shiftcnts[i] != shiftcnts[0])
545 scalar_shift = false;
546 }
547
548 if (scalar_shift && shiftcnts[0] == 0)
549 return op0;
550
551 if (scalar_shift)
552 {
553 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
554 if (op != unknown_optab
555 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
556 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
557 build_int_cst (NULL_TREE, shiftcnts[0]));
558 }
559
560 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
561 if (op != unknown_optab
562 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
563 {
564 tree_vector_builder vec (type, nunits, 1);
565 for (i = 0; i < nunits; i++)
566 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
567 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
568 }
569
570 return NULL_TREE;
571 }
572
573 /* Try to expand integer vector division by constant using
574 widening multiply, shifts and additions. */
575 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)576 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
577 tree op1, enum tree_code code)
578 {
579 bool use_pow2 = true;
580 bool has_vector_shift = true;
581 bool use_abs_op1 = false;
582 int mode = -1, this_mode;
583 int pre_shift = -1, post_shift;
584 unsigned int nunits = nunits_for_known_piecewise_op (type);
585 int *shifts = XALLOCAVEC (int, nunits * 4);
586 int *pre_shifts = shifts + nunits;
587 int *post_shifts = pre_shifts + nunits;
588 int *shift_temps = post_shifts + nunits;
589 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
590 int prec = TYPE_PRECISION (TREE_TYPE (type));
591 int dummy_int;
592 unsigned int i;
593 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
594 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
595 tree cur_op, mulcst, tem;
596 optab op;
597
598 if (prec > HOST_BITS_PER_WIDE_INT)
599 return NULL_TREE;
600
601 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
602 if (op == unknown_optab
603 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
604 has_vector_shift = false;
605
606 /* Analysis phase. Determine if all op1 elements are either power
607 of two and it is possible to expand it using shifts (or for remainder
608 using masking). Additionally compute the multiplicative constants
609 and pre and post shifts if the division is to be expanded using
610 widening or high part multiplication plus shifts. */
611 for (i = 0; i < nunits; i++)
612 {
613 tree cst = VECTOR_CST_ELT (op1, i);
614 unsigned HOST_WIDE_INT ml;
615
616 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
617 return NULL_TREE;
618 pre_shifts[i] = 0;
619 post_shifts[i] = 0;
620 mulc[i] = 0;
621 if (use_pow2
622 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
623 use_pow2 = false;
624 if (use_pow2)
625 {
626 shifts[i] = tree_log2 (cst);
627 if (shifts[i] != shifts[0]
628 && code == TRUNC_DIV_EXPR
629 && !has_vector_shift)
630 use_pow2 = false;
631 }
632 if (mode == -2)
633 continue;
634 if (sign_p == UNSIGNED)
635 {
636 unsigned HOST_WIDE_INT mh;
637 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
638
639 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
640 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
641 return NULL_TREE;
642
643 if (d <= 1)
644 {
645 mode = -2;
646 continue;
647 }
648
649 /* Find a suitable multiplier and right shift count
650 instead of multiplying with D. */
651 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
652
653 /* If the suggested multiplier is more than SIZE bits, we can
654 do better for even divisors, using an initial right shift. */
655 if ((mh != 0 && (d & 1) == 0)
656 || (!has_vector_shift && pre_shift != -1))
657 {
658 if (has_vector_shift)
659 pre_shift = ctz_or_zero (d);
660 else if (pre_shift == -1)
661 {
662 unsigned int j;
663 for (j = 0; j < nunits; j++)
664 {
665 tree cst2 = VECTOR_CST_ELT (op1, j);
666 unsigned HOST_WIDE_INT d2;
667 int this_pre_shift;
668
669 if (!tree_fits_uhwi_p (cst2))
670 return NULL_TREE;
671 d2 = tree_to_uhwi (cst2) & mask;
672 if (d2 == 0)
673 return NULL_TREE;
674 this_pre_shift = floor_log2 (d2 & -d2);
675 if (pre_shift == -1 || this_pre_shift < pre_shift)
676 pre_shift = this_pre_shift;
677 }
678 if (i != 0 && pre_shift != 0)
679 {
680 /* Restart. */
681 i = -1U;
682 mode = -1;
683 continue;
684 }
685 }
686 if (pre_shift != 0)
687 {
688 if ((d >> pre_shift) <= 1)
689 {
690 mode = -2;
691 continue;
692 }
693 mh = choose_multiplier (d >> pre_shift, prec,
694 prec - pre_shift,
695 &ml, &post_shift, &dummy_int);
696 gcc_assert (!mh);
697 pre_shifts[i] = pre_shift;
698 }
699 }
700 if (!mh)
701 this_mode = 0;
702 else
703 this_mode = 1;
704 }
705 else
706 {
707 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
708 unsigned HOST_WIDE_INT abs_d;
709
710 if (d == -1)
711 return NULL_TREE;
712
713 /* Since d might be INT_MIN, we have to cast to
714 unsigned HOST_WIDE_INT before negating to avoid
715 undefined signed overflow. */
716 abs_d = (d >= 0
717 ? (unsigned HOST_WIDE_INT) d
718 : - (unsigned HOST_WIDE_INT) d);
719
720 /* n rem d = n rem -d */
721 if (code == TRUNC_MOD_EXPR && d < 0)
722 {
723 d = abs_d;
724 use_abs_op1 = true;
725 }
726 if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
727 {
728 /* This case is not handled correctly below. */
729 mode = -2;
730 continue;
731 }
732 if (abs_d <= 1)
733 {
734 mode = -2;
735 continue;
736 }
737
738 choose_multiplier (abs_d, prec, prec - 1, &ml,
739 &post_shift, &dummy_int);
740 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
741 {
742 this_mode = 4 + (d < 0);
743 ml |= HOST_WIDE_INT_M1U << (prec - 1);
744 }
745 else
746 this_mode = 2 + (d < 0);
747 }
748 mulc[i] = ml;
749 post_shifts[i] = post_shift;
750 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
751 || post_shift >= prec
752 || pre_shifts[i] >= prec)
753 this_mode = -2;
754
755 if (i == 0)
756 mode = this_mode;
757 else if (mode != this_mode)
758 mode = -2;
759 }
760
761 if (use_pow2)
762 {
763 tree addend = NULL_TREE;
764 if (sign_p == SIGNED)
765 {
766 tree uns_type;
767
768 /* Both division and remainder sequences need
769 op0 < 0 ? mask : 0 computed. It can be either computed as
770 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
771 if none of the shifts is 0, or as the conditional. */
772 for (i = 0; i < nunits; i++)
773 if (shifts[i] == 0)
774 break;
775 uns_type
776 = build_vector_type (build_nonstandard_integer_type (prec, 1),
777 nunits);
778 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
779 {
780 for (i = 0; i < nunits; i++)
781 shift_temps[i] = prec - 1;
782 cur_op = add_rshift (gsi, type, op0, shift_temps);
783 if (cur_op != NULL_TREE)
784 {
785 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
786 uns_type, cur_op);
787 for (i = 0; i < nunits; i++)
788 shift_temps[i] = prec - shifts[i];
789 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
790 if (cur_op != NULL_TREE)
791 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
792 type, cur_op);
793 }
794 }
795 if (addend == NULL_TREE
796 && expand_vec_cond_expr_p (type, type, LT_EXPR))
797 {
798 tree zero, cst, mask_type, mask;
799 gimple *stmt, *cond;
800
801 mask_type = truth_type_for (type);
802 zero = build_zero_cst (type);
803 mask = make_ssa_name (mask_type);
804 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
805 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
806 tree_vector_builder vec (type, nunits, 1);
807 for (i = 0; i < nunits; i++)
808 vec.quick_push (build_int_cst (TREE_TYPE (type),
809 (HOST_WIDE_INT_1U
810 << shifts[i]) - 1));
811 cst = vec.build ();
812 addend = make_ssa_name (type);
813 stmt
814 = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
815 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
816 }
817 }
818 if (code == TRUNC_DIV_EXPR)
819 {
820 if (sign_p == UNSIGNED)
821 {
822 /* q = op0 >> shift; */
823 cur_op = add_rshift (gsi, type, op0, shifts);
824 if (cur_op != NULL_TREE)
825 return cur_op;
826 }
827 else if (addend != NULL_TREE)
828 {
829 /* t1 = op0 + addend;
830 q = t1 >> shift; */
831 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
832 if (op != unknown_optab
833 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
834 {
835 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
836 cur_op = add_rshift (gsi, type, cur_op, shifts);
837 if (cur_op != NULL_TREE)
838 return cur_op;
839 }
840 }
841 }
842 else
843 {
844 tree mask;
845 tree_vector_builder vec (type, nunits, 1);
846 for (i = 0; i < nunits; i++)
847 vec.quick_push (build_int_cst (TREE_TYPE (type),
848 (HOST_WIDE_INT_1U
849 << shifts[i]) - 1));
850 mask = vec.build ();
851 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
852 if (op != unknown_optab
853 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
854 {
855 if (sign_p == UNSIGNED)
856 /* r = op0 & mask; */
857 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
858 else if (addend != NULL_TREE)
859 {
860 /* t1 = op0 + addend;
861 t2 = t1 & mask;
862 r = t2 - addend; */
863 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
864 if (op != unknown_optab
865 && optab_handler (op, TYPE_MODE (type))
866 != CODE_FOR_nothing)
867 {
868 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
869 addend);
870 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
871 cur_op, mask);
872 op = optab_for_tree_code (MINUS_EXPR, type,
873 optab_default);
874 if (op != unknown_optab
875 && optab_handler (op, TYPE_MODE (type))
876 != CODE_FOR_nothing)
877 return gimplify_build2 (gsi, MINUS_EXPR, type,
878 cur_op, addend);
879 }
880 }
881 }
882 }
883 }
884
885 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
886 return NULL_TREE;
887
888 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
889 return NULL_TREE;
890
891 cur_op = op0;
892
893 switch (mode)
894 {
895 case 0:
896 gcc_assert (sign_p == UNSIGNED);
897 /* t1 = oprnd0 >> pre_shift;
898 t2 = t1 h* ml;
899 q = t2 >> post_shift; */
900 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
901 if (cur_op == NULL_TREE)
902 return NULL_TREE;
903 break;
904 case 1:
905 gcc_assert (sign_p == UNSIGNED);
906 for (i = 0; i < nunits; i++)
907 {
908 shift_temps[i] = 1;
909 post_shifts[i]--;
910 }
911 break;
912 case 2:
913 case 3:
914 case 4:
915 case 5:
916 gcc_assert (sign_p == SIGNED);
917 for (i = 0; i < nunits; i++)
918 shift_temps[i] = prec - 1;
919 break;
920 default:
921 return NULL_TREE;
922 }
923
924 tree_vector_builder vec (type, nunits, 1);
925 for (i = 0; i < nunits; i++)
926 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
927 mulcst = vec.build ();
928
929 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
930
931 switch (mode)
932 {
933 case 0:
934 /* t1 = oprnd0 >> pre_shift;
935 t2 = t1 h* ml;
936 q = t2 >> post_shift; */
937 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
938 break;
939 case 1:
940 /* t1 = oprnd0 h* ml;
941 t2 = oprnd0 - t1;
942 t3 = t2 >> 1;
943 t4 = t1 + t3;
944 q = t4 >> (post_shift - 1); */
945 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
946 if (op == unknown_optab
947 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
948 return NULL_TREE;
949 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
950 tem = add_rshift (gsi, type, tem, shift_temps);
951 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
952 if (op == unknown_optab
953 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
954 return NULL_TREE;
955 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
956 cur_op = add_rshift (gsi, type, tem, post_shifts);
957 if (cur_op == NULL_TREE)
958 return NULL_TREE;
959 break;
960 case 2:
961 case 3:
962 case 4:
963 case 5:
964 /* t1 = oprnd0 h* ml;
965 t2 = t1; [ iff (mode & 2) != 0 ]
966 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
967 t3 = t2 >> post_shift;
968 t4 = oprnd0 >> (prec - 1);
969 q = t3 - t4; [ iff (mode & 1) == 0 ]
970 q = t4 - t3; [ iff (mode & 1) != 0 ] */
971 if ((mode & 2) == 0)
972 {
973 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
974 if (op == unknown_optab
975 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
976 return NULL_TREE;
977 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
978 }
979 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
980 if (cur_op == NULL_TREE)
981 return NULL_TREE;
982 tem = add_rshift (gsi, type, op0, shift_temps);
983 if (tem == NULL_TREE)
984 return NULL_TREE;
985 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
986 if (op == unknown_optab
987 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
988 return NULL_TREE;
989 if ((mode & 1) == 0)
990 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
991 else
992 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
993 break;
994 default:
995 gcc_unreachable ();
996 }
997
998 if (code == TRUNC_DIV_EXPR)
999 return cur_op;
1000
1001 /* We divided. Now finish by:
1002 t1 = q * oprnd1;
1003 r = oprnd0 - t1; */
1004 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
1005 if (op == unknown_optab
1006 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1007 return NULL_TREE;
1008 if (use_abs_op1)
1009 {
1010 tree_vector_builder elts;
1011 if (!elts.new_unary_operation (type, op1, false))
1012 return NULL_TREE;
1013 unsigned int count = elts.encoded_nelts ();
1014 for (unsigned int i = 0; i < count; ++i)
1015 {
1016 tree elem1 = VECTOR_CST_ELT (op1, i);
1017
1018 tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
1019 if (elt == NULL_TREE)
1020 return NULL_TREE;
1021 elts.quick_push (elt);
1022 }
1023 op1 = elts.build ();
1024 }
1025 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
1026 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1027 if (op == unknown_optab
1028 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1029 return NULL_TREE;
1030 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
1031 }
1032
1033 /* Expand a vector condition to scalars, by using many conditions
1034 on the vector's elements. */
1035
1036 static bool
expand_vector_condition(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)1037 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
1038 {
1039 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1040 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1041 tree a = gimple_assign_rhs1 (stmt);
1042 tree a1 = a;
1043 tree a2 = NULL_TREE;
1044 bool a_is_comparison = false;
1045 bool a_is_scalar_bitmask = false;
1046 tree b = gimple_assign_rhs2 (stmt);
1047 tree c = gimple_assign_rhs3 (stmt);
1048 vec<constructor_elt, va_gc> *v;
1049 tree constr;
1050 tree inner_type = TREE_TYPE (type);
1051 tree width = vector_element_bits_tree (type);
1052 tree cond_type = TREE_TYPE (TREE_TYPE (a));
1053 tree comp_inner_type = cond_type;
1054 tree index = bitsize_int (0);
1055 tree comp_width = width;
1056 tree comp_index = index;
1057 location_t loc = gimple_location (gsi_stmt (*gsi));
1058 tree_code code = TREE_CODE (a);
1059 gassign *assign = NULL;
1060
1061 if (code == SSA_NAME)
1062 {
1063 assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
1064 if (assign != NULL
1065 && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
1066 {
1067 a_is_comparison = true;
1068 a1 = gimple_assign_rhs1 (assign);
1069 a2 = gimple_assign_rhs2 (assign);
1070 code = gimple_assign_rhs_code (assign);
1071 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
1072 comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1073 }
1074 }
1075
1076 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)
1077 || (integer_all_onesp (b) && integer_zerop (c)
1078 && expand_vec_cmp_expr_p (type, TREE_TYPE (a1), code)))
1079 {
1080 gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1081 return true;
1082 }
1083
1084 /* If a has vector boolean type and is a comparison, above
1085 expand_vec_cond_expr_p might fail, even if both the comparison and
1086 VEC_COND_EXPR could be supported individually. See PR109176. */
1087 if (a_is_comparison
1088 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1089 && expand_vec_cond_expr_p (type, TREE_TYPE (a), SSA_NAME)
1090 && expand_vec_cmp_expr_p (TREE_TYPE (a1), TREE_TYPE (a), code))
1091 return true;
1092
1093 /* Handle vector boolean types with bitmasks. If there is a comparison
1094 and we can expand the comparison into the vector boolean bitmask,
1095 or otherwise if it is compatible with type, we can transform
1096 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1097 into
1098 tmp_6 = x_2 < y_3;
1099 tmp_7 = tmp_6 & vbfld_4;
1100 tmp_8 = ~tmp_6;
1101 tmp_9 = tmp_8 & vbfld_5;
1102 vbfld_1 = tmp_7 | tmp_9;
1103 Similarly for vbfld_10 instead of x_2 < y_3. */
1104 if (VECTOR_BOOLEAN_TYPE_P (type)
1105 && SCALAR_INT_MODE_P (TYPE_MODE (type))
1106 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1107 TYPE_VECTOR_SUBPARTS (type)
1108 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1109 && (a_is_comparison
1110 ? useless_type_conversion_p (type, TREE_TYPE (a))
1111 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1112 {
1113 if (a_is_comparison)
1114 a = gimplify_build2 (gsi, code, type, a1, a2);
1115 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1116 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1117 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1118 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1119 gimple_assign_set_rhs_from_tree (gsi, a);
1120 update_stmt (gsi_stmt (*gsi));
1121 return true;
1122 }
1123
1124 /* TODO: try and find a smaller vector type. */
1125
1126 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1127 warning_at (loc, OPT_Wvector_operation_performance,
1128 "vector condition will be expanded piecewise");
1129
1130 if (!a_is_comparison
1131 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1132 && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1133 && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1134 TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1135 * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1136 (TREE_TYPE (TREE_TYPE (a))))))
1137 {
1138 a_is_scalar_bitmask = true;
1139 int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1140 tree atype = build_nonstandard_integer_type (prec, 1);
1141 a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1142 }
1143 else if (!a_is_comparison
1144 && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)))
1145 comp_width = vector_element_bits_tree (TREE_TYPE (a));
1146
1147 int nunits = nunits_for_known_piecewise_op (type);
1148 vec_alloc (v, nunits);
1149 bool constant_p = true;
1150 for (int i = 0; i < nunits; i++)
1151 {
1152 tree aa, result;
1153 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1154 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1155 if (a_is_comparison)
1156 {
1157 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1158 comp_width, comp_index);
1159 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1160 comp_width, comp_index);
1161 aa = build2 (code, cond_type, aa1, aa2);
1162 }
1163 else if (a_is_scalar_bitmask)
1164 {
1165 wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1166 result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1167 a, wide_int_to_tree (TREE_TYPE (a), w));
1168 aa = build2 (NE_EXPR, boolean_type_node, result,
1169 build_zero_cst (TREE_TYPE (a)));
1170 }
1171 else
1172 aa = tree_vec_extract (gsi, cond_type, a, comp_width, comp_index);
1173 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1174 if (!CONSTANT_CLASS_P (result))
1175 constant_p = false;
1176 constructor_elt ce = {NULL_TREE, result};
1177 v->quick_push (ce);
1178 index = int_const_binop (PLUS_EXPR, index, width);
1179 if (width == comp_width)
1180 comp_index = index;
1181 else
1182 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1183 }
1184
1185 if (constant_p)
1186 constr = build_vector_from_ctor (type, v);
1187 else
1188 constr = build_constructor (type, v);
1189 gimple_assign_set_rhs_from_tree (gsi, constr);
1190 update_stmt (gsi_stmt (*gsi));
1191
1192 if (a_is_comparison)
1193 bitmap_set_bit (dce_ssa_names,
1194 SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1195
1196 return false;
1197 }
1198
1199 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code,bitmap dce_ssa_names)1200 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1201 gassign *assign, enum tree_code code,
1202 bitmap dce_ssa_names)
1203 {
1204 machine_mode compute_mode = TYPE_MODE (compute_type);
1205
1206 /* If the compute mode is not a vector mode (hence we are not decomposing
1207 a BLKmode vector to smaller, hardware-supported vectors), we may want
1208 to expand the operations in parallel. */
1209 if (!VECTOR_MODE_P (compute_mode))
1210 switch (code)
1211 {
1212 case PLUS_EXPR:
1213 case MINUS_EXPR:
1214 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1215 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1216 gimple_assign_rhs1 (assign),
1217 gimple_assign_rhs2 (assign), code);
1218 break;
1219
1220 case NEGATE_EXPR:
1221 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1222 return expand_vector_addition (gsi, do_unop, do_negate, type,
1223 gimple_assign_rhs1 (assign),
1224 NULL_TREE, code);
1225 break;
1226
1227 case BIT_AND_EXPR:
1228 case BIT_IOR_EXPR:
1229 case BIT_XOR_EXPR:
1230 return expand_vector_parallel (gsi, do_binop, type,
1231 gimple_assign_rhs1 (assign),
1232 gimple_assign_rhs2 (assign), code);
1233
1234 case BIT_NOT_EXPR:
1235 return expand_vector_parallel (gsi, do_unop, type,
1236 gimple_assign_rhs1 (assign),
1237 NULL_TREE, code);
1238 case EQ_EXPR:
1239 case NE_EXPR:
1240 case GT_EXPR:
1241 case LT_EXPR:
1242 case GE_EXPR:
1243 case LE_EXPR:
1244 case UNEQ_EXPR:
1245 case UNGT_EXPR:
1246 case UNLT_EXPR:
1247 case UNGE_EXPR:
1248 case UNLE_EXPR:
1249 case LTGT_EXPR:
1250 case ORDERED_EXPR:
1251 case UNORDERED_EXPR:
1252 {
1253 tree rhs1 = gimple_assign_rhs1 (assign);
1254 tree rhs2 = gimple_assign_rhs2 (assign);
1255
1256 return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1257 dce_ssa_names);
1258 }
1259
1260 case TRUNC_DIV_EXPR:
1261 case TRUNC_MOD_EXPR:
1262 {
1263 tree rhs1 = gimple_assign_rhs1 (assign);
1264 tree rhs2 = gimple_assign_rhs2 (assign);
1265 tree ret;
1266
1267 if (!optimize
1268 || !VECTOR_INTEGER_TYPE_P (type)
1269 || TREE_CODE (rhs2) != VECTOR_CST
1270 || !VECTOR_MODE_P (TYPE_MODE (type)))
1271 break;
1272
1273 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1274 if (ret != NULL_TREE)
1275 return ret;
1276 break;
1277 }
1278
1279 default:
1280 break;
1281 }
1282
1283 if (TREE_CODE_CLASS (code) == tcc_unary)
1284 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1285 gimple_assign_rhs1 (assign),
1286 NULL_TREE, code, false);
1287 else
1288 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1289 gimple_assign_rhs1 (assign),
1290 gimple_assign_rhs2 (assign), code, false);
1291 }
1292
1293 /* Try to optimize
1294 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1295 style stmts into:
1296 _9 = { b_7, b_7, b_7, b_7 };
1297 a_5 = _9 + { 0, 3, 6, 9 };
1298 because vector splat operation is usually more efficient
1299 than piecewise initialization of the vector. */
1300
1301 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1302 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1303 {
1304 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1305 tree lhs = gimple_assign_lhs (stmt);
1306 tree rhs = gimple_assign_rhs1 (stmt);
1307 tree type = TREE_TYPE (rhs);
1308 unsigned int i, j;
1309 unsigned HOST_WIDE_INT nelts;
1310 bool all_same = true;
1311 constructor_elt *elt;
1312 gimple *g;
1313 tree base = NULL_TREE;
1314 optab op;
1315
1316 if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1317 || nelts <= 2
1318 || CONSTRUCTOR_NELTS (rhs) != nelts)
1319 return;
1320 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1321 if (op == unknown_optab
1322 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1323 return;
1324 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1325 if (TREE_CODE (elt->value) != SSA_NAME
1326 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1327 return;
1328 else
1329 {
1330 tree this_base = elt->value;
1331 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1332 all_same = false;
1333 for (j = 0; j < nelts + 1; j++)
1334 {
1335 g = SSA_NAME_DEF_STMT (this_base);
1336 if (is_gimple_assign (g)
1337 && gimple_assign_rhs_code (g) == PLUS_EXPR
1338 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1339 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1340 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1341 this_base = gimple_assign_rhs1 (g);
1342 else
1343 break;
1344 }
1345 if (i == 0)
1346 base = this_base;
1347 else if (this_base != base)
1348 return;
1349 }
1350 if (all_same)
1351 return;
1352 tree_vector_builder cst (type, nelts, 1);
1353 for (i = 0; i < nelts; i++)
1354 {
1355 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1356 tree elt = build_zero_cst (TREE_TYPE (base));
1357 while (this_base != base)
1358 {
1359 g = SSA_NAME_DEF_STMT (this_base);
1360 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1361 elt, gimple_assign_rhs2 (g));
1362 if (elt == NULL_TREE
1363 || TREE_CODE (elt) != INTEGER_CST
1364 || TREE_OVERFLOW (elt))
1365 return;
1366 this_base = gimple_assign_rhs1 (g);
1367 }
1368 cst.quick_push (elt);
1369 }
1370 for (i = 0; i < nelts; i++)
1371 CONSTRUCTOR_ELT (rhs, i)->value = base;
1372 g = gimple_build_assign (make_ssa_name (type), rhs);
1373 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1374 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1375 cst.build ());
1376 gsi_replace (gsi, g, false);
1377 }
1378
1379 /* Return a type for the widest vector mode with the same element type as
1380 type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
1381 ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
1382 with optab OP, or return NULL_TREE if none is found. */
1383
1384 static tree
type_for_widest_vector_mode(tree original_vector_type,optab op)1385 type_for_widest_vector_mode (tree original_vector_type, optab op)
1386 {
1387 gcc_assert (VECTOR_TYPE_P (original_vector_type));
1388 tree type = TREE_TYPE (original_vector_type);
1389 machine_mode inner_mode = TYPE_MODE (type);
1390 machine_mode best_mode = VOIDmode, mode;
1391 poly_int64 best_nunits = 0;
1392
1393 if (SCALAR_FLOAT_MODE_P (inner_mode))
1394 mode = MIN_MODE_VECTOR_FLOAT;
1395 else if (SCALAR_FRACT_MODE_P (inner_mode))
1396 mode = MIN_MODE_VECTOR_FRACT;
1397 else if (SCALAR_UFRACT_MODE_P (inner_mode))
1398 mode = MIN_MODE_VECTOR_UFRACT;
1399 else if (SCALAR_ACCUM_MODE_P (inner_mode))
1400 mode = MIN_MODE_VECTOR_ACCUM;
1401 else if (SCALAR_UACCUM_MODE_P (inner_mode))
1402 mode = MIN_MODE_VECTOR_UACCUM;
1403 else if (inner_mode == BImode)
1404 mode = MIN_MODE_VECTOR_BOOL;
1405 else
1406 mode = MIN_MODE_VECTOR_INT;
1407
1408 FOR_EACH_MODE_FROM (mode, mode)
1409 if (GET_MODE_INNER (mode) == inner_mode
1410 && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1411 && optab_handler (op, mode) != CODE_FOR_nothing
1412 && known_le (GET_MODE_NUNITS (mode),
1413 TYPE_VECTOR_SUBPARTS (original_vector_type)))
1414 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1415
1416 if (best_mode == VOIDmode)
1417 return NULL_TREE;
1418 else
1419 return build_vector_type_for_mode (type, best_mode);
1420 }
1421
1422
1423 /* Build a reference to the element of the vector VECT. Function
1424 returns either the element itself, either BIT_FIELD_REF, or an
1425 ARRAY_REF expression.
1426
1427 GSI is required to insert temporary variables while building a
1428 refernece to the element of the vector VECT.
1429
1430 PTMPVEC is a pointer to the temporary variable for caching
1431 purposes. In case when PTMPVEC is NULL new temporary variable
1432 will be created. */
1433 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1434 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1435 {
1436 tree vect_type, vect_elt_type;
1437 gimple *asgn;
1438 tree tmpvec;
1439 tree arraytype;
1440 bool need_asgn = true;
1441 unsigned int elements;
1442
1443 vect_type = TREE_TYPE (vect);
1444 vect_elt_type = TREE_TYPE (vect_type);
1445 elements = nunits_for_known_piecewise_op (vect_type);
1446
1447 if (TREE_CODE (idx) == INTEGER_CST)
1448 {
1449 unsigned HOST_WIDE_INT index;
1450
1451 /* Given that we're about to compute a binary modulus,
1452 we don't care about the high bits of the value. */
1453 index = TREE_INT_CST_LOW (idx);
1454 if (!tree_fits_uhwi_p (idx) || index >= elements)
1455 {
1456 index &= elements - 1;
1457 idx = build_int_cst (TREE_TYPE (idx), index);
1458 }
1459
1460 /* When lowering a vector statement sequence do some easy
1461 simplification by looking through intermediate vector results. */
1462 if (TREE_CODE (vect) == SSA_NAME)
1463 {
1464 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1465 if (is_gimple_assign (def_stmt)
1466 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1467 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1468 vect = gimple_assign_rhs1 (def_stmt);
1469 }
1470
1471 if (TREE_CODE (vect) == VECTOR_CST)
1472 return VECTOR_CST_ELT (vect, index);
1473 else if (TREE_CODE (vect) == CONSTRUCTOR
1474 && (CONSTRUCTOR_NELTS (vect) == 0
1475 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1476 != VECTOR_TYPE))
1477 {
1478 if (index < CONSTRUCTOR_NELTS (vect))
1479 return CONSTRUCTOR_ELT (vect, index)->value;
1480 return build_zero_cst (vect_elt_type);
1481 }
1482 else
1483 {
1484 tree size = vector_element_bits_tree (vect_type);
1485 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1486 size);
1487 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1488 }
1489 }
1490
1491 if (!ptmpvec)
1492 tmpvec = create_tmp_var (vect_type, "vectmp");
1493 else if (!*ptmpvec)
1494 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1495 else
1496 {
1497 tmpvec = *ptmpvec;
1498 need_asgn = false;
1499 }
1500
1501 if (need_asgn)
1502 {
1503 TREE_ADDRESSABLE (tmpvec) = 1;
1504 asgn = gimple_build_assign (tmpvec, vect);
1505 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1506 }
1507
1508 arraytype = build_array_type_nelts (vect_elt_type, elements);
1509 return build4 (ARRAY_REF, vect_elt_type,
1510 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1511 idx, NULL_TREE, NULL_TREE);
1512 }
1513
1514 /* Check if VEC_PERM_EXPR within the given setting is supported
1515 by hardware, or lower it piecewise.
1516
1517 When VEC_PERM_EXPR has the same first and second operands:
1518 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1519 {v0[mask[0]], v0[mask[1]], ...}
1520 MASK and V0 must have the same number of elements.
1521
1522 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1523 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1524 V0 and V1 must have the same type. MASK, V0, V1 must have the
1525 same number of arguments. */
1526
1527 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1528 lower_vec_perm (gimple_stmt_iterator *gsi)
1529 {
1530 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1531 tree mask = gimple_assign_rhs3 (stmt);
1532 tree vec0 = gimple_assign_rhs1 (stmt);
1533 tree vec1 = gimple_assign_rhs2 (stmt);
1534 tree vect_type = TREE_TYPE (vec0);
1535 tree mask_type = TREE_TYPE (mask);
1536 tree vect_elt_type = TREE_TYPE (vect_type);
1537 tree mask_elt_type = TREE_TYPE (mask_type);
1538 unsigned HOST_WIDE_INT elements;
1539 vec<constructor_elt, va_gc> *v;
1540 tree constr, t, si, i_val;
1541 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1542 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1543 location_t loc = gimple_location (gsi_stmt (*gsi));
1544 unsigned i;
1545
1546 if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1547 return;
1548
1549 if (TREE_CODE (mask) == SSA_NAME)
1550 {
1551 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1552 if (is_gimple_assign (def_stmt)
1553 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1554 mask = gimple_assign_rhs1 (def_stmt);
1555 }
1556
1557 vec_perm_builder sel_int;
1558
1559 if (TREE_CODE (mask) == VECTOR_CST
1560 && tree_to_vec_perm_builder (&sel_int, mask))
1561 {
1562 vec_perm_indices indices (sel_int, 2, elements);
1563 if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1564 {
1565 gimple_assign_set_rhs3 (stmt, mask);
1566 update_stmt (stmt);
1567 return;
1568 }
1569 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1570 vector as VEC1 and a right element shift MASK. */
1571 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1572 != CODE_FOR_nothing
1573 && TREE_CODE (vec1) == VECTOR_CST
1574 && initializer_zerop (vec1)
1575 && maybe_ne (indices[0], 0)
1576 && known_lt (poly_uint64 (indices[0]), elements))
1577 {
1578 bool ok_p = indices.series_p (0, 1, indices[0], 1);
1579 if (!ok_p)
1580 {
1581 for (i = 1; i < elements; ++i)
1582 {
1583 poly_uint64 actual = indices[i];
1584 poly_uint64 expected = i + indices[0];
1585 /* Indices into the second vector are all equivalent. */
1586 if (maybe_lt (actual, elements)
1587 ? maybe_ne (actual, expected)
1588 : maybe_lt (expected, elements))
1589 break;
1590 }
1591 ok_p = i == elements;
1592 }
1593 if (ok_p)
1594 {
1595 gimple_assign_set_rhs3 (stmt, mask);
1596 update_stmt (stmt);
1597 return;
1598 }
1599 }
1600 /* And similarly vec_shl pattern. */
1601 if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1602 != CODE_FOR_nothing
1603 && TREE_CODE (vec0) == VECTOR_CST
1604 && initializer_zerop (vec0))
1605 {
1606 unsigned int first = 0;
1607 for (i = 0; i < elements; ++i)
1608 if (known_eq (poly_uint64 (indices[i]), elements))
1609 {
1610 if (i == 0 || first)
1611 break;
1612 first = i;
1613 }
1614 else if (first
1615 ? maybe_ne (poly_uint64 (indices[i]),
1616 elements + i - first)
1617 : maybe_ge (poly_uint64 (indices[i]), elements))
1618 break;
1619 if (first && i == elements)
1620 {
1621 gimple_assign_set_rhs3 (stmt, mask);
1622 update_stmt (stmt);
1623 return;
1624 }
1625 }
1626 }
1627 else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1628 return;
1629
1630 if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1631 warning_at (loc, OPT_Wvector_operation_performance,
1632 "vector shuffling operation will be expanded piecewise");
1633
1634 vec_alloc (v, elements);
1635 bool constant_p = true;
1636 for (i = 0; i < elements; i++)
1637 {
1638 si = size_int (i);
1639 i_val = vector_element (gsi, mask, si, &masktmp);
1640
1641 if (TREE_CODE (i_val) == INTEGER_CST)
1642 {
1643 unsigned HOST_WIDE_INT index;
1644
1645 index = TREE_INT_CST_LOW (i_val);
1646 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1647 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1648
1649 if (two_operand_p && (index & elements) != 0)
1650 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1651 else
1652 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1653
1654 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1655 true, GSI_SAME_STMT);
1656 }
1657 else
1658 {
1659 tree cond = NULL_TREE, v0_val;
1660
1661 if (two_operand_p)
1662 {
1663 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1664 build_int_cst (mask_elt_type, elements));
1665 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1666 true, GSI_SAME_STMT);
1667 }
1668
1669 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1670 build_int_cst (mask_elt_type, elements - 1));
1671 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1672 true, GSI_SAME_STMT);
1673
1674 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1675 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1676 true, GSI_SAME_STMT);
1677
1678 if (two_operand_p)
1679 {
1680 tree v1_val;
1681
1682 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1683 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1684 true, GSI_SAME_STMT);
1685
1686 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1687 cond, build_zero_cst (mask_elt_type));
1688 cond = fold_build3 (COND_EXPR, vect_elt_type,
1689 cond, v0_val, v1_val);
1690 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1691 true, GSI_SAME_STMT);
1692 }
1693 else
1694 t = v0_val;
1695 }
1696
1697 if (!CONSTANT_CLASS_P (t))
1698 constant_p = false;
1699 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1700 }
1701
1702 if (constant_p)
1703 constr = build_vector_from_ctor (vect_type, v);
1704 else
1705 constr = build_constructor (vect_type, v);
1706 gimple_assign_set_rhs_from_tree (gsi, constr);
1707 update_stmt (gsi_stmt (*gsi));
1708 }
1709
1710 /* If OP is a uniform vector return the element it is a splat from. */
1711
1712 static tree
ssa_uniform_vector_p(tree op)1713 ssa_uniform_vector_p (tree op)
1714 {
1715 if (TREE_CODE (op) == VECTOR_CST
1716 || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1717 || TREE_CODE (op) == CONSTRUCTOR)
1718 return uniform_vector_p (op);
1719 if (TREE_CODE (op) == SSA_NAME)
1720 {
1721 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1722 if (gimple_assign_single_p (def_stmt))
1723 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1724 }
1725 return NULL_TREE;
1726 }
1727
1728 /* Return type in which CODE operation with optab OP can be
1729 computed. */
1730
1731 static tree
get_compute_type(enum tree_code code,optab op,tree type)1732 get_compute_type (enum tree_code code, optab op, tree type)
1733 {
1734 /* For very wide vectors, try using a smaller vector mode. */
1735 tree compute_type = type;
1736 if (op
1737 && (!VECTOR_MODE_P (TYPE_MODE (type))
1738 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1739 {
1740 tree vector_compute_type
1741 = type_for_widest_vector_mode (type, op);
1742 if (vector_compute_type != NULL_TREE
1743 && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1744 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1745 != CODE_FOR_nothing))
1746 compute_type = vector_compute_type;
1747 }
1748
1749 /* If we are breaking a BLKmode vector into smaller pieces,
1750 type_for_widest_vector_mode has already looked into the optab,
1751 so skip these checks. */
1752 if (compute_type == type)
1753 {
1754 machine_mode compute_mode = TYPE_MODE (compute_type);
1755 if (VECTOR_MODE_P (compute_mode))
1756 {
1757 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1758 return compute_type;
1759 if (code == MULT_HIGHPART_EXPR
1760 && can_mult_highpart_p (compute_mode,
1761 TYPE_UNSIGNED (compute_type)))
1762 return compute_type;
1763 }
1764 /* There is no operation in hardware, so fall back to scalars. */
1765 compute_type = TREE_TYPE (type);
1766 }
1767
1768 return compute_type;
1769 }
1770
1771 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1772 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1773 tree bitpos, tree bitsize, enum tree_code code,
1774 tree type ATTRIBUTE_UNUSED)
1775 {
1776 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1777 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1778 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1779 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1780 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1781 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1782 }
1783
1784 /* Expand a vector COND_EXPR to scalars, piecewise. */
1785 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1786 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1787 {
1788 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1789 tree lhs = gimple_assign_lhs (stmt);
1790 tree type = TREE_TYPE (lhs);
1791 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1792 machine_mode compute_mode = TYPE_MODE (compute_type);
1793 gcc_assert (compute_mode != BLKmode);
1794 tree rhs2 = gimple_assign_rhs2 (stmt);
1795 tree rhs3 = gimple_assign_rhs3 (stmt);
1796 tree new_rhs;
1797
1798 /* If the compute mode is not a vector mode (hence we are not decomposing
1799 a BLKmode vector to smaller, hardware-supported vectors), we may want
1800 to expand the operations in parallel. */
1801 if (!VECTOR_MODE_P (compute_mode))
1802 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1803 COND_EXPR);
1804 else
1805 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1806 rhs2, rhs3, COND_EXPR, false);
1807 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1808 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1809 new_rhs);
1810
1811 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1812 way to do it is change expand_vector_operation and its callees to
1813 return a tree_code, RHS1 and RHS2 instead of a tree. */
1814 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1815 update_stmt (gsi_stmt (*gsi));
1816 }
1817
1818 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1819 lowering. If INNER_TYPE is not a vector type, this is a scalar
1820 fallback. */
1821
1822 static tree
do_vec_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree decl,tree bitpos,tree bitsize,enum tree_code code,tree type)1823 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1824 tree decl, tree bitpos, tree bitsize,
1825 enum tree_code code, tree type)
1826 {
1827 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1828 if (!VECTOR_TYPE_P (inner_type))
1829 return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1830 if (code == CALL_EXPR)
1831 {
1832 gimple *g = gimple_build_call (decl, 1, a);
1833 tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1834 gimple_call_set_lhs (g, lhs);
1835 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1836 return lhs;
1837 }
1838 else
1839 {
1840 tree outer_type = build_vector_type (TREE_TYPE (type),
1841 TYPE_VECTOR_SUBPARTS (inner_type));
1842 return gimplify_build1 (gsi, code, outer_type, a);
1843 }
1844 }
1845
1846 /* Similarly, but for narrowing conversion. */
1847
1848 static tree
do_vec_narrow_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree,tree bitpos,tree,enum tree_code code,tree type)1849 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1850 tree, tree bitpos, tree, enum tree_code code,
1851 tree type)
1852 {
1853 tree itype = build_vector_type (TREE_TYPE (inner_type),
1854 exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1855 2));
1856 tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1857 tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1858 int_const_binop (PLUS_EXPR, bitpos,
1859 TYPE_SIZE (itype)));
1860 tree outer_type = build_vector_type (TREE_TYPE (type),
1861 TYPE_VECTOR_SUBPARTS (inner_type));
1862 return gimplify_build2 (gsi, code, outer_type, b, c);
1863 }
1864
1865 /* Expand VEC_CONVERT ifn call. */
1866
1867 static void
expand_vector_conversion(gimple_stmt_iterator * gsi)1868 expand_vector_conversion (gimple_stmt_iterator *gsi)
1869 {
1870 gimple *stmt = gsi_stmt (*gsi);
1871 gimple *g;
1872 tree lhs = gimple_call_lhs (stmt);
1873 if (lhs == NULL_TREE)
1874 {
1875 g = gimple_build_nop ();
1876 gsi_replace (gsi, g, false);
1877 return;
1878 }
1879 tree arg = gimple_call_arg (stmt, 0);
1880 tree ret_type = TREE_TYPE (lhs);
1881 tree arg_type = TREE_TYPE (arg);
1882 tree new_rhs, compute_type = TREE_TYPE (arg_type);
1883 enum tree_code code = NOP_EXPR;
1884 enum tree_code code1 = ERROR_MARK;
1885 enum { NARROW, NONE, WIDEN } modifier = NONE;
1886 optab optab1 = unknown_optab;
1887
1888 gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1889 if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1890 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1891 code = FIX_TRUNC_EXPR;
1892 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1893 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1894 code = FLOAT_EXPR;
1895 unsigned int ret_elt_bits = vector_element_bits (ret_type);
1896 unsigned int arg_elt_bits = vector_element_bits (arg_type);
1897 if (ret_elt_bits < arg_elt_bits)
1898 modifier = NARROW;
1899 else if (ret_elt_bits > arg_elt_bits)
1900 modifier = WIDEN;
1901
1902 if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1903 {
1904 if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1905 {
1906 g = gimple_build_assign (lhs, code1, arg);
1907 gsi_replace (gsi, g, false);
1908 return;
1909 }
1910 /* Can't use get_compute_type here, as supportable_convert_operation
1911 doesn't necessarily use an optab and needs two arguments. */
1912 tree vec_compute_type
1913 = type_for_widest_vector_mode (arg_type, mov_optab);
1914 if (vec_compute_type
1915 && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
1916 {
1917 unsigned HOST_WIDE_INT nelts
1918 = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1919 while (nelts > 1)
1920 {
1921 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1922 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1923 if (supportable_convert_operation (code, ret1_type, arg1_type,
1924 &code1))
1925 {
1926 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1927 ret_type, arg1_type, arg,
1928 NULL_TREE, code1, false);
1929 g = gimple_build_assign (lhs, new_rhs);
1930 gsi_replace (gsi, g, false);
1931 return;
1932 }
1933 nelts = nelts / 2;
1934 }
1935 }
1936 }
1937 else if (modifier == NARROW)
1938 {
1939 switch (code)
1940 {
1941 CASE_CONVERT:
1942 code1 = VEC_PACK_TRUNC_EXPR;
1943 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1944 break;
1945 case FIX_TRUNC_EXPR:
1946 code1 = VEC_PACK_FIX_TRUNC_EXPR;
1947 /* The signedness is determined from output operand. */
1948 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1949 break;
1950 case FLOAT_EXPR:
1951 code1 = VEC_PACK_FLOAT_EXPR;
1952 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1953 break;
1954 default:
1955 gcc_unreachable ();
1956 }
1957
1958 if (optab1)
1959 compute_type = get_compute_type (code1, optab1, arg_type);
1960 enum insn_code icode1;
1961 if (VECTOR_TYPE_P (compute_type)
1962 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1963 != CODE_FOR_nothing)
1964 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1965 {
1966 tree cretd_type
1967 = build_vector_type (TREE_TYPE (ret_type),
1968 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1969 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1970 {
1971 if (compute_type == arg_type)
1972 {
1973 new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1974 arg, build_zero_cst (arg_type));
1975 new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1976 TYPE_SIZE (ret_type),
1977 bitsize_int (0));
1978 g = gimple_build_assign (lhs, new_rhs);
1979 gsi_replace (gsi, g, false);
1980 return;
1981 }
1982 tree dcompute_type
1983 = build_vector_type (TREE_TYPE (compute_type),
1984 TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1985 if (TYPE_MAIN_VARIANT (dcompute_type)
1986 == TYPE_MAIN_VARIANT (arg_type))
1987 new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1988 NULL_TREE, bitsize_int (0),
1989 NULL_TREE, code1,
1990 ret_type);
1991 else
1992 new_rhs = expand_vector_piecewise (gsi,
1993 do_vec_narrow_conversion,
1994 arg_type, dcompute_type,
1995 arg, NULL_TREE, code1,
1996 false, ret_type);
1997 g = gimple_build_assign (lhs, new_rhs);
1998 gsi_replace (gsi, g, false);
1999 return;
2000 }
2001 }
2002 }
2003 else if (modifier == WIDEN)
2004 {
2005 enum tree_code code2 = ERROR_MARK;
2006 optab optab2 = unknown_optab;
2007 switch (code)
2008 {
2009 CASE_CONVERT:
2010 code1 = VEC_UNPACK_LO_EXPR;
2011 code2 = VEC_UNPACK_HI_EXPR;
2012 break;
2013 case FIX_TRUNC_EXPR:
2014 code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
2015 code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
2016 break;
2017 case FLOAT_EXPR:
2018 code1 = VEC_UNPACK_FLOAT_LO_EXPR;
2019 code2 = VEC_UNPACK_FLOAT_HI_EXPR;
2020 break;
2021 default:
2022 gcc_unreachable ();
2023 }
2024 if (BYTES_BIG_ENDIAN)
2025 std::swap (code1, code2);
2026
2027 if (code == FIX_TRUNC_EXPR)
2028 {
2029 /* The signedness is determined from output operand. */
2030 optab1 = optab_for_tree_code (code1, ret_type, optab_default);
2031 optab2 = optab_for_tree_code (code2, ret_type, optab_default);
2032 }
2033 else
2034 {
2035 optab1 = optab_for_tree_code (code1, arg_type, optab_default);
2036 optab2 = optab_for_tree_code (code2, arg_type, optab_default);
2037 }
2038
2039 if (optab1 && optab2)
2040 compute_type = get_compute_type (code1, optab1, arg_type);
2041
2042 enum insn_code icode1, icode2;
2043 if (VECTOR_TYPE_P (compute_type)
2044 && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
2045 != CODE_FOR_nothing)
2046 && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
2047 != CODE_FOR_nothing)
2048 && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
2049 && (insn_data[icode1].operand[0].mode
2050 == insn_data[icode2].operand[0].mode))
2051 {
2052 poly_uint64 nunits
2053 = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
2054 tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
2055 if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
2056 {
2057 vec<constructor_elt, va_gc> *v;
2058 tree part_width = TYPE_SIZE (compute_type);
2059 tree index = bitsize_int (0);
2060 int nunits = nunits_for_known_piecewise_op (arg_type);
2061 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
2062 int i;
2063 location_t loc = gimple_location (gsi_stmt (*gsi));
2064
2065 if (compute_type != arg_type)
2066 {
2067 if (!warning_suppressed_p (gsi_stmt (*gsi),
2068 OPT_Wvector_operation_performance))
2069 warning_at (loc, OPT_Wvector_operation_performance,
2070 "vector operation will be expanded piecewise");
2071 }
2072 else
2073 {
2074 nunits = 1;
2075 delta = 1;
2076 }
2077
2078 vec_alloc (v, (nunits + delta - 1) / delta * 2);
2079 bool constant_p = true;
2080 for (i = 0; i < nunits;
2081 i += delta, index = int_const_binop (PLUS_EXPR, index,
2082 part_width))
2083 {
2084 tree a = arg;
2085 if (compute_type != arg_type)
2086 a = tree_vec_extract (gsi, compute_type, a, part_width,
2087 index);
2088 tree result = gimplify_build1 (gsi, code1, cretd_type, a);
2089 constructor_elt ce = { NULL_TREE, result };
2090 if (!CONSTANT_CLASS_P (ce.value))
2091 constant_p = false;
2092 v->quick_push (ce);
2093 ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
2094 if (!CONSTANT_CLASS_P (ce.value))
2095 constant_p = false;
2096 v->quick_push (ce);
2097 }
2098
2099 if (constant_p)
2100 new_rhs = build_vector_from_ctor (ret_type, v);
2101 else
2102 new_rhs = build_constructor (ret_type, v);
2103 g = gimple_build_assign (lhs, new_rhs);
2104 gsi_replace (gsi, g, false);
2105 return;
2106 }
2107 }
2108 }
2109
2110 new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
2111 TREE_TYPE (arg_type), arg,
2112 NULL_TREE, code, false, ret_type);
2113 g = gimple_build_assign (lhs, new_rhs);
2114 gsi_replace (gsi, g, false);
2115 }
2116
2117 /* Process one statement. If we identify a vector operation, expand it. */
2118
2119 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)2120 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2121 bitmap dce_ssa_names)
2122 {
2123 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2124 enum tree_code code;
2125 optab op = unknown_optab;
2126 enum gimple_rhs_class rhs_class;
2127 tree new_rhs;
2128
2129 /* Only consider code == GIMPLE_ASSIGN. */
2130 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2131 if (!stmt)
2132 {
2133 if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2134 expand_vector_conversion (gsi);
2135 return;
2136 }
2137
2138 code = gimple_assign_rhs_code (stmt);
2139 rhs_class = get_gimple_rhs_class (code);
2140 lhs = gimple_assign_lhs (stmt);
2141
2142 if (code == VEC_PERM_EXPR)
2143 {
2144 lower_vec_perm (gsi);
2145 return;
2146 }
2147
2148 if (code == VEC_COND_EXPR)
2149 {
2150 expand_vector_condition (gsi, dce_ssa_names);
2151 return;
2152 }
2153
2154 if (code == COND_EXPR
2155 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2156 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2157 {
2158 expand_vector_scalar_condition (gsi);
2159 return;
2160 }
2161
2162 if (code == CONSTRUCTOR
2163 && TREE_CODE (lhs) == SSA_NAME
2164 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2165 && !gimple_clobber_p (stmt)
2166 && optimize)
2167 {
2168 optimize_vector_constructor (gsi);
2169 return;
2170 }
2171
2172 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2173 return;
2174
2175 rhs1 = gimple_assign_rhs1 (stmt);
2176 if (rhs_class == GIMPLE_BINARY_RHS)
2177 rhs2 = gimple_assign_rhs2 (stmt);
2178
2179 type = TREE_TYPE (lhs);
2180 if (!VECTOR_TYPE_P (type)
2181 || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2182 return;
2183
2184 /* A scalar operation pretending to be a vector one. */
2185 if (VECTOR_BOOLEAN_TYPE_P (type)
2186 && !VECTOR_MODE_P (TYPE_MODE (type))
2187 && TYPE_MODE (type) != BLKmode
2188 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2189 || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2190 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2191 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2192 return;
2193
2194 /* If the vector operation is operating on all same vector elements
2195 implement it with a scalar operation and a splat if the target
2196 supports the scalar operation. */
2197 tree srhs1, srhs2 = NULL_TREE;
2198 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2199 && (rhs2 == NULL_TREE
2200 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2201 && (srhs2 = rhs2))
2202 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2203 /* As we query direct optabs restrict to non-convert operations. */
2204 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2205 {
2206 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2207 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2208 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2209 {
2210 tree stype = TREE_TYPE (TREE_TYPE (lhs));
2211 tree slhs = (rhs2 != NULL_TREE)
2212 ? gimplify_build2 (gsi, code, stype, srhs1, srhs2)
2213 : gimplify_build1 (gsi, code, stype, srhs1);
2214 gimple_assign_set_rhs_from_tree (gsi,
2215 build_vector_from_val (type, slhs));
2216 update_stmt (stmt);
2217 return;
2218 }
2219 }
2220
2221 if (CONVERT_EXPR_CODE_P (code)
2222 || code == FLOAT_EXPR
2223 || code == FIX_TRUNC_EXPR
2224 || code == VIEW_CONVERT_EXPR)
2225 return;
2226
2227 /* The signedness is determined from input argument. */
2228 if (code == VEC_UNPACK_FLOAT_HI_EXPR
2229 || code == VEC_UNPACK_FLOAT_LO_EXPR
2230 || code == VEC_PACK_FLOAT_EXPR)
2231 {
2232 /* We do not know how to scalarize those. */
2233 return;
2234 }
2235
2236 /* For widening/narrowing vector operations, the relevant type is of the
2237 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
2238 calculated in the same way above. */
2239 if (code == WIDEN_SUM_EXPR
2240 || code == VEC_WIDEN_PLUS_HI_EXPR
2241 || code == VEC_WIDEN_PLUS_LO_EXPR
2242 || code == VEC_WIDEN_MINUS_HI_EXPR
2243 || code == VEC_WIDEN_MINUS_LO_EXPR
2244 || code == VEC_WIDEN_MULT_HI_EXPR
2245 || code == VEC_WIDEN_MULT_LO_EXPR
2246 || code == VEC_WIDEN_MULT_EVEN_EXPR
2247 || code == VEC_WIDEN_MULT_ODD_EXPR
2248 || code == VEC_UNPACK_HI_EXPR
2249 || code == VEC_UNPACK_LO_EXPR
2250 || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2251 || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2252 || code == VEC_PACK_TRUNC_EXPR
2253 || code == VEC_PACK_SAT_EXPR
2254 || code == VEC_PACK_FIX_TRUNC_EXPR
2255 || code == VEC_WIDEN_LSHIFT_HI_EXPR
2256 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2257 {
2258 /* We do not know how to scalarize those. */
2259 return;
2260 }
2261
2262 /* Choose between vector shift/rotate by vector and vector shift/rotate by
2263 scalar */
2264 if (code == LSHIFT_EXPR
2265 || code == RSHIFT_EXPR
2266 || code == LROTATE_EXPR
2267 || code == RROTATE_EXPR)
2268 {
2269 optab opv;
2270
2271 /* Check whether we have vector <op> {x,x,x,x} where x
2272 could be a scalar variable or a constant. Transform
2273 vector <op> {x,x,x,x} ==> vector <op> scalar. */
2274 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2275 {
2276 tree first;
2277
2278 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2279 {
2280 gimple_assign_set_rhs2 (stmt, first);
2281 update_stmt (stmt);
2282 rhs2 = first;
2283 }
2284 }
2285
2286 opv = optab_for_tree_code (code, type, optab_vector);
2287 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2288 op = opv;
2289 else
2290 {
2291 op = optab_for_tree_code (code, type, optab_scalar);
2292
2293 compute_type = get_compute_type (code, op, type);
2294 if (compute_type == type)
2295 return;
2296 /* The rtl expander will expand vector/scalar as vector/vector
2297 if necessary. Pick one with wider vector type. */
2298 tree compute_vtype = get_compute_type (code, opv, type);
2299 if (subparts_gt (compute_vtype, compute_type))
2300 {
2301 compute_type = compute_vtype;
2302 op = opv;
2303 }
2304 }
2305
2306 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2307 {
2308 if (compute_type == NULL_TREE)
2309 compute_type = get_compute_type (code, op, type);
2310 if (compute_type == type)
2311 return;
2312 /* Before splitting vector rotates into scalar rotates,
2313 see if we can't use vector shifts and BIT_IOR_EXPR
2314 instead. For vector by vector rotates we'd also
2315 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2316 for now, fold doesn't seem to create such rotates anyway. */
2317 if (compute_type == TREE_TYPE (type)
2318 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2319 {
2320 optab oplv = vashl_optab, opl = ashl_optab;
2321 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2322 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2323 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2324 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2325 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2326 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2327 /* The rtl expander will expand vector/scalar as vector/vector
2328 if necessary. Pick one with wider vector type. */
2329 if (subparts_gt (compute_lvtype, compute_ltype))
2330 {
2331 compute_ltype = compute_lvtype;
2332 opl = oplv;
2333 }
2334 if (subparts_gt (compute_rvtype, compute_rtype))
2335 {
2336 compute_rtype = compute_rvtype;
2337 opr = oprv;
2338 }
2339 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2340 BIT_IOR_EXPR. */
2341 compute_type = compute_ltype;
2342 if (subparts_gt (compute_type, compute_rtype))
2343 compute_type = compute_rtype;
2344 if (subparts_gt (compute_type, compute_otype))
2345 compute_type = compute_otype;
2346 /* Verify all 3 operations can be performed in that type. */
2347 if (compute_type != TREE_TYPE (type))
2348 {
2349 if (optab_handler (opl, TYPE_MODE (compute_type))
2350 == CODE_FOR_nothing
2351 || optab_handler (opr, TYPE_MODE (compute_type))
2352 == CODE_FOR_nothing
2353 || optab_handler (opo, TYPE_MODE (compute_type))
2354 == CODE_FOR_nothing)
2355 compute_type = TREE_TYPE (type);
2356 }
2357 }
2358 }
2359 }
2360 else
2361 op = optab_for_tree_code (code, type, optab_default);
2362
2363 /* Optabs will try converting a negation into a subtraction, so
2364 look for it as well. TODO: negation of floating-point vectors
2365 might be turned into an exclusive OR toggling the sign bit. */
2366 if (op == unknown_optab
2367 && code == NEGATE_EXPR
2368 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2369 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2370
2371 if (compute_type == NULL_TREE)
2372 compute_type = get_compute_type (code, op, type);
2373 if (compute_type == type)
2374 return;
2375
2376 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2377 dce_ssa_names);
2378
2379 /* Leave expression untouched for later expansion. */
2380 if (new_rhs == NULL_TREE)
2381 return;
2382
2383 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2384 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2385 new_rhs);
2386
2387 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
2388 way to do it is change expand_vector_operation and its callees to
2389 return a tree_code, RHS1 and RHS2 instead of a tree. */
2390 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2391 update_stmt (gsi_stmt (*gsi));
2392 }
2393
2394 /* Use this to lower vector operations introduced by the vectorizer,
2395 if it may need the bit-twiddling tricks implemented in this file. */
2396
2397 static unsigned int
expand_vector_operations(void)2398 expand_vector_operations (void)
2399 {
2400 gimple_stmt_iterator gsi;
2401 basic_block bb;
2402 bool cfg_changed = false;
2403
2404 auto_bitmap dce_ssa_names;
2405
2406 FOR_EACH_BB_FN (bb, cfun)
2407 {
2408 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2409 {
2410 expand_vector_operations_1 (&gsi, dce_ssa_names);
2411 /* ??? If we do not cleanup EH then we will ICE in
2412 verification. But in reality we have created wrong-code
2413 as we did not properly transition EH info and edges to
2414 the piecewise computations. */
2415 if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2416 && gimple_purge_dead_eh_edges (bb))
2417 cfg_changed = true;
2418 }
2419 }
2420
2421 simple_dce_from_worklist (dce_ssa_names);
2422
2423 return cfg_changed ? TODO_cleanup_cfg : 0;
2424 }
2425
2426 namespace {
2427
2428 const pass_data pass_data_lower_vector =
2429 {
2430 GIMPLE_PASS, /* type */
2431 "veclower", /* name */
2432 OPTGROUP_VEC, /* optinfo_flags */
2433 TV_NONE, /* tv_id */
2434 PROP_cfg, /* properties_required */
2435 PROP_gimple_lvec, /* properties_provided */
2436 0, /* properties_destroyed */
2437 0, /* todo_flags_start */
2438 TODO_update_ssa, /* todo_flags_finish */
2439 };
2440
2441 class pass_lower_vector : public gimple_opt_pass
2442 {
2443 public:
pass_lower_vector(gcc::context * ctxt)2444 pass_lower_vector (gcc::context *ctxt)
2445 : gimple_opt_pass (pass_data_lower_vector, ctxt)
2446 {}
2447
2448 /* opt_pass methods: */
gate(function * fun)2449 virtual bool gate (function *fun)
2450 {
2451 return !(fun->curr_properties & PROP_gimple_lvec);
2452 }
2453
execute(function *)2454 virtual unsigned int execute (function *)
2455 {
2456 return expand_vector_operations ();
2457 }
2458
2459 }; // class pass_lower_vector
2460
2461 } // anon namespace
2462
2463 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)2464 make_pass_lower_vector (gcc::context *ctxt)
2465 {
2466 return new pass_lower_vector (ctxt);
2467 }
2468
2469 namespace {
2470
2471 const pass_data pass_data_lower_vector_ssa =
2472 {
2473 GIMPLE_PASS, /* type */
2474 "veclower2", /* name */
2475 OPTGROUP_VEC, /* optinfo_flags */
2476 TV_NONE, /* tv_id */
2477 PROP_cfg, /* properties_required */
2478 PROP_gimple_lvec, /* properties_provided */
2479 0, /* properties_destroyed */
2480 0, /* todo_flags_start */
2481 ( TODO_update_ssa
2482 | TODO_cleanup_cfg ), /* todo_flags_finish */
2483 };
2484
2485 class pass_lower_vector_ssa : public gimple_opt_pass
2486 {
2487 public:
pass_lower_vector_ssa(gcc::context * ctxt)2488 pass_lower_vector_ssa (gcc::context *ctxt)
2489 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2490 {}
2491
2492 /* opt_pass methods: */
clone()2493 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)2494 virtual unsigned int execute (function *)
2495 {
2496 return expand_vector_operations ();
2497 }
2498
2499 }; // class pass_lower_vector_ssa
2500
2501 } // anon namespace
2502
2503 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)2504 make_pass_lower_vector_ssa (gcc::context *ctxt)
2505 {
2506 return new pass_lower_vector_ssa (ctxt);
2507 }
2508
2509 #include "gt-tree-vect-generic.h"
2510