1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h" /* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "tree-eh.h"
36 #include "gimplify.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "dumpfile.h"
41 #include "builtins.h"
42 #include "internal-fn.h"
43 #include "case-cfn-macros.h"
44 #include "fold-const-call.h"
45 #include "attribs.h"
46 #include "cgraph.h"
47 #include "omp-simd-clone.h"
48 #include "predict.h"
49 #include "tree-vector-builder.h"
50 #include "vec-perm-indices.h"
51 #include "gimple-range.h"
52
53 /* Return true if we have a useful VR_RANGE range for VAR, storing it
54 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
55
56 static bool
vect_get_range_info(tree var,wide_int * min_value,wide_int * max_value)57 vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
58 {
59 value_range vr;
60 get_range_query (cfun)->range_of_expr (vr, var);
61 if (vr.undefined_p ())
62 vr.set_varying (TREE_TYPE (var));
63 *min_value = wi::to_wide (vr.min ());
64 *max_value = wi::to_wide (vr.max ());
65 value_range_kind vr_type = vr.kind ();
66 wide_int nonzero = get_nonzero_bits (var);
67 signop sgn = TYPE_SIGN (TREE_TYPE (var));
68 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
69 nonzero, sgn) == VR_RANGE)
70 {
71 if (dump_enabled_p ())
72 {
73 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
74 dump_printf (MSG_NOTE, " has range [");
75 dump_hex (MSG_NOTE, *min_value);
76 dump_printf (MSG_NOTE, ", ");
77 dump_hex (MSG_NOTE, *max_value);
78 dump_printf (MSG_NOTE, "]\n");
79 }
80 return true;
81 }
82 else
83 {
84 if (dump_enabled_p ())
85 {
86 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
87 dump_printf (MSG_NOTE, " has no range info\n");
88 }
89 return false;
90 }
91 }
92
93 /* Report that we've found an instance of pattern PATTERN in
94 statement STMT. */
95
96 static void
vect_pattern_detected(const char * name,gimple * stmt)97 vect_pattern_detected (const char *name, gimple *stmt)
98 {
99 if (dump_enabled_p ())
100 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
101 }
102
103 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
104 return the pattern statement's stmt_vec_info. Set its vector type to
105 VECTYPE if it doesn't have one already. */
106
107 static stmt_vec_info
vect_init_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)108 vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
109 stmt_vec_info orig_stmt_info, tree vectype)
110 {
111 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
112 if (pattern_stmt_info == NULL)
113 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
114 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
115
116 pattern_stmt_info->pattern_stmt_p = true;
117 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
118 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
119 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
120 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
121 {
122 gcc_assert (!vectype
123 || (VECTOR_BOOLEAN_TYPE_P (vectype)
124 == vect_use_mask_type_p (orig_stmt_info)));
125 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
126 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
127 }
128 return pattern_stmt_info;
129 }
130
131 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
132 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
133 have one already. */
134
135 static void
vect_set_pattern_stmt(vec_info * vinfo,gimple * pattern_stmt,stmt_vec_info orig_stmt_info,tree vectype)136 vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt,
137 stmt_vec_info orig_stmt_info, tree vectype)
138 {
139 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
140 STMT_VINFO_RELATED_STMT (orig_stmt_info)
141 = vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype);
142 }
143
144 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
145 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
146 be different from the vector type of the final pattern statement.
147 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
148 from which it was derived. */
149
150 static inline void
append_pattern_def_seq(vec_info * vinfo,stmt_vec_info stmt_info,gimple * new_stmt,tree vectype=NULL_TREE,tree scalar_type_for_mask=NULL_TREE)151 append_pattern_def_seq (vec_info *vinfo,
152 stmt_vec_info stmt_info, gimple *new_stmt,
153 tree vectype = NULL_TREE,
154 tree scalar_type_for_mask = NULL_TREE)
155 {
156 gcc_assert (!scalar_type_for_mask
157 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
158 if (vectype)
159 {
160 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
161 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
162 if (scalar_type_for_mask)
163 new_stmt_info->mask_precision
164 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
165 }
166 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
167 new_stmt);
168 }
169
170 /* The caller wants to perform new operations on vect_external variable
171 VAR, so that the result of the operations would also be vect_external.
172 Return the edge on which the operations can be performed, if one exists.
173 Return null if the operations should instead be treated as part of
174 the pattern that needs them. */
175
176 static edge
vect_get_external_def_edge(vec_info * vinfo,tree var)177 vect_get_external_def_edge (vec_info *vinfo, tree var)
178 {
179 edge e = NULL;
180 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
181 {
182 e = loop_preheader_edge (loop_vinfo->loop);
183 if (!SSA_NAME_IS_DEFAULT_DEF (var))
184 {
185 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
186 if (bb == NULL
187 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
188 e = NULL;
189 }
190 }
191 return e;
192 }
193
194 /* Return true if the target supports a vector version of CODE,
195 where CODE is known to map to a direct optab with the given SUBTYPE.
196 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
197 specifies the type of the scalar result.
198
199 If CODE allows the inputs and outputs to have different type
200 (such as for WIDEN_SUM_EXPR), it is the input mode rather
201 than the output mode that determines the appropriate target pattern.
202 Operand 0 of the target pattern then specifies the mode that the output
203 must have.
204
205 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
206 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
207 is nonnull. */
208
209 static bool
vect_supportable_direct_optab_p(vec_info * vinfo,tree otype,tree_code code,tree itype,tree * vecotype_out,tree * vecitype_out=NULL,enum optab_subtype subtype=optab_default)210 vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
211 tree itype, tree *vecotype_out,
212 tree *vecitype_out = NULL,
213 enum optab_subtype subtype = optab_default)
214 {
215 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
216 if (!vecitype)
217 return false;
218
219 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
220 if (!vecotype)
221 return false;
222
223 optab optab = optab_for_tree_code (code, vecitype, subtype);
224 if (!optab)
225 return false;
226
227 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
228 if (icode == CODE_FOR_nothing
229 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
230 return false;
231
232 *vecotype_out = vecotype;
233 if (vecitype_out)
234 *vecitype_out = vecitype;
235 return true;
236 }
237
238 /* Round bit precision PRECISION up to a full element. */
239
240 static unsigned int
vect_element_precision(unsigned int precision)241 vect_element_precision (unsigned int precision)
242 {
243 precision = 1 << ceil_log2 (precision);
244 return MAX (precision, BITS_PER_UNIT);
245 }
246
247 /* If OP is defined by a statement that's being considered for vectorization,
248 return information about that statement, otherwise return NULL. */
249
250 static stmt_vec_info
vect_get_internal_def(vec_info * vinfo,tree op)251 vect_get_internal_def (vec_info *vinfo, tree op)
252 {
253 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
254 if (def_stmt_info
255 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
256 return def_stmt_info;
257 return NULL;
258 }
259
260 /* Check whether NAME, an ssa-name used in STMT_VINFO,
261 is a result of a type promotion, such that:
262 DEF_STMT: NAME = NOP (name0)
263 If CHECK_SIGN is TRUE, check that either both types are signed or both are
264 unsigned. */
265
266 static bool
type_conversion_p(vec_info * vinfo,tree name,bool check_sign,tree * orig_type,gimple ** def_stmt,bool * promotion)267 type_conversion_p (vec_info *vinfo, tree name, bool check_sign,
268 tree *orig_type, gimple **def_stmt, bool *promotion)
269 {
270 tree type = TREE_TYPE (name);
271 tree oprnd0;
272 enum vect_def_type dt;
273
274 stmt_vec_info def_stmt_info;
275 if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt))
276 return false;
277
278 if (dt != vect_internal_def
279 && dt != vect_external_def && dt != vect_constant_def)
280 return false;
281
282 if (!*def_stmt)
283 return false;
284
285 if (!is_gimple_assign (*def_stmt))
286 return false;
287
288 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
289 return false;
290
291 oprnd0 = gimple_assign_rhs1 (*def_stmt);
292
293 *orig_type = TREE_TYPE (oprnd0);
294 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
295 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
296 return false;
297
298 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
299 *promotion = true;
300 else
301 *promotion = false;
302
303 if (!vect_is_simple_use (oprnd0, vinfo, &dt))
304 return false;
305
306 return true;
307 }
308
309 /* Holds information about an input operand after some sign changes
310 and type promotions have been peeled away. */
311 class vect_unpromoted_value {
312 public:
313 vect_unpromoted_value ();
314
315 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
316
317 /* The value obtained after peeling away zero or more casts. */
318 tree op;
319
320 /* The type of OP. */
321 tree type;
322
323 /* The definition type of OP. */
324 vect_def_type dt;
325
326 /* If OP is the result of peeling at least one cast, and if the cast
327 of OP itself is a vectorizable statement, CASTER identifies that
328 statement, otherwise it is null. */
329 stmt_vec_info caster;
330 };
331
vect_unpromoted_value()332 inline vect_unpromoted_value::vect_unpromoted_value ()
333 : op (NULL_TREE),
334 type (NULL_TREE),
335 dt (vect_uninitialized_def),
336 caster (NULL)
337 {
338 }
339
340 /* Set the operand to OP_IN, its definition type to DT_IN, and the
341 statement that casts it to CASTER_IN. */
342
343 inline void
set_op(tree op_in,vect_def_type dt_in,stmt_vec_info caster_in)344 vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
345 stmt_vec_info caster_in)
346 {
347 op = op_in;
348 type = TREE_TYPE (op);
349 dt = dt_in;
350 caster = caster_in;
351 }
352
353 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
354 to reach some vectorizable inner operand OP', continuing as long as it
355 is possible to convert OP' back to OP using a possible sign change
356 followed by a possible promotion P. Return this OP', or null if OP is
357 not a vectorizable SSA name. If there is a promotion P, describe its
358 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
359 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
360 have more than one user.
361
362 A successful return means that it is possible to go from OP' to OP
363 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
364 whereas the cast from UNPROM to OP might be a promotion, a sign
365 change, or a nop.
366
367 E.g. say we have:
368
369 signed short *ptr = ...;
370 signed short C = *ptr;
371 unsigned short B = (unsigned short) C; // sign change
372 signed int A = (signed int) B; // unsigned promotion
373 ...possible other uses of A...
374 unsigned int OP = (unsigned int) A; // sign change
375
376 In this case it's possible to go directly from C to OP using:
377
378 OP = (unsigned int) (unsigned short) C;
379 +------------+ +--------------+
380 promotion sign change
381
382 so OP' would be C. The input to the promotion is B, so UNPROM
383 would describe B. */
384
385 static tree
vect_look_through_possible_promotion(vec_info * vinfo,tree op,vect_unpromoted_value * unprom,bool * single_use_p=NULL)386 vect_look_through_possible_promotion (vec_info *vinfo, tree op,
387 vect_unpromoted_value *unprom,
388 bool *single_use_p = NULL)
389 {
390 tree res = NULL_TREE;
391 tree op_type = TREE_TYPE (op);
392 unsigned int orig_precision = TYPE_PRECISION (op_type);
393 unsigned int min_precision = orig_precision;
394 stmt_vec_info caster = NULL;
395 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
396 {
397 /* See whether OP is simple enough to vectorize. */
398 stmt_vec_info def_stmt_info;
399 gimple *def_stmt;
400 vect_def_type dt;
401 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
402 break;
403
404 /* If OP is the input of a demotion, skip over it to see whether
405 OP is itself the result of a promotion. If so, the combined
406 effect of the promotion and the demotion might fit the required
407 pattern, otherwise neither operation fits.
408
409 This copes with cases such as the result of an arithmetic
410 operation being truncated before being stored, and where that
411 arithmetic operation has been recognized as an over-widened one. */
412 if (TYPE_PRECISION (op_type) <= min_precision)
413 {
414 /* Use OP as the UNPROM described above if we haven't yet
415 found a promotion, or if using the new input preserves the
416 sign of the previous promotion. */
417 if (!res
418 || TYPE_PRECISION (unprom->type) == orig_precision
419 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
420 {
421 unprom->set_op (op, dt, caster);
422 min_precision = TYPE_PRECISION (op_type);
423 }
424 /* Stop if we've already seen a promotion and if this
425 conversion does more than change the sign. */
426 else if (TYPE_PRECISION (op_type)
427 != TYPE_PRECISION (unprom->type))
428 break;
429
430 /* The sequence now extends to OP. */
431 res = op;
432 }
433
434 /* See whether OP is defined by a cast. Record it as CASTER if
435 the cast is potentially vectorizable. */
436 if (!def_stmt)
437 break;
438 caster = def_stmt_info;
439
440 /* Ignore pattern statements, since we don't link uses for them. */
441 if (caster
442 && single_use_p
443 && !STMT_VINFO_RELATED_STMT (caster)
444 && !has_single_use (res))
445 *single_use_p = false;
446
447 gassign *assign = dyn_cast <gassign *> (def_stmt);
448 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
449 break;
450
451 /* Continue with the input to the cast. */
452 op = gimple_assign_rhs1 (def_stmt);
453 op_type = TREE_TYPE (op);
454 }
455 return res;
456 }
457
458 /* OP is an integer operand to an operation that returns TYPE, and we
459 want to treat the operation as a widening one. So far we can treat
460 it as widening from *COMMON_TYPE.
461
462 Return true if OP is suitable for such a widening operation,
463 either widening from *COMMON_TYPE or from some supertype of it.
464 Update *COMMON_TYPE to the supertype in the latter case.
465
466 SHIFT_P is true if OP is a shift amount. */
467
468 static bool
vect_joust_widened_integer(tree type,bool shift_p,tree op,tree * common_type)469 vect_joust_widened_integer (tree type, bool shift_p, tree op,
470 tree *common_type)
471 {
472 /* Calculate the minimum precision required by OP, without changing
473 the sign of either operand. */
474 unsigned int precision;
475 if (shift_p)
476 {
477 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
478 return false;
479 precision = TREE_INT_CST_LOW (op);
480 }
481 else
482 {
483 precision = wi::min_precision (wi::to_widest (op),
484 TYPE_SIGN (*common_type));
485 if (precision * 2 > TYPE_PRECISION (type))
486 return false;
487 }
488
489 /* If OP requires a wider type, switch to that type. The checks
490 above ensure that this is still narrower than the result. */
491 precision = vect_element_precision (precision);
492 if (TYPE_PRECISION (*common_type) < precision)
493 *common_type = build_nonstandard_integer_type
494 (precision, TYPE_UNSIGNED (*common_type));
495 return true;
496 }
497
498 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
499 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
500
501 static bool
vect_joust_widened_type(tree type,tree new_type,tree * common_type)502 vect_joust_widened_type (tree type, tree new_type, tree *common_type)
503 {
504 if (types_compatible_p (*common_type, new_type))
505 return true;
506
507 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
508 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
509 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
510 return true;
511
512 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
513 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
514 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
515 {
516 *common_type = new_type;
517 return true;
518 }
519
520 /* We have mismatched signs, with the signed type being
521 no wider than the unsigned type. In this case we need
522 a wider signed type. */
523 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
524 TYPE_PRECISION (new_type));
525 precision *= 2;
526
527 if (precision * 2 > TYPE_PRECISION (type))
528 return false;
529
530 *common_type = build_nonstandard_integer_type (precision, false);
531 return true;
532 }
533
534 /* Check whether STMT_INFO can be viewed as a tree of integer operations
535 in which each node either performs CODE or WIDENED_CODE, and where
536 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
537 specifies the maximum number of leaf operands. SHIFT_P says whether
538 CODE and WIDENED_CODE are some sort of shift.
539
540 If STMT_INFO is such a tree, return the number of leaf operands
541 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
542 to a type that (a) is narrower than the result of STMT_INFO and
543 (b) can hold all leaf operand values.
544
545 If SUBTYPE then allow that the signs of the operands
546 may differ in signs but not in precision. SUBTYPE is updated to reflect
547 this.
548
549 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
550 exists. */
551
552 static unsigned int
vect_widened_op_tree(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree_code widened_code,bool shift_p,unsigned int max_nops,vect_unpromoted_value * unprom,tree * common_type,enum optab_subtype * subtype=NULL)553 vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code,
554 tree_code widened_code, bool shift_p,
555 unsigned int max_nops,
556 vect_unpromoted_value *unprom, tree *common_type,
557 enum optab_subtype *subtype = NULL)
558 {
559 /* Check for an integer operation with the right code. */
560 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
561 if (!assign)
562 return 0;
563
564 tree_code rhs_code = gimple_assign_rhs_code (assign);
565 if (rhs_code != code && rhs_code != widened_code)
566 return 0;
567
568 tree type = TREE_TYPE (gimple_assign_lhs (assign));
569 if (!INTEGRAL_TYPE_P (type))
570 return 0;
571
572 /* Assume that both operands will be leaf operands. */
573 max_nops -= 2;
574
575 /* Check the operands. */
576 unsigned int next_op = 0;
577 for (unsigned int i = 0; i < 2; ++i)
578 {
579 vect_unpromoted_value *this_unprom = &unprom[next_op];
580 unsigned int nops = 1;
581 tree op = gimple_op (assign, i + 1);
582 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
583 {
584 /* We already have a common type from earlier operands.
585 Update it to account for OP. */
586 this_unprom->set_op (op, vect_constant_def);
587 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
588 return 0;
589 }
590 else
591 {
592 /* Only allow shifts by constants. */
593 if (shift_p && i == 1)
594 return 0;
595
596 if (rhs_code != code)
597 {
598 /* If rhs_code is widened_code, don't look through further
599 possible promotions, there is a promotion already embedded
600 in the WIDEN_*_EXPR. */
601 if (TREE_CODE (op) != SSA_NAME
602 || !INTEGRAL_TYPE_P (TREE_TYPE (op)))
603 return 0;
604
605 stmt_vec_info def_stmt_info;
606 gimple *def_stmt;
607 vect_def_type dt;
608 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info,
609 &def_stmt))
610 return 0;
611 this_unprom->set_op (op, dt, NULL);
612 }
613 else if (!vect_look_through_possible_promotion (vinfo, op,
614 this_unprom))
615 return 0;
616
617 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
618 {
619 /* The operand isn't widened. If STMT_INFO has the code
620 for an unwidened operation, recursively check whether
621 this operand is a node of the tree. */
622 if (rhs_code != code
623 || max_nops == 0
624 || this_unprom->dt != vect_internal_def)
625 return 0;
626
627 /* Give back the leaf slot allocated above now that we're
628 not treating this as a leaf operand. */
629 max_nops += 1;
630
631 /* Recursively process the definition of the operand. */
632 stmt_vec_info def_stmt_info
633 = vinfo->lookup_def (this_unprom->op);
634 nops = vect_widened_op_tree (vinfo, def_stmt_info, code,
635 widened_code, shift_p, max_nops,
636 this_unprom, common_type,
637 subtype);
638 if (nops == 0)
639 return 0;
640
641 max_nops -= nops;
642 }
643 else
644 {
645 /* Make sure that the operand is narrower than the result. */
646 if (TYPE_PRECISION (this_unprom->type) * 2
647 > TYPE_PRECISION (type))
648 return 0;
649
650 /* Update COMMON_TYPE for the new operand. */
651 if (i == 0)
652 *common_type = this_unprom->type;
653 else if (!vect_joust_widened_type (type, this_unprom->type,
654 common_type))
655 {
656 if (subtype)
657 {
658 /* See if we can sign extend the smaller type. */
659 if (TYPE_PRECISION (this_unprom->type)
660 > TYPE_PRECISION (*common_type))
661 *common_type = this_unprom->type;
662 *subtype = optab_vector_mixed_sign;
663 }
664 else
665 return 0;
666 }
667 }
668 }
669 next_op += nops;
670 }
671 return next_op;
672 }
673
674 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
675 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
676
677 static tree
vect_recog_temp_ssa_var(tree type,gimple * stmt)678 vect_recog_temp_ssa_var (tree type, gimple *stmt)
679 {
680 return make_temp_ssa_name (type, stmt, "patt");
681 }
682
683 /* STMT2_INFO describes a type conversion that could be split into STMT1
684 followed by a version of STMT2_INFO that takes NEW_RHS as its first
685 input. Try to do this using pattern statements, returning true on
686 success. */
687
688 static bool
vect_split_statement(vec_info * vinfo,stmt_vec_info stmt2_info,tree new_rhs,gimple * stmt1,tree vectype)689 vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs,
690 gimple *stmt1, tree vectype)
691 {
692 if (is_pattern_stmt_p (stmt2_info))
693 {
694 /* STMT2_INFO is part of a pattern. Get the statement to which
695 the pattern is attached. */
696 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
697 vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype);
698
699 if (dump_enabled_p ())
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting pattern statement: %G", stmt2_info->stmt);
702
703 /* Since STMT2_INFO is a pattern statement, we can change it
704 in-situ without worrying about changing the code for the
705 containing block. */
706 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
707
708 if (dump_enabled_p ())
709 {
710 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
711 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
712 stmt2_info->stmt);
713 }
714
715 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
716 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
717 /* STMT2_INFO is the actual pattern statement. Add STMT1
718 to the end of the definition sequence. */
719 gimple_seq_add_stmt_without_update (def_seq, stmt1);
720 else
721 {
722 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
723 before it. */
724 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
725 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
726 }
727 return true;
728 }
729 else
730 {
731 /* STMT2_INFO doesn't yet have a pattern. Try to create a
732 two-statement pattern now. */
733 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
734 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
735 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
736 if (!lhs_vectype)
737 return false;
738
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_NOTE, vect_location,
741 "Splitting statement: %G", stmt2_info->stmt);
742
743 /* Add STMT1 as a singleton pattern definition sequence. */
744 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
745 vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype);
746 gimple_seq_add_stmt_without_update (def_seq, stmt1);
747
748 /* Build the second of the two pattern statements. */
749 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
750 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
751 vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype);
752
753 if (dump_enabled_p ())
754 {
755 dump_printf_loc (MSG_NOTE, vect_location,
756 "into pattern statements: %G", stmt1);
757 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
758 }
759
760 return true;
761 }
762 }
763
764 /* Convert UNPROM to TYPE and return the result, adding new statements
765 to STMT_INFO's pattern definition statements if no better way is
766 available. VECTYPE is the vector form of TYPE.
767
768 If SUBTYPE then convert the type based on the subtype. */
769
770 static tree
vect_convert_input(vec_info * vinfo,stmt_vec_info stmt_info,tree type,vect_unpromoted_value * unprom,tree vectype,enum optab_subtype subtype=optab_default)771 vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
772 vect_unpromoted_value *unprom, tree vectype,
773 enum optab_subtype subtype = optab_default)
774 {
775
776 /* Update the type if the signs differ. */
777 if (subtype == optab_vector_mixed_sign
778 && TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op)))
779 type = build_nonstandard_integer_type (TYPE_PRECISION (type),
780 TYPE_SIGN (unprom->type));
781
782 /* Check for a no-op conversion. */
783 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
784 return unprom->op;
785
786 /* Allow the caller to create constant vect_unpromoted_values. */
787 if (TREE_CODE (unprom->op) == INTEGER_CST)
788 return wide_int_to_tree (type, wi::to_widest (unprom->op));
789
790 tree input = unprom->op;
791 if (unprom->caster)
792 {
793 tree lhs = gimple_get_lhs (unprom->caster->stmt);
794 tree lhs_type = TREE_TYPE (lhs);
795
796 /* If the result of the existing cast is the right width, use it
797 instead of the source of the cast. */
798 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
799 input = lhs;
800 /* If the precision we want is between the source and result
801 precisions of the existing cast, try splitting the cast into
802 two and tapping into a mid-way point. */
803 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
804 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
805 {
806 /* In order to preserve the semantics of the original cast,
807 give the mid-way point the same signedness as the input value.
808
809 It would be possible to use a signed type here instead if
810 TYPE is signed and UNPROM->TYPE is unsigned, but that would
811 make the sign of the midtype sensitive to the order in
812 which we process the statements, since the signedness of
813 TYPE is the signedness required by just one of possibly
814 many users. Also, unsigned promotions are usually as cheap
815 as or cheaper than signed ones, so it's better to keep an
816 unsigned promotion. */
817 tree midtype = build_nonstandard_integer_type
818 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
819 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
820 if (vec_midtype)
821 {
822 input = vect_recog_temp_ssa_var (midtype, NULL);
823 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
824 unprom->op);
825 if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt,
826 vec_midtype))
827 append_pattern_def_seq (vinfo, stmt_info,
828 new_stmt, vec_midtype);
829 }
830 }
831
832 /* See if we can reuse an existing result. */
833 if (types_compatible_p (type, TREE_TYPE (input)))
834 return input;
835 }
836
837 /* We need a new conversion statement. */
838 tree new_op = vect_recog_temp_ssa_var (type, NULL);
839 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
840
841 /* If OP is an external value, see if we can insert the new statement
842 on an incoming edge. */
843 if (input == unprom->op && unprom->dt == vect_external_def)
844 if (edge e = vect_get_external_def_edge (vinfo, input))
845 {
846 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
847 gcc_assert (!new_bb);
848 return new_op;
849 }
850
851 /* As a (common) last resort, add the statement to the pattern itself. */
852 append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype);
853 return new_op;
854 }
855
856 /* Invoke vect_convert_input for N elements of UNPROM and store the
857 result in the corresponding elements of RESULT.
858
859 If SUBTYPE then convert the type based on the subtype. */
860
861 static void
vect_convert_inputs(vec_info * vinfo,stmt_vec_info stmt_info,unsigned int n,tree * result,tree type,vect_unpromoted_value * unprom,tree vectype,enum optab_subtype subtype=optab_default)862 vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n,
863 tree *result, tree type, vect_unpromoted_value *unprom,
864 tree vectype, enum optab_subtype subtype = optab_default)
865 {
866 for (unsigned int i = 0; i < n; ++i)
867 {
868 unsigned int j;
869 for (j = 0; j < i; ++j)
870 if (unprom[j].op == unprom[i].op)
871 break;
872
873 if (j < i)
874 result[i] = result[j];
875 else
876 result[i] = vect_convert_input (vinfo, stmt_info,
877 type, &unprom[i], vectype, subtype);
878 }
879 }
880
881 /* The caller has created a (possibly empty) sequence of pattern definition
882 statements followed by a single statement PATTERN_STMT. Cast the result
883 of this final statement to TYPE. If a new statement is needed, add
884 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
885 and return the new statement, otherwise return PATTERN_STMT as-is.
886 VECITYPE is the vector form of PATTERN_STMT's result type. */
887
888 static gimple *
vect_convert_output(vec_info * vinfo,stmt_vec_info stmt_info,tree type,gimple * pattern_stmt,tree vecitype)889 vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type,
890 gimple *pattern_stmt, tree vecitype)
891 {
892 tree lhs = gimple_get_lhs (pattern_stmt);
893 if (!types_compatible_p (type, TREE_TYPE (lhs)))
894 {
895 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype);
896 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
897 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
898 }
899 return pattern_stmt;
900 }
901
902 /* Return true if STMT_VINFO describes a reduction for which reassociation
903 is allowed. If STMT_INFO is part of a group, assume that it's part of
904 a reduction chain and optimistically assume that all statements
905 except the last allow reassociation.
906 Also require it to have code CODE and to be a reduction
907 in the outermost loop. When returning true, store the operands in
908 *OP0_OUT and *OP1_OUT. */
909
910 static bool
vect_reassociating_reduction_p(vec_info * vinfo,stmt_vec_info stmt_info,tree_code code,tree * op0_out,tree * op1_out)911 vect_reassociating_reduction_p (vec_info *vinfo,
912 stmt_vec_info stmt_info, tree_code code,
913 tree *op0_out, tree *op1_out)
914 {
915 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
916 if (!loop_info)
917 return false;
918
919 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
920 if (!assign || gimple_assign_rhs_code (assign) != code)
921 return false;
922
923 /* We don't allow changing the order of the computation in the inner-loop
924 when doing outer-loop vectorization. */
925 class loop *loop = LOOP_VINFO_LOOP (loop_info);
926 if (loop && nested_in_vect_loop_p (loop, stmt_info))
927 return false;
928
929 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
930 {
931 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
932 code))
933 return false;
934 }
935 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
936 return false;
937
938 *op0_out = gimple_assign_rhs1 (assign);
939 *op1_out = gimple_assign_rhs2 (assign);
940 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
941 std::swap (*op0_out, *op1_out);
942 return true;
943 }
944
945 /* match.pd function to match
946 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
947 with conditions:
948 1) @1, @2, c, d, a, b are all integral type.
949 2) There's single_use for both @1 and @2.
950 3) a, c have same precision.
951 4) c and @1 have different precision.
952 5) c, d are the same type or they can differ in sign when convert is
953 truncation.
954
955 record a and c and d and @3. */
956
957 extern bool gimple_cond_expr_convert_p (tree, tree*, tree (*)(tree));
958
959 /* Function vect_recog_cond_expr_convert
960
961 Try to find the following pattern:
962
963 TYPE_AB A,B;
964 TYPE_CD C,D;
965 TYPE_E E;
966 TYPE_E op_true = (TYPE_E) A;
967 TYPE_E op_false = (TYPE_E) B;
968
969 E = C cmp D ? op_true : op_false;
970
971 where
972 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
973 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
974 single_use of op_true and op_false.
975 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
976
977 Input:
978
979 * STMT_VINFO: The stmt from which the pattern search begins.
980 here it starts with E = c cmp D ? op_true : op_false;
981
982 Output:
983
984 TYPE1 E' = C cmp D ? A : B;
985 TYPE3 E = (TYPE3) E';
986
987 There may extra nop_convert for A or B to handle different signness.
988
989 * TYPE_OUT: The vector type of the output of this pattern.
990
991 * Return value: A new stmt that will be used to replace the sequence of
992 stmts that constitute the pattern. In this case it will be:
993 E = (TYPE3)E';
994 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
995
996 static gimple *
vect_recog_cond_expr_convert_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)997 vect_recog_cond_expr_convert_pattern (vec_info *vinfo,
998 stmt_vec_info stmt_vinfo, tree *type_out)
999 {
1000 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1001 tree lhs, match[4], temp, type, new_lhs, op2;
1002 gimple *cond_stmt;
1003 gimple *pattern_stmt;
1004
1005 if (!last_stmt)
1006 return NULL;
1007
1008 lhs = gimple_assign_lhs (last_stmt);
1009
1010 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1011 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1012 if (!gimple_cond_expr_convert_p (lhs, &match[0], NULL))
1013 return NULL;
1014
1015 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt);
1016
1017 op2 = match[2];
1018 type = TREE_TYPE (match[1]);
1019 if (TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (match[2])))
1020 {
1021 op2 = vect_recog_temp_ssa_var (type, NULL);
1022 gimple* nop_stmt = gimple_build_assign (op2, NOP_EXPR, match[2]);
1023 append_pattern_def_seq (vinfo, stmt_vinfo, nop_stmt,
1024 get_vectype_for_scalar_type (vinfo, type));
1025 }
1026
1027 temp = vect_recog_temp_ssa_var (type, NULL);
1028 cond_stmt = gimple_build_assign (temp, build3 (COND_EXPR, type, match[3],
1029 match[1], op2));
1030 append_pattern_def_seq (vinfo, stmt_vinfo, cond_stmt,
1031 get_vectype_for_scalar_type (vinfo, type));
1032 new_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
1033 pattern_stmt = gimple_build_assign (new_lhs, NOP_EXPR, temp);
1034 *type_out = STMT_VINFO_VECTYPE (stmt_vinfo);
1035
1036 if (dump_enabled_p ())
1037 dump_printf_loc (MSG_NOTE, vect_location,
1038 "created pattern stmt: %G", pattern_stmt);
1039 return pattern_stmt;
1040 }
1041
1042 /* Function vect_recog_dot_prod_pattern
1043
1044 Try to find the following pattern:
1045
1046 type1a x_t
1047 type1b y_t;
1048 TYPE1 prod;
1049 TYPE2 sum = init;
1050 loop:
1051 sum_0 = phi <init, sum_1>
1052 S1 x_t = ...
1053 S2 y_t = ...
1054 S3 x_T = (TYPE1) x_t;
1055 S4 y_T = (TYPE1) y_t;
1056 S5 prod = x_T * y_T;
1057 [S6 prod = (TYPE2) prod; #optional]
1058 S7 sum_1 = prod + sum_0;
1059
1060 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1061 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1062 'type1a' and 'type1b' can differ.
1063
1064 Input:
1065
1066 * STMT_VINFO: The stmt from which the pattern search begins. In the
1067 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1068 will be detected.
1069
1070 Output:
1071
1072 * TYPE_OUT: The type of the output of this pattern.
1073
1074 * Return value: A new stmt that will be used to replace the sequence of
1075 stmts that constitute the pattern. In this case it will be:
1076 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1077
1078 Note: The dot-prod idiom is a widening reduction pattern that is
1079 vectorized without preserving all the intermediate results. It
1080 produces only N/2 (widened) results (by summing up pairs of
1081 intermediate results) rather than all N results. Therefore, we
1082 cannot allow this pattern when we want to get all the results and in
1083 the correct order (as is the case when this computation is in an
1084 inner-loop nested in an outer-loop that us being vectorized). */
1085
1086 static gimple *
vect_recog_dot_prod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1087 vect_recog_dot_prod_pattern (vec_info *vinfo,
1088 stmt_vec_info stmt_vinfo, tree *type_out)
1089 {
1090 tree oprnd0, oprnd1;
1091 gimple *last_stmt = stmt_vinfo->stmt;
1092 tree type, half_type;
1093 gimple *pattern_stmt;
1094 tree var;
1095
1096 /* Look for the following pattern
1097 DX = (TYPE1) X;
1098 DY = (TYPE1) Y;
1099 DPROD = DX * DY;
1100 DDPROD = (TYPE2) DPROD;
1101 sum_1 = DDPROD + sum_0;
1102 In which
1103 - DX is double the size of X
1104 - DY is double the size of Y
1105 - DX, DY, DPROD all have the same type but the sign
1106 between X, Y and DPROD can differ.
1107 - sum is the same size of DPROD or bigger
1108 - sum has been recognized as a reduction variable.
1109
1110 This is equivalent to:
1111 DPROD = X w* Y; #widen mult
1112 sum_1 = DPROD w+ sum_0; #widen summation
1113 or
1114 DPROD = X w* Y; #widen mult
1115 sum_1 = DPROD + sum_0; #summation
1116 */
1117
1118 /* Starting from LAST_STMT, follow the defs of its uses in search
1119 of the above pattern. */
1120
1121 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1122 &oprnd0, &oprnd1))
1123 return NULL;
1124
1125 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1126
1127 vect_unpromoted_value unprom_mult;
1128 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
1129
1130 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1131 we know that oprnd1 is the reduction variable (defined by a loop-header
1132 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1133 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1134 if (!oprnd0)
1135 return NULL;
1136
1137 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
1138 if (!mult_vinfo)
1139 return NULL;
1140
1141 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1142 inside the loop (in case we are analyzing an outer-loop). */
1143 vect_unpromoted_value unprom0[2];
1144 enum optab_subtype subtype = optab_vector;
1145 if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
1146 false, 2, unprom0, &half_type, &subtype))
1147 return NULL;
1148
1149 /* If there are two widening operations, make sure they agree on the sign
1150 of the extension. The result of an optab_vector_mixed_sign operation
1151 is signed; otherwise, the result has the same sign as the operands. */
1152 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
1153 && (subtype == optab_vector_mixed_sign
1154 ? TYPE_UNSIGNED (unprom_mult.type)
1155 : TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type)))
1156 return NULL;
1157
1158 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1159
1160 tree half_vectype;
1161 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
1162 type_out, &half_vectype, subtype))
1163 return NULL;
1164
1165 /* Get the inputs in the appropriate types. */
1166 tree mult_oprnd[2];
1167 vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type,
1168 unprom0, half_vectype, subtype);
1169
1170 var = vect_recog_temp_ssa_var (type, NULL);
1171 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
1172 mult_oprnd[0], mult_oprnd[1], oprnd1);
1173
1174 return pattern_stmt;
1175 }
1176
1177
1178 /* Function vect_recog_sad_pattern
1179
1180 Try to find the following Sum of Absolute Difference (SAD) pattern:
1181
1182 type x_t, y_t;
1183 signed TYPE1 diff, abs_diff;
1184 TYPE2 sum = init;
1185 loop:
1186 sum_0 = phi <init, sum_1>
1187 S1 x_t = ...
1188 S2 y_t = ...
1189 S3 x_T = (TYPE1) x_t;
1190 S4 y_T = (TYPE1) y_t;
1191 S5 diff = x_T - y_T;
1192 S6 abs_diff = ABS_EXPR <diff>;
1193 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1194 S8 sum_1 = abs_diff + sum_0;
1195
1196 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1197 same size of 'TYPE1' or bigger. This is a special case of a reduction
1198 computation.
1199
1200 Input:
1201
1202 * STMT_VINFO: The stmt from which the pattern search begins. In the
1203 example, when this function is called with S8, the pattern
1204 {S3,S4,S5,S6,S7,S8} will be detected.
1205
1206 Output:
1207
1208 * TYPE_OUT: The type of the output of this pattern.
1209
1210 * Return value: A new stmt that will be used to replace the sequence of
1211 stmts that constitute the pattern. In this case it will be:
1212 SAD_EXPR <x_t, y_t, sum_0>
1213 */
1214
1215 static gimple *
vect_recog_sad_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1216 vect_recog_sad_pattern (vec_info *vinfo,
1217 stmt_vec_info stmt_vinfo, tree *type_out)
1218 {
1219 gimple *last_stmt = stmt_vinfo->stmt;
1220 tree half_type;
1221
1222 /* Look for the following pattern
1223 DX = (TYPE1) X;
1224 DY = (TYPE1) Y;
1225 DDIFF = DX - DY;
1226 DAD = ABS_EXPR <DDIFF>;
1227 DDPROD = (TYPE2) DPROD;
1228 sum_1 = DAD + sum_0;
1229 In which
1230 - DX is at least double the size of X
1231 - DY is at least double the size of Y
1232 - DX, DY, DDIFF, DAD all have the same type
1233 - sum is the same size of DAD or bigger
1234 - sum has been recognized as a reduction variable.
1235
1236 This is equivalent to:
1237 DDIFF = X w- Y; #widen sub
1238 DAD = ABS_EXPR <DDIFF>;
1239 sum_1 = DAD w+ sum_0; #widen summation
1240 or
1241 DDIFF = X w- Y; #widen sub
1242 DAD = ABS_EXPR <DDIFF>;
1243 sum_1 = DAD + sum_0; #summation
1244 */
1245
1246 /* Starting from LAST_STMT, follow the defs of its uses in search
1247 of the above pattern. */
1248
1249 tree plus_oprnd0, plus_oprnd1;
1250 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1251 &plus_oprnd0, &plus_oprnd1))
1252 return NULL;
1253
1254 tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt));
1255
1256 /* Any non-truncating sequence of conversions is OK here, since
1257 with a successful match, the result of the ABS(U) is known to fit
1258 within the nonnegative range of the result type. (It cannot be the
1259 negative of the minimum signed value due to the range of the widening
1260 MINUS_EXPR.) */
1261 vect_unpromoted_value unprom_abs;
1262 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1263 &unprom_abs);
1264
1265 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1266 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1267 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1268 Then check that plus_oprnd0 is defined by an abs_expr. */
1269
1270 if (!plus_oprnd0)
1271 return NULL;
1272
1273 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1274 if (!abs_stmt_vinfo)
1275 return NULL;
1276
1277 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1278 inside the loop (in case we are analyzing an outer-loop). */
1279 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1280 if (!abs_stmt
1281 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1282 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
1283 return NULL;
1284
1285 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
1286 tree abs_type = TREE_TYPE (abs_oprnd);
1287 if (TYPE_UNSIGNED (abs_type))
1288 return NULL;
1289
1290 /* Peel off conversions from the ABS input. This can involve sign
1291 changes (e.g. from an unsigned subtraction to a signed ABS input)
1292 or signed promotion, but it can't include unsigned promotion.
1293 (Note that ABS of an unsigned promotion should have been folded
1294 away before now anyway.) */
1295 vect_unpromoted_value unprom_diff;
1296 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1297 &unprom_diff);
1298 if (!abs_oprnd)
1299 return NULL;
1300 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1301 && TYPE_UNSIGNED (unprom_diff.type))
1302 return NULL;
1303
1304 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
1305 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1306 if (!diff_stmt_vinfo)
1307 return NULL;
1308
1309 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1310 inside the loop (in case we are analyzing an outer-loop). */
1311 vect_unpromoted_value unprom[2];
1312 if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR,
1313 false, 2, unprom, &half_type))
1314 return NULL;
1315
1316 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1317
1318 tree half_vectype;
1319 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
1320 type_out, &half_vectype))
1321 return NULL;
1322
1323 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1324 tree sad_oprnd[2];
1325 vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type,
1326 unprom, half_vectype);
1327
1328 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
1329 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1330 sad_oprnd[1], plus_oprnd1);
1331
1332 return pattern_stmt;
1333 }
1334
1335 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1336 so that it can be treated as though it had the form:
1337
1338 A_TYPE a;
1339 B_TYPE b;
1340 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1341 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1342 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1343 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1344 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1345
1346 Try to replace the pattern with:
1347
1348 A_TYPE a;
1349 B_TYPE b;
1350 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1351 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1352 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1353 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1354
1355 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1356
1357 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1358 name of the pattern being matched, for dump purposes. */
1359
1360 static gimple *
vect_recog_widen_op_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out,tree_code orig_code,tree_code wide_code,bool shift_p,const char * name)1361 vect_recog_widen_op_pattern (vec_info *vinfo,
1362 stmt_vec_info last_stmt_info, tree *type_out,
1363 tree_code orig_code, tree_code wide_code,
1364 bool shift_p, const char *name)
1365 {
1366 gimple *last_stmt = last_stmt_info->stmt;
1367
1368 vect_unpromoted_value unprom[2];
1369 tree half_type;
1370 if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code,
1371 shift_p, 2, unprom, &half_type))
1372 return NULL;
1373
1374 /* Pattern detected. */
1375 vect_pattern_detected (name, last_stmt);
1376
1377 tree type = TREE_TYPE (gimple_get_lhs (last_stmt));
1378 tree itype = type;
1379 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1380 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1381 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1382 TYPE_UNSIGNED (half_type));
1383
1384 /* Check target support */
1385 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1386 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1387 tree ctype = itype;
1388 tree vecctype = vecitype;
1389 if (orig_code == MINUS_EXPR
1390 && TYPE_UNSIGNED (itype)
1391 && TYPE_PRECISION (type) > TYPE_PRECISION (itype))
1392 {
1393 /* Subtraction is special, even if half_type is unsigned and no matter
1394 whether type is signed or unsigned, if type is wider than itype,
1395 we need to sign-extend from the widening operation result to the
1396 result type.
1397 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1398 itype unsigned short and type either int or unsigned int.
1399 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1400 (unsigned short) 0xffff, but for type int we want the result -1
1401 and for type unsigned int 0xffffffff rather than 0xffff. */
1402 ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0);
1403 vecctype = get_vectype_for_scalar_type (vinfo, ctype);
1404 }
1405
1406 enum tree_code dummy_code;
1407 int dummy_int;
1408 auto_vec<tree> dummy_vec;
1409 if (!vectype
1410 || !vecitype
1411 || !vecctype
1412 || !supportable_widening_operation (vinfo, wide_code, last_stmt_info,
1413 vecitype, vectype,
1414 &dummy_code, &dummy_code,
1415 &dummy_int, &dummy_vec))
1416 return NULL;
1417
1418 *type_out = get_vectype_for_scalar_type (vinfo, type);
1419 if (!*type_out)
1420 return NULL;
1421
1422 tree oprnd[2];
1423 vect_convert_inputs (vinfo, last_stmt_info,
1424 2, oprnd, half_type, unprom, vectype);
1425
1426 tree var = vect_recog_temp_ssa_var (itype, NULL);
1427 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1428 oprnd[0], oprnd[1]);
1429
1430 if (vecctype != vecitype)
1431 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype,
1432 pattern_stmt, vecitype);
1433
1434 return vect_convert_output (vinfo, last_stmt_info,
1435 type, pattern_stmt, vecctype);
1436 }
1437
1438 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1439 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1440
1441 static gimple *
vect_recog_widen_mult_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1442 vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1443 tree *type_out)
1444 {
1445 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1446 MULT_EXPR, WIDEN_MULT_EXPR, false,
1447 "vect_recog_widen_mult_pattern");
1448 }
1449
1450 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1451 to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */
1452
1453 static gimple *
vect_recog_widen_plus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1454 vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1455 tree *type_out)
1456 {
1457 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1458 PLUS_EXPR, WIDEN_PLUS_EXPR, false,
1459 "vect_recog_widen_plus_pattern");
1460 }
1461
1462 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1463 to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */
1464 static gimple *
vect_recog_widen_minus_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1465 vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info,
1466 tree *type_out)
1467 {
1468 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
1469 MINUS_EXPR, WIDEN_MINUS_EXPR, false,
1470 "vect_recog_widen_minus_pattern");
1471 }
1472
1473 /* Function vect_recog_popcount_pattern
1474
1475 Try to find the following pattern:
1476
1477 UTYPE1 A;
1478 TYPE1 B;
1479 UTYPE2 temp_in;
1480 TYPE3 temp_out;
1481 temp_in = (UTYPE2)A;
1482
1483 temp_out = __builtin_popcount{,l,ll} (temp_in);
1484 B = (TYPE1) temp_out;
1485
1486 TYPE2 may or may not be equal to TYPE3.
1487 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
1488 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
1489
1490 Input:
1491
1492 * STMT_VINFO: The stmt from which the pattern search begins.
1493 here it starts with B = (TYPE1) temp_out;
1494
1495 Output:
1496
1497 * TYPE_OUT: The vector type of the output of this pattern.
1498
1499 * Return value: A new stmt that will be used to replace the sequence of
1500 stmts that constitute the pattern. In this case it will be:
1501 B = .POPCOUNT (A);
1502 */
1503
1504 static gimple *
vect_recog_popcount_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1505 vect_recog_popcount_pattern (vec_info *vinfo,
1506 stmt_vec_info stmt_vinfo, tree *type_out)
1507 {
1508 gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt);
1509 gimple *popcount_stmt, *pattern_stmt;
1510 tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var;
1511 auto_vec<tree> vargs;
1512
1513 /* Find B = (TYPE1) temp_out. */
1514 if (!last_stmt)
1515 return NULL;
1516 tree_code code = gimple_assign_rhs_code (last_stmt);
1517 if (!CONVERT_EXPR_CODE_P (code))
1518 return NULL;
1519
1520 lhs_oprnd = gimple_assign_lhs (last_stmt);
1521 lhs_type = TREE_TYPE (lhs_oprnd);
1522 if (!INTEGRAL_TYPE_P (lhs_type))
1523 return NULL;
1524
1525 rhs_oprnd = gimple_assign_rhs1 (last_stmt);
1526 if (TREE_CODE (rhs_oprnd) != SSA_NAME
1527 || !has_single_use (rhs_oprnd))
1528 return NULL;
1529 popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd);
1530
1531 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
1532 if (!is_gimple_call (popcount_stmt))
1533 return NULL;
1534 switch (gimple_call_combined_fn (popcount_stmt))
1535 {
1536 CASE_CFN_POPCOUNT:
1537 break;
1538 default:
1539 return NULL;
1540 }
1541
1542 if (gimple_call_num_args (popcount_stmt) != 1)
1543 return NULL;
1544
1545 rhs_oprnd = gimple_call_arg (popcount_stmt, 0);
1546 vect_unpromoted_value unprom_diff;
1547 rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd,
1548 &unprom_diff);
1549
1550 if (!rhs_origin)
1551 return NULL;
1552
1553 /* Input and output of .POPCOUNT should be same-precision integer.
1554 Also A should be unsigned or same precision as temp_in,
1555 otherwise there would be sign_extend from A to temp_in. */
1556 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)
1557 || (!TYPE_UNSIGNED (unprom_diff.type)
1558 && (TYPE_PRECISION (unprom_diff.type)
1559 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd)))))
1560 return NULL;
1561 vargs.safe_push (unprom_diff.op);
1562
1563 vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt);
1564 vec_type = get_vectype_for_scalar_type (vinfo, lhs_type);
1565 /* Do it only if the backend has popcount<vector_mode>2 pattern. */
1566 if (!vec_type
1567 || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type,
1568 OPTIMIZE_FOR_SPEED))
1569 return NULL;
1570
1571 /* Create B = .POPCOUNT (A). */
1572 new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1573 pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs);
1574 gimple_call_set_lhs (pattern_stmt, new_var);
1575 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1576 *type_out = vec_type;
1577
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location,
1580 "created pattern stmt: %G", pattern_stmt);
1581 return pattern_stmt;
1582 }
1583
1584 /* Function vect_recog_pow_pattern
1585
1586 Try to find the following pattern:
1587
1588 x = POW (y, N);
1589
1590 with POW being one of pow, powf, powi, powif and N being
1591 either 2 or 0.5.
1592
1593 Input:
1594
1595 * STMT_VINFO: The stmt from which the pattern search begins.
1596
1597 Output:
1598
1599 * TYPE_OUT: The type of the output of this pattern.
1600
1601 * Return value: A new stmt that will be used to replace the sequence of
1602 stmts that constitute the pattern. In this case it will be:
1603 x = x * x
1604 or
1605 x = sqrt (x)
1606 */
1607
1608 static gimple *
vect_recog_pow_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1609 vect_recog_pow_pattern (vec_info *vinfo,
1610 stmt_vec_info stmt_vinfo, tree *type_out)
1611 {
1612 gimple *last_stmt = stmt_vinfo->stmt;
1613 tree base, exp;
1614 gimple *stmt;
1615 tree var;
1616
1617 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
1618 return NULL;
1619
1620 switch (gimple_call_combined_fn (last_stmt))
1621 {
1622 CASE_CFN_POW:
1623 CASE_CFN_POWI:
1624 break;
1625
1626 default:
1627 return NULL;
1628 }
1629
1630 base = gimple_call_arg (last_stmt, 0);
1631 exp = gimple_call_arg (last_stmt, 1);
1632 if (TREE_CODE (exp) != REAL_CST
1633 && TREE_CODE (exp) != INTEGER_CST)
1634 {
1635 if (flag_unsafe_math_optimizations
1636 && TREE_CODE (base) == REAL_CST
1637 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
1638 {
1639 combined_fn log_cfn;
1640 built_in_function exp_bfn;
1641 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1642 {
1643 case BUILT_IN_POW:
1644 log_cfn = CFN_BUILT_IN_LOG;
1645 exp_bfn = BUILT_IN_EXP;
1646 break;
1647 case BUILT_IN_POWF:
1648 log_cfn = CFN_BUILT_IN_LOGF;
1649 exp_bfn = BUILT_IN_EXPF;
1650 break;
1651 case BUILT_IN_POWL:
1652 log_cfn = CFN_BUILT_IN_LOGL;
1653 exp_bfn = BUILT_IN_EXPL;
1654 break;
1655 default:
1656 return NULL;
1657 }
1658 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1659 tree exp_decl = builtin_decl_implicit (exp_bfn);
1660 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1661 does that, but if C is a power of 2, we want to use
1662 exp2 (log2 (C) * x) in the non-vectorized version, but for
1663 vectorization we don't have vectorized exp2. */
1664 if (logc
1665 && TREE_CODE (logc) == REAL_CST
1666 && exp_decl
1667 && lookup_attribute ("omp declare simd",
1668 DECL_ATTRIBUTES (exp_decl)))
1669 {
1670 cgraph_node *node = cgraph_node::get_create (exp_decl);
1671 if (node->simd_clones == NULL)
1672 {
1673 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1674 || node->definition)
1675 return NULL;
1676 expand_simd_clones (node);
1677 if (node->simd_clones == NULL)
1678 return NULL;
1679 }
1680 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1681 if (!*type_out)
1682 return NULL;
1683 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1684 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
1685 append_pattern_def_seq (vinfo, stmt_vinfo, g);
1686 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1687 g = gimple_build_call (exp_decl, 1, def);
1688 gimple_call_set_lhs (g, res);
1689 return g;
1690 }
1691 }
1692
1693 return NULL;
1694 }
1695
1696 /* We now have a pow or powi builtin function call with a constant
1697 exponent. */
1698
1699 /* Catch squaring. */
1700 if ((tree_fits_shwi_p (exp)
1701 && tree_to_shwi (exp) == 2)
1702 || (TREE_CODE (exp) == REAL_CST
1703 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
1704 {
1705 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1706 TREE_TYPE (base), type_out))
1707 return NULL;
1708
1709 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1710 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
1711 return stmt;
1712 }
1713
1714 /* Catch square root. */
1715 if (TREE_CODE (exp) == REAL_CST
1716 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
1717 {
1718 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1719 if (*type_out
1720 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
1721 OPTIMIZE_FOR_SPEED))
1722 {
1723 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1724 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1725 gimple_call_set_lhs (stmt, var);
1726 gimple_call_set_nothrow (stmt, true);
1727 return stmt;
1728 }
1729 }
1730
1731 return NULL;
1732 }
1733
1734
1735 /* Function vect_recog_widen_sum_pattern
1736
1737 Try to find the following pattern:
1738
1739 type x_t;
1740 TYPE x_T, sum = init;
1741 loop:
1742 sum_0 = phi <init, sum_1>
1743 S1 x_t = *p;
1744 S2 x_T = (TYPE) x_t;
1745 S3 sum_1 = x_T + sum_0;
1746
1747 where type 'TYPE' is at least double the size of type 'type', i.e - we're
1748 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
1749 a special case of a reduction computation.
1750
1751 Input:
1752
1753 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
1754 when this function is called with S3, the pattern {S2,S3} will be detected.
1755
1756 Output:
1757
1758 * TYPE_OUT: The type of the output of this pattern.
1759
1760 * Return value: A new stmt that will be used to replace the sequence of
1761 stmts that constitute the pattern. In this case it will be:
1762 WIDEN_SUM <x_t, sum_0>
1763
1764 Note: The widening-sum idiom is a widening reduction pattern that is
1765 vectorized without preserving all the intermediate results. It
1766 produces only N/2 (widened) results (by summing up pairs of
1767 intermediate results) rather than all N results. Therefore, we
1768 cannot allow this pattern when we want to get all the results and in
1769 the correct order (as is the case when this computation is in an
1770 inner-loop nested in an outer-loop that us being vectorized). */
1771
1772 static gimple *
vect_recog_widen_sum_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)1773 vect_recog_widen_sum_pattern (vec_info *vinfo,
1774 stmt_vec_info stmt_vinfo, tree *type_out)
1775 {
1776 gimple *last_stmt = stmt_vinfo->stmt;
1777 tree oprnd0, oprnd1;
1778 tree type;
1779 gimple *pattern_stmt;
1780 tree var;
1781
1782 /* Look for the following pattern
1783 DX = (TYPE) X;
1784 sum_1 = DX + sum_0;
1785 In which DX is at least double the size of X, and sum_1 has been
1786 recognized as a reduction variable.
1787 */
1788
1789 /* Starting from LAST_STMT, follow the defs of its uses in search
1790 of the above pattern. */
1791
1792 if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR,
1793 &oprnd0, &oprnd1)
1794 || TREE_CODE (oprnd0) != SSA_NAME
1795 || !vinfo->lookup_def (oprnd0))
1796 return NULL;
1797
1798 type = TREE_TYPE (gimple_get_lhs (last_stmt));
1799
1800 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1801 we know that oprnd1 is the reduction variable (defined by a loop-header
1802 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1803 Left to check that oprnd0 is defined by a cast from type 'type' to type
1804 'TYPE'. */
1805
1806 vect_unpromoted_value unprom0;
1807 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1808 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1809 return NULL;
1810
1811 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1812
1813 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1814 unprom0.type, type_out))
1815 return NULL;
1816
1817 var = vect_recog_temp_ssa_var (type, NULL);
1818 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
1819
1820 return pattern_stmt;
1821 }
1822
1823 /* Recognize cases in which an operation is performed in one type WTYPE
1824 but could be done more efficiently in a narrower type NTYPE. For example,
1825 if we have:
1826
1827 ATYPE a; // narrower than NTYPE
1828 BTYPE b; // narrower than NTYPE
1829 WTYPE aw = (WTYPE) a;
1830 WTYPE bw = (WTYPE) b;
1831 WTYPE res = aw + bw; // only uses of aw and bw
1832
1833 then it would be more efficient to do:
1834
1835 NTYPE an = (NTYPE) a;
1836 NTYPE bn = (NTYPE) b;
1837 NTYPE resn = an + bn;
1838 WTYPE res = (WTYPE) resn;
1839
1840 Other situations include things like:
1841
1842 ATYPE a; // NTYPE or narrower
1843 WTYPE aw = (WTYPE) a;
1844 WTYPE res = aw + b;
1845
1846 when only "(NTYPE) res" is significant. In that case it's more efficient
1847 to truncate "b" and do the operation on NTYPE instead:
1848
1849 NTYPE an = (NTYPE) a;
1850 NTYPE bn = (NTYPE) b; // truncation
1851 NTYPE resn = an + bn;
1852 WTYPE res = (WTYPE) resn;
1853
1854 All users of "res" should then use "resn" instead, making the final
1855 statement dead (not marked as relevant). The final statement is still
1856 needed to maintain the type correctness of the IR.
1857
1858 vect_determine_precisions has already determined the minimum
1859 precison of the operation and the minimum precision required
1860 by users of the result. */
1861
1862 static gimple *
vect_recog_over_widening_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)1863 vect_recog_over_widening_pattern (vec_info *vinfo,
1864 stmt_vec_info last_stmt_info, tree *type_out)
1865 {
1866 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1867 if (!last_stmt)
1868 return NULL;
1869
1870 /* See whether we have found that this operation can be done on a
1871 narrower type without changing its semantics. */
1872 unsigned int new_precision = last_stmt_info->operation_precision;
1873 if (!new_precision)
1874 return NULL;
1875
1876 tree lhs = gimple_assign_lhs (last_stmt);
1877 tree type = TREE_TYPE (lhs);
1878 tree_code code = gimple_assign_rhs_code (last_stmt);
1879
1880 /* Punt for reductions where we don't handle the type conversions. */
1881 if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def)
1882 return NULL;
1883
1884 /* Keep the first operand of a COND_EXPR as-is: only the other two
1885 operands are interesting. */
1886 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1887
1888 /* Check the operands. */
1889 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1890 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1891 unprom.quick_grow (nops);
1892 unsigned int min_precision = 0;
1893 bool single_use_p = false;
1894 for (unsigned int i = 0; i < nops; ++i)
1895 {
1896 tree op = gimple_op (last_stmt, first_op + i);
1897 if (TREE_CODE (op) == INTEGER_CST)
1898 unprom[i].set_op (op, vect_constant_def);
1899 else if (TREE_CODE (op) == SSA_NAME)
1900 {
1901 bool op_single_use_p = true;
1902 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1903 &op_single_use_p))
1904 return NULL;
1905 /* If:
1906
1907 (1) N bits of the result are needed;
1908 (2) all inputs are widened from M<N bits; and
1909 (3) one operand OP is a single-use SSA name
1910
1911 we can shift the M->N widening from OP to the output
1912 without changing the number or type of extensions involved.
1913 This then reduces the number of copies of STMT_INFO.
1914
1915 If instead of (3) more than one operand is a single-use SSA name,
1916 shifting the extension to the output is even more of a win.
1917
1918 If instead:
1919
1920 (1) N bits of the result are needed;
1921 (2) one operand OP2 is widened from M2<N bits;
1922 (3) another operand OP1 is widened from M1<M2 bits; and
1923 (4) both OP1 and OP2 are single-use
1924
1925 the choice is between:
1926
1927 (a) truncating OP2 to M1, doing the operation on M1,
1928 and then widening the result to N
1929
1930 (b) widening OP1 to M2, doing the operation on M2, and then
1931 widening the result to N
1932
1933 Both shift the M2->N widening of the inputs to the output.
1934 (a) additionally shifts the M1->M2 widening to the output;
1935 it requires fewer copies of STMT_INFO but requires an extra
1936 M2->M1 truncation.
1937
1938 Which is better will depend on the complexity and cost of
1939 STMT_INFO, which is hard to predict at this stage. However,
1940 a clear tie-breaker in favor of (b) is the fact that the
1941 truncation in (a) increases the length of the operation chain.
1942
1943 If instead of (4) only one of OP1 or OP2 is single-use,
1944 (b) is still a win over doing the operation in N bits:
1945 it still shifts the M2->N widening on the single-use operand
1946 to the output and reduces the number of STMT_INFO copies.
1947
1948 If neither operand is single-use then operating on fewer than
1949 N bits might lead to more extensions overall. Whether it does
1950 or not depends on global information about the vectorization
1951 region, and whether that's a good trade-off would again
1952 depend on the complexity and cost of the statements involved,
1953 as well as things like register pressure that are not normally
1954 modelled at this stage. We therefore ignore these cases
1955 and just optimize the clear single-use wins above.
1956
1957 Thus we take the maximum precision of the unpromoted operands
1958 and record whether any operand is single-use. */
1959 if (unprom[i].dt == vect_internal_def)
1960 {
1961 min_precision = MAX (min_precision,
1962 TYPE_PRECISION (unprom[i].type));
1963 single_use_p |= op_single_use_p;
1964 }
1965 }
1966 else
1967 return NULL;
1968 }
1969
1970 /* Although the operation could be done in operation_precision, we have
1971 to balance that against introducing extra truncations or extensions.
1972 Calculate the minimum precision that can be handled efficiently.
1973
1974 The loop above determined that the operation could be handled
1975 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1976 extension from the inputs to the output without introducing more
1977 instructions, and would reduce the number of instructions required
1978 for STMT_INFO itself.
1979
1980 vect_determine_precisions has also determined that the result only
1981 needs min_output_precision bits. Truncating by a factor of N times
1982 requires a tree of N - 1 instructions, so if TYPE is N times wider
1983 than min_output_precision, doing the operation in TYPE and truncating
1984 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1985 In contrast:
1986
1987 - truncating the input to a unary operation and doing the operation
1988 in the new type requires at most N - 1 + 1 = N instructions per
1989 output vector
1990
1991 - doing the same for a binary operation requires at most
1992 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1993
1994 Both unary and binary operations require fewer instructions than
1995 this if the operands were extended from a suitable truncated form.
1996 Thus there is usually nothing to lose by doing operations in
1997 min_output_precision bits, but there can be something to gain. */
1998 if (!single_use_p)
1999 min_precision = last_stmt_info->min_output_precision;
2000 else
2001 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
2002
2003 /* Apply the minimum efficient precision we just calculated. */
2004 if (new_precision < min_precision)
2005 new_precision = min_precision;
2006 new_precision = vect_element_precision (new_precision);
2007 if (new_precision >= TYPE_PRECISION (type))
2008 return NULL;
2009
2010 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
2011
2012 *type_out = get_vectype_for_scalar_type (vinfo, type);
2013 if (!*type_out)
2014 return NULL;
2015
2016 /* We've found a viable pattern. Get the new type of the operation. */
2017 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
2018 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
2019
2020 /* If we're truncating an operation, we need to make sure that we
2021 don't introduce new undefined overflow. The codes tested here are
2022 a subset of those accepted by vect_truncatable_operation_p. */
2023 tree op_type = new_type;
2024 if (TYPE_OVERFLOW_UNDEFINED (new_type)
2025 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
2026 op_type = build_nonstandard_integer_type (new_precision, true);
2027
2028 /* We specifically don't check here whether the target supports the
2029 new operation, since it might be something that a later pattern
2030 wants to rewrite anyway. If targets have a minimum element size
2031 for some optabs, we should pattern-match smaller ops to larger ops
2032 where beneficial. */
2033 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2034 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
2035 if (!new_vectype || !op_vectype)
2036 return NULL;
2037
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
2040 type, new_type);
2041
2042 /* Calculate the rhs operands for an operation on OP_TYPE. */
2043 tree ops[3] = {};
2044 for (unsigned int i = 1; i < first_op; ++i)
2045 ops[i - 1] = gimple_op (last_stmt, i);
2046 vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1],
2047 op_type, &unprom[0], op_vectype);
2048
2049 /* Use the operation to produce a result of type OP_TYPE. */
2050 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
2051 gimple *pattern_stmt = gimple_build_assign (new_var, code,
2052 ops[0], ops[1], ops[2]);
2053 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2054
2055 if (dump_enabled_p ())
2056 dump_printf_loc (MSG_NOTE, vect_location,
2057 "created pattern stmt: %G", pattern_stmt);
2058
2059 /* Convert back to the original signedness, if OP_TYPE is different
2060 from NEW_TYPE. */
2061 if (op_type != new_type)
2062 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type,
2063 pattern_stmt, op_vectype);
2064
2065 /* Promote the result to the original type. */
2066 pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type,
2067 pattern_stmt, new_vectype);
2068
2069 return pattern_stmt;
2070 }
2071
2072 /* Recognize the following patterns:
2073
2074 ATYPE a; // narrower than TYPE
2075 BTYPE b; // narrower than TYPE
2076
2077 1) Multiply high with scaling
2078 TYPE res = ((TYPE) a * (TYPE) b) >> c;
2079 Here, c is bitsize (TYPE) / 2 - 1.
2080
2081 2) ... or also with rounding
2082 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
2083 Here, d is bitsize (TYPE) / 2 - 2.
2084
2085 3) Normal multiply high
2086 TYPE res = ((TYPE) a * (TYPE) b) >> e;
2087 Here, e is bitsize (TYPE) / 2.
2088
2089 where only the bottom half of res is used. */
2090
2091 static gimple *
vect_recog_mulhs_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2092 vect_recog_mulhs_pattern (vec_info *vinfo,
2093 stmt_vec_info last_stmt_info, tree *type_out)
2094 {
2095 /* Check for a right shift. */
2096 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2097 if (!last_stmt
2098 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
2099 return NULL;
2100
2101 /* Check that the shift result is wider than the users of the
2102 result need (i.e. that narrowing would be a natural choice). */
2103 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
2104 unsigned int target_precision
2105 = vect_element_precision (last_stmt_info->min_output_precision);
2106 if (!INTEGRAL_TYPE_P (lhs_type)
2107 || target_precision >= TYPE_PRECISION (lhs_type))
2108 return NULL;
2109
2110 /* Look through any change in sign on the outer shift input. */
2111 vect_unpromoted_value unprom_rshift_input;
2112 tree rshift_input = vect_look_through_possible_promotion
2113 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
2114 if (!rshift_input
2115 || TYPE_PRECISION (TREE_TYPE (rshift_input))
2116 != TYPE_PRECISION (lhs_type))
2117 return NULL;
2118
2119 /* Get the definition of the shift input. */
2120 stmt_vec_info rshift_input_stmt_info
2121 = vect_get_internal_def (vinfo, rshift_input);
2122 if (!rshift_input_stmt_info)
2123 return NULL;
2124 gassign *rshift_input_stmt
2125 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
2126 if (!rshift_input_stmt)
2127 return NULL;
2128
2129 stmt_vec_info mulh_stmt_info;
2130 tree scale_term;
2131 bool rounding_p = false;
2132
2133 /* Check for the presence of the rounding term. */
2134 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
2135 {
2136 /* Check that the outer shift was by 1. */
2137 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
2138 return NULL;
2139
2140 /* Check that the second operand of the PLUS_EXPR is 1. */
2141 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
2142 return NULL;
2143
2144 /* Look through any change in sign on the addition input. */
2145 vect_unpromoted_value unprom_plus_input;
2146 tree plus_input = vect_look_through_possible_promotion
2147 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
2148 if (!plus_input
2149 || TYPE_PRECISION (TREE_TYPE (plus_input))
2150 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
2151 return NULL;
2152
2153 /* Get the definition of the multiply-high-scale part. */
2154 stmt_vec_info plus_input_stmt_info
2155 = vect_get_internal_def (vinfo, plus_input);
2156 if (!plus_input_stmt_info)
2157 return NULL;
2158 gassign *plus_input_stmt
2159 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
2160 if (!plus_input_stmt
2161 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
2162 return NULL;
2163
2164 /* Look through any change in sign on the scaling input. */
2165 vect_unpromoted_value unprom_scale_input;
2166 tree scale_input = vect_look_through_possible_promotion
2167 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
2168 if (!scale_input
2169 || TYPE_PRECISION (TREE_TYPE (scale_input))
2170 != TYPE_PRECISION (TREE_TYPE (plus_input)))
2171 return NULL;
2172
2173 /* Get the definition of the multiply-high part. */
2174 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
2175 if (!mulh_stmt_info)
2176 return NULL;
2177
2178 /* Get the scaling term. */
2179 scale_term = gimple_assign_rhs2 (plus_input_stmt);
2180 rounding_p = true;
2181 }
2182 else
2183 {
2184 mulh_stmt_info = rshift_input_stmt_info;
2185 scale_term = gimple_assign_rhs2 (last_stmt);
2186 }
2187
2188 /* Check that the scaling factor is constant. */
2189 if (TREE_CODE (scale_term) != INTEGER_CST)
2190 return NULL;
2191
2192 /* Check whether the scaling input term can be seen as two widened
2193 inputs multiplied together. */
2194 vect_unpromoted_value unprom_mult[2];
2195 tree new_type;
2196 unsigned int nops
2197 = vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
2198 false, 2, unprom_mult, &new_type);
2199 if (nops != 2)
2200 return NULL;
2201
2202 /* Adjust output precision. */
2203 if (TYPE_PRECISION (new_type) < target_precision)
2204 new_type = build_nonstandard_integer_type
2205 (target_precision, TYPE_UNSIGNED (new_type));
2206
2207 unsigned mult_precision = TYPE_PRECISION (new_type);
2208 internal_fn ifn;
2209 /* Check that the scaling factor is expected. Instead of
2210 target_precision, we should use the one that we actually
2211 use for internal function. */
2212 if (rounding_p)
2213 {
2214 /* Check pattern 2). */
2215 if (wi::to_widest (scale_term) + mult_precision + 2
2216 != TYPE_PRECISION (lhs_type))
2217 return NULL;
2218
2219 ifn = IFN_MULHRS;
2220 }
2221 else
2222 {
2223 /* Check for pattern 1). */
2224 if (wi::to_widest (scale_term) + mult_precision + 1
2225 == TYPE_PRECISION (lhs_type))
2226 ifn = IFN_MULHS;
2227 /* Check for pattern 3). */
2228 else if (wi::to_widest (scale_term) + mult_precision
2229 == TYPE_PRECISION (lhs_type))
2230 ifn = IFN_MULH;
2231 else
2232 return NULL;
2233 }
2234
2235 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
2236
2237 /* Check for target support. */
2238 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2239 if (!new_vectype
2240 || !direct_internal_fn_supported_p
2241 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2242 return NULL;
2243
2244 /* The IR requires a valid vector type for the cast result, even though
2245 it's likely to be discarded. */
2246 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2247 if (!*type_out)
2248 return NULL;
2249
2250 /* Generate the IFN_MULHRS call. */
2251 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2252 tree new_ops[2];
2253 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2254 unprom_mult, new_vectype);
2255 gcall *mulhrs_stmt
2256 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
2257 gimple_call_set_lhs (mulhrs_stmt, new_var);
2258 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
2259
2260 if (dump_enabled_p ())
2261 dump_printf_loc (MSG_NOTE, vect_location,
2262 "created pattern stmt: %G", mulhrs_stmt);
2263
2264 return vect_convert_output (vinfo, last_stmt_info, lhs_type,
2265 mulhrs_stmt, new_vectype);
2266 }
2267
2268 /* Recognize the patterns:
2269
2270 ATYPE a; // narrower than TYPE
2271 BTYPE b; // narrower than TYPE
2272 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
2273 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
2274
2275 where only the bottom half of avg is used. Try to transform them into:
2276
2277 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
2278 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
2279
2280 followed by:
2281
2282 TYPE avg = (TYPE) avg';
2283
2284 where NTYPE is no wider than half of TYPE. Since only the bottom half
2285 of avg is used, all or part of the cast of avg' should become redundant.
2286
2287 If there is no target support available, generate code to distribute rshift
2288 over plus and add a carry. */
2289
2290 static gimple *
vect_recog_average_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2291 vect_recog_average_pattern (vec_info *vinfo,
2292 stmt_vec_info last_stmt_info, tree *type_out)
2293 {
2294 /* Check for a shift right by one bit. */
2295 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2296 if (!last_stmt
2297 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
2298 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
2299 return NULL;
2300
2301 /* Check that the shift result is wider than the users of the
2302 result need (i.e. that narrowing would be a natural choice). */
2303 tree lhs = gimple_assign_lhs (last_stmt);
2304 tree type = TREE_TYPE (lhs);
2305 unsigned int target_precision
2306 = vect_element_precision (last_stmt_info->min_output_precision);
2307 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
2308 return NULL;
2309
2310 /* Look through any change in sign on the shift input. */
2311 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
2312 vect_unpromoted_value unprom_plus;
2313 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
2314 &unprom_plus);
2315 if (!rshift_rhs
2316 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
2317 return NULL;
2318
2319 /* Get the definition of the shift input. */
2320 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
2321 if (!plus_stmt_info)
2322 return NULL;
2323
2324 /* Check whether the shift input can be seen as a tree of additions on
2325 2 or 3 widened inputs.
2326
2327 Note that the pattern should be a win even if the result of one or
2328 more additions is reused elsewhere: if the pattern matches, we'd be
2329 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
2330 internal_fn ifn = IFN_AVG_FLOOR;
2331 vect_unpromoted_value unprom[3];
2332 tree new_type;
2333 unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR,
2334 WIDEN_PLUS_EXPR, false, 3,
2335 unprom, &new_type);
2336 if (nops == 0)
2337 return NULL;
2338 if (nops == 3)
2339 {
2340 /* Check that one operand is 1. */
2341 unsigned int i;
2342 for (i = 0; i < 3; ++i)
2343 if (integer_onep (unprom[i].op))
2344 break;
2345 if (i == 3)
2346 return NULL;
2347 /* Throw away the 1 operand and keep the other two. */
2348 if (i < 2)
2349 unprom[i] = unprom[2];
2350 ifn = IFN_AVG_CEIL;
2351 }
2352
2353 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2354
2355 /* We know that:
2356
2357 (a) the operation can be viewed as:
2358
2359 TYPE widened0 = (TYPE) UNPROM[0];
2360 TYPE widened1 = (TYPE) UNPROM[1];
2361 TYPE tmp1 = widened0 + widened1 {+ 1};
2362 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2363
2364 (b) the first two statements are equivalent to:
2365
2366 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2367 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2368
2369 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2370 where sensible;
2371
2372 (d) all the operations can be performed correctly at twice the width of
2373 NEW_TYPE, due to the nature of the average operation; and
2374
2375 (e) users of the result of the right shift need only TARGET_PRECISION
2376 bits, where TARGET_PRECISION is no more than half of TYPE's
2377 precision.
2378
2379 Under these circumstances, the only situation in which NEW_TYPE
2380 could be narrower than TARGET_PRECISION is if widened0, widened1
2381 and an addition result are all used more than once. Thus we can
2382 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2383 as "free", whereas widening the result of the average instruction
2384 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2385 therefore better not to go narrower than TARGET_PRECISION. */
2386 if (TYPE_PRECISION (new_type) < target_precision)
2387 new_type = build_nonstandard_integer_type (target_precision,
2388 TYPE_UNSIGNED (new_type));
2389
2390 /* Check for target support. */
2391 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
2392 if (!new_vectype)
2393 return NULL;
2394
2395 bool fallback_p = false;
2396
2397 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2398 ;
2399 else if (TYPE_UNSIGNED (new_type)
2400 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2401 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2402 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2403 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2404 fallback_p = true;
2405 else
2406 return NULL;
2407
2408 /* The IR requires a valid vector type for the cast result, even though
2409 it's likely to be discarded. */
2410 *type_out = get_vectype_for_scalar_type (vinfo, type);
2411 if (!*type_out)
2412 return NULL;
2413
2414 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2415 tree new_ops[2];
2416 vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type,
2417 unprom, new_vectype);
2418
2419 if (fallback_p)
2420 {
2421 /* As a fallback, generate code for following sequence:
2422
2423 shifted_op0 = new_ops[0] >> 1;
2424 shifted_op1 = new_ops[1] >> 1;
2425 sum_of_shifted = shifted_op0 + shifted_op1;
2426 unmasked_carry = new_ops[0] and/or new_ops[1];
2427 carry = unmasked_carry & 1;
2428 new_var = sum_of_shifted + carry;
2429 */
2430
2431 tree one_cst = build_one_cst (new_type);
2432 gassign *g;
2433
2434 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2435 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2436 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2437
2438 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2439 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2440 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2441
2442 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2443 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2444 shifted_op0, shifted_op1);
2445 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2446
2447 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2448 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2449 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2450 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2451
2452 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2453 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2454 append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype);
2455
2456 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2457 return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype);
2458 }
2459
2460 /* Generate the IFN_AVG* call. */
2461 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2462 new_ops[1]);
2463 gimple_call_set_lhs (average_stmt, new_var);
2464 gimple_set_location (average_stmt, gimple_location (last_stmt));
2465
2466 if (dump_enabled_p ())
2467 dump_printf_loc (MSG_NOTE, vect_location,
2468 "created pattern stmt: %G", average_stmt);
2469
2470 return vect_convert_output (vinfo, last_stmt_info,
2471 type, average_stmt, new_vectype);
2472 }
2473
2474 /* Recognize cases in which the input to a cast is wider than its
2475 output, and the input is fed by a widening operation. Fold this
2476 by removing the unnecessary intermediate widening. E.g.:
2477
2478 unsigned char a;
2479 unsigned int b = (unsigned int) a;
2480 unsigned short c = (unsigned short) b;
2481
2482 -->
2483
2484 unsigned short c = (unsigned short) a;
2485
2486 Although this is rare in input IR, it is an expected side-effect
2487 of the over-widening pattern above.
2488
2489 This is beneficial also for integer-to-float conversions, if the
2490 widened integer has more bits than the float, and if the unwidened
2491 input doesn't. */
2492
2493 static gimple *
vect_recog_cast_forwprop_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2494 vect_recog_cast_forwprop_pattern (vec_info *vinfo,
2495 stmt_vec_info last_stmt_info, tree *type_out)
2496 {
2497 /* Check for a cast, including an integer-to-float conversion. */
2498 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
2499 if (!last_stmt)
2500 return NULL;
2501 tree_code code = gimple_assign_rhs_code (last_stmt);
2502 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2503 return NULL;
2504
2505 /* Make sure that the rhs is a scalar with a natural bitsize. */
2506 tree lhs = gimple_assign_lhs (last_stmt);
2507 if (!lhs)
2508 return NULL;
2509 tree lhs_type = TREE_TYPE (lhs);
2510 scalar_mode lhs_mode;
2511 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2512 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2513 return NULL;
2514
2515 /* Check for a narrowing operation (from a vector point of view). */
2516 tree rhs = gimple_assign_rhs1 (last_stmt);
2517 tree rhs_type = TREE_TYPE (rhs);
2518 if (!INTEGRAL_TYPE_P (rhs_type)
2519 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2520 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2521 return NULL;
2522
2523 /* Try to find an unpromoted input. */
2524 vect_unpromoted_value unprom;
2525 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2526 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2527 return NULL;
2528
2529 /* If the bits above RHS_TYPE matter, make sure that they're the
2530 same when extending from UNPROM as they are when extending from RHS. */
2531 if (!INTEGRAL_TYPE_P (lhs_type)
2532 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2533 return NULL;
2534
2535 /* We can get the same result by casting UNPROM directly, to avoid
2536 the unnecessary widening and narrowing. */
2537 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
2538
2539 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
2540 if (!*type_out)
2541 return NULL;
2542
2543 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2544 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2545 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
2546
2547 return pattern_stmt;
2548 }
2549
2550 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2551 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
2552
2553 static gimple *
vect_recog_widen_shift_pattern(vec_info * vinfo,stmt_vec_info last_stmt_info,tree * type_out)2554 vect_recog_widen_shift_pattern (vec_info *vinfo,
2555 stmt_vec_info last_stmt_info, tree *type_out)
2556 {
2557 return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out,
2558 LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true,
2559 "vect_recog_widen_shift_pattern");
2560 }
2561
2562 /* Detect a rotate pattern wouldn't be otherwise vectorized:
2563
2564 type a_t, b_t, c_t;
2565
2566 S0 a_t = b_t r<< c_t;
2567
2568 Input/Output:
2569
2570 * STMT_VINFO: The stmt from which the pattern search begins,
2571 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2572 with a sequence:
2573
2574 S1 d_t = -c_t;
2575 S2 e_t = d_t & (B - 1);
2576 S3 f_t = b_t << c_t;
2577 S4 g_t = b_t >> e_t;
2578 S0 a_t = f_t | g_t;
2579
2580 where B is element bitsize of type.
2581
2582 Output:
2583
2584 * TYPE_OUT: The type of the output of this pattern.
2585
2586 * Return value: A new stmt that will be used to replace the rotate
2587 S0 stmt. */
2588
2589 static gimple *
vect_recog_rotate_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2590 vect_recog_rotate_pattern (vec_info *vinfo,
2591 stmt_vec_info stmt_vinfo, tree *type_out)
2592 {
2593 gimple *last_stmt = stmt_vinfo->stmt;
2594 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
2595 gimple *pattern_stmt, *def_stmt;
2596 enum tree_code rhs_code;
2597 enum vect_def_type dt;
2598 optab optab1, optab2;
2599 edge ext_def = NULL;
2600 bool bswap16_p = false;
2601
2602 if (is_gimple_assign (last_stmt))
2603 {
2604 rhs_code = gimple_assign_rhs_code (last_stmt);
2605 switch (rhs_code)
2606 {
2607 case LROTATE_EXPR:
2608 case RROTATE_EXPR:
2609 break;
2610 default:
2611 return NULL;
2612 }
2613
2614 lhs = gimple_assign_lhs (last_stmt);
2615 oprnd0 = gimple_assign_rhs1 (last_stmt);
2616 type = TREE_TYPE (oprnd0);
2617 oprnd1 = gimple_assign_rhs2 (last_stmt);
2618 }
2619 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
2620 {
2621 /* __builtin_bswap16 (x) is another form of x r>> 8.
2622 The vectorizer has bswap support, but only if the argument isn't
2623 promoted. */
2624 lhs = gimple_call_lhs (last_stmt);
2625 oprnd0 = gimple_call_arg (last_stmt, 0);
2626 type = TREE_TYPE (oprnd0);
2627 if (!lhs
2628 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
2629 || TYPE_PRECISION (type) <= 16
2630 || TREE_CODE (oprnd0) != SSA_NAME
2631 || BITS_PER_UNIT != 8
2632 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2633 return NULL;
2634
2635 stmt_vec_info def_stmt_info;
2636 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2637 return NULL;
2638
2639 if (dt != vect_internal_def)
2640 return NULL;
2641
2642 if (gimple_assign_cast_p (def_stmt))
2643 {
2644 def = gimple_assign_rhs1 (def_stmt);
2645 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2646 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2647 oprnd0 = def;
2648 }
2649
2650 type = TREE_TYPE (lhs);
2651 vectype = get_vectype_for_scalar_type (vinfo, type);
2652 if (vectype == NULL_TREE)
2653 return NULL;
2654
2655 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2656 {
2657 /* The encoding uses one stepped pattern for each byte in the
2658 16-bit word. */
2659 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2660 for (unsigned i = 0; i < 3; ++i)
2661 for (unsigned j = 0; j < 2; ++j)
2662 elts.quick_push ((i + 1) * 2 - j - 1);
2663
2664 vec_perm_indices indices (elts, 1,
2665 TYPE_VECTOR_SUBPARTS (char_vectype));
2666 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2667 {
2668 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2669 undo the argument promotion. */
2670 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2671 {
2672 def = vect_recog_temp_ssa_var (type, NULL);
2673 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2674 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2675 oprnd0 = def;
2676 }
2677
2678 /* Pattern detected. */
2679 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2680
2681 *type_out = vectype;
2682
2683 /* Pattern supported. Create a stmt to be used to replace the
2684 pattern, with the unpromoted argument. */
2685 var = vect_recog_temp_ssa_var (type, NULL);
2686 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2687 1, oprnd0);
2688 gimple_call_set_lhs (pattern_stmt, var);
2689 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2690 gimple_call_fntype (last_stmt));
2691 return pattern_stmt;
2692 }
2693 }
2694
2695 oprnd1 = build_int_cst (integer_type_node, 8);
2696 rhs_code = LROTATE_EXPR;
2697 bswap16_p = true;
2698 }
2699 else
2700 return NULL;
2701
2702 if (TREE_CODE (oprnd0) != SSA_NAME
2703 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2704 || !INTEGRAL_TYPE_P (type)
2705 || !TYPE_UNSIGNED (type))
2706 return NULL;
2707
2708 stmt_vec_info def_stmt_info;
2709 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
2710 return NULL;
2711
2712 if (dt != vect_internal_def
2713 && dt != vect_constant_def
2714 && dt != vect_external_def)
2715 return NULL;
2716
2717 vectype = get_vectype_for_scalar_type (vinfo, type);
2718 if (vectype == NULL_TREE)
2719 return NULL;
2720
2721 /* If vector/vector or vector/scalar rotate is supported by the target,
2722 don't do anything here. */
2723 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2724 if (optab1
2725 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2726 {
2727 use_rotate:
2728 if (bswap16_p)
2729 {
2730 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2731 {
2732 def = vect_recog_temp_ssa_var (type, NULL);
2733 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2734 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2735 oprnd0 = def;
2736 }
2737
2738 /* Pattern detected. */
2739 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2740
2741 *type_out = vectype;
2742
2743 /* Pattern supported. Create a stmt to be used to replace the
2744 pattern. */
2745 var = vect_recog_temp_ssa_var (type, NULL);
2746 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2747 oprnd1);
2748 return pattern_stmt;
2749 }
2750 return NULL;
2751 }
2752
2753 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
2754 {
2755 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2756 if (optab2
2757 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2758 goto use_rotate;
2759 }
2760
2761 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2762 don't do anything here either. */
2763 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2764 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2765 if (!optab1
2766 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2767 || !optab2
2768 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2769 {
2770 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
2771 return NULL;
2772 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2773 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2774 if (!optab1
2775 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2776 || !optab2
2777 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2778 return NULL;
2779 }
2780
2781 *type_out = vectype;
2782
2783 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2784 {
2785 def = vect_recog_temp_ssa_var (type, NULL);
2786 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2787 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2788 oprnd0 = def;
2789 }
2790
2791 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
2792 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
2793
2794 def = NULL_TREE;
2795 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
2796 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
2797 def = oprnd1;
2798 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2799 {
2800 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2801 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
2802 && TYPE_PRECISION (TREE_TYPE (rhs1))
2803 == TYPE_PRECISION (type))
2804 def = rhs1;
2805 }
2806
2807 if (def == NULL_TREE)
2808 {
2809 def = vect_recog_temp_ssa_var (type, NULL);
2810 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2811 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2812 }
2813 stype = TREE_TYPE (def);
2814
2815 if (TREE_CODE (def) == INTEGER_CST)
2816 {
2817 if (!tree_fits_uhwi_p (def)
2818 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
2819 || integer_zerop (def))
2820 return NULL;
2821 def2 = build_int_cst (stype,
2822 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
2823 }
2824 else
2825 {
2826 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
2827
2828 if (vecstype == NULL_TREE)
2829 return NULL;
2830 def2 = vect_recog_temp_ssa_var (stype, NULL);
2831 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
2832 if (ext_def)
2833 {
2834 basic_block new_bb
2835 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2836 gcc_assert (!new_bb);
2837 }
2838 else
2839 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2840
2841 def2 = vect_recog_temp_ssa_var (stype, NULL);
2842 tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1);
2843 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2844 gimple_assign_lhs (def_stmt), mask);
2845 if (ext_def)
2846 {
2847 basic_block new_bb
2848 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2849 gcc_assert (!new_bb);
2850 }
2851 else
2852 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2853 }
2854
2855 var1 = vect_recog_temp_ssa_var (type, NULL);
2856 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2857 ? LSHIFT_EXPR : RSHIFT_EXPR,
2858 oprnd0, def);
2859 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2860
2861 var2 = vect_recog_temp_ssa_var (type, NULL);
2862 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2863 ? RSHIFT_EXPR : LSHIFT_EXPR,
2864 oprnd0, def2);
2865 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2866
2867 /* Pattern detected. */
2868 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2869
2870 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2871 var = vect_recog_temp_ssa_var (type, NULL);
2872 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
2873
2874 return pattern_stmt;
2875 }
2876
2877 /* Detect a vector by vector shift pattern that wouldn't be otherwise
2878 vectorized:
2879
2880 type a_t;
2881 TYPE b_T, res_T;
2882
2883 S1 a_t = ;
2884 S2 b_T = ;
2885 S3 res_T = b_T op a_t;
2886
2887 where type 'TYPE' is a type with different size than 'type',
2888 and op is <<, >> or rotate.
2889
2890 Also detect cases:
2891
2892 type a_t;
2893 TYPE b_T, c_T, res_T;
2894
2895 S0 c_T = ;
2896 S1 a_t = (type) c_T;
2897 S2 b_T = ;
2898 S3 res_T = b_T op a_t;
2899
2900 Input/Output:
2901
2902 * STMT_VINFO: The stmt from which the pattern search begins,
2903 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2904 with a shift/rotate which has same type on both operands, in the
2905 second case just b_T op c_T, in the first case with added cast
2906 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
2907
2908 Output:
2909
2910 * TYPE_OUT: The type of the output of this pattern.
2911
2912 * Return value: A new stmt that will be used to replace the shift/rotate
2913 S3 stmt. */
2914
2915 static gimple *
vect_recog_vector_vector_shift_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)2916 vect_recog_vector_vector_shift_pattern (vec_info *vinfo,
2917 stmt_vec_info stmt_vinfo,
2918 tree *type_out)
2919 {
2920 gimple *last_stmt = stmt_vinfo->stmt;
2921 tree oprnd0, oprnd1, lhs, var;
2922 gimple *pattern_stmt;
2923 enum tree_code rhs_code;
2924
2925 if (!is_gimple_assign (last_stmt))
2926 return NULL;
2927
2928 rhs_code = gimple_assign_rhs_code (last_stmt);
2929 switch (rhs_code)
2930 {
2931 case LSHIFT_EXPR:
2932 case RSHIFT_EXPR:
2933 case LROTATE_EXPR:
2934 case RROTATE_EXPR:
2935 break;
2936 default:
2937 return NULL;
2938 }
2939
2940 lhs = gimple_assign_lhs (last_stmt);
2941 oprnd0 = gimple_assign_rhs1 (last_stmt);
2942 oprnd1 = gimple_assign_rhs2 (last_stmt);
2943 if (TREE_CODE (oprnd0) != SSA_NAME
2944 || TREE_CODE (oprnd1) != SSA_NAME
2945 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2946 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
2947 || TYPE_PRECISION (TREE_TYPE (lhs))
2948 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2949 return NULL;
2950
2951 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2952 if (!def_vinfo)
2953 return NULL;
2954
2955 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
2956 if (*type_out == NULL_TREE)
2957 return NULL;
2958
2959 tree def = NULL_TREE;
2960 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
2961 if (def_stmt && gimple_assign_cast_p (def_stmt))
2962 {
2963 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2964 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2965 && TYPE_PRECISION (TREE_TYPE (rhs1))
2966 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2967 {
2968 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2969 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2970 def = rhs1;
2971 else
2972 {
2973 tree mask
2974 = build_low_bits_mask (TREE_TYPE (rhs1),
2975 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2976 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2977 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
2978 tree vecstype = get_vectype_for_scalar_type (vinfo,
2979 TREE_TYPE (rhs1));
2980 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype);
2981 }
2982 }
2983 }
2984
2985 if (def == NULL_TREE)
2986 {
2987 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2988 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
2989 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
2990 }
2991
2992 /* Pattern detected. */
2993 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
2994
2995 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2996 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2997 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
2998
2999 return pattern_stmt;
3000 }
3001
3002 /* Return true iff the target has a vector optab implementing the operation
3003 CODE on type VECTYPE. */
3004
3005 static bool
target_has_vecop_for_code(tree_code code,tree vectype)3006 target_has_vecop_for_code (tree_code code, tree vectype)
3007 {
3008 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
3009 return voptab
3010 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
3011 }
3012
3013 /* Verify that the target has optabs of VECTYPE to perform all the steps
3014 needed by the multiplication-by-immediate synthesis algorithm described by
3015 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
3016 present. Return true iff the target supports all the steps. */
3017
3018 static bool
target_supports_mult_synth_alg(struct algorithm * alg,mult_variant var,tree vectype,bool synth_shift_p)3019 target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
3020 tree vectype, bool synth_shift_p)
3021 {
3022 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
3023 return false;
3024
3025 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
3026 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
3027
3028 if (var == negate_variant
3029 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
3030 return false;
3031
3032 /* If we must synthesize shifts with additions make sure that vector
3033 addition is available. */
3034 if ((var == add_variant || synth_shift_p) && !supports_vplus)
3035 return false;
3036
3037 for (int i = 1; i < alg->ops; i++)
3038 {
3039 switch (alg->op[i])
3040 {
3041 case alg_shift:
3042 break;
3043 case alg_add_t_m2:
3044 case alg_add_t2_m:
3045 case alg_add_factor:
3046 if (!supports_vplus)
3047 return false;
3048 break;
3049 case alg_sub_t_m2:
3050 case alg_sub_t2_m:
3051 case alg_sub_factor:
3052 if (!supports_vminus)
3053 return false;
3054 break;
3055 case alg_unknown:
3056 case alg_m:
3057 case alg_zero:
3058 case alg_impossible:
3059 return false;
3060 default:
3061 gcc_unreachable ();
3062 }
3063 }
3064
3065 return true;
3066 }
3067
3068 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
3069 putting the final result in DEST. Append all statements but the last into
3070 VINFO. Return the last statement. */
3071
3072 static gimple *
synth_lshift_by_additions(vec_info * vinfo,tree dest,tree op,HOST_WIDE_INT amnt,stmt_vec_info stmt_info)3073 synth_lshift_by_additions (vec_info *vinfo,
3074 tree dest, tree op, HOST_WIDE_INT amnt,
3075 stmt_vec_info stmt_info)
3076 {
3077 HOST_WIDE_INT i;
3078 tree itype = TREE_TYPE (op);
3079 tree prev_res = op;
3080 gcc_assert (amnt >= 0);
3081 for (i = 0; i < amnt; i++)
3082 {
3083 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
3084 : dest;
3085 gimple *stmt
3086 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
3087 prev_res = tmp_var;
3088 if (i < amnt - 1)
3089 append_pattern_def_seq (vinfo, stmt_info, stmt);
3090 else
3091 return stmt;
3092 }
3093 gcc_unreachable ();
3094 return NULL;
3095 }
3096
3097 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
3098 CODE to operands OP1 and OP2, creating a new temporary SSA var in
3099 the process if necessary. Append the resulting assignment statements
3100 to the sequence in STMT_VINFO. Return the SSA variable that holds the
3101 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
3102 left shifts using additions. */
3103
3104 static tree
apply_binop_and_append_stmt(vec_info * vinfo,tree_code code,tree op1,tree op2,stmt_vec_info stmt_vinfo,bool synth_shift_p)3105 apply_binop_and_append_stmt (vec_info *vinfo,
3106 tree_code code, tree op1, tree op2,
3107 stmt_vec_info stmt_vinfo, bool synth_shift_p)
3108 {
3109 if (integer_zerop (op2)
3110 && (code == LSHIFT_EXPR
3111 || code == PLUS_EXPR))
3112 {
3113 gcc_assert (TREE_CODE (op1) == SSA_NAME);
3114 return op1;
3115 }
3116
3117 gimple *stmt;
3118 tree itype = TREE_TYPE (op1);
3119 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
3120
3121 if (code == LSHIFT_EXPR
3122 && synth_shift_p)
3123 {
3124 stmt = synth_lshift_by_additions (vinfo, tmp_var, op1,
3125 TREE_INT_CST_LOW (op2), stmt_vinfo);
3126 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3127 return tmp_var;
3128 }
3129
3130 stmt = gimple_build_assign (tmp_var, code, op1, op2);
3131 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3132 return tmp_var;
3133 }
3134
3135 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
3136 and simple arithmetic operations to be vectorized. Record the statements
3137 produced in STMT_VINFO and return the last statement in the sequence or
3138 NULL if it's not possible to synthesize such a multiplication.
3139 This function mirrors the behavior of expand_mult_const in expmed.cc but
3140 works on tree-ssa form. */
3141
3142 static gimple *
vect_synth_mult_by_constant(vec_info * vinfo,tree op,tree val,stmt_vec_info stmt_vinfo)3143 vect_synth_mult_by_constant (vec_info *vinfo, tree op, tree val,
3144 stmt_vec_info stmt_vinfo)
3145 {
3146 tree itype = TREE_TYPE (op);
3147 machine_mode mode = TYPE_MODE (itype);
3148 struct algorithm alg;
3149 mult_variant variant;
3150 if (!tree_fits_shwi_p (val))
3151 return NULL;
3152
3153 /* Multiplication synthesis by shifts, adds and subs can introduce
3154 signed overflow where the original operation didn't. Perform the
3155 operations on an unsigned type and cast back to avoid this.
3156 In the future we may want to relax this for synthesis algorithms
3157 that we can prove do not cause unexpected overflow. */
3158 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
3159
3160 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
3161 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
3162 if (!vectype)
3163 return NULL;
3164
3165 /* Targets that don't support vector shifts but support vector additions
3166 can synthesize shifts that way. */
3167 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
3168
3169 HOST_WIDE_INT hwval = tree_to_shwi (val);
3170 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
3171 The vectorizer's benefit analysis will decide whether it's beneficial
3172 to do this. */
3173 bool possible = choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype))
3174 ? TYPE_MODE (vectype) : mode,
3175 hwval, &alg, &variant, MAX_COST);
3176 if (!possible)
3177 return NULL;
3178
3179 if (!target_supports_mult_synth_alg (&alg, variant, vectype, synth_shift_p))
3180 return NULL;
3181
3182 tree accumulator;
3183
3184 /* Clear out the sequence of statements so we can populate it below. */
3185 gimple *stmt = NULL;
3186
3187 if (cast_to_unsigned_p)
3188 {
3189 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
3190 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
3191 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3192 op = tmp_op;
3193 }
3194
3195 if (alg.op[0] == alg_zero)
3196 accumulator = build_int_cst (multtype, 0);
3197 else
3198 accumulator = op;
3199
3200 bool needs_fixup = (variant == negate_variant)
3201 || (variant == add_variant);
3202
3203 for (int i = 1; i < alg.ops; i++)
3204 {
3205 tree shft_log = build_int_cst (multtype, alg.log[i]);
3206 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3207 tree tmp_var = NULL_TREE;
3208
3209 switch (alg.op[i])
3210 {
3211 case alg_shift:
3212 if (synth_shift_p)
3213 stmt
3214 = synth_lshift_by_additions (vinfo, accum_tmp, accumulator,
3215 alg.log[i], stmt_vinfo);
3216 else
3217 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
3218 shft_log);
3219 break;
3220 case alg_add_t_m2:
3221 tmp_var
3222 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op, shft_log,
3223 stmt_vinfo, synth_shift_p);
3224 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3225 tmp_var);
3226 break;
3227 case alg_sub_t_m2:
3228 tmp_var = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, op,
3229 shft_log, stmt_vinfo,
3230 synth_shift_p);
3231 /* In some algorithms the first step involves zeroing the
3232 accumulator. If subtracting from such an accumulator
3233 just emit the negation directly. */
3234 if (integer_zerop (accumulator))
3235 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
3236 else
3237 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
3238 tmp_var);
3239 break;
3240 case alg_add_t2_m:
3241 tmp_var
3242 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3243 shft_log, stmt_vinfo, synth_shift_p);
3244 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
3245 break;
3246 case alg_sub_t2_m:
3247 tmp_var
3248 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3249 shft_log, stmt_vinfo, synth_shift_p);
3250 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
3251 break;
3252 case alg_add_factor:
3253 tmp_var
3254 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3255 shft_log, stmt_vinfo, synth_shift_p);
3256 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
3257 tmp_var);
3258 break;
3259 case alg_sub_factor:
3260 tmp_var
3261 = apply_binop_and_append_stmt (vinfo, LSHIFT_EXPR, accumulator,
3262 shft_log, stmt_vinfo, synth_shift_p);
3263 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
3264 accumulator);
3265 break;
3266 default:
3267 gcc_unreachable ();
3268 }
3269 /* We don't want to append the last stmt in the sequence to stmt_vinfo
3270 but rather return it directly. */
3271
3272 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
3273 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3274 accumulator = accum_tmp;
3275 }
3276 if (variant == negate_variant)
3277 {
3278 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3279 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
3280 accumulator = accum_tmp;
3281 if (cast_to_unsigned_p)
3282 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3283 }
3284 else if (variant == add_variant)
3285 {
3286 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
3287 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
3288 accumulator = accum_tmp;
3289 if (cast_to_unsigned_p)
3290 append_pattern_def_seq (vinfo, stmt_vinfo, stmt);
3291 }
3292 /* Move back to a signed if needed. */
3293 if (cast_to_unsigned_p)
3294 {
3295 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
3296 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
3297 }
3298
3299 return stmt;
3300 }
3301
3302 /* Detect multiplication by constant and convert it into a sequence of
3303 shifts and additions, subtractions, negations. We reuse the
3304 choose_mult_variant algorithms from expmed.cc
3305
3306 Input/Output:
3307
3308 STMT_VINFO: The stmt from which the pattern search begins,
3309 i.e. the mult stmt.
3310
3311 Output:
3312
3313 * TYPE_OUT: The type of the output of this pattern.
3314
3315 * Return value: A new stmt that will be used to replace
3316 the multiplication. */
3317
3318 static gimple *
vect_recog_mult_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3319 vect_recog_mult_pattern (vec_info *vinfo,
3320 stmt_vec_info stmt_vinfo, tree *type_out)
3321 {
3322 gimple *last_stmt = stmt_vinfo->stmt;
3323 tree oprnd0, oprnd1, vectype, itype;
3324 gimple *pattern_stmt;
3325
3326 if (!is_gimple_assign (last_stmt))
3327 return NULL;
3328
3329 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
3330 return NULL;
3331
3332 oprnd0 = gimple_assign_rhs1 (last_stmt);
3333 oprnd1 = gimple_assign_rhs2 (last_stmt);
3334 itype = TREE_TYPE (oprnd0);
3335
3336 if (TREE_CODE (oprnd0) != SSA_NAME
3337 || TREE_CODE (oprnd1) != INTEGER_CST
3338 || !INTEGRAL_TYPE_P (itype)
3339 || !type_has_mode_precision_p (itype))
3340 return NULL;
3341
3342 vectype = get_vectype_for_scalar_type (vinfo, itype);
3343 if (vectype == NULL_TREE)
3344 return NULL;
3345
3346 /* If the target can handle vectorized multiplication natively,
3347 don't attempt to optimize this. */
3348 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
3349 if (mul_optab != unknown_optab)
3350 {
3351 machine_mode vec_mode = TYPE_MODE (vectype);
3352 int icode = (int) optab_handler (mul_optab, vec_mode);
3353 if (icode != CODE_FOR_nothing)
3354 return NULL;
3355 }
3356
3357 pattern_stmt = vect_synth_mult_by_constant (vinfo,
3358 oprnd0, oprnd1, stmt_vinfo);
3359 if (!pattern_stmt)
3360 return NULL;
3361
3362 /* Pattern detected. */
3363 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
3364
3365 *type_out = vectype;
3366
3367 return pattern_stmt;
3368 }
3369
3370 /* Detect a signed division by a constant that wouldn't be
3371 otherwise vectorized:
3372
3373 type a_t, b_t;
3374
3375 S1 a_t = b_t / N;
3376
3377 where type 'type' is an integral type and N is a constant.
3378
3379 Similarly handle modulo by a constant:
3380
3381 S4 a_t = b_t % N;
3382
3383 Input/Output:
3384
3385 * STMT_VINFO: The stmt from which the pattern search begins,
3386 i.e. the division stmt. S1 is replaced by if N is a power
3387 of two constant and type is signed:
3388 S3 y_t = b_t < 0 ? N - 1 : 0;
3389 S2 x_t = b_t + y_t;
3390 S1' a_t = x_t >> log2 (N);
3391
3392 S4 is replaced if N is a power of two constant and
3393 type is signed by (where *_T temporaries have unsigned type):
3394 S9 y_T = b_t < 0 ? -1U : 0U;
3395 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3396 S7 z_t = (type) z_T;
3397 S6 w_t = b_t + z_t;
3398 S5 x_t = w_t & (N - 1);
3399 S4' a_t = x_t - z_t;
3400
3401 Output:
3402
3403 * TYPE_OUT: The type of the output of this pattern.
3404
3405 * Return value: A new stmt that will be used to replace the division
3406 S1 or modulo S4 stmt. */
3407
3408 static gimple *
vect_recog_divmod_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3409 vect_recog_divmod_pattern (vec_info *vinfo,
3410 stmt_vec_info stmt_vinfo, tree *type_out)
3411 {
3412 gimple *last_stmt = stmt_vinfo->stmt;
3413 tree oprnd0, oprnd1, vectype, itype, cond;
3414 gimple *pattern_stmt, *def_stmt;
3415 enum tree_code rhs_code;
3416 optab optab;
3417 tree q;
3418 int dummy_int, prec;
3419
3420 if (!is_gimple_assign (last_stmt))
3421 return NULL;
3422
3423 rhs_code = gimple_assign_rhs_code (last_stmt);
3424 switch (rhs_code)
3425 {
3426 case TRUNC_DIV_EXPR:
3427 case EXACT_DIV_EXPR:
3428 case TRUNC_MOD_EXPR:
3429 break;
3430 default:
3431 return NULL;
3432 }
3433
3434 oprnd0 = gimple_assign_rhs1 (last_stmt);
3435 oprnd1 = gimple_assign_rhs2 (last_stmt);
3436 itype = TREE_TYPE (oprnd0);
3437 if (TREE_CODE (oprnd0) != SSA_NAME
3438 || TREE_CODE (oprnd1) != INTEGER_CST
3439 || TREE_CODE (itype) != INTEGER_TYPE
3440 || !type_has_mode_precision_p (itype))
3441 return NULL;
3442
3443 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
3444 vectype = get_vectype_for_scalar_type (vinfo, itype);
3445 if (vectype == NULL_TREE)
3446 return NULL;
3447
3448 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
3449 {
3450 /* If the target can handle vectorized division or modulo natively,
3451 don't attempt to optimize this, since native division is likely
3452 to give smaller code. */
3453 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3454 if (optab != unknown_optab)
3455 {
3456 machine_mode vec_mode = TYPE_MODE (vectype);
3457 int icode = (int) optab_handler (optab, vec_mode);
3458 if (icode != CODE_FOR_nothing)
3459 return NULL;
3460 }
3461 }
3462
3463 prec = TYPE_PRECISION (itype);
3464 if (integer_pow2p (oprnd1))
3465 {
3466 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3467 return NULL;
3468
3469 /* Pattern detected. */
3470 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3471
3472 *type_out = vectype;
3473
3474 /* Check if the target supports this internal function. */
3475 internal_fn ifn = IFN_DIV_POW2;
3476 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3477 {
3478 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3479
3480 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3481 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3482 gimple_call_set_lhs (div_stmt, var_div);
3483
3484 if (rhs_code == TRUNC_MOD_EXPR)
3485 {
3486 append_pattern_def_seq (vinfo, stmt_vinfo, div_stmt);
3487 def_stmt
3488 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3489 LSHIFT_EXPR, var_div, shift);
3490 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3491 pattern_stmt
3492 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3493 MINUS_EXPR, oprnd0,
3494 gimple_assign_lhs (def_stmt));
3495 }
3496 else
3497 pattern_stmt = div_stmt;
3498 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3499
3500 return pattern_stmt;
3501 }
3502
3503 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3504 build_int_cst (itype, 0));
3505 if (rhs_code == TRUNC_DIV_EXPR
3506 || rhs_code == EXACT_DIV_EXPR)
3507 {
3508 tree var = vect_recog_temp_ssa_var (itype, NULL);
3509 tree shift;
3510 def_stmt
3511 = gimple_build_assign (var, COND_EXPR, cond,
3512 fold_build2 (MINUS_EXPR, itype, oprnd1,
3513 build_int_cst (itype, 1)),
3514 build_int_cst (itype, 0));
3515 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3516 var = vect_recog_temp_ssa_var (itype, NULL);
3517 def_stmt
3518 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3519 gimple_assign_lhs (def_stmt));
3520 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3521
3522 shift = build_int_cst (itype, tree_log2 (oprnd1));
3523 pattern_stmt
3524 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3525 RSHIFT_EXPR, var, shift);
3526 }
3527 else
3528 {
3529 tree signmask;
3530 if (compare_tree_int (oprnd1, 2) == 0)
3531 {
3532 signmask = vect_recog_temp_ssa_var (itype, NULL);
3533 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3534 build_int_cst (itype, 1),
3535 build_int_cst (itype, 0));
3536 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3537 }
3538 else
3539 {
3540 tree utype
3541 = build_nonstandard_integer_type (prec, 1);
3542 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
3543 tree shift
3544 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
3545 - tree_log2 (oprnd1));
3546 tree var = vect_recog_temp_ssa_var (utype, NULL);
3547
3548 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3549 build_int_cst (utype, -1),
3550 build_int_cst (utype, 0));
3551 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3552 var = vect_recog_temp_ssa_var (utype, NULL);
3553 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3554 gimple_assign_lhs (def_stmt),
3555 shift);
3556 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecutype);
3557 signmask = vect_recog_temp_ssa_var (itype, NULL);
3558 def_stmt
3559 = gimple_build_assign (signmask, NOP_EXPR, var);
3560 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3561 }
3562 def_stmt
3563 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3564 PLUS_EXPR, oprnd0, signmask);
3565 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3566 def_stmt
3567 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3568 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3569 fold_build2 (MINUS_EXPR, itype, oprnd1,
3570 build_int_cst (itype, 1)));
3571 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3572
3573 pattern_stmt
3574 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3575 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3576 signmask);
3577 }
3578
3579 return pattern_stmt;
3580 }
3581
3582 if (prec > HOST_BITS_PER_WIDE_INT
3583 || integer_zerop (oprnd1))
3584 return NULL;
3585
3586 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3587 return NULL;
3588
3589 if (TYPE_UNSIGNED (itype))
3590 {
3591 unsigned HOST_WIDE_INT mh, ml;
3592 int pre_shift, post_shift;
3593 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
3594 & GET_MODE_MASK (itype_mode));
3595 tree t1, t2, t3, t4;
3596
3597 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
3598 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3599 return NULL;
3600
3601 /* Find a suitable multiplier and right shift count
3602 instead of multiplying with D. */
3603 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3604
3605 /* If the suggested multiplier is more than SIZE bits, we can do better
3606 for even divisors, using an initial right shift. */
3607 if (mh != 0 && (d & 1) == 0)
3608 {
3609 pre_shift = ctz_or_zero (d);
3610 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3611 &ml, &post_shift, &dummy_int);
3612 gcc_assert (!mh);
3613 }
3614 else
3615 pre_shift = 0;
3616
3617 if (mh != 0)
3618 {
3619 if (post_shift - 1 >= prec)
3620 return NULL;
3621
3622 /* t1 = oprnd0 h* ml;
3623 t2 = oprnd0 - t1;
3624 t3 = t2 >> 1;
3625 t4 = t1 + t3;
3626 q = t4 >> (post_shift - 1); */
3627 t1 = vect_recog_temp_ssa_var (itype, NULL);
3628 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3629 build_int_cst (itype, ml));
3630 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3631
3632 t2 = vect_recog_temp_ssa_var (itype, NULL);
3633 def_stmt
3634 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
3635 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3636
3637 t3 = vect_recog_temp_ssa_var (itype, NULL);
3638 def_stmt
3639 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
3640 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3641
3642 t4 = vect_recog_temp_ssa_var (itype, NULL);
3643 def_stmt
3644 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
3645
3646 if (post_shift != 1)
3647 {
3648 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3649
3650 q = vect_recog_temp_ssa_var (itype, NULL);
3651 pattern_stmt
3652 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3653 build_int_cst (itype, post_shift - 1));
3654 }
3655 else
3656 {
3657 q = t4;
3658 pattern_stmt = def_stmt;
3659 }
3660 }
3661 else
3662 {
3663 if (pre_shift >= prec || post_shift >= prec)
3664 return NULL;
3665
3666 /* t1 = oprnd0 >> pre_shift;
3667 t2 = t1 h* ml;
3668 q = t2 >> post_shift; */
3669 if (pre_shift)
3670 {
3671 t1 = vect_recog_temp_ssa_var (itype, NULL);
3672 def_stmt
3673 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3674 build_int_cst (NULL, pre_shift));
3675 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3676 }
3677 else
3678 t1 = oprnd0;
3679
3680 t2 = vect_recog_temp_ssa_var (itype, NULL);
3681 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3682 build_int_cst (itype, ml));
3683
3684 if (post_shift)
3685 {
3686 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3687
3688 q = vect_recog_temp_ssa_var (itype, NULL);
3689 def_stmt
3690 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3691 build_int_cst (itype, post_shift));
3692 }
3693 else
3694 q = t2;
3695
3696 pattern_stmt = def_stmt;
3697 }
3698 }
3699 else
3700 {
3701 unsigned HOST_WIDE_INT ml;
3702 int post_shift;
3703 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
3704 unsigned HOST_WIDE_INT abs_d;
3705 bool add = false;
3706 tree t1, t2, t3, t4;
3707
3708 /* Give up for -1. */
3709 if (d == -1)
3710 return NULL;
3711
3712 /* Since d might be INT_MIN, we have to cast to
3713 unsigned HOST_WIDE_INT before negating to avoid
3714 undefined signed overflow. */
3715 abs_d = (d >= 0
3716 ? (unsigned HOST_WIDE_INT) d
3717 : - (unsigned HOST_WIDE_INT) d);
3718
3719 /* n rem d = n rem -d */
3720 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3721 {
3722 d = abs_d;
3723 oprnd1 = build_int_cst (itype, abs_d);
3724 }
3725 if (HOST_BITS_PER_WIDE_INT >= prec
3726 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
3727 /* This case is not handled correctly below. */
3728 return NULL;
3729
3730 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
3731 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
3732 {
3733 add = true;
3734 ml |= HOST_WIDE_INT_M1U << (prec - 1);
3735 }
3736 if (post_shift >= prec)
3737 return NULL;
3738
3739 /* t1 = oprnd0 h* ml; */
3740 t1 = vect_recog_temp_ssa_var (itype, NULL);
3741 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3742 build_int_cst (itype, ml));
3743
3744 if (add)
3745 {
3746 /* t2 = t1 + oprnd0; */
3747 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3748 t2 = vect_recog_temp_ssa_var (itype, NULL);
3749 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
3750 }
3751 else
3752 t2 = t1;
3753
3754 if (post_shift)
3755 {
3756 /* t3 = t2 >> post_shift; */
3757 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3758 t3 = vect_recog_temp_ssa_var (itype, NULL);
3759 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3760 build_int_cst (itype, post_shift));
3761 }
3762 else
3763 t3 = t2;
3764
3765 int msb = 1;
3766 value_range r;
3767 get_range_query (cfun)->range_of_expr (r, oprnd0);
3768 if (r.kind () == VR_RANGE)
3769 {
3770 if (!wi::neg_p (r.lower_bound (), TYPE_SIGN (itype)))
3771 msb = 0;
3772 else if (wi::neg_p (r.upper_bound (), TYPE_SIGN (itype)))
3773 msb = -1;
3774 }
3775
3776 if (msb == 0 && d >= 0)
3777 {
3778 /* q = t3; */
3779 q = t3;
3780 pattern_stmt = def_stmt;
3781 }
3782 else
3783 {
3784 /* t4 = oprnd0 >> (prec - 1);
3785 or if we know from VRP that oprnd0 >= 0
3786 t4 = 0;
3787 or if we know from VRP that oprnd0 < 0
3788 t4 = -1; */
3789 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3790 t4 = vect_recog_temp_ssa_var (itype, NULL);
3791 if (msb != 1)
3792 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3793 build_int_cst (itype, msb));
3794 else
3795 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3796 build_int_cst (itype, prec - 1));
3797 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3798
3799 /* q = t3 - t4; or q = t4 - t3; */
3800 q = vect_recog_temp_ssa_var (itype, NULL);
3801 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3802 d < 0 ? t3 : t4);
3803 }
3804 }
3805
3806 if (rhs_code == TRUNC_MOD_EXPR)
3807 {
3808 tree r, t1;
3809
3810 /* We divided. Now finish by:
3811 t1 = q * oprnd1;
3812 r = oprnd0 - t1; */
3813 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt);
3814
3815 t1 = vect_recog_temp_ssa_var (itype, NULL);
3816 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
3817 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt);
3818
3819 r = vect_recog_temp_ssa_var (itype, NULL);
3820 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
3821 }
3822
3823 /* Pattern detected. */
3824 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
3825
3826 *type_out = vectype;
3827 return pattern_stmt;
3828 }
3829
3830 /* Function vect_recog_mixed_size_cond_pattern
3831
3832 Try to find the following pattern:
3833
3834 type x_t, y_t;
3835 TYPE a_T, b_T, c_T;
3836 loop:
3837 S1 a_T = x_t CMP y_t ? b_T : c_T;
3838
3839 where type 'TYPE' is an integral type which has different size
3840 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
3841 than 'type', the constants need to fit into an integer type
3842 with the same width as 'type') or results of conversion from 'type'.
3843
3844 Input:
3845
3846 * STMT_VINFO: The stmt from which the pattern search begins.
3847
3848 Output:
3849
3850 * TYPE_OUT: The type of the output of this pattern.
3851
3852 * Return value: A new stmt that will be used to replace the pattern.
3853 Additionally a def_stmt is added.
3854
3855 a_it = x_t CMP y_t ? b_it : c_it;
3856 a_T = (TYPE) a_it; */
3857
3858 static gimple *
vect_recog_mixed_size_cond_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)3859 vect_recog_mixed_size_cond_pattern (vec_info *vinfo,
3860 stmt_vec_info stmt_vinfo, tree *type_out)
3861 {
3862 gimple *last_stmt = stmt_vinfo->stmt;
3863 tree cond_expr, then_clause, else_clause;
3864 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3865 gimple *pattern_stmt, *def_stmt;
3866 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
3867 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
3868 bool promotion;
3869 tree comp_scalar_type;
3870
3871 if (!is_gimple_assign (last_stmt)
3872 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3873 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3874 return NULL;
3875
3876 cond_expr = gimple_assign_rhs1 (last_stmt);
3877 then_clause = gimple_assign_rhs2 (last_stmt);
3878 else_clause = gimple_assign_rhs3 (last_stmt);
3879
3880 if (!COMPARISON_CLASS_P (cond_expr))
3881 return NULL;
3882
3883 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3884 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
3885 if (comp_vectype == NULL_TREE)
3886 return NULL;
3887
3888 type = TREE_TYPE (gimple_assign_lhs (last_stmt));
3889 if (types_compatible_p (type, comp_scalar_type)
3890 || ((TREE_CODE (then_clause) != INTEGER_CST
3891 || TREE_CODE (else_clause) != INTEGER_CST)
3892 && !INTEGRAL_TYPE_P (comp_scalar_type))
3893 || !INTEGRAL_TYPE_P (type))
3894 return NULL;
3895
3896 if ((TREE_CODE (then_clause) != INTEGER_CST
3897 && !type_conversion_p (vinfo, then_clause, false,
3898 &orig_type0, &def_stmt0, &promotion))
3899 || (TREE_CODE (else_clause) != INTEGER_CST
3900 && !type_conversion_p (vinfo, else_clause, false,
3901 &orig_type1, &def_stmt1, &promotion)))
3902 return NULL;
3903
3904 if (orig_type0 && orig_type1
3905 && !types_compatible_p (orig_type0, orig_type1))
3906 return NULL;
3907
3908 if (orig_type0)
3909 {
3910 if (!types_compatible_p (orig_type0, comp_scalar_type))
3911 return NULL;
3912 then_clause = gimple_assign_rhs1 (def_stmt0);
3913 itype = orig_type0;
3914 }
3915
3916 if (orig_type1)
3917 {
3918 if (!types_compatible_p (orig_type1, comp_scalar_type))
3919 return NULL;
3920 else_clause = gimple_assign_rhs1 (def_stmt1);
3921 itype = orig_type1;
3922 }
3923
3924
3925 HOST_WIDE_INT cmp_mode_size
3926 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3927
3928 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3929 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
3930 return NULL;
3931
3932 vectype = get_vectype_for_scalar_type (vinfo, type);
3933 if (vectype == NULL_TREE)
3934 return NULL;
3935
3936 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
3937 return NULL;
3938
3939 if (itype == NULL_TREE)
3940 itype = build_nonstandard_integer_type (cmp_mode_size,
3941 TYPE_UNSIGNED (type));
3942
3943 if (itype == NULL_TREE
3944 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
3945 return NULL;
3946
3947 vecitype = get_vectype_for_scalar_type (vinfo, itype);
3948 if (vecitype == NULL_TREE)
3949 return NULL;
3950
3951 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
3952 return NULL;
3953
3954 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
3955 {
3956 if ((TREE_CODE (then_clause) == INTEGER_CST
3957 && !int_fits_type_p (then_clause, itype))
3958 || (TREE_CODE (else_clause) == INTEGER_CST
3959 && !int_fits_type_p (else_clause, itype)))
3960 return NULL;
3961 }
3962
3963 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3964 COND_EXPR, unshare_expr (cond_expr),
3965 fold_convert (itype, then_clause),
3966 fold_convert (itype, else_clause));
3967 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3968 NOP_EXPR, gimple_assign_lhs (def_stmt));
3969
3970 append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecitype);
3971 *type_out = vectype;
3972
3973 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
3974
3975 return pattern_stmt;
3976 }
3977
3978
3979 /* Helper function of vect_recog_bool_pattern. Called recursively, return
3980 true if bool VAR can and should be optimized that way. Assume it shouldn't
3981 in case it's a result of a comparison which can be directly vectorized into
3982 a vector comparison. Fills in STMTS with all stmts visited during the
3983 walk. */
3984
3985 static bool
check_bool_pattern(tree var,vec_info * vinfo,hash_set<gimple * > & stmts)3986 check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
3987 {
3988 tree rhs1;
3989 enum tree_code rhs_code;
3990
3991 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3992 if (!def_stmt_info)
3993 return false;
3994
3995 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3996 if (!def_stmt)
3997 return false;
3998
3999 if (stmts.contains (def_stmt))
4000 return true;
4001
4002 rhs1 = gimple_assign_rhs1 (def_stmt);
4003 rhs_code = gimple_assign_rhs_code (def_stmt);
4004 switch (rhs_code)
4005 {
4006 case SSA_NAME:
4007 if (! check_bool_pattern (rhs1, vinfo, stmts))
4008 return false;
4009 break;
4010
4011 CASE_CONVERT:
4012 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
4013 return false;
4014 if (! check_bool_pattern (rhs1, vinfo, stmts))
4015 return false;
4016 break;
4017
4018 case BIT_NOT_EXPR:
4019 if (! check_bool_pattern (rhs1, vinfo, stmts))
4020 return false;
4021 break;
4022
4023 case BIT_AND_EXPR:
4024 case BIT_IOR_EXPR:
4025 case BIT_XOR_EXPR:
4026 if (! check_bool_pattern (rhs1, vinfo, stmts)
4027 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
4028 return false;
4029 break;
4030
4031 default:
4032 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
4033 {
4034 tree vecitype, comp_vectype;
4035
4036 /* If the comparison can throw, then is_gimple_condexpr will be
4037 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
4038 if (stmt_could_throw_p (cfun, def_stmt))
4039 return false;
4040
4041 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
4042 if (comp_vectype == NULL_TREE)
4043 return false;
4044
4045 tree mask_type = get_mask_type_for_scalar_type (vinfo,
4046 TREE_TYPE (rhs1));
4047 if (mask_type
4048 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
4049 return false;
4050
4051 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
4052 {
4053 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4054 tree itype
4055 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4056 vecitype = get_vectype_for_scalar_type (vinfo, itype);
4057 if (vecitype == NULL_TREE)
4058 return false;
4059 }
4060 else
4061 vecitype = comp_vectype;
4062 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
4063 return false;
4064 }
4065 else
4066 return false;
4067 break;
4068 }
4069
4070 bool res = stmts.add (def_stmt);
4071 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
4072 gcc_assert (!res);
4073
4074 return true;
4075 }
4076
4077
4078 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
4079 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
4080 pattern sequence. */
4081
4082 static tree
adjust_bool_pattern_cast(vec_info * vinfo,tree type,tree var,stmt_vec_info stmt_info)4083 adjust_bool_pattern_cast (vec_info *vinfo,
4084 tree type, tree var, stmt_vec_info stmt_info)
4085 {
4086 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
4087 NOP_EXPR, var);
4088 append_pattern_def_seq (vinfo, stmt_info, cast_stmt,
4089 get_vectype_for_scalar_type (vinfo, type));
4090 return gimple_assign_lhs (cast_stmt);
4091 }
4092
4093 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
4094 VAR is an SSA_NAME that should be transformed from bool to a wider integer
4095 type, OUT_TYPE is the desired final integer type of the whole pattern.
4096 STMT_INFO is the info of the pattern root and is where pattern stmts should
4097 be associated with. DEFS is a map of pattern defs. */
4098
4099 static void
adjust_bool_pattern(vec_info * vinfo,tree var,tree out_type,stmt_vec_info stmt_info,hash_map<tree,tree> & defs)4100 adjust_bool_pattern (vec_info *vinfo, tree var, tree out_type,
4101 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
4102 {
4103 gimple *stmt = SSA_NAME_DEF_STMT (var);
4104 enum tree_code rhs_code, def_rhs_code;
4105 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
4106 location_t loc;
4107 gimple *pattern_stmt, *def_stmt;
4108 tree trueval = NULL_TREE;
4109
4110 rhs1 = gimple_assign_rhs1 (stmt);
4111 rhs2 = gimple_assign_rhs2 (stmt);
4112 rhs_code = gimple_assign_rhs_code (stmt);
4113 loc = gimple_location (stmt);
4114 switch (rhs_code)
4115 {
4116 case SSA_NAME:
4117 CASE_CONVERT:
4118 irhs1 = *defs.get (rhs1);
4119 itype = TREE_TYPE (irhs1);
4120 pattern_stmt
4121 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4122 SSA_NAME, irhs1);
4123 break;
4124
4125 case BIT_NOT_EXPR:
4126 irhs1 = *defs.get (rhs1);
4127 itype = TREE_TYPE (irhs1);
4128 pattern_stmt
4129 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4130 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
4131 break;
4132
4133 case BIT_AND_EXPR:
4134 /* Try to optimize x = y & (a < b ? 1 : 0); into
4135 x = (a < b ? y : 0);
4136
4137 E.g. for:
4138 bool a_b, b_b, c_b;
4139 TYPE d_T;
4140
4141 S1 a_b = x1 CMP1 y1;
4142 S2 b_b = x2 CMP2 y2;
4143 S3 c_b = a_b & b_b;
4144 S4 d_T = (TYPE) c_b;
4145
4146 we would normally emit:
4147
4148 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4149 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4150 S3' c_T = a_T & b_T;
4151 S4' d_T = c_T;
4152
4153 but we can save one stmt by using the
4154 result of one of the COND_EXPRs in the other COND_EXPR and leave
4155 BIT_AND_EXPR stmt out:
4156
4157 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4158 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4159 S4' f_T = c_T;
4160
4161 At least when VEC_COND_EXPR is implemented using masks
4162 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
4163 computes the comparison masks and ands it, in one case with
4164 all ones vector, in the other case with a vector register.
4165 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
4166 often more expensive. */
4167 def_stmt = SSA_NAME_DEF_STMT (rhs2);
4168 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4169 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4170 {
4171 irhs1 = *defs.get (rhs1);
4172 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4173 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4174 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4175 {
4176 rhs_code = def_rhs_code;
4177 rhs1 = def_rhs1;
4178 rhs2 = gimple_assign_rhs2 (def_stmt);
4179 trueval = irhs1;
4180 goto do_compare;
4181 }
4182 else
4183 irhs2 = *defs.get (rhs2);
4184 goto and_ior_xor;
4185 }
4186 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4187 def_rhs_code = gimple_assign_rhs_code (def_stmt);
4188 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
4189 {
4190 irhs2 = *defs.get (rhs2);
4191 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
4192 if (TYPE_PRECISION (TREE_TYPE (irhs2))
4193 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
4194 {
4195 rhs_code = def_rhs_code;
4196 rhs1 = def_rhs1;
4197 rhs2 = gimple_assign_rhs2 (def_stmt);
4198 trueval = irhs2;
4199 goto do_compare;
4200 }
4201 else
4202 irhs1 = *defs.get (rhs1);
4203 goto and_ior_xor;
4204 }
4205 /* FALLTHRU */
4206 case BIT_IOR_EXPR:
4207 case BIT_XOR_EXPR:
4208 irhs1 = *defs.get (rhs1);
4209 irhs2 = *defs.get (rhs2);
4210 and_ior_xor:
4211 if (TYPE_PRECISION (TREE_TYPE (irhs1))
4212 != TYPE_PRECISION (TREE_TYPE (irhs2)))
4213 {
4214 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
4215 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
4216 int out_prec = TYPE_PRECISION (out_type);
4217 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
4218 irhs2 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs1), irhs2,
4219 stmt_info);
4220 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
4221 irhs1 = adjust_bool_pattern_cast (vinfo, TREE_TYPE (irhs2), irhs1,
4222 stmt_info);
4223 else
4224 {
4225 irhs1 = adjust_bool_pattern_cast (vinfo,
4226 out_type, irhs1, stmt_info);
4227 irhs2 = adjust_bool_pattern_cast (vinfo,
4228 out_type, irhs2, stmt_info);
4229 }
4230 }
4231 itype = TREE_TYPE (irhs1);
4232 pattern_stmt
4233 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4234 rhs_code, irhs1, irhs2);
4235 break;
4236
4237 default:
4238 do_compare:
4239 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
4240 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
4241 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
4242 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
4243 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
4244 {
4245 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
4246 itype
4247 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
4248 }
4249 else
4250 itype = TREE_TYPE (rhs1);
4251 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
4252 if (trueval == NULL_TREE)
4253 trueval = build_int_cst (itype, 1);
4254 else
4255 gcc_checking_assert (useless_type_conversion_p (itype,
4256 TREE_TYPE (trueval)));
4257 pattern_stmt
4258 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
4259 COND_EXPR, cond_expr, trueval,
4260 build_int_cst (itype, 0));
4261 break;
4262 }
4263
4264 gimple_set_location (pattern_stmt, loc);
4265 append_pattern_def_seq (vinfo, stmt_info, pattern_stmt,
4266 get_vectype_for_scalar_type (vinfo, itype));
4267 defs.put (var, gimple_assign_lhs (pattern_stmt));
4268 }
4269
4270 /* Comparison function to qsort a vector of gimple stmts after UID. */
4271
4272 static int
sort_after_uid(const void * p1,const void * p2)4273 sort_after_uid (const void *p1, const void *p2)
4274 {
4275 const gimple *stmt1 = *(const gimple * const *)p1;
4276 const gimple *stmt2 = *(const gimple * const *)p2;
4277 return gimple_uid (stmt1) - gimple_uid (stmt2);
4278 }
4279
4280 /* Create pattern stmts for all stmts participating in the bool pattern
4281 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
4282 OUT_TYPE. Return the def of the pattern root. */
4283
4284 static tree
adjust_bool_stmts(vec_info * vinfo,hash_set<gimple * > & bool_stmt_set,tree out_type,stmt_vec_info stmt_info)4285 adjust_bool_stmts (vec_info *vinfo, hash_set <gimple *> &bool_stmt_set,
4286 tree out_type, stmt_vec_info stmt_info)
4287 {
4288 /* Gather original stmts in the bool pattern in their order of appearance
4289 in the IL. */
4290 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
4291 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
4292 i != bool_stmt_set.end (); ++i)
4293 bool_stmts.quick_push (*i);
4294 bool_stmts.qsort (sort_after_uid);
4295
4296 /* Now process them in that order, producing pattern stmts. */
4297 hash_map <tree, tree> defs;
4298 for (unsigned i = 0; i < bool_stmts.length (); ++i)
4299 adjust_bool_pattern (vinfo, gimple_assign_lhs (bool_stmts[i]),
4300 out_type, stmt_info, defs);
4301
4302 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
4303 gimple *pattern_stmt
4304 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4305 return gimple_assign_lhs (pattern_stmt);
4306 }
4307
4308 /* Return the proper type for converting bool VAR into
4309 an integer value or NULL_TREE if no such type exists.
4310 The type is chosen so that the converted value has the
4311 same number of elements as VAR's vector type. */
4312
4313 static tree
integer_type_for_mask(tree var,vec_info * vinfo)4314 integer_type_for_mask (tree var, vec_info *vinfo)
4315 {
4316 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4317 return NULL_TREE;
4318
4319 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
4320 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
4321 return NULL_TREE;
4322
4323 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
4324 }
4325
4326 /* Function vect_recog_bool_pattern
4327
4328 Try to find pattern like following:
4329
4330 bool a_b, b_b, c_b, d_b, e_b;
4331 TYPE f_T;
4332 loop:
4333 S1 a_b = x1 CMP1 y1;
4334 S2 b_b = x2 CMP2 y2;
4335 S3 c_b = a_b & b_b;
4336 S4 d_b = x3 CMP3 y3;
4337 S5 e_b = c_b | d_b;
4338 S6 f_T = (TYPE) e_b;
4339
4340 where type 'TYPE' is an integral type. Or a similar pattern
4341 ending in
4342
4343 S6 f_Y = e_b ? r_Y : s_Y;
4344
4345 as results from if-conversion of a complex condition.
4346
4347 Input:
4348
4349 * STMT_VINFO: The stmt at the end from which the pattern
4350 search begins, i.e. cast of a bool to
4351 an integer type.
4352
4353 Output:
4354
4355 * TYPE_OUT: The type of the output of this pattern.
4356
4357 * Return value: A new stmt that will be used to replace the pattern.
4358
4359 Assuming size of TYPE is the same as size of all comparisons
4360 (otherwise some casts would be added where needed), the above
4361 sequence we create related pattern stmts:
4362 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4363 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4364 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4365 S5' e_T = c_T | d_T;
4366 S6' f_T = e_T;
4367
4368 Instead of the above S3' we could emit:
4369 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4370 S3' c_T = a_T | b_T;
4371 but the above is more efficient. */
4372
4373 static gimple *
vect_recog_bool_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4374 vect_recog_bool_pattern (vec_info *vinfo,
4375 stmt_vec_info stmt_vinfo, tree *type_out)
4376 {
4377 gimple *last_stmt = stmt_vinfo->stmt;
4378 enum tree_code rhs_code;
4379 tree var, lhs, rhs, vectype;
4380 gimple *pattern_stmt;
4381
4382 if (!is_gimple_assign (last_stmt))
4383 return NULL;
4384
4385 var = gimple_assign_rhs1 (last_stmt);
4386 lhs = gimple_assign_lhs (last_stmt);
4387 rhs_code = gimple_assign_rhs_code (last_stmt);
4388
4389 if (rhs_code == VIEW_CONVERT_EXPR)
4390 var = TREE_OPERAND (var, 0);
4391
4392 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
4393 return NULL;
4394
4395 hash_set<gimple *> bool_stmts;
4396
4397 if (CONVERT_EXPR_CODE_P (rhs_code)
4398 || rhs_code == VIEW_CONVERT_EXPR)
4399 {
4400 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4401 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4402 return NULL;
4403 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4404
4405 if (check_bool_pattern (var, vinfo, bool_stmts))
4406 {
4407 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4408 TREE_TYPE (lhs), stmt_vinfo);
4409 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4410 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4411 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4412 else
4413 pattern_stmt
4414 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4415 }
4416 else
4417 {
4418 tree type = integer_type_for_mask (var, vinfo);
4419 tree cst0, cst1, tmp;
4420
4421 if (!type)
4422 return NULL;
4423
4424 /* We may directly use cond with narrowed type to avoid
4425 multiple cond exprs with following result packing and
4426 perform single cond with packed mask instead. In case
4427 of widening we better make cond first and then extract
4428 results. */
4429 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4430 type = TREE_TYPE (lhs);
4431
4432 cst0 = build_int_cst (type, 0);
4433 cst1 = build_int_cst (type, 1);
4434 tmp = vect_recog_temp_ssa_var (type, NULL);
4435 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
4436
4437 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4438 {
4439 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
4440 append_pattern_def_seq (vinfo, stmt_vinfo,
4441 pattern_stmt, new_vectype);
4442
4443 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4444 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4445 }
4446 }
4447
4448 *type_out = vectype;
4449 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4450
4451 return pattern_stmt;
4452 }
4453 else if (rhs_code == COND_EXPR
4454 && TREE_CODE (var) == SSA_NAME)
4455 {
4456 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4457 if (vectype == NULL_TREE)
4458 return NULL;
4459
4460 /* Build a scalar type for the boolean result that when
4461 vectorized matches the vector type of the result in
4462 size and number of elements. */
4463 unsigned prec
4464 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4465 TYPE_VECTOR_SUBPARTS (vectype));
4466
4467 tree type
4468 = build_nonstandard_integer_type (prec,
4469 TYPE_UNSIGNED (TREE_TYPE (var)));
4470 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
4471 return NULL;
4472
4473 if (!check_bool_pattern (var, vinfo, bool_stmts))
4474 return NULL;
4475
4476 rhs = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo);
4477
4478 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4479 pattern_stmt
4480 = gimple_build_assign (lhs, COND_EXPR,
4481 build2 (NE_EXPR, boolean_type_node,
4482 rhs, build_int_cst (type, 0)),
4483 gimple_assign_rhs2 (last_stmt),
4484 gimple_assign_rhs3 (last_stmt));
4485 *type_out = vectype;
4486 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4487
4488 return pattern_stmt;
4489 }
4490 else if (rhs_code == SSA_NAME
4491 && STMT_VINFO_DATA_REF (stmt_vinfo))
4492 {
4493 stmt_vec_info pattern_stmt_info;
4494 tree nunits_vectype;
4495 if (!vect_get_vector_types_for_stmt (vinfo, stmt_vinfo, &vectype,
4496 &nunits_vectype)
4497 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
4498 return NULL;
4499
4500 if (check_bool_pattern (var, vinfo, bool_stmts))
4501 rhs = adjust_bool_stmts (vinfo, bool_stmts,
4502 TREE_TYPE (vectype), stmt_vinfo);
4503 else
4504 {
4505 tree type = integer_type_for_mask (var, vinfo);
4506 tree cst0, cst1, new_vectype;
4507
4508 if (!type)
4509 return NULL;
4510
4511 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4512 type = TREE_TYPE (vectype);
4513
4514 cst0 = build_int_cst (type, 0);
4515 cst1 = build_int_cst (type, 1);
4516 new_vectype = get_vectype_for_scalar_type (vinfo, type);
4517
4518 rhs = vect_recog_temp_ssa_var (type, NULL);
4519 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
4520 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, new_vectype);
4521 }
4522
4523 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4524 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4525 {
4526 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4527 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
4528 append_pattern_def_seq (vinfo, stmt_vinfo, cast_stmt);
4529 rhs = rhs2;
4530 }
4531 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4532 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4533 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4534 *type_out = vectype;
4535 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4536
4537 return pattern_stmt;
4538 }
4539 else
4540 return NULL;
4541 }
4542
4543
4544 /* A helper for vect_recog_mask_conversion_pattern. Build
4545 conversion of MASK to a type suitable for masking VECTYPE.
4546 Built statement gets required vectype and is appended to
4547 a pattern sequence of STMT_VINFO.
4548
4549 Return converted mask. */
4550
4551 static tree
build_mask_conversion(vec_info * vinfo,tree mask,tree vectype,stmt_vec_info stmt_vinfo)4552 build_mask_conversion (vec_info *vinfo,
4553 tree mask, tree vectype, stmt_vec_info stmt_vinfo)
4554 {
4555 gimple *stmt;
4556 tree masktype, tmp;
4557
4558 masktype = truth_type_for (vectype);
4559 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4560 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
4561 append_pattern_def_seq (vinfo, stmt_vinfo,
4562 stmt, masktype, TREE_TYPE (vectype));
4563
4564 return tmp;
4565 }
4566
4567
4568 /* Function vect_recog_mask_conversion_pattern
4569
4570 Try to find statements which require boolean type
4571 converison. Additional conversion statements are
4572 added to handle such cases. For example:
4573
4574 bool m_1, m_2, m_3;
4575 int i_4, i_5;
4576 double d_6, d_7;
4577 char c_1, c_2, c_3;
4578
4579 S1 m_1 = i_4 > i_5;
4580 S2 m_2 = d_6 < d_7;
4581 S3 m_3 = m_1 & m_2;
4582 S4 c_1 = m_3 ? c_2 : c_3;
4583
4584 Will be transformed into:
4585
4586 S1 m_1 = i_4 > i_5;
4587 S2 m_2 = d_6 < d_7;
4588 S3'' m_2' = (_Bool[bitsize=32])m_2
4589 S3' m_3' = m_1 & m_2';
4590 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4591 S4' c_1' = m_3'' ? c_2 : c_3; */
4592
4593 static gimple *
vect_recog_mask_conversion_pattern(vec_info * vinfo,stmt_vec_info stmt_vinfo,tree * type_out)4594 vect_recog_mask_conversion_pattern (vec_info *vinfo,
4595 stmt_vec_info stmt_vinfo, tree *type_out)
4596 {
4597 gimple *last_stmt = stmt_vinfo->stmt;
4598 enum tree_code rhs_code;
4599 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4600 tree vectype1, vectype2;
4601 stmt_vec_info pattern_stmt_info;
4602 tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
4603 tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
4604
4605 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4606 if (is_gimple_call (last_stmt)
4607 && gimple_call_internal_p (last_stmt))
4608 {
4609 gcall *pattern_stmt;
4610
4611 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4612 int mask_argno = internal_fn_mask_index (ifn);
4613 if (mask_argno < 0)
4614 return NULL;
4615
4616 bool store_p = internal_store_fn_p (ifn);
4617 if (store_p)
4618 {
4619 int rhs_index = internal_fn_stored_value_index (ifn);
4620 tree rhs = gimple_call_arg (last_stmt, rhs_index);
4621 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
4622 }
4623 else
4624 {
4625 lhs = gimple_call_lhs (last_stmt);
4626 if (!lhs)
4627 return NULL;
4628 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4629 }
4630
4631 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
4632 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
4633 if (!mask_arg_type)
4634 return NULL;
4635 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
4636
4637 if (!vectype1 || !vectype2
4638 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4639 TYPE_VECTOR_SUBPARTS (vectype2)))
4640 return NULL;
4641
4642 tmp = build_mask_conversion (vinfo, mask_arg, vectype1, stmt_vinfo);
4643
4644 auto_vec<tree, 8> args;
4645 unsigned int nargs = gimple_call_num_args (last_stmt);
4646 args.safe_grow (nargs, true);
4647 for (unsigned int i = 0; i < nargs; ++i)
4648 args[i] = ((int) i == mask_argno
4649 ? tmp
4650 : gimple_call_arg (last_stmt, i));
4651 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
4652
4653 if (!store_p)
4654 {
4655 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4656 gimple_call_set_lhs (pattern_stmt, lhs);
4657 }
4658 gimple_call_set_nothrow (pattern_stmt, true);
4659
4660 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
4661 if (STMT_VINFO_DATA_REF (stmt_vinfo))
4662 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
4663
4664 *type_out = vectype1;
4665 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4666
4667 return pattern_stmt;
4668 }
4669
4670 if (!is_gimple_assign (last_stmt))
4671 return NULL;
4672
4673 gimple *pattern_stmt;
4674 lhs = gimple_assign_lhs (last_stmt);
4675 rhs1 = gimple_assign_rhs1 (last_stmt);
4676 rhs_code = gimple_assign_rhs_code (last_stmt);
4677
4678 /* Check for cond expression requiring mask conversion. */
4679 if (rhs_code == COND_EXPR)
4680 {
4681 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
4682
4683 if (TREE_CODE (rhs1) == SSA_NAME)
4684 {
4685 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4686 if (!rhs1_type)
4687 return NULL;
4688 }
4689 else if (COMPARISON_CLASS_P (rhs1))
4690 {
4691 /* Check whether we're comparing scalar booleans and (if so)
4692 whether a better mask type exists than the mask associated
4693 with boolean-sized elements. This avoids unnecessary packs
4694 and unpacks if the booleans are set from comparisons of
4695 wider types. E.g. in:
4696
4697 int x1, x2, x3, x4, y1, y1;
4698 ...
4699 bool b1 = (x1 == x2);
4700 bool b2 = (x3 == x4);
4701 ... = b1 == b2 ? y1 : y2;
4702
4703 it is better for b1 and b2 to use the mask type associated
4704 with int elements rather bool (byte) elements. */
4705 rhs1_op0 = TREE_OPERAND (rhs1, 0);
4706 rhs1_op1 = TREE_OPERAND (rhs1, 1);
4707 if (!rhs1_op0 || !rhs1_op1)
4708 return NULL;
4709 rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
4710 rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
4711
4712 if (!rhs1_op0_type)
4713 rhs1_type = TREE_TYPE (rhs1_op0);
4714 else if (!rhs1_op1_type)
4715 rhs1_type = TREE_TYPE (rhs1_op1);
4716 else if (TYPE_PRECISION (rhs1_op0_type)
4717 != TYPE_PRECISION (rhs1_op1_type))
4718 {
4719 int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
4720 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4721 int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
4722 - (int) TYPE_PRECISION (TREE_TYPE (lhs));
4723 if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
4724 {
4725 if (abs (tmp0) > abs (tmp1))
4726 rhs1_type = rhs1_op1_type;
4727 else
4728 rhs1_type = rhs1_op0_type;
4729 }
4730 else
4731 rhs1_type = build_nonstandard_integer_type
4732 (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
4733 }
4734 else
4735 rhs1_type = rhs1_op0_type;
4736 }
4737 else
4738 return NULL;
4739
4740 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4741
4742 if (!vectype1 || !vectype2)
4743 return NULL;
4744
4745 /* Continue if a conversion is needed. Also continue if we have
4746 a comparison whose vector type would normally be different from
4747 VECTYPE2 when considered in isolation. In that case we'll
4748 replace the comparison with an SSA name (so that we can record
4749 its vector type) and behave as though the comparison was an SSA
4750 name from the outset. */
4751 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4752 TYPE_VECTOR_SUBPARTS (vectype2))
4753 && !rhs1_op0_type
4754 && !rhs1_op1_type)
4755 return NULL;
4756
4757 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4758 in place, we can handle it in vectorizable_condition. This avoids
4759 unnecessary promotion stmts and increased vectorization factor. */
4760 if (COMPARISON_CLASS_P (rhs1)
4761 && INTEGRAL_TYPE_P (rhs1_type)
4762 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4763 TYPE_VECTOR_SUBPARTS (vectype2)))
4764 {
4765 enum vect_def_type dt;
4766 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
4767 && dt == vect_external_def
4768 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
4769 && (dt == vect_external_def
4770 || dt == vect_constant_def))
4771 {
4772 tree wide_scalar_type = build_nonstandard_integer_type
4773 (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
4774 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4775 wide_scalar_type);
4776 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4777 return NULL;
4778 }
4779 }
4780
4781 /* If rhs1 is a comparison we need to move it into a
4782 separate statement. */
4783 if (TREE_CODE (rhs1) != SSA_NAME)
4784 {
4785 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4786 if (rhs1_op0_type
4787 && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
4788 rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
4789 vectype2, stmt_vinfo);
4790 if (rhs1_op1_type
4791 && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
4792 rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
4793 vectype2, stmt_vinfo);
4794 pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
4795 rhs1_op0, rhs1_op1);
4796 rhs1 = tmp;
4797 append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vectype2,
4798 rhs1_type);
4799 }
4800
4801 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4802 TYPE_VECTOR_SUBPARTS (vectype2)))
4803 tmp = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4804 else
4805 tmp = rhs1;
4806
4807 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4808 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4809 gimple_assign_rhs2 (last_stmt),
4810 gimple_assign_rhs3 (last_stmt));
4811
4812 *type_out = vectype1;
4813 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4814
4815 return pattern_stmt;
4816 }
4817
4818 /* Now check for binary boolean operations requiring conversion for
4819 one of operands. */
4820 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4821 return NULL;
4822
4823 if (rhs_code != BIT_IOR_EXPR
4824 && rhs_code != BIT_XOR_EXPR
4825 && rhs_code != BIT_AND_EXPR
4826 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
4827 return NULL;
4828
4829 rhs2 = gimple_assign_rhs2 (last_stmt);
4830
4831 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4832 rhs2_type = integer_type_for_mask (rhs2, vinfo);
4833
4834 if (!rhs1_type || !rhs2_type
4835 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4836 return NULL;
4837
4838 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4839 {
4840 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
4841 if (!vectype1)
4842 return NULL;
4843 rhs2 = build_mask_conversion (vinfo, rhs2, vectype1, stmt_vinfo);
4844 }
4845 else
4846 {
4847 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
4848 if (!vectype1)
4849 return NULL;
4850 rhs1 = build_mask_conversion (vinfo, rhs1, vectype1, stmt_vinfo);
4851 }
4852
4853 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4854 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4855
4856 *type_out = vectype1;
4857 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
4858
4859 return pattern_stmt;
4860 }
4861
4862 /* STMT_INFO is a load or store. If the load or store is conditional, return
4863 the boolean condition under which it occurs, otherwise return null. */
4864
4865 static tree
vect_get_load_store_mask(stmt_vec_info stmt_info)4866 vect_get_load_store_mask (stmt_vec_info stmt_info)
4867 {
4868 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
4869 {
4870 gcc_assert (gimple_assign_single_p (def_assign));
4871 return NULL_TREE;
4872 }
4873
4874 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
4875 {
4876 internal_fn ifn = gimple_call_internal_fn (def_call);
4877 int mask_index = internal_fn_mask_index (ifn);
4878 return gimple_call_arg (def_call, mask_index);
4879 }
4880
4881 gcc_unreachable ();
4882 }
4883
4884 /* Return MASK if MASK is suitable for masking an operation on vectors
4885 of type VECTYPE, otherwise convert it into such a form and return
4886 the result. Associate any conversion statements with STMT_INFO's
4887 pattern. */
4888
4889 static tree
vect_convert_mask_for_vectype(tree mask,tree vectype,stmt_vec_info stmt_info,vec_info * vinfo)4890 vect_convert_mask_for_vectype (tree mask, tree vectype,
4891 stmt_vec_info stmt_info, vec_info *vinfo)
4892 {
4893 tree mask_type = integer_type_for_mask (mask, vinfo);
4894 if (mask_type)
4895 {
4896 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
4897 if (mask_vectype
4898 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4899 TYPE_VECTOR_SUBPARTS (mask_vectype)))
4900 mask = build_mask_conversion (vinfo, mask, vectype, stmt_info);
4901 }
4902 return mask;
4903 }
4904
4905 /* Return the equivalent of:
4906
4907 fold_convert (TYPE, VALUE)
4908
4909 with the expectation that the operation will be vectorized.
4910 If new statements are needed, add them as pattern statements
4911 to STMT_INFO. */
4912
4913 static tree
vect_add_conversion_to_pattern(vec_info * vinfo,tree type,tree value,stmt_vec_info stmt_info)4914 vect_add_conversion_to_pattern (vec_info *vinfo,
4915 tree type, tree value, stmt_vec_info stmt_info)
4916 {
4917 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4918 return value;
4919
4920 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4921 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
4922 append_pattern_def_seq (vinfo, stmt_info, conversion,
4923 get_vectype_for_scalar_type (vinfo, type));
4924 return new_value;
4925 }
4926
4927 /* Try to convert STMT_INFO into a call to a gather load or scatter store
4928 internal function. Return the final statement on success and set
4929 *TYPE_OUT to the vector type being loaded or stored.
4930
4931 This function only handles gathers and scatters that were recognized
4932 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4933
4934 static gimple *
vect_recog_gather_scatter_pattern(vec_info * vinfo,stmt_vec_info stmt_info,tree * type_out)4935 vect_recog_gather_scatter_pattern (vec_info *vinfo,
4936 stmt_vec_info stmt_info, tree *type_out)
4937 {
4938 /* Currently we only support this for loop vectorization. */
4939 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4940 if (!loop_vinfo)
4941 return NULL;
4942
4943 /* Make sure that we're looking at a gather load or scatter store. */
4944 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4945 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4946 return NULL;
4947
4948 /* Get the boolean that controls whether the load or store happens.
4949 This is null if the operation is unconditional. */
4950 tree mask = vect_get_load_store_mask (stmt_info);
4951
4952 /* Make sure that the target supports an appropriate internal
4953 function for the gather/scatter operation. */
4954 gather_scatter_info gs_info;
4955 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
4956 || gs_info.ifn == IFN_LAST)
4957 return NULL;
4958
4959 /* Convert the mask to the right form. */
4960 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4961 gs_info.element_type);
4962 if (mask)
4963 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
4964 loop_vinfo);
4965 else if (gs_info.ifn == IFN_MASK_SCATTER_STORE
4966 || gs_info.ifn == IFN_MASK_GATHER_LOAD)
4967 mask = build_int_cst (TREE_TYPE (truth_type_for (gs_vectype)), -1);
4968
4969 /* Get the invariant base and non-invariant offset, converting the
4970 latter to the same width as the vector elements. */
4971 tree base = gs_info.base;
4972 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
4973 tree offset = vect_add_conversion_to_pattern (vinfo, offset_type,
4974 gs_info.offset, stmt_info);
4975
4976 /* Build the new pattern statement. */
4977 tree scale = size_int (gs_info.scale);
4978 gcall *pattern_stmt;
4979 if (DR_IS_READ (dr))
4980 {
4981 tree zero = build_zero_cst (gs_info.element_type);
4982 if (mask != NULL)
4983 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4984 offset, scale, zero, mask);
4985 else
4986 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4987 offset, scale, zero);
4988 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4989 gimple_call_set_lhs (pattern_stmt, load_lhs);
4990 }
4991 else
4992 {
4993 tree rhs = vect_get_store_rhs (stmt_info);
4994 if (mask != NULL)
4995 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
4996 base, offset, scale, rhs,
4997 mask);
4998 else
4999 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
5000 base, offset, scale, rhs);
5001 }
5002 gimple_call_set_nothrow (pattern_stmt, true);
5003
5004 /* Copy across relevant vectorization info and associate DR with the
5005 new pattern statement instead of the original statement. */
5006 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5007 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
5008
5009 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5010 *type_out = vectype;
5011 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
5012
5013 return pattern_stmt;
5014 }
5015
5016 /* Return true if TYPE is a non-boolean integer type. These are the types
5017 that we want to consider for narrowing. */
5018
5019 static bool
vect_narrowable_type_p(tree type)5020 vect_narrowable_type_p (tree type)
5021 {
5022 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
5023 }
5024
5025 /* Return true if the operation given by CODE can be truncated to N bits
5026 when only N bits of the output are needed. This is only true if bit N+1
5027 of the inputs has no effect on the low N bits of the result. */
5028
5029 static bool
vect_truncatable_operation_p(tree_code code)5030 vect_truncatable_operation_p (tree_code code)
5031 {
5032 switch (code)
5033 {
5034 case NEGATE_EXPR:
5035 case PLUS_EXPR:
5036 case MINUS_EXPR:
5037 case MULT_EXPR:
5038 case BIT_NOT_EXPR:
5039 case BIT_AND_EXPR:
5040 case BIT_IOR_EXPR:
5041 case BIT_XOR_EXPR:
5042 case COND_EXPR:
5043 return true;
5044
5045 default:
5046 return false;
5047 }
5048 }
5049
5050 /* Record that STMT_INFO could be changed from operating on TYPE to
5051 operating on a type with the precision and sign given by PRECISION
5052 and SIGN respectively. PRECISION is an arbitrary bit precision;
5053 it might not be a whole number of bytes. */
5054
5055 static void
vect_set_operation_type(stmt_vec_info stmt_info,tree type,unsigned int precision,signop sign)5056 vect_set_operation_type (stmt_vec_info stmt_info, tree type,
5057 unsigned int precision, signop sign)
5058 {
5059 /* Round the precision up to a whole number of bytes. */
5060 precision = vect_element_precision (precision);
5061 if (precision < TYPE_PRECISION (type)
5062 && (!stmt_info->operation_precision
5063 || stmt_info->operation_precision > precision))
5064 {
5065 stmt_info->operation_precision = precision;
5066 stmt_info->operation_sign = sign;
5067 }
5068 }
5069
5070 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
5071 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
5072 is an arbitrary bit precision; it might not be a whole number of bytes. */
5073
5074 static void
vect_set_min_input_precision(stmt_vec_info stmt_info,tree type,unsigned int min_input_precision)5075 vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
5076 unsigned int min_input_precision)
5077 {
5078 /* This operation in isolation only requires the inputs to have
5079 MIN_INPUT_PRECISION of precision, However, that doesn't mean
5080 that MIN_INPUT_PRECISION is a natural precision for the chain
5081 as a whole. E.g. consider something like:
5082
5083 unsigned short *x, *y;
5084 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5085
5086 The right shift can be done on unsigned chars, and only requires the
5087 result of "*x & 0xf0" to be done on unsigned chars. But taking that
5088 approach would mean turning a natural chain of single-vector unsigned
5089 short operations into one that truncates "*x" and then extends
5090 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
5091 operation and one vector for each unsigned char operation.
5092 This would be a significant pessimization.
5093
5094 Instead only propagate the maximum of this precision and the precision
5095 required by the users of the result. This means that we don't pessimize
5096 the case above but continue to optimize things like:
5097
5098 unsigned char *y;
5099 unsigned short *x;
5100 *y = ((*x & 0xf0) >> 4) | (*y << 4);
5101
5102 Here we would truncate two vectors of *x to a single vector of
5103 unsigned chars and use single-vector unsigned char operations for
5104 everything else, rather than doing two unsigned short copies of
5105 "(*x & 0xf0) >> 4" and then truncating the result. */
5106 min_input_precision = MAX (min_input_precision,
5107 stmt_info->min_output_precision);
5108
5109 if (min_input_precision < TYPE_PRECISION (type)
5110 && (!stmt_info->min_input_precision
5111 || stmt_info->min_input_precision > min_input_precision))
5112 stmt_info->min_input_precision = min_input_precision;
5113 }
5114
5115 /* Subroutine of vect_determine_min_output_precision. Return true if
5116 we can calculate a reduced number of output bits for STMT_INFO,
5117 whose result is LHS. */
5118
5119 static bool
vect_determine_min_output_precision_1(vec_info * vinfo,stmt_vec_info stmt_info,tree lhs)5120 vect_determine_min_output_precision_1 (vec_info *vinfo,
5121 stmt_vec_info stmt_info, tree lhs)
5122 {
5123 /* Take the maximum precision required by users of the result. */
5124 unsigned int precision = 0;
5125 imm_use_iterator iter;
5126 use_operand_p use;
5127 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
5128 {
5129 gimple *use_stmt = USE_STMT (use);
5130 if (is_gimple_debug (use_stmt))
5131 continue;
5132 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
5133 if (!use_stmt_info || !use_stmt_info->min_input_precision)
5134 return false;
5135 /* The input precision recorded for COND_EXPRs applies only to the
5136 "then" and "else" values. */
5137 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5138 if (assign
5139 && gimple_assign_rhs_code (assign) == COND_EXPR
5140 && use->use != gimple_assign_rhs2_ptr (assign)
5141 && use->use != gimple_assign_rhs3_ptr (assign))
5142 return false;
5143 precision = MAX (precision, use_stmt_info->min_input_precision);
5144 }
5145
5146 if (dump_enabled_p ())
5147 dump_printf_loc (MSG_NOTE, vect_location,
5148 "only the low %d bits of %T are significant\n",
5149 precision, lhs);
5150 stmt_info->min_output_precision = precision;
5151 return true;
5152 }
5153
5154 /* Calculate min_output_precision for STMT_INFO. */
5155
5156 static void
vect_determine_min_output_precision(vec_info * vinfo,stmt_vec_info stmt_info)5157 vect_determine_min_output_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5158 {
5159 /* We're only interested in statements with a narrowable result. */
5160 tree lhs = gimple_get_lhs (stmt_info->stmt);
5161 if (!lhs
5162 || TREE_CODE (lhs) != SSA_NAME
5163 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
5164 return;
5165
5166 if (!vect_determine_min_output_precision_1 (vinfo, stmt_info, lhs))
5167 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
5168 }
5169
5170 /* Use range information to decide whether STMT (described by STMT_INFO)
5171 could be done in a narrower type. This is effectively a forward
5172 propagation, since it uses context-independent information that applies
5173 to all users of an SSA name. */
5174
5175 static void
vect_determine_precisions_from_range(stmt_vec_info stmt_info,gassign * stmt)5176 vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
5177 {
5178 tree lhs = gimple_assign_lhs (stmt);
5179 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
5180 return;
5181
5182 tree type = TREE_TYPE (lhs);
5183 if (!vect_narrowable_type_p (type))
5184 return;
5185
5186 /* First see whether we have any useful range information for the result. */
5187 unsigned int precision = TYPE_PRECISION (type);
5188 signop sign = TYPE_SIGN (type);
5189 wide_int min_value, max_value;
5190 if (!vect_get_range_info (lhs, &min_value, &max_value))
5191 return;
5192
5193 tree_code code = gimple_assign_rhs_code (stmt);
5194 unsigned int nops = gimple_num_ops (stmt);
5195
5196 if (!vect_truncatable_operation_p (code))
5197 {
5198 /* Handle operations that can be computed in type T if all inputs
5199 and outputs can be represented in type T. Also handle left and
5200 right shifts, where (in addition) the maximum shift amount must
5201 be less than the number of bits in T. */
5202 bool is_shift;
5203 switch (code)
5204 {
5205 case LSHIFT_EXPR:
5206 case RSHIFT_EXPR:
5207 is_shift = true;
5208 break;
5209
5210 case ABS_EXPR:
5211 case MIN_EXPR:
5212 case MAX_EXPR:
5213 case TRUNC_DIV_EXPR:
5214 case CEIL_DIV_EXPR:
5215 case FLOOR_DIV_EXPR:
5216 case ROUND_DIV_EXPR:
5217 case EXACT_DIV_EXPR:
5218 /* Modulus is excluded because it is typically calculated by doing
5219 a division, for which minimum signed / -1 isn't representable in
5220 the original signed type. We could take the division range into
5221 account instead, if handling modulus ever becomes important. */
5222 is_shift = false;
5223 break;
5224
5225 default:
5226 return;
5227 }
5228 for (unsigned int i = 1; i < nops; ++i)
5229 {
5230 tree op = gimple_op (stmt, i);
5231 wide_int op_min_value, op_max_value;
5232 if (TREE_CODE (op) == INTEGER_CST)
5233 {
5234 unsigned int op_precision = TYPE_PRECISION (TREE_TYPE (op));
5235 op_min_value = op_max_value = wi::to_wide (op, op_precision);
5236 }
5237 else if (TREE_CODE (op) == SSA_NAME)
5238 {
5239 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
5240 return;
5241 }
5242 else
5243 return;
5244
5245 if (is_shift && i == 2)
5246 {
5247 /* There needs to be one more bit than the maximum shift amount.
5248
5249 If the maximum shift amount is already 1 less than PRECISION
5250 then we can't narrow the shift further. Dealing with that
5251 case first ensures that we can safely use an unsigned range
5252 below.
5253
5254 op_min_value isn't relevant, since shifts by negative amounts
5255 are UB. */
5256 if (wi::geu_p (op_max_value, precision - 1))
5257 return;
5258 unsigned int min_bits = op_max_value.to_uhwi () + 1;
5259
5260 /* As explained below, we can convert a signed shift into an
5261 unsigned shift if the sign bit is always clear. At this
5262 point we've already processed the ranges of the output and
5263 the first input. */
5264 auto op_sign = sign;
5265 if (sign == SIGNED && !wi::neg_p (min_value))
5266 op_sign = UNSIGNED;
5267 op_min_value = wide_int::from (wi::min_value (min_bits, op_sign),
5268 precision, op_sign);
5269 op_max_value = wide_int::from (wi::max_value (min_bits, op_sign),
5270 precision, op_sign);
5271 }
5272 min_value = wi::min (min_value, op_min_value, sign);
5273 max_value = wi::max (max_value, op_max_value, sign);
5274 }
5275 }
5276
5277 /* Try to switch signed types for unsigned types if we can.
5278 This is better for two reasons. First, unsigned ops tend
5279 to be cheaper than signed ops. Second, it means that we can
5280 handle things like:
5281
5282 signed char c;
5283 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
5284
5285 as:
5286
5287 signed char c;
5288 unsigned short res_1 = (unsigned short) c & 0xff00;
5289 int res = (int) res_1;
5290
5291 where the intermediate result res_1 has unsigned rather than
5292 signed type. */
5293 if (sign == SIGNED && !wi::neg_p (min_value))
5294 sign = UNSIGNED;
5295
5296 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
5297 unsigned int precision1 = wi::min_precision (min_value, sign);
5298 unsigned int precision2 = wi::min_precision (max_value, sign);
5299 unsigned int value_precision = MAX (precision1, precision2);
5300 if (value_precision >= precision)
5301 return;
5302
5303 if (dump_enabled_p ())
5304 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5305 " without loss of precision: %G",
5306 sign == SIGNED ? "signed" : "unsigned",
5307 value_precision, stmt);
5308
5309 vect_set_operation_type (stmt_info, type, value_precision, sign);
5310 vect_set_min_input_precision (stmt_info, type, value_precision);
5311 }
5312
5313 /* Use information about the users of STMT's result to decide whether
5314 STMT (described by STMT_INFO) could be done in a narrower type.
5315 This is effectively a backward propagation. */
5316
5317 static void
vect_determine_precisions_from_users(stmt_vec_info stmt_info,gassign * stmt)5318 vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
5319 {
5320 tree_code code = gimple_assign_rhs_code (stmt);
5321 unsigned int opno = (code == COND_EXPR ? 2 : 1);
5322 tree type = TREE_TYPE (gimple_op (stmt, opno));
5323 if (!vect_narrowable_type_p (type))
5324 return;
5325
5326 unsigned int precision = TYPE_PRECISION (type);
5327 unsigned int operation_precision, min_input_precision;
5328 switch (code)
5329 {
5330 CASE_CONVERT:
5331 /* Only the bits that contribute to the output matter. Don't change
5332 the precision of the operation itself. */
5333 operation_precision = precision;
5334 min_input_precision = stmt_info->min_output_precision;
5335 break;
5336
5337 case LSHIFT_EXPR:
5338 case RSHIFT_EXPR:
5339 {
5340 tree shift = gimple_assign_rhs2 (stmt);
5341 if (TREE_CODE (shift) != INTEGER_CST
5342 || !wi::ltu_p (wi::to_widest (shift), precision))
5343 return;
5344 unsigned int const_shift = TREE_INT_CST_LOW (shift);
5345 if (code == LSHIFT_EXPR)
5346 {
5347 /* Avoid creating an undefined shift.
5348
5349 ??? We could instead use min_output_precision as-is and
5350 optimize out-of-range shifts to zero. However, only
5351 degenerate testcases shift away all their useful input data,
5352 and it isn't natural to drop input operations in the middle
5353 of vectorization. This sort of thing should really be
5354 handled before vectorization. */
5355 operation_precision = MAX (stmt_info->min_output_precision,
5356 const_shift + 1);
5357 /* We need CONST_SHIFT fewer bits of the input. */
5358 min_input_precision = (MAX (operation_precision, const_shift)
5359 - const_shift);
5360 }
5361 else
5362 {
5363 /* We need CONST_SHIFT extra bits to do the operation. */
5364 operation_precision = (stmt_info->min_output_precision
5365 + const_shift);
5366 min_input_precision = operation_precision;
5367 }
5368 break;
5369 }
5370
5371 default:
5372 if (vect_truncatable_operation_p (code))
5373 {
5374 /* Input bit N has no effect on output bits N-1 and lower. */
5375 operation_precision = stmt_info->min_output_precision;
5376 min_input_precision = operation_precision;
5377 break;
5378 }
5379 return;
5380 }
5381
5382 if (operation_precision < precision)
5383 {
5384 if (dump_enabled_p ())
5385 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
5386 " without affecting users: %G",
5387 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
5388 operation_precision, stmt);
5389 vect_set_operation_type (stmt_info, type, operation_precision,
5390 TYPE_SIGN (type));
5391 }
5392 vect_set_min_input_precision (stmt_info, type, min_input_precision);
5393 }
5394
5395 /* Return true if the statement described by STMT_INFO sets a boolean
5396 SSA_NAME and if we know how to vectorize this kind of statement using
5397 vector mask types. */
5398
5399 static bool
possible_vector_mask_operation_p(stmt_vec_info stmt_info)5400 possible_vector_mask_operation_p (stmt_vec_info stmt_info)
5401 {
5402 tree lhs = gimple_get_lhs (stmt_info->stmt);
5403 if (!lhs
5404 || TREE_CODE (lhs) != SSA_NAME
5405 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
5406 return false;
5407
5408 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5409 {
5410 tree_code rhs_code = gimple_assign_rhs_code (assign);
5411 switch (rhs_code)
5412 {
5413 CASE_CONVERT:
5414 case SSA_NAME:
5415 case BIT_NOT_EXPR:
5416 case BIT_IOR_EXPR:
5417 case BIT_XOR_EXPR:
5418 case BIT_AND_EXPR:
5419 return true;
5420
5421 default:
5422 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
5423 }
5424 }
5425 else if (is_a <gphi *> (stmt_info->stmt))
5426 return true;
5427 return false;
5428 }
5429
5430 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
5431 a vector mask type instead of a normal vector type. Record the
5432 result in STMT_INFO->mask_precision. */
5433
5434 static void
vect_determine_mask_precision(vec_info * vinfo,stmt_vec_info stmt_info)5435 vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info)
5436 {
5437 if (!possible_vector_mask_operation_p (stmt_info))
5438 return;
5439
5440 /* If at least one boolean input uses a vector mask type,
5441 pick the mask type with the narrowest elements.
5442
5443 ??? This is the traditional behavior. It should always produce
5444 the smallest number of operations, but isn't necessarily the
5445 optimal choice. For example, if we have:
5446
5447 a = b & c
5448
5449 where:
5450
5451 - the user of a wants it to have a mask type for 16-bit elements (M16)
5452 - b also uses M16
5453 - c uses a mask type for 8-bit elements (M8)
5454
5455 then picking M8 gives:
5456
5457 - 1 M16->M8 pack for b
5458 - 1 M8 AND for a
5459 - 2 M8->M16 unpacks for the user of a
5460
5461 whereas picking M16 would have given:
5462
5463 - 2 M8->M16 unpacks for c
5464 - 2 M16 ANDs for a
5465
5466 The number of operations are equal, but M16 would have given
5467 a shorter dependency chain and allowed more ILP. */
5468 unsigned int precision = ~0U;
5469 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
5470 {
5471 unsigned int nops = gimple_num_ops (assign);
5472 for (unsigned int i = 1; i < nops; ++i)
5473 {
5474 tree rhs = gimple_op (assign, i);
5475 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5476 continue;
5477
5478 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5479 if (!def_stmt_info)
5480 /* Don't let external or constant operands influence the choice.
5481 We can convert them to whichever vector type we pick. */
5482 continue;
5483
5484 if (def_stmt_info->mask_precision)
5485 {
5486 if (precision > def_stmt_info->mask_precision)
5487 precision = def_stmt_info->mask_precision;
5488 }
5489 }
5490
5491 /* If the statement compares two values that shouldn't use vector masks,
5492 try comparing the values as normal scalars instead. */
5493 tree_code rhs_code = gimple_assign_rhs_code (assign);
5494 if (precision == ~0U
5495 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5496 {
5497 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5498 scalar_mode mode;
5499 tree vectype, mask_type;
5500 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5501 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5502 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5503 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5504 precision = GET_MODE_BITSIZE (mode);
5505 }
5506 }
5507 else
5508 {
5509 gphi *phi = as_a <gphi *> (stmt_info->stmt);
5510 for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
5511 {
5512 tree rhs = gimple_phi_arg_def (phi, i);
5513
5514 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5515 if (!def_stmt_info)
5516 /* Don't let external or constant operands influence the choice.
5517 We can convert them to whichever vector type we pick. */
5518 continue;
5519
5520 if (def_stmt_info->mask_precision)
5521 {
5522 if (precision > def_stmt_info->mask_precision)
5523 precision = def_stmt_info->mask_precision;
5524 }
5525 }
5526 }
5527
5528 if (dump_enabled_p ())
5529 {
5530 if (precision == ~0U)
5531 dump_printf_loc (MSG_NOTE, vect_location,
5532 "using normal nonmask vectors for %G",
5533 stmt_info->stmt);
5534 else
5535 dump_printf_loc (MSG_NOTE, vect_location,
5536 "using boolean precision %d for %G",
5537 precision, stmt_info->stmt);
5538 }
5539
5540 stmt_info->mask_precision = precision;
5541 }
5542
5543 /* Handle vect_determine_precisions for STMT_INFO, given that we
5544 have already done so for the users of its result. */
5545
5546 void
vect_determine_stmt_precisions(vec_info * vinfo,stmt_vec_info stmt_info)5547 vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info)
5548 {
5549 vect_determine_min_output_precision (vinfo, stmt_info);
5550 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5551 {
5552 vect_determine_precisions_from_range (stmt_info, stmt);
5553 vect_determine_precisions_from_users (stmt_info, stmt);
5554 }
5555 }
5556
5557 /* Walk backwards through the vectorizable region to determine the
5558 values of these fields:
5559
5560 - min_output_precision
5561 - min_input_precision
5562 - operation_precision
5563 - operation_sign. */
5564
5565 void
vect_determine_precisions(vec_info * vinfo)5566 vect_determine_precisions (vec_info *vinfo)
5567 {
5568 DUMP_VECT_SCOPE ("vect_determine_precisions");
5569
5570 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5571 {
5572 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5573 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5574 unsigned int nbbs = loop->num_nodes;
5575
5576 for (unsigned int i = 0; i < nbbs; i++)
5577 {
5578 basic_block bb = bbs[i];
5579 for (auto gsi = gsi_start_phis (bb);
5580 !gsi_end_p (gsi); gsi_next (&gsi))
5581 {
5582 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5583 if (stmt_info)
5584 vect_determine_mask_precision (vinfo, stmt_info);
5585 }
5586 for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5587 if (!is_gimple_debug (gsi_stmt (si)))
5588 vect_determine_mask_precision
5589 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5590 }
5591 for (unsigned int i = 0; i < nbbs; i++)
5592 {
5593 basic_block bb = bbs[nbbs - i - 1];
5594 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5595 !gsi_end_p (si); gsi_prev (&si))
5596 if (!is_gimple_debug (gsi_stmt (si)))
5597 vect_determine_stmt_precisions
5598 (vinfo, vinfo->lookup_stmt (gsi_stmt (si)));
5599 for (auto gsi = gsi_start_phis (bb);
5600 !gsi_end_p (gsi); gsi_next (&gsi))
5601 {
5602 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5603 if (stmt_info)
5604 vect_determine_stmt_precisions (vinfo, stmt_info);
5605 }
5606 }
5607 }
5608 else
5609 {
5610 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5611 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
5612 {
5613 basic_block bb = bb_vinfo->bbs[i];
5614 for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5615 {
5616 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5617 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5618 vect_determine_mask_precision (vinfo, stmt_info);
5619 }
5620 for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5621 {
5622 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5623 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5624 vect_determine_mask_precision (vinfo, stmt_info);
5625 }
5626 }
5627 for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i)
5628 {
5629 for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]);
5630 !gsi_end_p (gsi); gsi_prev (&gsi))
5631 {
5632 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi));
5633 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5634 vect_determine_stmt_precisions (vinfo, stmt_info);
5635 }
5636 for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]);
5637 !gsi_end_p (gsi); gsi_next (&gsi))
5638 {
5639 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ());
5640 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5641 vect_determine_stmt_precisions (vinfo, stmt_info);
5642 }
5643 }
5644 }
5645 }
5646
5647 typedef gimple *(*vect_recog_func_ptr) (vec_info *, stmt_vec_info, tree *);
5648
5649 struct vect_recog_func
5650 {
5651 vect_recog_func_ptr fn;
5652 const char *name;
5653 };
5654
5655 /* Note that ordering matters - the first pattern matching on a stmt is
5656 taken which means usually the more complex one needs to preceed the
5657 less comples onex (widen_sum only after dot_prod or sad for example). */
5658 static vect_recog_func vect_vect_recog_func_ptrs[] = {
5659 { vect_recog_over_widening_pattern, "over_widening" },
5660 /* Must come after over_widening, which narrows the shift as much as
5661 possible beforehand. */
5662 { vect_recog_average_pattern, "average" },
5663 { vect_recog_cond_expr_convert_pattern, "cond_expr_convert" },
5664 { vect_recog_mulhs_pattern, "mult_high" },
5665 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
5666 { vect_recog_widen_mult_pattern, "widen_mult" },
5667 { vect_recog_dot_prod_pattern, "dot_prod" },
5668 { vect_recog_sad_pattern, "sad" },
5669 { vect_recog_widen_sum_pattern, "widen_sum" },
5670 { vect_recog_pow_pattern, "pow" },
5671 { vect_recog_popcount_pattern, "popcount" },
5672 { vect_recog_widen_shift_pattern, "widen_shift" },
5673 { vect_recog_rotate_pattern, "rotate" },
5674 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5675 { vect_recog_divmod_pattern, "divmod" },
5676 { vect_recog_mult_pattern, "mult" },
5677 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5678 { vect_recog_bool_pattern, "bool" },
5679 /* This must come before mask conversion, and includes the parts
5680 of mask conversion that are needed for gather and scatter
5681 internal functions. */
5682 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5683 { vect_recog_mask_conversion_pattern, "mask_conversion" },
5684 { vect_recog_widen_plus_pattern, "widen_plus" },
5685 { vect_recog_widen_minus_pattern, "widen_minus" },
5686 };
5687
5688 const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5689
5690 /* Mark statements that are involved in a pattern. */
5691
5692 void
vect_mark_pattern_stmts(vec_info * vinfo,stmt_vec_info orig_stmt_info,gimple * pattern_stmt,tree pattern_vectype)5693 vect_mark_pattern_stmts (vec_info *vinfo,
5694 stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
5695 tree pattern_vectype)
5696 {
5697 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
5698 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5699
5700 gimple *orig_pattern_stmt = NULL;
5701 if (is_pattern_stmt_p (orig_stmt_info))
5702 {
5703 /* We're replacing a statement in an existing pattern definition
5704 sequence. */
5705 orig_pattern_stmt = orig_stmt_info->stmt;
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_NOTE, vect_location,
5708 "replacing earlier pattern %G", orig_pattern_stmt);
5709
5710 /* To keep the book-keeping simple, just swap the lhs of the
5711 old and new statements, so that the old one has a valid but
5712 unused lhs. */
5713 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5714 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
5715 gimple_set_lhs (pattern_stmt, old_lhs);
5716
5717 if (dump_enabled_p ())
5718 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
5719
5720 /* Switch to the statement that ORIG replaces. */
5721 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
5722
5723 /* We shouldn't be replacing the main pattern statement. */
5724 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5725 != orig_pattern_stmt);
5726 }
5727
5728 if (def_seq)
5729 for (gimple_stmt_iterator si = gsi_start (def_seq);
5730 !gsi_end_p (si); gsi_next (&si))
5731 {
5732 if (dump_enabled_p ())
5733 dump_printf_loc (MSG_NOTE, vect_location,
5734 "extra pattern stmt: %G", gsi_stmt (si));
5735 stmt_vec_info pattern_stmt_info
5736 = vect_init_pattern_stmt (vinfo, gsi_stmt (si),
5737 orig_stmt_info, pattern_vectype);
5738 /* Stmts in the def sequence are not vectorizable cycle or
5739 induction defs, instead they should all be vect_internal_def
5740 feeding the main pattern stmt which retains this def type. */
5741 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5742 }
5743
5744 if (orig_pattern_stmt)
5745 {
5746 vect_init_pattern_stmt (vinfo, pattern_stmt,
5747 orig_stmt_info, pattern_vectype);
5748
5749 /* Insert all the new pattern statements before the original one. */
5750 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
5751 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5752 orig_def_seq);
5753 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5754 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
5755
5756 /* Remove the pattern statement that this new pattern replaces. */
5757 gsi_remove (&gsi, false);
5758 }
5759 else
5760 vect_set_pattern_stmt (vinfo,
5761 pattern_stmt, orig_stmt_info, pattern_vectype);
5762
5763 /* Transfer reduction path info to the pattern. */
5764 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5765 {
5766 gimple_match_op op;
5767 if (!gimple_extract_op (orig_stmt_info_saved->stmt, &op))
5768 gcc_unreachable ();
5769 tree lookfor = op.ops[STMT_VINFO_REDUC_IDX (orig_stmt_info)];
5770 /* Search the pattern def sequence and the main pattern stmt. Note
5771 we may have inserted all into a containing pattern def sequence
5772 so the following is a bit awkward. */
5773 gimple_stmt_iterator si;
5774 gimple *s;
5775 if (def_seq)
5776 {
5777 si = gsi_start (def_seq);
5778 s = gsi_stmt (si);
5779 gsi_next (&si);
5780 }
5781 else
5782 {
5783 si = gsi_none ();
5784 s = pattern_stmt;
5785 }
5786 do
5787 {
5788 bool found = false;
5789 if (gimple_extract_op (s, &op))
5790 for (unsigned i = 0; i < op.num_ops; ++i)
5791 if (op.ops[i] == lookfor)
5792 {
5793 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i;
5794 lookfor = gimple_get_lhs (s);
5795 found = true;
5796 break;
5797 }
5798 if (s == pattern_stmt)
5799 {
5800 if (!found && dump_enabled_p ())
5801 dump_printf_loc (MSG_NOTE, vect_location,
5802 "failed to update reduction index.\n");
5803 break;
5804 }
5805 if (gsi_end_p (si))
5806 s = pattern_stmt;
5807 else
5808 {
5809 s = gsi_stmt (si);
5810 if (s == pattern_stmt)
5811 /* Found the end inside a bigger pattern def seq. */
5812 si = gsi_none ();
5813 else
5814 gsi_next (&si);
5815 }
5816 } while (1);
5817 }
5818 }
5819
5820 /* Function vect_pattern_recog_1
5821
5822 Input:
5823 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5824 computation pattern.
5825 STMT_INFO: A stmt from which the pattern search should start.
5826
5827 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5828 a sequence of statements that has the same functionality and can be
5829 used to replace STMT_INFO. It returns the last statement in the sequence
5830 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
5831 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5832 statement, having first checked that the target supports the new operation
5833 in that type.
5834
5835 This function also does some bookkeeping, as explained in the documentation
5836 for vect_recog_pattern. */
5837
5838 static void
vect_pattern_recog_1(vec_info * vinfo,vect_recog_func * recog_func,stmt_vec_info stmt_info)5839 vect_pattern_recog_1 (vec_info *vinfo,
5840 vect_recog_func *recog_func, stmt_vec_info stmt_info)
5841 {
5842 gimple *pattern_stmt;
5843 loop_vec_info loop_vinfo;
5844 tree pattern_vectype;
5845
5846 /* If this statement has already been replaced with pattern statements,
5847 leave the original statement alone, since the first match wins.
5848 Instead try to match against the definition statements that feed
5849 the main pattern statement. */
5850 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5851 {
5852 gimple_stmt_iterator gsi;
5853 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5854 !gsi_end_p (gsi); gsi_next (&gsi))
5855 vect_pattern_recog_1 (vinfo, recog_func,
5856 vinfo->lookup_stmt (gsi_stmt (gsi)));
5857 return;
5858 }
5859
5860 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5861 pattern_stmt = recog_func->fn (vinfo, stmt_info, &pattern_vectype);
5862 if (!pattern_stmt)
5863 {
5864 /* Clear any half-formed pattern definition sequence. */
5865 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
5866 return;
5867 }
5868
5869 loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5870
5871 /* Found a vectorizable pattern. */
5872 if (dump_enabled_p ())
5873 dump_printf_loc (MSG_NOTE, vect_location,
5874 "%s pattern recognized: %G",
5875 recog_func->name, pattern_stmt);
5876
5877 /* Mark the stmts that are involved in the pattern. */
5878 vect_mark_pattern_stmts (vinfo, stmt_info, pattern_stmt, pattern_vectype);
5879
5880 /* Patterns cannot be vectorized using SLP, because they change the order of
5881 computation. */
5882 if (loop_vinfo)
5883 {
5884 unsigned ix, ix2;
5885 stmt_vec_info *elem_ptr;
5886 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
5887 elem_ptr, *elem_ptr == stmt_info);
5888 }
5889 }
5890
5891
5892 /* Function vect_pattern_recog
5893
5894 Input:
5895 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5896 computation idioms.
5897
5898 Output - for each computation idiom that is detected we create a new stmt
5899 that provides the same functionality and that can be vectorized. We
5900 also record some information in the struct_stmt_info of the relevant
5901 stmts, as explained below:
5902
5903 At the entry to this function we have the following stmts, with the
5904 following initial value in the STMT_VINFO fields:
5905
5906 stmt in_pattern_p related_stmt vec_stmt
5907 S1: a_i = .... - - -
5908 S2: a_2 = ..use(a_i).. - - -
5909 S3: a_1 = ..use(a_2).. - - -
5910 S4: a_0 = ..use(a_1).. - - -
5911 S5: ... = ..use(a_0).. - - -
5912
5913 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
5914 represented by a single stmt. We then:
5915 - create a new stmt S6 equivalent to the pattern (the stmt is not
5916 inserted into the code)
5917 - fill in the STMT_VINFO fields as follows:
5918
5919 in_pattern_p related_stmt vec_stmt
5920 S1: a_i = .... - - -
5921 S2: a_2 = ..use(a_i).. - - -
5922 S3: a_1 = ..use(a_2).. - - -
5923 S4: a_0 = ..use(a_1).. true S6 -
5924 '---> S6: a_new = .... - S4 -
5925 S5: ... = ..use(a_0).. - - -
5926
5927 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
5928 to each other through the RELATED_STMT field).
5929
5930 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5931 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5932 remain irrelevant unless used by stmts other than S4.
5933
5934 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
5935 (because they are marked as irrelevant). It will vectorize S6, and record
5936 a pointer to the new vector stmt VS6 from S6 (as usual).
5937 S4 will be skipped, and S5 will be vectorized as usual:
5938
5939 in_pattern_p related_stmt vec_stmt
5940 S1: a_i = .... - - -
5941 S2: a_2 = ..use(a_i).. - - -
5942 S3: a_1 = ..use(a_2).. - - -
5943 > VS6: va_new = .... - - -
5944 S4: a_0 = ..use(a_1).. true S6 VS6
5945 '---> S6: a_new = .... - S4 VS6
5946 > VS5: ... = ..vuse(va_new).. - - -
5947 S5: ... = ..use(a_0).. - - -
5948
5949 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
5950 elsewhere), and we'll end up with:
5951
5952 VS6: va_new = ....
5953 VS5: ... = ..vuse(va_new)..
5954
5955 In case of more than one pattern statements, e.g., widen-mult with
5956 intermediate type:
5957
5958 S1 a_t = ;
5959 S2 a_T = (TYPE) a_t;
5960 '--> S3: a_it = (interm_type) a_t;
5961 S4 prod_T = a_T * CONST;
5962 '--> S5: prod_T' = a_it w* CONST;
5963
5964 there may be other users of a_T outside the pattern. In that case S2 will
5965 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5966 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5967 be recorded in S3. */
5968
5969 void
vect_pattern_recog(vec_info * vinfo)5970 vect_pattern_recog (vec_info *vinfo)
5971 {
5972 class loop *loop;
5973 basic_block *bbs;
5974 unsigned int nbbs;
5975 gimple_stmt_iterator si;
5976 unsigned int i, j;
5977
5978 vect_determine_precisions (vinfo);
5979
5980 DUMP_VECT_SCOPE ("vect_pattern_recog");
5981
5982 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5983 {
5984 loop = LOOP_VINFO_LOOP (loop_vinfo);
5985 bbs = LOOP_VINFO_BBS (loop_vinfo);
5986 nbbs = loop->num_nodes;
5987
5988 /* Scan through the loop stmts, applying the pattern recognition
5989 functions starting at each stmt visited: */
5990 for (i = 0; i < nbbs; i++)
5991 {
5992 basic_block bb = bbs[i];
5993 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
5994 {
5995 if (is_gimple_debug (gsi_stmt (si)))
5996 continue;
5997 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5998 /* Scan over all generic vect_recog_xxx_pattern functions. */
5999 for (j = 0; j < NUM_PATTERNS; j++)
6000 vect_pattern_recog_1 (vinfo, &vect_vect_recog_func_ptrs[j],
6001 stmt_info);
6002 }
6003 }
6004 }
6005 else
6006 {
6007 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
6008 for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i)
6009 for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[i]);
6010 !gsi_end_p (gsi); gsi_next (&gsi))
6011 {
6012 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (gsi_stmt (gsi));
6013 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info))
6014 continue;
6015
6016 /* Scan over all generic vect_recog_xxx_pattern functions. */
6017 for (j = 0; j < NUM_PATTERNS; j++)
6018 vect_pattern_recog_1 (vinfo,
6019 &vect_vect_recog_func_ptrs[j], stmt_info);
6020 }
6021 }
6022
6023 /* After this no more add_stmt calls are allowed. */
6024 vinfo->stmt_vec_info_ro = true;
6025 }
6026