1*e4b17023SJohn Marino /* Analysis Utilities for Loop Vectorization. 2*e4b17023SJohn Marino Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 3*e4b17023SJohn Marino Free Software Foundation, Inc. 4*e4b17023SJohn Marino Contributed by Dorit Nuzman <dorit@il.ibm.com> 5*e4b17023SJohn Marino 6*e4b17023SJohn Marino This file is part of GCC. 7*e4b17023SJohn Marino 8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under 9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free 10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later 11*e4b17023SJohn Marino version. 12*e4b17023SJohn Marino 13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or 15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16*e4b17023SJohn Marino for more details. 17*e4b17023SJohn Marino 18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License 19*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see 20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */ 21*e4b17023SJohn Marino 22*e4b17023SJohn Marino #include "config.h" 23*e4b17023SJohn Marino #include "system.h" 24*e4b17023SJohn Marino #include "coretypes.h" 25*e4b17023SJohn Marino #include "tm.h" 26*e4b17023SJohn Marino #include "ggc.h" 27*e4b17023SJohn Marino #include "tree.h" 28*e4b17023SJohn Marino #include "target.h" 29*e4b17023SJohn Marino #include "basic-block.h" 30*e4b17023SJohn Marino #include "gimple-pretty-print.h" 31*e4b17023SJohn Marino #include "tree-flow.h" 32*e4b17023SJohn Marino #include "tree-dump.h" 33*e4b17023SJohn Marino #include "cfgloop.h" 34*e4b17023SJohn Marino #include "expr.h" 35*e4b17023SJohn Marino #include "optabs.h" 36*e4b17023SJohn Marino #include "params.h" 37*e4b17023SJohn Marino #include "tree-data-ref.h" 38*e4b17023SJohn Marino #include "tree-vectorizer.h" 39*e4b17023SJohn Marino #include "recog.h" 40*e4b17023SJohn Marino #include "diagnostic-core.h" 41*e4b17023SJohn Marino 42*e4b17023SJohn Marino /* Pattern recognition functions */ 43*e4b17023SJohn Marino static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *, 44*e4b17023SJohn Marino tree *); 45*e4b17023SJohn Marino static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, 46*e4b17023SJohn Marino tree *); 47*e4b17023SJohn Marino static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, 48*e4b17023SJohn Marino tree *); 49*e4b17023SJohn Marino static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); 50*e4b17023SJohn Marino static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *, 51*e4b17023SJohn Marino tree *); 52*e4b17023SJohn Marino static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, 53*e4b17023SJohn Marino tree *, tree *); 54*e4b17023SJohn Marino static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **, 55*e4b17023SJohn Marino tree *, tree *); 56*e4b17023SJohn Marino static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **, 57*e4b17023SJohn Marino tree *, tree *); 58*e4b17023SJohn Marino static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, 59*e4b17023SJohn Marino tree *, tree *); 60*e4b17023SJohn Marino static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *); 61*e4b17023SJohn Marino static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { 62*e4b17023SJohn Marino vect_recog_widen_mult_pattern, 63*e4b17023SJohn Marino vect_recog_widen_sum_pattern, 64*e4b17023SJohn Marino vect_recog_dot_prod_pattern, 65*e4b17023SJohn Marino vect_recog_pow_pattern, 66*e4b17023SJohn Marino vect_recog_widen_shift_pattern, 67*e4b17023SJohn Marino vect_recog_over_widening_pattern, 68*e4b17023SJohn Marino vect_recog_vector_vector_shift_pattern, 69*e4b17023SJohn Marino vect_recog_sdivmod_pow2_pattern, 70*e4b17023SJohn Marino vect_recog_mixed_size_cond_pattern, 71*e4b17023SJohn Marino vect_recog_bool_pattern}; 72*e4b17023SJohn Marino 73*e4b17023SJohn Marino static inline void 74*e4b17023SJohn Marino append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt) 75*e4b17023SJohn Marino { 76*e4b17023SJohn Marino gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), 77*e4b17023SJohn Marino stmt); 78*e4b17023SJohn Marino } 79*e4b17023SJohn Marino 80*e4b17023SJohn Marino static inline void 81*e4b17023SJohn Marino new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt) 82*e4b17023SJohn Marino { 83*e4b17023SJohn Marino STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL; 84*e4b17023SJohn Marino append_pattern_def_seq (stmt_info, stmt); 85*e4b17023SJohn Marino } 86*e4b17023SJohn Marino 87*e4b17023SJohn Marino /* If the LHS of DEF_STMT has a single use, and that statement is 88*e4b17023SJohn Marino in the same loop, return it. */ 89*e4b17023SJohn Marino 90*e4b17023SJohn Marino static gimple 91*e4b17023SJohn Marino vect_single_imm_use (gimple def_stmt) 92*e4b17023SJohn Marino { 93*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (def_stmt); 94*e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 95*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 96*e4b17023SJohn Marino tree lhs = gimple_assign_lhs (def_stmt); 97*e4b17023SJohn Marino use_operand_p use_p; 98*e4b17023SJohn Marino gimple use_stmt; 99*e4b17023SJohn Marino 100*e4b17023SJohn Marino if (!single_imm_use (lhs, &use_p, &use_stmt)) 101*e4b17023SJohn Marino return NULL; 102*e4b17023SJohn Marino 103*e4b17023SJohn Marino if (!gimple_bb (use_stmt)) 104*e4b17023SJohn Marino return NULL; 105*e4b17023SJohn Marino 106*e4b17023SJohn Marino if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) 107*e4b17023SJohn Marino return NULL; 108*e4b17023SJohn Marino 109*e4b17023SJohn Marino gcc_assert (vinfo_for_stmt (use_stmt)); 110*e4b17023SJohn Marino return use_stmt; 111*e4b17023SJohn Marino } 112*e4b17023SJohn Marino 113*e4b17023SJohn Marino /* Function widened_name_p 114*e4b17023SJohn Marino 115*e4b17023SJohn Marino Check whether NAME, an ssa-name used in USE_STMT, 116*e4b17023SJohn Marino is a result of a type-promotion, such that: 117*e4b17023SJohn Marino DEF_STMT: NAME = NOP (name0) 118*e4b17023SJohn Marino where the type of name0 (HALF_TYPE) is smaller than the type of NAME. 119*e4b17023SJohn Marino If CHECK_SIGN is TRUE, check that either both types are signed or both are 120*e4b17023SJohn Marino unsigned. */ 121*e4b17023SJohn Marino 122*e4b17023SJohn Marino static bool 123*e4b17023SJohn Marino widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, 124*e4b17023SJohn Marino bool check_sign) 125*e4b17023SJohn Marino { 126*e4b17023SJohn Marino tree dummy; 127*e4b17023SJohn Marino gimple dummy_gimple; 128*e4b17023SJohn Marino loop_vec_info loop_vinfo; 129*e4b17023SJohn Marino stmt_vec_info stmt_vinfo; 130*e4b17023SJohn Marino tree type = TREE_TYPE (name); 131*e4b17023SJohn Marino tree oprnd0; 132*e4b17023SJohn Marino enum vect_def_type dt; 133*e4b17023SJohn Marino tree def; 134*e4b17023SJohn Marino 135*e4b17023SJohn Marino stmt_vinfo = vinfo_for_stmt (use_stmt); 136*e4b17023SJohn Marino loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 137*e4b17023SJohn Marino 138*e4b17023SJohn Marino if (!vect_is_simple_use (name, use_stmt, loop_vinfo, NULL, def_stmt, &def, 139*e4b17023SJohn Marino &dt)) 140*e4b17023SJohn Marino return false; 141*e4b17023SJohn Marino 142*e4b17023SJohn Marino if (dt != vect_internal_def 143*e4b17023SJohn Marino && dt != vect_external_def && dt != vect_constant_def) 144*e4b17023SJohn Marino return false; 145*e4b17023SJohn Marino 146*e4b17023SJohn Marino if (! *def_stmt) 147*e4b17023SJohn Marino return false; 148*e4b17023SJohn Marino 149*e4b17023SJohn Marino if (!is_gimple_assign (*def_stmt)) 150*e4b17023SJohn Marino return false; 151*e4b17023SJohn Marino 152*e4b17023SJohn Marino if (gimple_assign_rhs_code (*def_stmt) != NOP_EXPR) 153*e4b17023SJohn Marino return false; 154*e4b17023SJohn Marino 155*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (*def_stmt); 156*e4b17023SJohn Marino 157*e4b17023SJohn Marino *half_type = TREE_TYPE (oprnd0); 158*e4b17023SJohn Marino if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) 159*e4b17023SJohn Marino || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) 160*e4b17023SJohn Marino || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) 161*e4b17023SJohn Marino return false; 162*e4b17023SJohn Marino 163*e4b17023SJohn Marino if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo, 164*e4b17023SJohn Marino NULL, &dummy_gimple, &dummy, &dt)) 165*e4b17023SJohn Marino return false; 166*e4b17023SJohn Marino 167*e4b17023SJohn Marino return true; 168*e4b17023SJohn Marino } 169*e4b17023SJohn Marino 170*e4b17023SJohn Marino /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT 171*e4b17023SJohn Marino is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ 172*e4b17023SJohn Marino 173*e4b17023SJohn Marino static tree 174*e4b17023SJohn Marino vect_recog_temp_ssa_var (tree type, gimple stmt) 175*e4b17023SJohn Marino { 176*e4b17023SJohn Marino tree var = create_tmp_var (type, "patt"); 177*e4b17023SJohn Marino 178*e4b17023SJohn Marino add_referenced_var (var); 179*e4b17023SJohn Marino var = make_ssa_name (var, stmt); 180*e4b17023SJohn Marino return var; 181*e4b17023SJohn Marino } 182*e4b17023SJohn Marino 183*e4b17023SJohn Marino /* Function vect_recog_dot_prod_pattern 184*e4b17023SJohn Marino 185*e4b17023SJohn Marino Try to find the following pattern: 186*e4b17023SJohn Marino 187*e4b17023SJohn Marino type x_t, y_t; 188*e4b17023SJohn Marino TYPE1 prod; 189*e4b17023SJohn Marino TYPE2 sum = init; 190*e4b17023SJohn Marino loop: 191*e4b17023SJohn Marino sum_0 = phi <init, sum_1> 192*e4b17023SJohn Marino S1 x_t = ... 193*e4b17023SJohn Marino S2 y_t = ... 194*e4b17023SJohn Marino S3 x_T = (TYPE1) x_t; 195*e4b17023SJohn Marino S4 y_T = (TYPE1) y_t; 196*e4b17023SJohn Marino S5 prod = x_T * y_T; 197*e4b17023SJohn Marino [S6 prod = (TYPE2) prod; #optional] 198*e4b17023SJohn Marino S7 sum_1 = prod + sum_0; 199*e4b17023SJohn Marino 200*e4b17023SJohn Marino where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the 201*e4b17023SJohn Marino same size of 'TYPE1' or bigger. This is a special case of a reduction 202*e4b17023SJohn Marino computation. 203*e4b17023SJohn Marino 204*e4b17023SJohn Marino Input: 205*e4b17023SJohn Marino 206*e4b17023SJohn Marino * STMTS: Contains a stmt from which the pattern search begins. In the 207*e4b17023SJohn Marino example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} 208*e4b17023SJohn Marino will be detected. 209*e4b17023SJohn Marino 210*e4b17023SJohn Marino Output: 211*e4b17023SJohn Marino 212*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 213*e4b17023SJohn Marino 214*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 215*e4b17023SJohn Marino 216*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the sequence of 217*e4b17023SJohn Marino stmts that constitute the pattern. In this case it will be: 218*e4b17023SJohn Marino WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> 219*e4b17023SJohn Marino 220*e4b17023SJohn Marino Note: The dot-prod idiom is a widening reduction pattern that is 221*e4b17023SJohn Marino vectorized without preserving all the intermediate results. It 222*e4b17023SJohn Marino produces only N/2 (widened) results (by summing up pairs of 223*e4b17023SJohn Marino intermediate results) rather than all N results. Therefore, we 224*e4b17023SJohn Marino cannot allow this pattern when we want to get all the results and in 225*e4b17023SJohn Marino the correct order (as is the case when this computation is in an 226*e4b17023SJohn Marino inner-loop nested in an outer-loop that us being vectorized). */ 227*e4b17023SJohn Marino 228*e4b17023SJohn Marino static gimple 229*e4b17023SJohn Marino vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in, 230*e4b17023SJohn Marino tree *type_out) 231*e4b17023SJohn Marino { 232*e4b17023SJohn Marino gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); 233*e4b17023SJohn Marino tree oprnd0, oprnd1; 234*e4b17023SJohn Marino tree oprnd00, oprnd01; 235*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 236*e4b17023SJohn Marino tree type, half_type; 237*e4b17023SJohn Marino gimple pattern_stmt; 238*e4b17023SJohn Marino tree prod_type; 239*e4b17023SJohn Marino loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 240*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_info); 241*e4b17023SJohn Marino tree var; 242*e4b17023SJohn Marino 243*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt)) 244*e4b17023SJohn Marino return NULL; 245*e4b17023SJohn Marino 246*e4b17023SJohn Marino type = gimple_expr_type (last_stmt); 247*e4b17023SJohn Marino 248*e4b17023SJohn Marino /* Look for the following pattern 249*e4b17023SJohn Marino DX = (TYPE1) X; 250*e4b17023SJohn Marino DY = (TYPE1) Y; 251*e4b17023SJohn Marino DPROD = DX * DY; 252*e4b17023SJohn Marino DDPROD = (TYPE2) DPROD; 253*e4b17023SJohn Marino sum_1 = DDPROD + sum_0; 254*e4b17023SJohn Marino In which 255*e4b17023SJohn Marino - DX is double the size of X 256*e4b17023SJohn Marino - DY is double the size of Y 257*e4b17023SJohn Marino - DX, DY, DPROD all have the same type 258*e4b17023SJohn Marino - sum is the same size of DPROD or bigger 259*e4b17023SJohn Marino - sum has been recognized as a reduction variable. 260*e4b17023SJohn Marino 261*e4b17023SJohn Marino This is equivalent to: 262*e4b17023SJohn Marino DPROD = X w* Y; #widen mult 263*e4b17023SJohn Marino sum_1 = DPROD w+ sum_0; #widen summation 264*e4b17023SJohn Marino or 265*e4b17023SJohn Marino DPROD = X w* Y; #widen mult 266*e4b17023SJohn Marino sum_1 = DPROD + sum_0; #summation 267*e4b17023SJohn Marino */ 268*e4b17023SJohn Marino 269*e4b17023SJohn Marino /* Starting from LAST_STMT, follow the defs of its uses in search 270*e4b17023SJohn Marino of the above pattern. */ 271*e4b17023SJohn Marino 272*e4b17023SJohn Marino if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 273*e4b17023SJohn Marino return NULL; 274*e4b17023SJohn Marino 275*e4b17023SJohn Marino if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 276*e4b17023SJohn Marino { 277*e4b17023SJohn Marino /* Has been detected as widening-summation? */ 278*e4b17023SJohn Marino 279*e4b17023SJohn Marino stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 280*e4b17023SJohn Marino type = gimple_expr_type (stmt); 281*e4b17023SJohn Marino if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR) 282*e4b17023SJohn Marino return NULL; 283*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (stmt); 284*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (stmt); 285*e4b17023SJohn Marino half_type = TREE_TYPE (oprnd0); 286*e4b17023SJohn Marino } 287*e4b17023SJohn Marino else 288*e4b17023SJohn Marino { 289*e4b17023SJohn Marino gimple def_stmt; 290*e4b17023SJohn Marino 291*e4b17023SJohn Marino if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 292*e4b17023SJohn Marino return NULL; 293*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (last_stmt); 294*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (last_stmt); 295*e4b17023SJohn Marino if (!types_compatible_p (TREE_TYPE (oprnd0), type) 296*e4b17023SJohn Marino || !types_compatible_p (TREE_TYPE (oprnd1), type)) 297*e4b17023SJohn Marino return NULL; 298*e4b17023SJohn Marino stmt = last_stmt; 299*e4b17023SJohn Marino 300*e4b17023SJohn Marino if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) 301*e4b17023SJohn Marino { 302*e4b17023SJohn Marino stmt = def_stmt; 303*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (stmt); 304*e4b17023SJohn Marino } 305*e4b17023SJohn Marino else 306*e4b17023SJohn Marino half_type = type; 307*e4b17023SJohn Marino } 308*e4b17023SJohn Marino 309*e4b17023SJohn Marino /* So far so good. Since last_stmt was detected as a (summation) reduction, 310*e4b17023SJohn Marino we know that oprnd1 is the reduction variable (defined by a loop-header 311*e4b17023SJohn Marino phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 312*e4b17023SJohn Marino Left to check that oprnd0 is defined by a (widen_)mult_expr */ 313*e4b17023SJohn Marino if (TREE_CODE (oprnd0) != SSA_NAME) 314*e4b17023SJohn Marino return NULL; 315*e4b17023SJohn Marino 316*e4b17023SJohn Marino prod_type = half_type; 317*e4b17023SJohn Marino stmt = SSA_NAME_DEF_STMT (oprnd0); 318*e4b17023SJohn Marino 319*e4b17023SJohn Marino /* It could not be the dot_prod pattern if the stmt is outside the loop. */ 320*e4b17023SJohn Marino if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt))) 321*e4b17023SJohn Marino return NULL; 322*e4b17023SJohn Marino 323*e4b17023SJohn Marino /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi 324*e4b17023SJohn Marino inside the loop (in case we are analyzing an outer-loop). */ 325*e4b17023SJohn Marino if (!is_gimple_assign (stmt)) 326*e4b17023SJohn Marino return NULL; 327*e4b17023SJohn Marino stmt_vinfo = vinfo_for_stmt (stmt); 328*e4b17023SJohn Marino gcc_assert (stmt_vinfo); 329*e4b17023SJohn Marino if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 330*e4b17023SJohn Marino return NULL; 331*e4b17023SJohn Marino if (gimple_assign_rhs_code (stmt) != MULT_EXPR) 332*e4b17023SJohn Marino return NULL; 333*e4b17023SJohn Marino if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 334*e4b17023SJohn Marino { 335*e4b17023SJohn Marino /* Has been detected as a widening multiplication? */ 336*e4b17023SJohn Marino 337*e4b17023SJohn Marino stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 338*e4b17023SJohn Marino if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR) 339*e4b17023SJohn Marino return NULL; 340*e4b17023SJohn Marino stmt_vinfo = vinfo_for_stmt (stmt); 341*e4b17023SJohn Marino gcc_assert (stmt_vinfo); 342*e4b17023SJohn Marino gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def); 343*e4b17023SJohn Marino oprnd00 = gimple_assign_rhs1 (stmt); 344*e4b17023SJohn Marino oprnd01 = gimple_assign_rhs2 (stmt); 345*e4b17023SJohn Marino } 346*e4b17023SJohn Marino else 347*e4b17023SJohn Marino { 348*e4b17023SJohn Marino tree half_type0, half_type1; 349*e4b17023SJohn Marino gimple def_stmt; 350*e4b17023SJohn Marino tree oprnd0, oprnd1; 351*e4b17023SJohn Marino 352*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (stmt); 353*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (stmt); 354*e4b17023SJohn Marino if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) 355*e4b17023SJohn Marino || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) 356*e4b17023SJohn Marino return NULL; 357*e4b17023SJohn Marino if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) 358*e4b17023SJohn Marino return NULL; 359*e4b17023SJohn Marino oprnd00 = gimple_assign_rhs1 (def_stmt); 360*e4b17023SJohn Marino if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) 361*e4b17023SJohn Marino return NULL; 362*e4b17023SJohn Marino oprnd01 = gimple_assign_rhs1 (def_stmt); 363*e4b17023SJohn Marino if (!types_compatible_p (half_type0, half_type1)) 364*e4b17023SJohn Marino return NULL; 365*e4b17023SJohn Marino if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2) 366*e4b17023SJohn Marino return NULL; 367*e4b17023SJohn Marino } 368*e4b17023SJohn Marino 369*e4b17023SJohn Marino half_type = TREE_TYPE (oprnd00); 370*e4b17023SJohn Marino *type_in = half_type; 371*e4b17023SJohn Marino *type_out = type; 372*e4b17023SJohn Marino 373*e4b17023SJohn Marino /* Pattern detected. Create a stmt to be used to replace the pattern: */ 374*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (type, NULL); 375*e4b17023SJohn Marino pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var, 376*e4b17023SJohn Marino oprnd00, oprnd01, oprnd1); 377*e4b17023SJohn Marino 378*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 379*e4b17023SJohn Marino { 380*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: "); 381*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 382*e4b17023SJohn Marino } 383*e4b17023SJohn Marino 384*e4b17023SJohn Marino /* We don't allow changing the order of the computation in the inner-loop 385*e4b17023SJohn Marino when doing outer-loop vectorization. */ 386*e4b17023SJohn Marino gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 387*e4b17023SJohn Marino 388*e4b17023SJohn Marino return pattern_stmt; 389*e4b17023SJohn Marino } 390*e4b17023SJohn Marino 391*e4b17023SJohn Marino 392*e4b17023SJohn Marino /* Handle widening operation by a constant. At the moment we support MULT_EXPR 393*e4b17023SJohn Marino and LSHIFT_EXPR. 394*e4b17023SJohn Marino 395*e4b17023SJohn Marino For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR 396*e4b17023SJohn Marino we check that CONST_OPRND is less or equal to the size of HALF_TYPE. 397*e4b17023SJohn Marino 398*e4b17023SJohn Marino Otherwise, if the type of the result (TYPE) is at least 4 times bigger than 399*e4b17023SJohn Marino HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE) 400*e4b17023SJohn Marino that satisfies the above restrictions, we can perform a widening opeartion 401*e4b17023SJohn Marino from the intermediate type to TYPE and replace a_T = (TYPE) a_t; 402*e4b17023SJohn Marino with a_it = (interm_type) a_t; */ 403*e4b17023SJohn Marino 404*e4b17023SJohn Marino static bool 405*e4b17023SJohn Marino vect_handle_widen_op_by_const (gimple stmt, enum tree_code code, 406*e4b17023SJohn Marino tree const_oprnd, tree *oprnd, 407*e4b17023SJohn Marino VEC (gimple, heap) **stmts, tree type, 408*e4b17023SJohn Marino tree *half_type, gimple def_stmt) 409*e4b17023SJohn Marino { 410*e4b17023SJohn Marino tree new_type, new_oprnd, tmp; 411*e4b17023SJohn Marino gimple new_stmt; 412*e4b17023SJohn Marino loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); 413*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_info); 414*e4b17023SJohn Marino 415*e4b17023SJohn Marino if (code != MULT_EXPR && code != LSHIFT_EXPR) 416*e4b17023SJohn Marino return false; 417*e4b17023SJohn Marino 418*e4b17023SJohn Marino if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type)) 419*e4b17023SJohn Marino || (code == LSHIFT_EXPR 420*e4b17023SJohn Marino && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) 421*e4b17023SJohn Marino != 1)) 422*e4b17023SJohn Marino && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2)) 423*e4b17023SJohn Marino { 424*e4b17023SJohn Marino /* CONST_OPRND is a constant of HALF_TYPE. */ 425*e4b17023SJohn Marino *oprnd = gimple_assign_rhs1 (def_stmt); 426*e4b17023SJohn Marino return true; 427*e4b17023SJohn Marino } 428*e4b17023SJohn Marino 429*e4b17023SJohn Marino if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4) 430*e4b17023SJohn Marino || !gimple_bb (def_stmt) 431*e4b17023SJohn Marino || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 432*e4b17023SJohn Marino || !vinfo_for_stmt (def_stmt)) 433*e4b17023SJohn Marino return false; 434*e4b17023SJohn Marino 435*e4b17023SJohn Marino /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for 436*e4b17023SJohn Marino a type 2 times bigger than HALF_TYPE. */ 437*e4b17023SJohn Marino new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2, 438*e4b17023SJohn Marino TYPE_UNSIGNED (type)); 439*e4b17023SJohn Marino if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type)) 440*e4b17023SJohn Marino || (code == LSHIFT_EXPR 441*e4b17023SJohn Marino && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1)) 442*e4b17023SJohn Marino return false; 443*e4b17023SJohn Marino 444*e4b17023SJohn Marino /* Use NEW_TYPE for widening operation. */ 445*e4b17023SJohn Marino if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) 446*e4b17023SJohn Marino { 447*e4b17023SJohn Marino new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 448*e4b17023SJohn Marino /* Check if the already created pattern stmt is what we need. */ 449*e4b17023SJohn Marino if (!is_gimple_assign (new_stmt) 450*e4b17023SJohn Marino || gimple_assign_rhs_code (new_stmt) != NOP_EXPR 451*e4b17023SJohn Marino || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type) 452*e4b17023SJohn Marino return false; 453*e4b17023SJohn Marino 454*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, def_stmt); 455*e4b17023SJohn Marino *oprnd = gimple_assign_lhs (new_stmt); 456*e4b17023SJohn Marino } 457*e4b17023SJohn Marino else 458*e4b17023SJohn Marino { 459*e4b17023SJohn Marino /* Create a_T = (NEW_TYPE) a_t; */ 460*e4b17023SJohn Marino *oprnd = gimple_assign_rhs1 (def_stmt); 461*e4b17023SJohn Marino tmp = create_tmp_var (new_type, NULL); 462*e4b17023SJohn Marino add_referenced_var (tmp); 463*e4b17023SJohn Marino new_oprnd = make_ssa_name (tmp, NULL); 464*e4b17023SJohn Marino new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd, 465*e4b17023SJohn Marino NULL_TREE); 466*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; 467*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, def_stmt); 468*e4b17023SJohn Marino *oprnd = new_oprnd; 469*e4b17023SJohn Marino } 470*e4b17023SJohn Marino 471*e4b17023SJohn Marino *half_type = new_type; 472*e4b17023SJohn Marino return true; 473*e4b17023SJohn Marino } 474*e4b17023SJohn Marino 475*e4b17023SJohn Marino 476*e4b17023SJohn Marino /* Function vect_recog_widen_mult_pattern 477*e4b17023SJohn Marino 478*e4b17023SJohn Marino Try to find the following pattern: 479*e4b17023SJohn Marino 480*e4b17023SJohn Marino type a_t, b_t; 481*e4b17023SJohn Marino TYPE a_T, b_T, prod_T; 482*e4b17023SJohn Marino 483*e4b17023SJohn Marino S1 a_t = ; 484*e4b17023SJohn Marino S2 b_t = ; 485*e4b17023SJohn Marino S3 a_T = (TYPE) a_t; 486*e4b17023SJohn Marino S4 b_T = (TYPE) b_t; 487*e4b17023SJohn Marino S5 prod_T = a_T * b_T; 488*e4b17023SJohn Marino 489*e4b17023SJohn Marino where type 'TYPE' is at least double the size of type 'type'. 490*e4b17023SJohn Marino 491*e4b17023SJohn Marino Also detect unsgigned cases: 492*e4b17023SJohn Marino 493*e4b17023SJohn Marino unsigned type a_t, b_t; 494*e4b17023SJohn Marino unsigned TYPE u_prod_T; 495*e4b17023SJohn Marino TYPE a_T, b_T, prod_T; 496*e4b17023SJohn Marino 497*e4b17023SJohn Marino S1 a_t = ; 498*e4b17023SJohn Marino S2 b_t = ; 499*e4b17023SJohn Marino S3 a_T = (TYPE) a_t; 500*e4b17023SJohn Marino S4 b_T = (TYPE) b_t; 501*e4b17023SJohn Marino S5 prod_T = a_T * b_T; 502*e4b17023SJohn Marino S6 u_prod_T = (unsigned TYPE) prod_T; 503*e4b17023SJohn Marino 504*e4b17023SJohn Marino and multiplication by constants: 505*e4b17023SJohn Marino 506*e4b17023SJohn Marino type a_t; 507*e4b17023SJohn Marino TYPE a_T, prod_T; 508*e4b17023SJohn Marino 509*e4b17023SJohn Marino S1 a_t = ; 510*e4b17023SJohn Marino S3 a_T = (TYPE) a_t; 511*e4b17023SJohn Marino S5 prod_T = a_T * CONST; 512*e4b17023SJohn Marino 513*e4b17023SJohn Marino A special case of multiplication by constants is when 'TYPE' is 4 times 514*e4b17023SJohn Marino bigger than 'type', but CONST fits an intermediate type 2 times smaller 515*e4b17023SJohn Marino than 'TYPE'. In that case we create an additional pattern stmt for S3 516*e4b17023SJohn Marino to create a variable of the intermediate type, and perform widen-mult 517*e4b17023SJohn Marino on the intermediate type as well: 518*e4b17023SJohn Marino 519*e4b17023SJohn Marino type a_t; 520*e4b17023SJohn Marino interm_type a_it; 521*e4b17023SJohn Marino TYPE a_T, prod_T, prod_T'; 522*e4b17023SJohn Marino 523*e4b17023SJohn Marino S1 a_t = ; 524*e4b17023SJohn Marino S3 a_T = (TYPE) a_t; 525*e4b17023SJohn Marino '--> a_it = (interm_type) a_t; 526*e4b17023SJohn Marino S5 prod_T = a_T * CONST; 527*e4b17023SJohn Marino '--> prod_T' = a_it w* CONST; 528*e4b17023SJohn Marino 529*e4b17023SJohn Marino Input/Output: 530*e4b17023SJohn Marino 531*e4b17023SJohn Marino * STMTS: Contains a stmt from which the pattern search begins. In the 532*e4b17023SJohn Marino example, when this function is called with S5, the pattern {S3,S4,S5,(S6)} 533*e4b17023SJohn Marino is detected. In case of unsigned widen-mult, the original stmt (S5) is 534*e4b17023SJohn Marino replaced with S6 in STMTS. In case of multiplication by a constant 535*e4b17023SJohn Marino of an intermediate type (the last case above), STMTS also contains S3 536*e4b17023SJohn Marino (inserted before S5). 537*e4b17023SJohn Marino 538*e4b17023SJohn Marino Output: 539*e4b17023SJohn Marino 540*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 541*e4b17023SJohn Marino 542*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 543*e4b17023SJohn Marino 544*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the sequence of 545*e4b17023SJohn Marino stmts that constitute the pattern. In this case it will be: 546*e4b17023SJohn Marino WIDEN_MULT <a_t, b_t> 547*e4b17023SJohn Marino */ 548*e4b17023SJohn Marino 549*e4b17023SJohn Marino static gimple 550*e4b17023SJohn Marino vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts, 551*e4b17023SJohn Marino tree *type_in, tree *type_out) 552*e4b17023SJohn Marino { 553*e4b17023SJohn Marino gimple last_stmt = VEC_pop (gimple, *stmts); 554*e4b17023SJohn Marino gimple def_stmt0, def_stmt1; 555*e4b17023SJohn Marino tree oprnd0, oprnd1; 556*e4b17023SJohn Marino tree type, half_type0, half_type1; 557*e4b17023SJohn Marino gimple pattern_stmt; 558*e4b17023SJohn Marino tree vectype, vectype_out = NULL_TREE; 559*e4b17023SJohn Marino tree dummy; 560*e4b17023SJohn Marino tree var; 561*e4b17023SJohn Marino enum tree_code dummy_code; 562*e4b17023SJohn Marino int dummy_int; 563*e4b17023SJohn Marino VEC (tree, heap) *dummy_vec; 564*e4b17023SJohn Marino bool op1_ok; 565*e4b17023SJohn Marino 566*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt)) 567*e4b17023SJohn Marino return NULL; 568*e4b17023SJohn Marino 569*e4b17023SJohn Marino type = gimple_expr_type (last_stmt); 570*e4b17023SJohn Marino 571*e4b17023SJohn Marino /* Starting from LAST_STMT, follow the defs of its uses in search 572*e4b17023SJohn Marino of the above pattern. */ 573*e4b17023SJohn Marino 574*e4b17023SJohn Marino if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) 575*e4b17023SJohn Marino return NULL; 576*e4b17023SJohn Marino 577*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (last_stmt); 578*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (last_stmt); 579*e4b17023SJohn Marino if (!types_compatible_p (TREE_TYPE (oprnd0), type) 580*e4b17023SJohn Marino || !types_compatible_p (TREE_TYPE (oprnd1), type)) 581*e4b17023SJohn Marino return NULL; 582*e4b17023SJohn Marino 583*e4b17023SJohn Marino /* Check argument 0. */ 584*e4b17023SJohn Marino if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) 585*e4b17023SJohn Marino return NULL; 586*e4b17023SJohn Marino /* Check argument 1. */ 587*e4b17023SJohn Marino op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false); 588*e4b17023SJohn Marino 589*e4b17023SJohn Marino if (op1_ok) 590*e4b17023SJohn Marino { 591*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (def_stmt0); 592*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs1 (def_stmt1); 593*e4b17023SJohn Marino } 594*e4b17023SJohn Marino else 595*e4b17023SJohn Marino { 596*e4b17023SJohn Marino if (TREE_CODE (oprnd1) == INTEGER_CST 597*e4b17023SJohn Marino && TREE_CODE (half_type0) == INTEGER_TYPE 598*e4b17023SJohn Marino && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1, 599*e4b17023SJohn Marino &oprnd0, stmts, type, 600*e4b17023SJohn Marino &half_type0, def_stmt0)) 601*e4b17023SJohn Marino half_type1 = half_type0; 602*e4b17023SJohn Marino else 603*e4b17023SJohn Marino return NULL; 604*e4b17023SJohn Marino } 605*e4b17023SJohn Marino 606*e4b17023SJohn Marino /* Handle unsigned case. Look for 607*e4b17023SJohn Marino S6 u_prod_T = (unsigned TYPE) prod_T; 608*e4b17023SJohn Marino Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ 609*e4b17023SJohn Marino if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) 610*e4b17023SJohn Marino { 611*e4b17023SJohn Marino gimple use_stmt; 612*e4b17023SJohn Marino tree use_lhs; 613*e4b17023SJohn Marino tree use_type; 614*e4b17023SJohn Marino 615*e4b17023SJohn Marino if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) 616*e4b17023SJohn Marino return NULL; 617*e4b17023SJohn Marino 618*e4b17023SJohn Marino use_stmt = vect_single_imm_use (last_stmt); 619*e4b17023SJohn Marino if (!use_stmt || !is_gimple_assign (use_stmt) 620*e4b17023SJohn Marino || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) 621*e4b17023SJohn Marino return NULL; 622*e4b17023SJohn Marino 623*e4b17023SJohn Marino use_lhs = gimple_assign_lhs (use_stmt); 624*e4b17023SJohn Marino use_type = TREE_TYPE (use_lhs); 625*e4b17023SJohn Marino if (!INTEGRAL_TYPE_P (use_type) 626*e4b17023SJohn Marino || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) 627*e4b17023SJohn Marino || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) 628*e4b17023SJohn Marino return NULL; 629*e4b17023SJohn Marino 630*e4b17023SJohn Marino type = use_type; 631*e4b17023SJohn Marino last_stmt = use_stmt; 632*e4b17023SJohn Marino } 633*e4b17023SJohn Marino 634*e4b17023SJohn Marino if (!types_compatible_p (half_type0, half_type1)) 635*e4b17023SJohn Marino return NULL; 636*e4b17023SJohn Marino 637*e4b17023SJohn Marino /* Pattern detected. */ 638*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 639*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: "); 640*e4b17023SJohn Marino 641*e4b17023SJohn Marino /* Check target support */ 642*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (half_type0); 643*e4b17023SJohn Marino vectype_out = get_vectype_for_scalar_type (type); 644*e4b17023SJohn Marino if (!vectype 645*e4b17023SJohn Marino || !vectype_out 646*e4b17023SJohn Marino || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, 647*e4b17023SJohn Marino vectype_out, vectype, 648*e4b17023SJohn Marino &dummy, &dummy, &dummy_code, 649*e4b17023SJohn Marino &dummy_code, &dummy_int, &dummy_vec)) 650*e4b17023SJohn Marino return NULL; 651*e4b17023SJohn Marino 652*e4b17023SJohn Marino *type_in = vectype; 653*e4b17023SJohn Marino *type_out = vectype_out; 654*e4b17023SJohn Marino 655*e4b17023SJohn Marino /* Pattern supported. Create a stmt to be used to replace the pattern: */ 656*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (type, NULL); 657*e4b17023SJohn Marino pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0, 658*e4b17023SJohn Marino oprnd1); 659*e4b17023SJohn Marino 660*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 661*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 662*e4b17023SJohn Marino 663*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, last_stmt); 664*e4b17023SJohn Marino return pattern_stmt; 665*e4b17023SJohn Marino } 666*e4b17023SJohn Marino 667*e4b17023SJohn Marino 668*e4b17023SJohn Marino /* Function vect_recog_pow_pattern 669*e4b17023SJohn Marino 670*e4b17023SJohn Marino Try to find the following pattern: 671*e4b17023SJohn Marino 672*e4b17023SJohn Marino x = POW (y, N); 673*e4b17023SJohn Marino 674*e4b17023SJohn Marino with POW being one of pow, powf, powi, powif and N being 675*e4b17023SJohn Marino either 2 or 0.5. 676*e4b17023SJohn Marino 677*e4b17023SJohn Marino Input: 678*e4b17023SJohn Marino 679*e4b17023SJohn Marino * LAST_STMT: A stmt from which the pattern search begins. 680*e4b17023SJohn Marino 681*e4b17023SJohn Marino Output: 682*e4b17023SJohn Marino 683*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 684*e4b17023SJohn Marino 685*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 686*e4b17023SJohn Marino 687*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the sequence of 688*e4b17023SJohn Marino stmts that constitute the pattern. In this case it will be: 689*e4b17023SJohn Marino x = x * x 690*e4b17023SJohn Marino or 691*e4b17023SJohn Marino x = sqrt (x) 692*e4b17023SJohn Marino */ 693*e4b17023SJohn Marino 694*e4b17023SJohn Marino static gimple 695*e4b17023SJohn Marino vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in, 696*e4b17023SJohn Marino tree *type_out) 697*e4b17023SJohn Marino { 698*e4b17023SJohn Marino gimple last_stmt = VEC_index (gimple, *stmts, 0); 699*e4b17023SJohn Marino tree fn, base, exp = NULL; 700*e4b17023SJohn Marino gimple stmt; 701*e4b17023SJohn Marino tree var; 702*e4b17023SJohn Marino 703*e4b17023SJohn Marino if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) 704*e4b17023SJohn Marino return NULL; 705*e4b17023SJohn Marino 706*e4b17023SJohn Marino fn = gimple_call_fndecl (last_stmt); 707*e4b17023SJohn Marino if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) 708*e4b17023SJohn Marino return NULL; 709*e4b17023SJohn Marino 710*e4b17023SJohn Marino switch (DECL_FUNCTION_CODE (fn)) 711*e4b17023SJohn Marino { 712*e4b17023SJohn Marino case BUILT_IN_POWIF: 713*e4b17023SJohn Marino case BUILT_IN_POWI: 714*e4b17023SJohn Marino case BUILT_IN_POWF: 715*e4b17023SJohn Marino case BUILT_IN_POW: 716*e4b17023SJohn Marino base = gimple_call_arg (last_stmt, 0); 717*e4b17023SJohn Marino exp = gimple_call_arg (last_stmt, 1); 718*e4b17023SJohn Marino if (TREE_CODE (exp) != REAL_CST 719*e4b17023SJohn Marino && TREE_CODE (exp) != INTEGER_CST) 720*e4b17023SJohn Marino return NULL; 721*e4b17023SJohn Marino break; 722*e4b17023SJohn Marino 723*e4b17023SJohn Marino default: 724*e4b17023SJohn Marino return NULL; 725*e4b17023SJohn Marino } 726*e4b17023SJohn Marino 727*e4b17023SJohn Marino /* We now have a pow or powi builtin function call with a constant 728*e4b17023SJohn Marino exponent. */ 729*e4b17023SJohn Marino 730*e4b17023SJohn Marino *type_out = NULL_TREE; 731*e4b17023SJohn Marino 732*e4b17023SJohn Marino /* Catch squaring. */ 733*e4b17023SJohn Marino if ((host_integerp (exp, 0) 734*e4b17023SJohn Marino && tree_low_cst (exp, 0) == 2) 735*e4b17023SJohn Marino || (TREE_CODE (exp) == REAL_CST 736*e4b17023SJohn Marino && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2))) 737*e4b17023SJohn Marino { 738*e4b17023SJohn Marino *type_in = TREE_TYPE (base); 739*e4b17023SJohn Marino 740*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); 741*e4b17023SJohn Marino stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base); 742*e4b17023SJohn Marino return stmt; 743*e4b17023SJohn Marino } 744*e4b17023SJohn Marino 745*e4b17023SJohn Marino /* Catch square root. */ 746*e4b17023SJohn Marino if (TREE_CODE (exp) == REAL_CST 747*e4b17023SJohn Marino && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf)) 748*e4b17023SJohn Marino { 749*e4b17023SJohn Marino tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT); 750*e4b17023SJohn Marino *type_in = get_vectype_for_scalar_type (TREE_TYPE (base)); 751*e4b17023SJohn Marino if (*type_in) 752*e4b17023SJohn Marino { 753*e4b17023SJohn Marino gimple stmt = gimple_build_call (newfn, 1, base); 754*e4b17023SJohn Marino if (vectorizable_function (stmt, *type_in, *type_in) 755*e4b17023SJohn Marino != NULL_TREE) 756*e4b17023SJohn Marino { 757*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); 758*e4b17023SJohn Marino gimple_call_set_lhs (stmt, var); 759*e4b17023SJohn Marino return stmt; 760*e4b17023SJohn Marino } 761*e4b17023SJohn Marino } 762*e4b17023SJohn Marino } 763*e4b17023SJohn Marino 764*e4b17023SJohn Marino return NULL; 765*e4b17023SJohn Marino } 766*e4b17023SJohn Marino 767*e4b17023SJohn Marino 768*e4b17023SJohn Marino /* Function vect_recog_widen_sum_pattern 769*e4b17023SJohn Marino 770*e4b17023SJohn Marino Try to find the following pattern: 771*e4b17023SJohn Marino 772*e4b17023SJohn Marino type x_t; 773*e4b17023SJohn Marino TYPE x_T, sum = init; 774*e4b17023SJohn Marino loop: 775*e4b17023SJohn Marino sum_0 = phi <init, sum_1> 776*e4b17023SJohn Marino S1 x_t = *p; 777*e4b17023SJohn Marino S2 x_T = (TYPE) x_t; 778*e4b17023SJohn Marino S3 sum_1 = x_T + sum_0; 779*e4b17023SJohn Marino 780*e4b17023SJohn Marino where type 'TYPE' is at least double the size of type 'type', i.e - we're 781*e4b17023SJohn Marino summing elements of type 'type' into an accumulator of type 'TYPE'. This is 782*e4b17023SJohn Marino a special case of a reduction computation. 783*e4b17023SJohn Marino 784*e4b17023SJohn Marino Input: 785*e4b17023SJohn Marino 786*e4b17023SJohn Marino * LAST_STMT: A stmt from which the pattern search begins. In the example, 787*e4b17023SJohn Marino when this function is called with S3, the pattern {S2,S3} will be detected. 788*e4b17023SJohn Marino 789*e4b17023SJohn Marino Output: 790*e4b17023SJohn Marino 791*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 792*e4b17023SJohn Marino 793*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 794*e4b17023SJohn Marino 795*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the sequence of 796*e4b17023SJohn Marino stmts that constitute the pattern. In this case it will be: 797*e4b17023SJohn Marino WIDEN_SUM <x_t, sum_0> 798*e4b17023SJohn Marino 799*e4b17023SJohn Marino Note: The widening-sum idiom is a widening reduction pattern that is 800*e4b17023SJohn Marino vectorized without preserving all the intermediate results. It 801*e4b17023SJohn Marino produces only N/2 (widened) results (by summing up pairs of 802*e4b17023SJohn Marino intermediate results) rather than all N results. Therefore, we 803*e4b17023SJohn Marino cannot allow this pattern when we want to get all the results and in 804*e4b17023SJohn Marino the correct order (as is the case when this computation is in an 805*e4b17023SJohn Marino inner-loop nested in an outer-loop that us being vectorized). */ 806*e4b17023SJohn Marino 807*e4b17023SJohn Marino static gimple 808*e4b17023SJohn Marino vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, 809*e4b17023SJohn Marino tree *type_out) 810*e4b17023SJohn Marino { 811*e4b17023SJohn Marino gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0); 812*e4b17023SJohn Marino tree oprnd0, oprnd1; 813*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 814*e4b17023SJohn Marino tree type, half_type; 815*e4b17023SJohn Marino gimple pattern_stmt; 816*e4b17023SJohn Marino loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 817*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_info); 818*e4b17023SJohn Marino tree var; 819*e4b17023SJohn Marino 820*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt)) 821*e4b17023SJohn Marino return NULL; 822*e4b17023SJohn Marino 823*e4b17023SJohn Marino type = gimple_expr_type (last_stmt); 824*e4b17023SJohn Marino 825*e4b17023SJohn Marino /* Look for the following pattern 826*e4b17023SJohn Marino DX = (TYPE) X; 827*e4b17023SJohn Marino sum_1 = DX + sum_0; 828*e4b17023SJohn Marino In which DX is at least double the size of X, and sum_1 has been 829*e4b17023SJohn Marino recognized as a reduction variable. 830*e4b17023SJohn Marino */ 831*e4b17023SJohn Marino 832*e4b17023SJohn Marino /* Starting from LAST_STMT, follow the defs of its uses in search 833*e4b17023SJohn Marino of the above pattern. */ 834*e4b17023SJohn Marino 835*e4b17023SJohn Marino if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) 836*e4b17023SJohn Marino return NULL; 837*e4b17023SJohn Marino 838*e4b17023SJohn Marino if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) 839*e4b17023SJohn Marino return NULL; 840*e4b17023SJohn Marino 841*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (last_stmt); 842*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (last_stmt); 843*e4b17023SJohn Marino if (!types_compatible_p (TREE_TYPE (oprnd0), type) 844*e4b17023SJohn Marino || !types_compatible_p (TREE_TYPE (oprnd1), type)) 845*e4b17023SJohn Marino return NULL; 846*e4b17023SJohn Marino 847*e4b17023SJohn Marino /* So far so good. Since last_stmt was detected as a (summation) reduction, 848*e4b17023SJohn Marino we know that oprnd1 is the reduction variable (defined by a loop-header 849*e4b17023SJohn Marino phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. 850*e4b17023SJohn Marino Left to check that oprnd0 is defined by a cast from type 'type' to type 851*e4b17023SJohn Marino 'TYPE'. */ 852*e4b17023SJohn Marino 853*e4b17023SJohn Marino if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt, true)) 854*e4b17023SJohn Marino return NULL; 855*e4b17023SJohn Marino 856*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (stmt); 857*e4b17023SJohn Marino *type_in = half_type; 858*e4b17023SJohn Marino *type_out = type; 859*e4b17023SJohn Marino 860*e4b17023SJohn Marino /* Pattern detected. Create a stmt to be used to replace the pattern: */ 861*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (type, NULL); 862*e4b17023SJohn Marino pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var, 863*e4b17023SJohn Marino oprnd0, oprnd1); 864*e4b17023SJohn Marino 865*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 866*e4b17023SJohn Marino { 867*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: "); 868*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 869*e4b17023SJohn Marino } 870*e4b17023SJohn Marino 871*e4b17023SJohn Marino /* We don't allow changing the order of the computation in the inner-loop 872*e4b17023SJohn Marino when doing outer-loop vectorization. */ 873*e4b17023SJohn Marino gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); 874*e4b17023SJohn Marino 875*e4b17023SJohn Marino return pattern_stmt; 876*e4b17023SJohn Marino } 877*e4b17023SJohn Marino 878*e4b17023SJohn Marino 879*e4b17023SJohn Marino /* Return TRUE if the operation in STMT can be performed on a smaller type. 880*e4b17023SJohn Marino 881*e4b17023SJohn Marino Input: 882*e4b17023SJohn Marino STMT - a statement to check. 883*e4b17023SJohn Marino DEF - we support operations with two operands, one of which is constant. 884*e4b17023SJohn Marino The other operand can be defined by a demotion operation, or by a 885*e4b17023SJohn Marino previous statement in a sequence of over-promoted operations. In the 886*e4b17023SJohn Marino later case DEF is used to replace that operand. (It is defined by a 887*e4b17023SJohn Marino pattern statement we created for the previous statement in the 888*e4b17023SJohn Marino sequence). 889*e4b17023SJohn Marino 890*e4b17023SJohn Marino Input/output: 891*e4b17023SJohn Marino NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not 892*e4b17023SJohn Marino NULL, it's the type of DEF. 893*e4b17023SJohn Marino STMTS - additional pattern statements. If a pattern statement (type 894*e4b17023SJohn Marino conversion) is created in this function, its original statement is 895*e4b17023SJohn Marino added to STMTS. 896*e4b17023SJohn Marino 897*e4b17023SJohn Marino Output: 898*e4b17023SJohn Marino OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new 899*e4b17023SJohn Marino operands to use in the new pattern statement for STMT (will be created 900*e4b17023SJohn Marino in vect_recog_over_widening_pattern ()). 901*e4b17023SJohn Marino NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern 902*e4b17023SJohn Marino statements for STMT: the first one is a type promotion and the second 903*e4b17023SJohn Marino one is the operation itself. We return the type promotion statement 904*e4b17023SJohn Marino in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of 905*e4b17023SJohn Marino the second pattern statement. */ 906*e4b17023SJohn Marino 907*e4b17023SJohn Marino static bool 908*e4b17023SJohn Marino vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type, 909*e4b17023SJohn Marino tree *op0, tree *op1, gimple *new_def_stmt, 910*e4b17023SJohn Marino VEC (gimple, heap) **stmts) 911*e4b17023SJohn Marino { 912*e4b17023SJohn Marino enum tree_code code; 913*e4b17023SJohn Marino tree const_oprnd, oprnd; 914*e4b17023SJohn Marino tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type; 915*e4b17023SJohn Marino gimple def_stmt, new_stmt; 916*e4b17023SJohn Marino bool first = false; 917*e4b17023SJohn Marino loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)); 918*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_info); 919*e4b17023SJohn Marino 920*e4b17023SJohn Marino *op0 = NULL_TREE; 921*e4b17023SJohn Marino *op1 = NULL_TREE; 922*e4b17023SJohn Marino *new_def_stmt = NULL; 923*e4b17023SJohn Marino 924*e4b17023SJohn Marino if (!is_gimple_assign (stmt)) 925*e4b17023SJohn Marino return false; 926*e4b17023SJohn Marino 927*e4b17023SJohn Marino code = gimple_assign_rhs_code (stmt); 928*e4b17023SJohn Marino if (code != LSHIFT_EXPR && code != RSHIFT_EXPR 929*e4b17023SJohn Marino && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR) 930*e4b17023SJohn Marino return false; 931*e4b17023SJohn Marino 932*e4b17023SJohn Marino oprnd = gimple_assign_rhs1 (stmt); 933*e4b17023SJohn Marino const_oprnd = gimple_assign_rhs2 (stmt); 934*e4b17023SJohn Marino type = gimple_expr_type (stmt); 935*e4b17023SJohn Marino 936*e4b17023SJohn Marino if (TREE_CODE (oprnd) != SSA_NAME 937*e4b17023SJohn Marino || TREE_CODE (const_oprnd) != INTEGER_CST) 938*e4b17023SJohn Marino return false; 939*e4b17023SJohn Marino 940*e4b17023SJohn Marino /* If oprnd has other uses besides that in stmt we cannot mark it 941*e4b17023SJohn Marino as being part of a pattern only. */ 942*e4b17023SJohn Marino if (!has_single_use (oprnd)) 943*e4b17023SJohn Marino return false; 944*e4b17023SJohn Marino 945*e4b17023SJohn Marino /* If we are in the middle of a sequence, we use DEF from a previous 946*e4b17023SJohn Marino statement. Otherwise, OPRND has to be a result of type promotion. */ 947*e4b17023SJohn Marino if (*new_type) 948*e4b17023SJohn Marino { 949*e4b17023SJohn Marino half_type = *new_type; 950*e4b17023SJohn Marino oprnd = def; 951*e4b17023SJohn Marino } 952*e4b17023SJohn Marino else 953*e4b17023SJohn Marino { 954*e4b17023SJohn Marino first = true; 955*e4b17023SJohn Marino if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false) 956*e4b17023SJohn Marino || !gimple_bb (def_stmt) 957*e4b17023SJohn Marino || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 958*e4b17023SJohn Marino || !vinfo_for_stmt (def_stmt)) 959*e4b17023SJohn Marino return false; 960*e4b17023SJohn Marino } 961*e4b17023SJohn Marino 962*e4b17023SJohn Marino /* Can we perform the operation on a smaller type? */ 963*e4b17023SJohn Marino switch (code) 964*e4b17023SJohn Marino { 965*e4b17023SJohn Marino case BIT_IOR_EXPR: 966*e4b17023SJohn Marino case BIT_XOR_EXPR: 967*e4b17023SJohn Marino case BIT_AND_EXPR: 968*e4b17023SJohn Marino if (!int_fits_type_p (const_oprnd, half_type)) 969*e4b17023SJohn Marino { 970*e4b17023SJohn Marino /* HALF_TYPE is not enough. Try a bigger type if possible. */ 971*e4b17023SJohn Marino if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 972*e4b17023SJohn Marino return false; 973*e4b17023SJohn Marino 974*e4b17023SJohn Marino interm_type = build_nonstandard_integer_type ( 975*e4b17023SJohn Marino TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 976*e4b17023SJohn Marino if (!int_fits_type_p (const_oprnd, interm_type)) 977*e4b17023SJohn Marino return false; 978*e4b17023SJohn Marino } 979*e4b17023SJohn Marino 980*e4b17023SJohn Marino break; 981*e4b17023SJohn Marino 982*e4b17023SJohn Marino case LSHIFT_EXPR: 983*e4b17023SJohn Marino /* Try intermediate type - HALF_TYPE is not enough for sure. */ 984*e4b17023SJohn Marino if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 985*e4b17023SJohn Marino return false; 986*e4b17023SJohn Marino 987*e4b17023SJohn Marino /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size. 988*e4b17023SJohn Marino (e.g., if the original value was char, the shift amount is at most 8 989*e4b17023SJohn Marino if we want to use short). */ 990*e4b17023SJohn Marino if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1) 991*e4b17023SJohn Marino return false; 992*e4b17023SJohn Marino 993*e4b17023SJohn Marino interm_type = build_nonstandard_integer_type ( 994*e4b17023SJohn Marino TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 995*e4b17023SJohn Marino 996*e4b17023SJohn Marino if (!vect_supportable_shift (code, interm_type)) 997*e4b17023SJohn Marino return false; 998*e4b17023SJohn Marino 999*e4b17023SJohn Marino break; 1000*e4b17023SJohn Marino 1001*e4b17023SJohn Marino case RSHIFT_EXPR: 1002*e4b17023SJohn Marino if (vect_supportable_shift (code, half_type)) 1003*e4b17023SJohn Marino break; 1004*e4b17023SJohn Marino 1005*e4b17023SJohn Marino /* Try intermediate type - HALF_TYPE is not supported. */ 1006*e4b17023SJohn Marino if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4)) 1007*e4b17023SJohn Marino return false; 1008*e4b17023SJohn Marino 1009*e4b17023SJohn Marino interm_type = build_nonstandard_integer_type ( 1010*e4b17023SJohn Marino TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type)); 1011*e4b17023SJohn Marino 1012*e4b17023SJohn Marino if (!vect_supportable_shift (code, interm_type)) 1013*e4b17023SJohn Marino return false; 1014*e4b17023SJohn Marino 1015*e4b17023SJohn Marino break; 1016*e4b17023SJohn Marino 1017*e4b17023SJohn Marino default: 1018*e4b17023SJohn Marino gcc_unreachable (); 1019*e4b17023SJohn Marino } 1020*e4b17023SJohn Marino 1021*e4b17023SJohn Marino /* There are four possible cases: 1022*e4b17023SJohn Marino 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's 1023*e4b17023SJohn Marino the first statement in the sequence) 1024*e4b17023SJohn Marino a. The original, HALF_TYPE, is not enough - we replace the promotion 1025*e4b17023SJohn Marino from HALF_TYPE to TYPE with a promotion to INTERM_TYPE. 1026*e4b17023SJohn Marino b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original 1027*e4b17023SJohn Marino promotion. 1028*e4b17023SJohn Marino 2. OPRND is defined by a pattern statement we created. 1029*e4b17023SJohn Marino a. Its type is not sufficient for the operation, we create a new stmt: 1030*e4b17023SJohn Marino a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store 1031*e4b17023SJohn Marino this statement in NEW_DEF_STMT, and it is later put in 1032*e4b17023SJohn Marino STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT. 1033*e4b17023SJohn Marino b. OPRND is good to use in the new statement. */ 1034*e4b17023SJohn Marino if (first) 1035*e4b17023SJohn Marino { 1036*e4b17023SJohn Marino if (interm_type) 1037*e4b17023SJohn Marino { 1038*e4b17023SJohn Marino /* Replace the original type conversion HALF_TYPE->TYPE with 1039*e4b17023SJohn Marino HALF_TYPE->INTERM_TYPE. */ 1040*e4b17023SJohn Marino if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) 1041*e4b17023SJohn Marino { 1042*e4b17023SJohn Marino new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); 1043*e4b17023SJohn Marino /* Check if the already created pattern stmt is what we need. */ 1044*e4b17023SJohn Marino if (!is_gimple_assign (new_stmt) 1045*e4b17023SJohn Marino || gimple_assign_rhs_code (new_stmt) != NOP_EXPR 1046*e4b17023SJohn Marino || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type) 1047*e4b17023SJohn Marino return false; 1048*e4b17023SJohn Marino 1049*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, def_stmt); 1050*e4b17023SJohn Marino oprnd = gimple_assign_lhs (new_stmt); 1051*e4b17023SJohn Marino } 1052*e4b17023SJohn Marino else 1053*e4b17023SJohn Marino { 1054*e4b17023SJohn Marino /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */ 1055*e4b17023SJohn Marino oprnd = gimple_assign_rhs1 (def_stmt); 1056*e4b17023SJohn Marino tmp = create_tmp_reg (interm_type, NULL); 1057*e4b17023SJohn Marino add_referenced_var (tmp); 1058*e4b17023SJohn Marino new_oprnd = make_ssa_name (tmp, NULL); 1059*e4b17023SJohn Marino new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, 1060*e4b17023SJohn Marino oprnd, NULL_TREE); 1061*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; 1062*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, def_stmt); 1063*e4b17023SJohn Marino oprnd = new_oprnd; 1064*e4b17023SJohn Marino } 1065*e4b17023SJohn Marino } 1066*e4b17023SJohn Marino else 1067*e4b17023SJohn Marino { 1068*e4b17023SJohn Marino /* Retrieve the operand before the type promotion. */ 1069*e4b17023SJohn Marino oprnd = gimple_assign_rhs1 (def_stmt); 1070*e4b17023SJohn Marino } 1071*e4b17023SJohn Marino } 1072*e4b17023SJohn Marino else 1073*e4b17023SJohn Marino { 1074*e4b17023SJohn Marino if (interm_type) 1075*e4b17023SJohn Marino { 1076*e4b17023SJohn Marino /* Create a type conversion HALF_TYPE->INTERM_TYPE. */ 1077*e4b17023SJohn Marino tmp = create_tmp_reg (interm_type, NULL); 1078*e4b17023SJohn Marino add_referenced_var (tmp); 1079*e4b17023SJohn Marino new_oprnd = make_ssa_name (tmp, NULL); 1080*e4b17023SJohn Marino new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, 1081*e4b17023SJohn Marino oprnd, NULL_TREE); 1082*e4b17023SJohn Marino oprnd = new_oprnd; 1083*e4b17023SJohn Marino *new_def_stmt = new_stmt; 1084*e4b17023SJohn Marino } 1085*e4b17023SJohn Marino 1086*e4b17023SJohn Marino /* Otherwise, OPRND is already set. */ 1087*e4b17023SJohn Marino } 1088*e4b17023SJohn Marino 1089*e4b17023SJohn Marino if (interm_type) 1090*e4b17023SJohn Marino *new_type = interm_type; 1091*e4b17023SJohn Marino else 1092*e4b17023SJohn Marino *new_type = half_type; 1093*e4b17023SJohn Marino 1094*e4b17023SJohn Marino *op0 = oprnd; 1095*e4b17023SJohn Marino *op1 = fold_convert (*new_type, const_oprnd); 1096*e4b17023SJohn Marino 1097*e4b17023SJohn Marino return true; 1098*e4b17023SJohn Marino } 1099*e4b17023SJohn Marino 1100*e4b17023SJohn Marino 1101*e4b17023SJohn Marino /* Try to find a statement or a sequence of statements that can be performed 1102*e4b17023SJohn Marino on a smaller type: 1103*e4b17023SJohn Marino 1104*e4b17023SJohn Marino type x_t; 1105*e4b17023SJohn Marino TYPE x_T, res0_T, res1_T; 1106*e4b17023SJohn Marino loop: 1107*e4b17023SJohn Marino S1 x_t = *p; 1108*e4b17023SJohn Marino S2 x_T = (TYPE) x_t; 1109*e4b17023SJohn Marino S3 res0_T = op (x_T, C0); 1110*e4b17023SJohn Marino S4 res1_T = op (res0_T, C1); 1111*e4b17023SJohn Marino S5 ... = () res1_T; - type demotion 1112*e4b17023SJohn Marino 1113*e4b17023SJohn Marino where type 'TYPE' is at least double the size of type 'type', C0 and C1 are 1114*e4b17023SJohn Marino constants. 1115*e4b17023SJohn Marino Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either 1116*e4b17023SJohn Marino be 'type' or some intermediate type. For now, we expect S5 to be a type 1117*e4b17023SJohn Marino demotion operation. We also check that S3 and S4 have only one use. */ 1118*e4b17023SJohn Marino 1119*e4b17023SJohn Marino static gimple 1120*e4b17023SJohn Marino vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts, 1121*e4b17023SJohn Marino tree *type_in, tree *type_out) 1122*e4b17023SJohn Marino { 1123*e4b17023SJohn Marino gimple stmt = VEC_pop (gimple, *stmts); 1124*e4b17023SJohn Marino gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL; 1125*e4b17023SJohn Marino tree op0, op1, vectype = NULL_TREE, use_lhs, use_type; 1126*e4b17023SJohn Marino tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd; 1127*e4b17023SJohn Marino bool first; 1128*e4b17023SJohn Marino tree type = NULL; 1129*e4b17023SJohn Marino 1130*e4b17023SJohn Marino first = true; 1131*e4b17023SJohn Marino while (1) 1132*e4b17023SJohn Marino { 1133*e4b17023SJohn Marino if (!vinfo_for_stmt (stmt) 1134*e4b17023SJohn Marino || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt))) 1135*e4b17023SJohn Marino return NULL; 1136*e4b17023SJohn Marino 1137*e4b17023SJohn Marino new_def_stmt = NULL; 1138*e4b17023SJohn Marino if (!vect_operation_fits_smaller_type (stmt, var, &new_type, 1139*e4b17023SJohn Marino &op0, &op1, &new_def_stmt, 1140*e4b17023SJohn Marino stmts)) 1141*e4b17023SJohn Marino { 1142*e4b17023SJohn Marino if (first) 1143*e4b17023SJohn Marino return NULL; 1144*e4b17023SJohn Marino else 1145*e4b17023SJohn Marino break; 1146*e4b17023SJohn Marino } 1147*e4b17023SJohn Marino 1148*e4b17023SJohn Marino /* STMT can be performed on a smaller type. Check its uses. */ 1149*e4b17023SJohn Marino use_stmt = vect_single_imm_use (stmt); 1150*e4b17023SJohn Marino if (!use_stmt || !is_gimple_assign (use_stmt)) 1151*e4b17023SJohn Marino return NULL; 1152*e4b17023SJohn Marino 1153*e4b17023SJohn Marino /* Create pattern statement for STMT. */ 1154*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (new_type); 1155*e4b17023SJohn Marino if (!vectype) 1156*e4b17023SJohn Marino return NULL; 1157*e4b17023SJohn Marino 1158*e4b17023SJohn Marino /* We want to collect all the statements for which we create pattern 1159*e4b17023SJohn Marino statetments, except for the case when the last statement in the 1160*e4b17023SJohn Marino sequence doesn't have a corresponding pattern statement. In such 1161*e4b17023SJohn Marino case we associate the last pattern statement with the last statement 1162*e4b17023SJohn Marino in the sequence. Therefore, we only add the original statement to 1163*e4b17023SJohn Marino the list if we know that it is not the last. */ 1164*e4b17023SJohn Marino if (prev_stmt) 1165*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, prev_stmt); 1166*e4b17023SJohn Marino 1167*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (new_type, NULL); 1168*e4b17023SJohn Marino pattern_stmt 1169*e4b17023SJohn Marino = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var, 1170*e4b17023SJohn Marino op0, op1); 1171*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 1172*e4b17023SJohn Marino new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt); 1173*e4b17023SJohn Marino 1174*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1175*e4b17023SJohn Marino { 1176*e4b17023SJohn Marino fprintf (vect_dump, "created pattern stmt: "); 1177*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1178*e4b17023SJohn Marino } 1179*e4b17023SJohn Marino 1180*e4b17023SJohn Marino type = gimple_expr_type (stmt); 1181*e4b17023SJohn Marino prev_stmt = stmt; 1182*e4b17023SJohn Marino stmt = use_stmt; 1183*e4b17023SJohn Marino 1184*e4b17023SJohn Marino first = false; 1185*e4b17023SJohn Marino } 1186*e4b17023SJohn Marino 1187*e4b17023SJohn Marino /* We got a sequence. We expect it to end with a type demotion operation. 1188*e4b17023SJohn Marino Otherwise, we quit (for now). There are three possible cases: the 1189*e4b17023SJohn Marino conversion is to NEW_TYPE (we don't do anything), the conversion is to 1190*e4b17023SJohn Marino a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and 1191*e4b17023SJohn Marino NEW_TYPE differs (we create a new conversion statement). */ 1192*e4b17023SJohn Marino if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))) 1193*e4b17023SJohn Marino { 1194*e4b17023SJohn Marino use_lhs = gimple_assign_lhs (use_stmt); 1195*e4b17023SJohn Marino use_type = TREE_TYPE (use_lhs); 1196*e4b17023SJohn Marino /* Support only type demotion or signedess change. */ 1197*e4b17023SJohn Marino if (!INTEGRAL_TYPE_P (use_type) 1198*e4b17023SJohn Marino || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type)) 1199*e4b17023SJohn Marino return NULL; 1200*e4b17023SJohn Marino 1201*e4b17023SJohn Marino /* Check that NEW_TYPE is not bigger than the conversion result. */ 1202*e4b17023SJohn Marino if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type)) 1203*e4b17023SJohn Marino return NULL; 1204*e4b17023SJohn Marino 1205*e4b17023SJohn Marino if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type) 1206*e4b17023SJohn Marino || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type)) 1207*e4b17023SJohn Marino { 1208*e4b17023SJohn Marino /* Create NEW_TYPE->USE_TYPE conversion. */ 1209*e4b17023SJohn Marino tmp = create_tmp_reg (use_type, NULL); 1210*e4b17023SJohn Marino add_referenced_var (tmp); 1211*e4b17023SJohn Marino new_oprnd = make_ssa_name (tmp, NULL); 1212*e4b17023SJohn Marino pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, 1213*e4b17023SJohn Marino var, NULL_TREE); 1214*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt; 1215*e4b17023SJohn Marino 1216*e4b17023SJohn Marino *type_in = get_vectype_for_scalar_type (new_type); 1217*e4b17023SJohn Marino *type_out = get_vectype_for_scalar_type (use_type); 1218*e4b17023SJohn Marino 1219*e4b17023SJohn Marino /* We created a pattern statement for the last statement in the 1220*e4b17023SJohn Marino sequence, so we don't need to associate it with the pattern 1221*e4b17023SJohn Marino statement created for PREV_STMT. Therefore, we add PREV_STMT 1222*e4b17023SJohn Marino to the list in order to mark it later in vect_pattern_recog_1. */ 1223*e4b17023SJohn Marino if (prev_stmt) 1224*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, prev_stmt); 1225*e4b17023SJohn Marino } 1226*e4b17023SJohn Marino else 1227*e4b17023SJohn Marino { 1228*e4b17023SJohn Marino if (prev_stmt) 1229*e4b17023SJohn Marino STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt)) 1230*e4b17023SJohn Marino = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt)); 1231*e4b17023SJohn Marino 1232*e4b17023SJohn Marino *type_in = vectype; 1233*e4b17023SJohn Marino *type_out = NULL_TREE; 1234*e4b17023SJohn Marino } 1235*e4b17023SJohn Marino 1236*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, use_stmt); 1237*e4b17023SJohn Marino } 1238*e4b17023SJohn Marino else 1239*e4b17023SJohn Marino /* TODO: support general case, create a conversion to the correct type. */ 1240*e4b17023SJohn Marino return NULL; 1241*e4b17023SJohn Marino 1242*e4b17023SJohn Marino /* Pattern detected. */ 1243*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1244*e4b17023SJohn Marino { 1245*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: "); 1246*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1247*e4b17023SJohn Marino } 1248*e4b17023SJohn Marino 1249*e4b17023SJohn Marino return pattern_stmt; 1250*e4b17023SJohn Marino } 1251*e4b17023SJohn Marino 1252*e4b17023SJohn Marino /* Detect widening shift pattern: 1253*e4b17023SJohn Marino 1254*e4b17023SJohn Marino type a_t; 1255*e4b17023SJohn Marino TYPE a_T, res_T; 1256*e4b17023SJohn Marino 1257*e4b17023SJohn Marino S1 a_t = ; 1258*e4b17023SJohn Marino S2 a_T = (TYPE) a_t; 1259*e4b17023SJohn Marino S3 res_T = a_T << CONST; 1260*e4b17023SJohn Marino 1261*e4b17023SJohn Marino where type 'TYPE' is at least double the size of type 'type'. 1262*e4b17023SJohn Marino 1263*e4b17023SJohn Marino Also detect cases where the shift result is immediately converted 1264*e4b17023SJohn Marino to another type 'result_type' that is no larger in size than 'TYPE'. 1265*e4b17023SJohn Marino In those cases we perform a widen-shift that directly results in 1266*e4b17023SJohn Marino 'result_type', to avoid a possible over-widening situation: 1267*e4b17023SJohn Marino 1268*e4b17023SJohn Marino type a_t; 1269*e4b17023SJohn Marino TYPE a_T, res_T; 1270*e4b17023SJohn Marino result_type res_result; 1271*e4b17023SJohn Marino 1272*e4b17023SJohn Marino S1 a_t = ; 1273*e4b17023SJohn Marino S2 a_T = (TYPE) a_t; 1274*e4b17023SJohn Marino S3 res_T = a_T << CONST; 1275*e4b17023SJohn Marino S4 res_result = (result_type) res_T; 1276*e4b17023SJohn Marino '--> res_result' = a_t w<< CONST; 1277*e4b17023SJohn Marino 1278*e4b17023SJohn Marino And a case when 'TYPE' is 4 times bigger than 'type'. In that case we 1279*e4b17023SJohn Marino create an additional pattern stmt for S2 to create a variable of an 1280*e4b17023SJohn Marino intermediate type, and perform widen-shift on the intermediate type: 1281*e4b17023SJohn Marino 1282*e4b17023SJohn Marino type a_t; 1283*e4b17023SJohn Marino interm_type a_it; 1284*e4b17023SJohn Marino TYPE a_T, res_T, res_T'; 1285*e4b17023SJohn Marino 1286*e4b17023SJohn Marino S1 a_t = ; 1287*e4b17023SJohn Marino S2 a_T = (TYPE) a_t; 1288*e4b17023SJohn Marino '--> a_it = (interm_type) a_t; 1289*e4b17023SJohn Marino S3 res_T = a_T << CONST; 1290*e4b17023SJohn Marino '--> res_T' = a_it <<* CONST; 1291*e4b17023SJohn Marino 1292*e4b17023SJohn Marino Input/Output: 1293*e4b17023SJohn Marino 1294*e4b17023SJohn Marino * STMTS: Contains a stmt from which the pattern search begins. 1295*e4b17023SJohn Marino In case of unsigned widen-shift, the original stmt (S3) is replaced with S4 1296*e4b17023SJohn Marino in STMTS. When an intermediate type is used and a pattern statement is 1297*e4b17023SJohn Marino created for S2, we also put S2 here (before S3). 1298*e4b17023SJohn Marino 1299*e4b17023SJohn Marino Output: 1300*e4b17023SJohn Marino 1301*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 1302*e4b17023SJohn Marino 1303*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 1304*e4b17023SJohn Marino 1305*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the sequence of 1306*e4b17023SJohn Marino stmts that constitute the pattern. In this case it will be: 1307*e4b17023SJohn Marino WIDEN_LSHIFT_EXPR <a_t, CONST>. */ 1308*e4b17023SJohn Marino 1309*e4b17023SJohn Marino static gimple 1310*e4b17023SJohn Marino vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts, 1311*e4b17023SJohn Marino tree *type_in, tree *type_out) 1312*e4b17023SJohn Marino { 1313*e4b17023SJohn Marino gimple last_stmt = VEC_pop (gimple, *stmts); 1314*e4b17023SJohn Marino gimple def_stmt0; 1315*e4b17023SJohn Marino tree oprnd0, oprnd1; 1316*e4b17023SJohn Marino tree type, half_type0; 1317*e4b17023SJohn Marino gimple pattern_stmt; 1318*e4b17023SJohn Marino tree vectype, vectype_out = NULL_TREE; 1319*e4b17023SJohn Marino tree dummy; 1320*e4b17023SJohn Marino tree var; 1321*e4b17023SJohn Marino enum tree_code dummy_code; 1322*e4b17023SJohn Marino int dummy_int; 1323*e4b17023SJohn Marino VEC (tree, heap) * dummy_vec; 1324*e4b17023SJohn Marino gimple use_stmt; 1325*e4b17023SJohn Marino 1326*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt)) 1327*e4b17023SJohn Marino return NULL; 1328*e4b17023SJohn Marino 1329*e4b17023SJohn Marino if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt))) 1330*e4b17023SJohn Marino return NULL; 1331*e4b17023SJohn Marino 1332*e4b17023SJohn Marino if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR) 1333*e4b17023SJohn Marino return NULL; 1334*e4b17023SJohn Marino 1335*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (last_stmt); 1336*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (last_stmt); 1337*e4b17023SJohn Marino if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST) 1338*e4b17023SJohn Marino return NULL; 1339*e4b17023SJohn Marino 1340*e4b17023SJohn Marino /* Check operand 0: it has to be defined by a type promotion. */ 1341*e4b17023SJohn Marino if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false)) 1342*e4b17023SJohn Marino return NULL; 1343*e4b17023SJohn Marino 1344*e4b17023SJohn Marino /* Check operand 1: has to be positive. We check that it fits the type 1345*e4b17023SJohn Marino in vect_handle_widen_op_by_const (). */ 1346*e4b17023SJohn Marino if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0) 1347*e4b17023SJohn Marino return NULL; 1348*e4b17023SJohn Marino 1349*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (def_stmt0); 1350*e4b17023SJohn Marino type = gimple_expr_type (last_stmt); 1351*e4b17023SJohn Marino 1352*e4b17023SJohn Marino /* Check for subsequent conversion to another type. */ 1353*e4b17023SJohn Marino use_stmt = vect_single_imm_use (last_stmt); 1354*e4b17023SJohn Marino if (use_stmt && is_gimple_assign (use_stmt) 1355*e4b17023SJohn Marino && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)) 1356*e4b17023SJohn Marino && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt))) 1357*e4b17023SJohn Marino { 1358*e4b17023SJohn Marino tree use_lhs = gimple_assign_lhs (use_stmt); 1359*e4b17023SJohn Marino tree use_type = TREE_TYPE (use_lhs); 1360*e4b17023SJohn Marino 1361*e4b17023SJohn Marino if (INTEGRAL_TYPE_P (use_type) 1362*e4b17023SJohn Marino && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type)) 1363*e4b17023SJohn Marino { 1364*e4b17023SJohn Marino last_stmt = use_stmt; 1365*e4b17023SJohn Marino type = use_type; 1366*e4b17023SJohn Marino } 1367*e4b17023SJohn Marino } 1368*e4b17023SJohn Marino 1369*e4b17023SJohn Marino /* Check if this a widening operation. */ 1370*e4b17023SJohn Marino if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1, 1371*e4b17023SJohn Marino &oprnd0, stmts, 1372*e4b17023SJohn Marino type, &half_type0, def_stmt0)) 1373*e4b17023SJohn Marino return NULL; 1374*e4b17023SJohn Marino 1375*e4b17023SJohn Marino /* Pattern detected. */ 1376*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1377*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: "); 1378*e4b17023SJohn Marino 1379*e4b17023SJohn Marino /* Check target support. */ 1380*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (half_type0); 1381*e4b17023SJohn Marino vectype_out = get_vectype_for_scalar_type (type); 1382*e4b17023SJohn Marino 1383*e4b17023SJohn Marino if (!vectype 1384*e4b17023SJohn Marino || !vectype_out 1385*e4b17023SJohn Marino || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt, 1386*e4b17023SJohn Marino vectype_out, vectype, 1387*e4b17023SJohn Marino &dummy, &dummy, &dummy_code, 1388*e4b17023SJohn Marino &dummy_code, &dummy_int, 1389*e4b17023SJohn Marino &dummy_vec)) 1390*e4b17023SJohn Marino return NULL; 1391*e4b17023SJohn Marino 1392*e4b17023SJohn Marino *type_in = vectype; 1393*e4b17023SJohn Marino *type_out = vectype_out; 1394*e4b17023SJohn Marino 1395*e4b17023SJohn Marino /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1396*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (type, NULL); 1397*e4b17023SJohn Marino pattern_stmt = 1398*e4b17023SJohn Marino gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1); 1399*e4b17023SJohn Marino 1400*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1401*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1402*e4b17023SJohn Marino 1403*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, last_stmt); 1404*e4b17023SJohn Marino return pattern_stmt; 1405*e4b17023SJohn Marino } 1406*e4b17023SJohn Marino 1407*e4b17023SJohn Marino /* Detect a vector by vector shift pattern that wouldn't be otherwise 1408*e4b17023SJohn Marino vectorized: 1409*e4b17023SJohn Marino 1410*e4b17023SJohn Marino type a_t; 1411*e4b17023SJohn Marino TYPE b_T, res_T; 1412*e4b17023SJohn Marino 1413*e4b17023SJohn Marino S1 a_t = ; 1414*e4b17023SJohn Marino S2 b_T = ; 1415*e4b17023SJohn Marino S3 res_T = b_T op a_t; 1416*e4b17023SJohn Marino 1417*e4b17023SJohn Marino where type 'TYPE' is a type with different size than 'type', 1418*e4b17023SJohn Marino and op is <<, >> or rotate. 1419*e4b17023SJohn Marino 1420*e4b17023SJohn Marino Also detect cases: 1421*e4b17023SJohn Marino 1422*e4b17023SJohn Marino type a_t; 1423*e4b17023SJohn Marino TYPE b_T, c_T, res_T; 1424*e4b17023SJohn Marino 1425*e4b17023SJohn Marino S0 c_T = ; 1426*e4b17023SJohn Marino S1 a_t = (type) c_T; 1427*e4b17023SJohn Marino S2 b_T = ; 1428*e4b17023SJohn Marino S3 res_T = b_T op a_t; 1429*e4b17023SJohn Marino 1430*e4b17023SJohn Marino Input/Output: 1431*e4b17023SJohn Marino 1432*e4b17023SJohn Marino * STMTS: Contains a stmt from which the pattern search begins, 1433*e4b17023SJohn Marino i.e. the shift/rotate stmt. The original stmt (S3) is replaced 1434*e4b17023SJohn Marino with a shift/rotate which has same type on both operands, in the 1435*e4b17023SJohn Marino second case just b_T op c_T, in the first case with added cast 1436*e4b17023SJohn Marino from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ. 1437*e4b17023SJohn Marino 1438*e4b17023SJohn Marino Output: 1439*e4b17023SJohn Marino 1440*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 1441*e4b17023SJohn Marino 1442*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 1443*e4b17023SJohn Marino 1444*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the shift/rotate 1445*e4b17023SJohn Marino S3 stmt. */ 1446*e4b17023SJohn Marino 1447*e4b17023SJohn Marino static gimple 1448*e4b17023SJohn Marino vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts, 1449*e4b17023SJohn Marino tree *type_in, tree *type_out) 1450*e4b17023SJohn Marino { 1451*e4b17023SJohn Marino gimple last_stmt = VEC_pop (gimple, *stmts); 1452*e4b17023SJohn Marino tree oprnd0, oprnd1, lhs, var; 1453*e4b17023SJohn Marino gimple pattern_stmt, def_stmt; 1454*e4b17023SJohn Marino enum tree_code rhs_code; 1455*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1456*e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1457*e4b17023SJohn Marino enum vect_def_type dt; 1458*e4b17023SJohn Marino tree def; 1459*e4b17023SJohn Marino 1460*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt)) 1461*e4b17023SJohn Marino return NULL; 1462*e4b17023SJohn Marino 1463*e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (last_stmt); 1464*e4b17023SJohn Marino switch (rhs_code) 1465*e4b17023SJohn Marino { 1466*e4b17023SJohn Marino case LSHIFT_EXPR: 1467*e4b17023SJohn Marino case RSHIFT_EXPR: 1468*e4b17023SJohn Marino case LROTATE_EXPR: 1469*e4b17023SJohn Marino case RROTATE_EXPR: 1470*e4b17023SJohn Marino break; 1471*e4b17023SJohn Marino default: 1472*e4b17023SJohn Marino return NULL; 1473*e4b17023SJohn Marino } 1474*e4b17023SJohn Marino 1475*e4b17023SJohn Marino if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 1476*e4b17023SJohn Marino return NULL; 1477*e4b17023SJohn Marino 1478*e4b17023SJohn Marino lhs = gimple_assign_lhs (last_stmt); 1479*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (last_stmt); 1480*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (last_stmt); 1481*e4b17023SJohn Marino if (TREE_CODE (oprnd0) != SSA_NAME 1482*e4b17023SJohn Marino || TREE_CODE (oprnd1) != SSA_NAME 1483*e4b17023SJohn Marino || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1)) 1484*e4b17023SJohn Marino || TYPE_PRECISION (TREE_TYPE (oprnd1)) 1485*e4b17023SJohn Marino != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1))) 1486*e4b17023SJohn Marino || TYPE_PRECISION (TREE_TYPE (lhs)) 1487*e4b17023SJohn Marino != TYPE_PRECISION (TREE_TYPE (oprnd0))) 1488*e4b17023SJohn Marino return NULL; 1489*e4b17023SJohn Marino 1490*e4b17023SJohn Marino if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, NULL, &def_stmt, 1491*e4b17023SJohn Marino &def, &dt)) 1492*e4b17023SJohn Marino return NULL; 1493*e4b17023SJohn Marino 1494*e4b17023SJohn Marino if (dt != vect_internal_def) 1495*e4b17023SJohn Marino return NULL; 1496*e4b17023SJohn Marino 1497*e4b17023SJohn Marino *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0)); 1498*e4b17023SJohn Marino *type_out = *type_in; 1499*e4b17023SJohn Marino if (*type_in == NULL_TREE) 1500*e4b17023SJohn Marino return NULL; 1501*e4b17023SJohn Marino 1502*e4b17023SJohn Marino def = NULL_TREE; 1503*e4b17023SJohn Marino if (gimple_assign_cast_p (def_stmt)) 1504*e4b17023SJohn Marino { 1505*e4b17023SJohn Marino tree rhs1 = gimple_assign_rhs1 (def_stmt); 1506*e4b17023SJohn Marino if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) 1507*e4b17023SJohn Marino && TYPE_PRECISION (TREE_TYPE (rhs1)) 1508*e4b17023SJohn Marino == TYPE_PRECISION (TREE_TYPE (oprnd0))) 1509*e4b17023SJohn Marino def = rhs1; 1510*e4b17023SJohn Marino } 1511*e4b17023SJohn Marino 1512*e4b17023SJohn Marino if (def == NULL_TREE) 1513*e4b17023SJohn Marino { 1514*e4b17023SJohn Marino def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 1515*e4b17023SJohn Marino def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1, 1516*e4b17023SJohn Marino NULL_TREE); 1517*e4b17023SJohn Marino new_pattern_def_seq (stmt_vinfo, def_stmt); 1518*e4b17023SJohn Marino } 1519*e4b17023SJohn Marino 1520*e4b17023SJohn Marino /* Pattern detected. */ 1521*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1522*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: "); 1523*e4b17023SJohn Marino 1524*e4b17023SJohn Marino /* Pattern supported. Create a stmt to be used to replace the pattern. */ 1525*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); 1526*e4b17023SJohn Marino pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def); 1527*e4b17023SJohn Marino 1528*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1529*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1530*e4b17023SJohn Marino 1531*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, last_stmt); 1532*e4b17023SJohn Marino return pattern_stmt; 1533*e4b17023SJohn Marino } 1534*e4b17023SJohn Marino 1535*e4b17023SJohn Marino /* Detect a signed division by power of two constant that wouldn't be 1536*e4b17023SJohn Marino otherwise vectorized: 1537*e4b17023SJohn Marino 1538*e4b17023SJohn Marino type a_t, b_t; 1539*e4b17023SJohn Marino 1540*e4b17023SJohn Marino S1 a_t = b_t / N; 1541*e4b17023SJohn Marino 1542*e4b17023SJohn Marino where type 'type' is a signed integral type and N is a constant positive 1543*e4b17023SJohn Marino power of two. 1544*e4b17023SJohn Marino 1545*e4b17023SJohn Marino Similarly handle signed modulo by power of two constant: 1546*e4b17023SJohn Marino 1547*e4b17023SJohn Marino S4 a_t = b_t % N; 1548*e4b17023SJohn Marino 1549*e4b17023SJohn Marino Input/Output: 1550*e4b17023SJohn Marino 1551*e4b17023SJohn Marino * STMTS: Contains a stmt from which the pattern search begins, 1552*e4b17023SJohn Marino i.e. the division stmt. S1 is replaced by: 1553*e4b17023SJohn Marino S3 y_t = b_t < 0 ? N - 1 : 0; 1554*e4b17023SJohn Marino S2 x_t = b_t + y_t; 1555*e4b17023SJohn Marino S1' a_t = x_t >> log2 (N); 1556*e4b17023SJohn Marino 1557*e4b17023SJohn Marino S4 is replaced by (where *_T temporaries have unsigned type): 1558*e4b17023SJohn Marino S9 y_T = b_t < 0 ? -1U : 0U; 1559*e4b17023SJohn Marino S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); 1560*e4b17023SJohn Marino S7 z_t = (type) z_T; 1561*e4b17023SJohn Marino S6 w_t = b_t + z_t; 1562*e4b17023SJohn Marino S5 x_t = w_t & (N - 1); 1563*e4b17023SJohn Marino S4' a_t = x_t - z_t; 1564*e4b17023SJohn Marino 1565*e4b17023SJohn Marino Output: 1566*e4b17023SJohn Marino 1567*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 1568*e4b17023SJohn Marino 1569*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 1570*e4b17023SJohn Marino 1571*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the division 1572*e4b17023SJohn Marino S1 or modulo S4 stmt. */ 1573*e4b17023SJohn Marino 1574*e4b17023SJohn Marino static gimple 1575*e4b17023SJohn Marino vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, 1576*e4b17023SJohn Marino tree *type_in, tree *type_out) 1577*e4b17023SJohn Marino { 1578*e4b17023SJohn Marino gimple last_stmt = VEC_pop (gimple, *stmts); 1579*e4b17023SJohn Marino tree oprnd0, oprnd1, vectype, itype, cond; 1580*e4b17023SJohn Marino gimple pattern_stmt, def_stmt; 1581*e4b17023SJohn Marino enum tree_code rhs_code; 1582*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 1583*e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1584*e4b17023SJohn Marino optab optab; 1585*e4b17023SJohn Marino 1586*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt)) 1587*e4b17023SJohn Marino return NULL; 1588*e4b17023SJohn Marino 1589*e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (last_stmt); 1590*e4b17023SJohn Marino switch (rhs_code) 1591*e4b17023SJohn Marino { 1592*e4b17023SJohn Marino case TRUNC_DIV_EXPR: 1593*e4b17023SJohn Marino case TRUNC_MOD_EXPR: 1594*e4b17023SJohn Marino break; 1595*e4b17023SJohn Marino default: 1596*e4b17023SJohn Marino return NULL; 1597*e4b17023SJohn Marino } 1598*e4b17023SJohn Marino 1599*e4b17023SJohn Marino if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) 1600*e4b17023SJohn Marino return NULL; 1601*e4b17023SJohn Marino 1602*e4b17023SJohn Marino oprnd0 = gimple_assign_rhs1 (last_stmt); 1603*e4b17023SJohn Marino oprnd1 = gimple_assign_rhs2 (last_stmt); 1604*e4b17023SJohn Marino itype = TREE_TYPE (oprnd0); 1605*e4b17023SJohn Marino if (TREE_CODE (oprnd0) != SSA_NAME 1606*e4b17023SJohn Marino || TREE_CODE (oprnd1) != INTEGER_CST 1607*e4b17023SJohn Marino || TREE_CODE (itype) != INTEGER_TYPE 1608*e4b17023SJohn Marino || TYPE_UNSIGNED (itype) 1609*e4b17023SJohn Marino || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)) 1610*e4b17023SJohn Marino || !integer_pow2p (oprnd1) 1611*e4b17023SJohn Marino || tree_int_cst_sgn (oprnd1) != 1) 1612*e4b17023SJohn Marino return NULL; 1613*e4b17023SJohn Marino 1614*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (itype); 1615*e4b17023SJohn Marino if (vectype == NULL_TREE) 1616*e4b17023SJohn Marino return NULL; 1617*e4b17023SJohn Marino 1618*e4b17023SJohn Marino /* If the target can handle vectorized division or modulo natively, 1619*e4b17023SJohn Marino don't attempt to optimize this. */ 1620*e4b17023SJohn Marino optab = optab_for_tree_code (rhs_code, vectype, optab_default); 1621*e4b17023SJohn Marino if (optab != NULL) 1622*e4b17023SJohn Marino { 1623*e4b17023SJohn Marino enum machine_mode vec_mode = TYPE_MODE (vectype); 1624*e4b17023SJohn Marino int icode = (int) optab_handler (optab, vec_mode); 1625*e4b17023SJohn Marino if (icode != CODE_FOR_nothing 1626*e4b17023SJohn Marino || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD) 1627*e4b17023SJohn Marino return NULL; 1628*e4b17023SJohn Marino } 1629*e4b17023SJohn Marino 1630*e4b17023SJohn Marino /* Pattern detected. */ 1631*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1632*e4b17023SJohn Marino fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: "); 1633*e4b17023SJohn Marino 1634*e4b17023SJohn Marino cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0)); 1635*e4b17023SJohn Marino if (rhs_code == TRUNC_DIV_EXPR) 1636*e4b17023SJohn Marino { 1637*e4b17023SJohn Marino tree var = vect_recog_temp_ssa_var (itype, NULL); 1638*e4b17023SJohn Marino def_stmt 1639*e4b17023SJohn Marino = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, 1640*e4b17023SJohn Marino fold_build2 (MINUS_EXPR, itype, 1641*e4b17023SJohn Marino oprnd1, 1642*e4b17023SJohn Marino build_int_cst (itype, 1643*e4b17023SJohn Marino 1)), 1644*e4b17023SJohn Marino build_int_cst (itype, 0)); 1645*e4b17023SJohn Marino new_pattern_def_seq (stmt_vinfo, def_stmt); 1646*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (itype, NULL); 1647*e4b17023SJohn Marino def_stmt 1648*e4b17023SJohn Marino = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0, 1649*e4b17023SJohn Marino gimple_assign_lhs (def_stmt)); 1650*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1651*e4b17023SJohn Marino 1652*e4b17023SJohn Marino pattern_stmt 1653*e4b17023SJohn Marino = gimple_build_assign_with_ops (RSHIFT_EXPR, 1654*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1655*e4b17023SJohn Marino var, 1656*e4b17023SJohn Marino build_int_cst (itype, 1657*e4b17023SJohn Marino tree_log2 (oprnd1))); 1658*e4b17023SJohn Marino } 1659*e4b17023SJohn Marino else 1660*e4b17023SJohn Marino { 1661*e4b17023SJohn Marino tree signmask; 1662*e4b17023SJohn Marino STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; 1663*e4b17023SJohn Marino if (compare_tree_int (oprnd1, 2) == 0) 1664*e4b17023SJohn Marino { 1665*e4b17023SJohn Marino signmask = vect_recog_temp_ssa_var (itype, NULL); 1666*e4b17023SJohn Marino def_stmt 1667*e4b17023SJohn Marino = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond, 1668*e4b17023SJohn Marino build_int_cst (itype, 1), 1669*e4b17023SJohn Marino build_int_cst (itype, 0)); 1670*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1671*e4b17023SJohn Marino } 1672*e4b17023SJohn Marino else 1673*e4b17023SJohn Marino { 1674*e4b17023SJohn Marino tree utype 1675*e4b17023SJohn Marino = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1); 1676*e4b17023SJohn Marino tree vecutype = get_vectype_for_scalar_type (utype); 1677*e4b17023SJohn Marino tree shift 1678*e4b17023SJohn Marino = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) 1679*e4b17023SJohn Marino - tree_log2 (oprnd1)); 1680*e4b17023SJohn Marino tree var = vect_recog_temp_ssa_var (utype, NULL); 1681*e4b17023SJohn Marino stmt_vec_info def_stmt_vinfo; 1682*e4b17023SJohn Marino 1683*e4b17023SJohn Marino def_stmt 1684*e4b17023SJohn Marino = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, 1685*e4b17023SJohn Marino build_int_cst (utype, -1), 1686*e4b17023SJohn Marino build_int_cst (utype, 0)); 1687*e4b17023SJohn Marino def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 1688*e4b17023SJohn Marino set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1689*e4b17023SJohn Marino STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 1690*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1691*e4b17023SJohn Marino var = vect_recog_temp_ssa_var (utype, NULL); 1692*e4b17023SJohn Marino def_stmt 1693*e4b17023SJohn Marino = gimple_build_assign_with_ops (RSHIFT_EXPR, var, 1694*e4b17023SJohn Marino gimple_assign_lhs (def_stmt), 1695*e4b17023SJohn Marino shift); 1696*e4b17023SJohn Marino def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 1697*e4b17023SJohn Marino set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); 1698*e4b17023SJohn Marino STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; 1699*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1700*e4b17023SJohn Marino signmask = vect_recog_temp_ssa_var (itype, NULL); 1701*e4b17023SJohn Marino def_stmt 1702*e4b17023SJohn Marino = gimple_build_assign_with_ops (NOP_EXPR, signmask, var, 1703*e4b17023SJohn Marino NULL_TREE); 1704*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1705*e4b17023SJohn Marino } 1706*e4b17023SJohn Marino def_stmt 1707*e4b17023SJohn Marino = gimple_build_assign_with_ops (PLUS_EXPR, 1708*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1709*e4b17023SJohn Marino oprnd0, signmask); 1710*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1711*e4b17023SJohn Marino def_stmt 1712*e4b17023SJohn Marino = gimple_build_assign_with_ops (BIT_AND_EXPR, 1713*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1714*e4b17023SJohn Marino gimple_assign_lhs (def_stmt), 1715*e4b17023SJohn Marino fold_build2 (MINUS_EXPR, itype, 1716*e4b17023SJohn Marino oprnd1, 1717*e4b17023SJohn Marino build_int_cst (itype, 1718*e4b17023SJohn Marino 1))); 1719*e4b17023SJohn Marino append_pattern_def_seq (stmt_vinfo, def_stmt); 1720*e4b17023SJohn Marino 1721*e4b17023SJohn Marino pattern_stmt 1722*e4b17023SJohn Marino = gimple_build_assign_with_ops (MINUS_EXPR, 1723*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1724*e4b17023SJohn Marino gimple_assign_lhs (def_stmt), 1725*e4b17023SJohn Marino signmask); 1726*e4b17023SJohn Marino } 1727*e4b17023SJohn Marino 1728*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 1729*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 1730*e4b17023SJohn Marino 1731*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, last_stmt); 1732*e4b17023SJohn Marino 1733*e4b17023SJohn Marino *type_in = vectype; 1734*e4b17023SJohn Marino *type_out = vectype; 1735*e4b17023SJohn Marino return pattern_stmt; 1736*e4b17023SJohn Marino } 1737*e4b17023SJohn Marino 1738*e4b17023SJohn Marino /* Function vect_recog_mixed_size_cond_pattern 1739*e4b17023SJohn Marino 1740*e4b17023SJohn Marino Try to find the following pattern: 1741*e4b17023SJohn Marino 1742*e4b17023SJohn Marino type x_t, y_t; 1743*e4b17023SJohn Marino TYPE a_T, b_T, c_T; 1744*e4b17023SJohn Marino loop: 1745*e4b17023SJohn Marino S1 a_T = x_t CMP y_t ? b_T : c_T; 1746*e4b17023SJohn Marino 1747*e4b17023SJohn Marino where type 'TYPE' is an integral type which has different size 1748*e4b17023SJohn Marino from 'type'. b_T and c_T are constants and if 'TYPE' is wider 1749*e4b17023SJohn Marino than 'type', the constants need to fit into an integer type 1750*e4b17023SJohn Marino with the same width as 'type'. 1751*e4b17023SJohn Marino 1752*e4b17023SJohn Marino Input: 1753*e4b17023SJohn Marino 1754*e4b17023SJohn Marino * LAST_STMT: A stmt from which the pattern search begins. 1755*e4b17023SJohn Marino 1756*e4b17023SJohn Marino Output: 1757*e4b17023SJohn Marino 1758*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 1759*e4b17023SJohn Marino 1760*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 1761*e4b17023SJohn Marino 1762*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the pattern. 1763*e4b17023SJohn Marino Additionally a def_stmt is added. 1764*e4b17023SJohn Marino 1765*e4b17023SJohn Marino a_it = x_t CMP y_t ? b_it : c_it; 1766*e4b17023SJohn Marino a_T = (TYPE) a_it; */ 1767*e4b17023SJohn Marino 1768*e4b17023SJohn Marino static gimple 1769*e4b17023SJohn Marino vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in, 1770*e4b17023SJohn Marino tree *type_out) 1771*e4b17023SJohn Marino { 1772*e4b17023SJohn Marino gimple last_stmt = VEC_index (gimple, *stmts, 0); 1773*e4b17023SJohn Marino tree cond_expr, then_clause, else_clause; 1774*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info; 1775*e4b17023SJohn Marino tree type, vectype, comp_vectype, itype, vecitype; 1776*e4b17023SJohn Marino enum machine_mode cmpmode; 1777*e4b17023SJohn Marino gimple pattern_stmt, def_stmt; 1778*e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 1779*e4b17023SJohn Marino 1780*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt) 1781*e4b17023SJohn Marino || gimple_assign_rhs_code (last_stmt) != COND_EXPR 1782*e4b17023SJohn Marino || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def) 1783*e4b17023SJohn Marino return NULL; 1784*e4b17023SJohn Marino 1785*e4b17023SJohn Marino cond_expr = gimple_assign_rhs1 (last_stmt); 1786*e4b17023SJohn Marino then_clause = gimple_assign_rhs2 (last_stmt); 1787*e4b17023SJohn Marino else_clause = gimple_assign_rhs3 (last_stmt); 1788*e4b17023SJohn Marino 1789*e4b17023SJohn Marino if (TREE_CODE (then_clause) != INTEGER_CST 1790*e4b17023SJohn Marino || TREE_CODE (else_clause) != INTEGER_CST) 1791*e4b17023SJohn Marino return NULL; 1792*e4b17023SJohn Marino 1793*e4b17023SJohn Marino if (!COMPARISON_CLASS_P (cond_expr)) 1794*e4b17023SJohn Marino return NULL; 1795*e4b17023SJohn Marino 1796*e4b17023SJohn Marino comp_vectype 1797*e4b17023SJohn Marino = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr, 0))); 1798*e4b17023SJohn Marino if (comp_vectype == NULL_TREE) 1799*e4b17023SJohn Marino return NULL; 1800*e4b17023SJohn Marino 1801*e4b17023SJohn Marino type = gimple_expr_type (last_stmt); 1802*e4b17023SJohn Marino cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype)); 1803*e4b17023SJohn Marino 1804*e4b17023SJohn Marino if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode)) 1805*e4b17023SJohn Marino return NULL; 1806*e4b17023SJohn Marino 1807*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (type); 1808*e4b17023SJohn Marino if (vectype == NULL_TREE) 1809*e4b17023SJohn Marino return NULL; 1810*e4b17023SJohn Marino 1811*e4b17023SJohn Marino if (expand_vec_cond_expr_p (vectype, comp_vectype)) 1812*e4b17023SJohn Marino return NULL; 1813*e4b17023SJohn Marino 1814*e4b17023SJohn Marino itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode), 1815*e4b17023SJohn Marino TYPE_UNSIGNED (type)); 1816*e4b17023SJohn Marino if (itype == NULL_TREE 1817*e4b17023SJohn Marino || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode)) 1818*e4b17023SJohn Marino return NULL; 1819*e4b17023SJohn Marino 1820*e4b17023SJohn Marino vecitype = get_vectype_for_scalar_type (itype); 1821*e4b17023SJohn Marino if (vecitype == NULL_TREE) 1822*e4b17023SJohn Marino return NULL; 1823*e4b17023SJohn Marino 1824*e4b17023SJohn Marino if (!expand_vec_cond_expr_p (vecitype, comp_vectype)) 1825*e4b17023SJohn Marino return NULL; 1826*e4b17023SJohn Marino 1827*e4b17023SJohn Marino if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode)) 1828*e4b17023SJohn Marino { 1829*e4b17023SJohn Marino if (!int_fits_type_p (then_clause, itype) 1830*e4b17023SJohn Marino || !int_fits_type_p (else_clause, itype)) 1831*e4b17023SJohn Marino return NULL; 1832*e4b17023SJohn Marino } 1833*e4b17023SJohn Marino 1834*e4b17023SJohn Marino def_stmt 1835*e4b17023SJohn Marino = gimple_build_assign_with_ops3 (COND_EXPR, 1836*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1837*e4b17023SJohn Marino unshare_expr (cond_expr), 1838*e4b17023SJohn Marino fold_convert (itype, then_clause), 1839*e4b17023SJohn Marino fold_convert (itype, else_clause)); 1840*e4b17023SJohn Marino pattern_stmt 1841*e4b17023SJohn Marino = gimple_build_assign_with_ops (NOP_EXPR, 1842*e4b17023SJohn Marino vect_recog_temp_ssa_var (type, NULL), 1843*e4b17023SJohn Marino gimple_assign_lhs (def_stmt), NULL_TREE); 1844*e4b17023SJohn Marino 1845*e4b17023SJohn Marino new_pattern_def_seq (stmt_vinfo, def_stmt); 1846*e4b17023SJohn Marino def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 1847*e4b17023SJohn Marino set_vinfo_for_stmt (def_stmt, def_stmt_info); 1848*e4b17023SJohn Marino STMT_VINFO_VECTYPE (def_stmt_info) = vecitype; 1849*e4b17023SJohn Marino *type_in = vecitype; 1850*e4b17023SJohn Marino *type_out = vectype; 1851*e4b17023SJohn Marino 1852*e4b17023SJohn Marino return pattern_stmt; 1853*e4b17023SJohn Marino } 1854*e4b17023SJohn Marino 1855*e4b17023SJohn Marino 1856*e4b17023SJohn Marino /* Helper function of vect_recog_bool_pattern. Called recursively, return 1857*e4b17023SJohn Marino true if bool VAR can be optimized that way. */ 1858*e4b17023SJohn Marino 1859*e4b17023SJohn Marino static bool 1860*e4b17023SJohn Marino check_bool_pattern (tree var, loop_vec_info loop_vinfo) 1861*e4b17023SJohn Marino { 1862*e4b17023SJohn Marino gimple def_stmt; 1863*e4b17023SJohn Marino enum vect_def_type dt; 1864*e4b17023SJohn Marino tree def, rhs1; 1865*e4b17023SJohn Marino enum tree_code rhs_code; 1866*e4b17023SJohn Marino 1867*e4b17023SJohn Marino if (!vect_is_simple_use (var, NULL, loop_vinfo, NULL, &def_stmt, &def, &dt)) 1868*e4b17023SJohn Marino return false; 1869*e4b17023SJohn Marino 1870*e4b17023SJohn Marino if (dt != vect_internal_def) 1871*e4b17023SJohn Marino return false; 1872*e4b17023SJohn Marino 1873*e4b17023SJohn Marino if (!is_gimple_assign (def_stmt)) 1874*e4b17023SJohn Marino return false; 1875*e4b17023SJohn Marino 1876*e4b17023SJohn Marino if (!has_single_use (def)) 1877*e4b17023SJohn Marino return false; 1878*e4b17023SJohn Marino 1879*e4b17023SJohn Marino rhs1 = gimple_assign_rhs1 (def_stmt); 1880*e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (def_stmt); 1881*e4b17023SJohn Marino switch (rhs_code) 1882*e4b17023SJohn Marino { 1883*e4b17023SJohn Marino case SSA_NAME: 1884*e4b17023SJohn Marino return check_bool_pattern (rhs1, loop_vinfo); 1885*e4b17023SJohn Marino 1886*e4b17023SJohn Marino CASE_CONVERT: 1887*e4b17023SJohn Marino if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1 1888*e4b17023SJohn Marino || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) 1889*e4b17023SJohn Marino && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE) 1890*e4b17023SJohn Marino return false; 1891*e4b17023SJohn Marino return check_bool_pattern (rhs1, loop_vinfo); 1892*e4b17023SJohn Marino 1893*e4b17023SJohn Marino case BIT_NOT_EXPR: 1894*e4b17023SJohn Marino return check_bool_pattern (rhs1, loop_vinfo); 1895*e4b17023SJohn Marino 1896*e4b17023SJohn Marino case BIT_AND_EXPR: 1897*e4b17023SJohn Marino case BIT_IOR_EXPR: 1898*e4b17023SJohn Marino case BIT_XOR_EXPR: 1899*e4b17023SJohn Marino if (!check_bool_pattern (rhs1, loop_vinfo)) 1900*e4b17023SJohn Marino return false; 1901*e4b17023SJohn Marino return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo); 1902*e4b17023SJohn Marino 1903*e4b17023SJohn Marino default: 1904*e4b17023SJohn Marino if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) 1905*e4b17023SJohn Marino { 1906*e4b17023SJohn Marino tree vecitype, comp_vectype; 1907*e4b17023SJohn Marino 1908*e4b17023SJohn Marino /* If the comparison can throw, then is_gimple_condexpr will be 1909*e4b17023SJohn Marino false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */ 1910*e4b17023SJohn Marino if (stmt_could_throw_p (def_stmt)) 1911*e4b17023SJohn Marino return false; 1912*e4b17023SJohn Marino 1913*e4b17023SJohn Marino comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1)); 1914*e4b17023SJohn Marino if (comp_vectype == NULL_TREE) 1915*e4b17023SJohn Marino return false; 1916*e4b17023SJohn Marino 1917*e4b17023SJohn Marino if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE) 1918*e4b17023SJohn Marino { 1919*e4b17023SJohn Marino enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 1920*e4b17023SJohn Marino tree itype 1921*e4b17023SJohn Marino = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 1922*e4b17023SJohn Marino vecitype = get_vectype_for_scalar_type (itype); 1923*e4b17023SJohn Marino if (vecitype == NULL_TREE) 1924*e4b17023SJohn Marino return false; 1925*e4b17023SJohn Marino } 1926*e4b17023SJohn Marino else 1927*e4b17023SJohn Marino vecitype = comp_vectype; 1928*e4b17023SJohn Marino return expand_vec_cond_expr_p (vecitype, comp_vectype); 1929*e4b17023SJohn Marino } 1930*e4b17023SJohn Marino return false; 1931*e4b17023SJohn Marino } 1932*e4b17023SJohn Marino } 1933*e4b17023SJohn Marino 1934*e4b17023SJohn Marino 1935*e4b17023SJohn Marino /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous 1936*e4b17023SJohn Marino stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT 1937*e4b17023SJohn Marino to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */ 1938*e4b17023SJohn Marino 1939*e4b17023SJohn Marino static tree 1940*e4b17023SJohn Marino adjust_bool_pattern_cast (tree type, tree var) 1941*e4b17023SJohn Marino { 1942*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var)); 1943*e4b17023SJohn Marino gimple cast_stmt, pattern_stmt; 1944*e4b17023SJohn Marino 1945*e4b17023SJohn Marino gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo)); 1946*e4b17023SJohn Marino pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); 1947*e4b17023SJohn Marino new_pattern_def_seq (stmt_vinfo, pattern_stmt); 1948*e4b17023SJohn Marino cast_stmt 1949*e4b17023SJohn Marino = gimple_build_assign_with_ops (NOP_EXPR, 1950*e4b17023SJohn Marino vect_recog_temp_ssa_var (type, NULL), 1951*e4b17023SJohn Marino gimple_assign_lhs (pattern_stmt), 1952*e4b17023SJohn Marino NULL_TREE); 1953*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt; 1954*e4b17023SJohn Marino return gimple_assign_lhs (cast_stmt); 1955*e4b17023SJohn Marino } 1956*e4b17023SJohn Marino 1957*e4b17023SJohn Marino 1958*e4b17023SJohn Marino /* Helper function of vect_recog_bool_pattern. Do the actual transformations, 1959*e4b17023SJohn Marino recursively. VAR is an SSA_NAME that should be transformed from bool 1960*e4b17023SJohn Marino to a wider integer type, OUT_TYPE is the desired final integer type of 1961*e4b17023SJohn Marino the whole pattern, TRUEVAL should be NULL unless optimizing 1962*e4b17023SJohn Marino BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands 1963*e4b17023SJohn Marino in the then_clause, STMTS is where statements with added pattern stmts 1964*e4b17023SJohn Marino should be pushed to. */ 1965*e4b17023SJohn Marino 1966*e4b17023SJohn Marino static tree 1967*e4b17023SJohn Marino adjust_bool_pattern (tree var, tree out_type, tree trueval, 1968*e4b17023SJohn Marino VEC (gimple, heap) **stmts) 1969*e4b17023SJohn Marino { 1970*e4b17023SJohn Marino gimple stmt = SSA_NAME_DEF_STMT (var); 1971*e4b17023SJohn Marino enum tree_code rhs_code, def_rhs_code; 1972*e4b17023SJohn Marino tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2; 1973*e4b17023SJohn Marino location_t loc; 1974*e4b17023SJohn Marino gimple pattern_stmt, def_stmt; 1975*e4b17023SJohn Marino 1976*e4b17023SJohn Marino rhs1 = gimple_assign_rhs1 (stmt); 1977*e4b17023SJohn Marino rhs2 = gimple_assign_rhs2 (stmt); 1978*e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (stmt); 1979*e4b17023SJohn Marino loc = gimple_location (stmt); 1980*e4b17023SJohn Marino switch (rhs_code) 1981*e4b17023SJohn Marino { 1982*e4b17023SJohn Marino case SSA_NAME: 1983*e4b17023SJohn Marino CASE_CONVERT: 1984*e4b17023SJohn Marino irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 1985*e4b17023SJohn Marino itype = TREE_TYPE (irhs1); 1986*e4b17023SJohn Marino pattern_stmt 1987*e4b17023SJohn Marino = gimple_build_assign_with_ops (SSA_NAME, 1988*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1989*e4b17023SJohn Marino irhs1, NULL_TREE); 1990*e4b17023SJohn Marino break; 1991*e4b17023SJohn Marino 1992*e4b17023SJohn Marino case BIT_NOT_EXPR: 1993*e4b17023SJohn Marino irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 1994*e4b17023SJohn Marino itype = TREE_TYPE (irhs1); 1995*e4b17023SJohn Marino pattern_stmt 1996*e4b17023SJohn Marino = gimple_build_assign_with_ops (BIT_XOR_EXPR, 1997*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 1998*e4b17023SJohn Marino irhs1, build_int_cst (itype, 1)); 1999*e4b17023SJohn Marino break; 2000*e4b17023SJohn Marino 2001*e4b17023SJohn Marino case BIT_AND_EXPR: 2002*e4b17023SJohn Marino /* Try to optimize x = y & (a < b ? 1 : 0); into 2003*e4b17023SJohn Marino x = (a < b ? y : 0); 2004*e4b17023SJohn Marino 2005*e4b17023SJohn Marino E.g. for: 2006*e4b17023SJohn Marino bool a_b, b_b, c_b; 2007*e4b17023SJohn Marino TYPE d_T; 2008*e4b17023SJohn Marino 2009*e4b17023SJohn Marino S1 a_b = x1 CMP1 y1; 2010*e4b17023SJohn Marino S2 b_b = x2 CMP2 y2; 2011*e4b17023SJohn Marino S3 c_b = a_b & b_b; 2012*e4b17023SJohn Marino S4 d_T = (TYPE) c_b; 2013*e4b17023SJohn Marino 2014*e4b17023SJohn Marino we would normally emit: 2015*e4b17023SJohn Marino 2016*e4b17023SJohn Marino S1' a_T = x1 CMP1 y1 ? 1 : 0; 2017*e4b17023SJohn Marino S2' b_T = x2 CMP2 y2 ? 1 : 0; 2018*e4b17023SJohn Marino S3' c_T = a_T & b_T; 2019*e4b17023SJohn Marino S4' d_T = c_T; 2020*e4b17023SJohn Marino 2021*e4b17023SJohn Marino but we can save one stmt by using the 2022*e4b17023SJohn Marino result of one of the COND_EXPRs in the other COND_EXPR and leave 2023*e4b17023SJohn Marino BIT_AND_EXPR stmt out: 2024*e4b17023SJohn Marino 2025*e4b17023SJohn Marino S1' a_T = x1 CMP1 y1 ? 1 : 0; 2026*e4b17023SJohn Marino S3' c_T = x2 CMP2 y2 ? a_T : 0; 2027*e4b17023SJohn Marino S4' f_T = c_T; 2028*e4b17023SJohn Marino 2029*e4b17023SJohn Marino At least when VEC_COND_EXPR is implemented using masks 2030*e4b17023SJohn Marino cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it 2031*e4b17023SJohn Marino computes the comparison masks and ands it, in one case with 2032*e4b17023SJohn Marino all ones vector, in the other case with a vector register. 2033*e4b17023SJohn Marino Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is 2034*e4b17023SJohn Marino often more expensive. */ 2035*e4b17023SJohn Marino def_stmt = SSA_NAME_DEF_STMT (rhs2); 2036*e4b17023SJohn Marino def_rhs_code = gimple_assign_rhs_code (def_stmt); 2037*e4b17023SJohn Marino if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 2038*e4b17023SJohn Marino { 2039*e4b17023SJohn Marino tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 2040*e4b17023SJohn Marino irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2041*e4b17023SJohn Marino if (TYPE_PRECISION (TREE_TYPE (irhs1)) 2042*e4b17023SJohn Marino == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 2043*e4b17023SJohn Marino { 2044*e4b17023SJohn Marino gimple tstmt; 2045*e4b17023SJohn Marino stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); 2046*e4b17023SJohn Marino irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts); 2047*e4b17023SJohn Marino tstmt = VEC_pop (gimple, *stmts); 2048*e4b17023SJohn Marino gcc_assert (tstmt == def_stmt); 2049*e4b17023SJohn Marino VEC_quick_push (gimple, *stmts, stmt); 2050*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) 2051*e4b17023SJohn Marino = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); 2052*e4b17023SJohn Marino gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo)); 2053*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; 2054*e4b17023SJohn Marino return irhs2; 2055*e4b17023SJohn Marino } 2056*e4b17023SJohn Marino else 2057*e4b17023SJohn Marino irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2058*e4b17023SJohn Marino goto and_ior_xor; 2059*e4b17023SJohn Marino } 2060*e4b17023SJohn Marino def_stmt = SSA_NAME_DEF_STMT (rhs1); 2061*e4b17023SJohn Marino def_rhs_code = gimple_assign_rhs_code (def_stmt); 2062*e4b17023SJohn Marino if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison) 2063*e4b17023SJohn Marino { 2064*e4b17023SJohn Marino tree def_rhs1 = gimple_assign_rhs1 (def_stmt); 2065*e4b17023SJohn Marino irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2066*e4b17023SJohn Marino if (TYPE_PRECISION (TREE_TYPE (irhs2)) 2067*e4b17023SJohn Marino == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1)))) 2068*e4b17023SJohn Marino { 2069*e4b17023SJohn Marino gimple tstmt; 2070*e4b17023SJohn Marino stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt); 2071*e4b17023SJohn Marino irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts); 2072*e4b17023SJohn Marino tstmt = VEC_pop (gimple, *stmts); 2073*e4b17023SJohn Marino gcc_assert (tstmt == def_stmt); 2074*e4b17023SJohn Marino VEC_quick_push (gimple, *stmts, stmt); 2075*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) 2076*e4b17023SJohn Marino = STMT_VINFO_RELATED_STMT (stmt_def_vinfo); 2077*e4b17023SJohn Marino gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo)); 2078*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL; 2079*e4b17023SJohn Marino return irhs1; 2080*e4b17023SJohn Marino } 2081*e4b17023SJohn Marino else 2082*e4b17023SJohn Marino irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2083*e4b17023SJohn Marino goto and_ior_xor; 2084*e4b17023SJohn Marino } 2085*e4b17023SJohn Marino /* FALLTHRU */ 2086*e4b17023SJohn Marino case BIT_IOR_EXPR: 2087*e4b17023SJohn Marino case BIT_XOR_EXPR: 2088*e4b17023SJohn Marino irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts); 2089*e4b17023SJohn Marino irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts); 2090*e4b17023SJohn Marino and_ior_xor: 2091*e4b17023SJohn Marino if (TYPE_PRECISION (TREE_TYPE (irhs1)) 2092*e4b17023SJohn Marino != TYPE_PRECISION (TREE_TYPE (irhs2))) 2093*e4b17023SJohn Marino { 2094*e4b17023SJohn Marino int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1)); 2095*e4b17023SJohn Marino int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2)); 2096*e4b17023SJohn Marino int out_prec = TYPE_PRECISION (out_type); 2097*e4b17023SJohn Marino if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2)) 2098*e4b17023SJohn Marino irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2); 2099*e4b17023SJohn Marino else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2)) 2100*e4b17023SJohn Marino irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1); 2101*e4b17023SJohn Marino else 2102*e4b17023SJohn Marino { 2103*e4b17023SJohn Marino irhs1 = adjust_bool_pattern_cast (out_type, rhs1); 2104*e4b17023SJohn Marino irhs2 = adjust_bool_pattern_cast (out_type, rhs2); 2105*e4b17023SJohn Marino } 2106*e4b17023SJohn Marino } 2107*e4b17023SJohn Marino itype = TREE_TYPE (irhs1); 2108*e4b17023SJohn Marino pattern_stmt 2109*e4b17023SJohn Marino = gimple_build_assign_with_ops (rhs_code, 2110*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 2111*e4b17023SJohn Marino irhs1, irhs2); 2112*e4b17023SJohn Marino break; 2113*e4b17023SJohn Marino 2114*e4b17023SJohn Marino default: 2115*e4b17023SJohn Marino gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison); 2116*e4b17023SJohn Marino if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE 2117*e4b17023SJohn Marino || !TYPE_UNSIGNED (TREE_TYPE (rhs1)) 2118*e4b17023SJohn Marino || (TYPE_PRECISION (TREE_TYPE (rhs1)) 2119*e4b17023SJohn Marino != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1))))) 2120*e4b17023SJohn Marino { 2121*e4b17023SJohn Marino enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1)); 2122*e4b17023SJohn Marino itype 2123*e4b17023SJohn Marino = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); 2124*e4b17023SJohn Marino } 2125*e4b17023SJohn Marino else 2126*e4b17023SJohn Marino itype = TREE_TYPE (rhs1); 2127*e4b17023SJohn Marino cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2); 2128*e4b17023SJohn Marino if (trueval == NULL_TREE) 2129*e4b17023SJohn Marino trueval = build_int_cst (itype, 1); 2130*e4b17023SJohn Marino else 2131*e4b17023SJohn Marino gcc_checking_assert (useless_type_conversion_p (itype, 2132*e4b17023SJohn Marino TREE_TYPE (trueval))); 2133*e4b17023SJohn Marino pattern_stmt 2134*e4b17023SJohn Marino = gimple_build_assign_with_ops3 (COND_EXPR, 2135*e4b17023SJohn Marino vect_recog_temp_ssa_var (itype, NULL), 2136*e4b17023SJohn Marino cond_expr, trueval, 2137*e4b17023SJohn Marino build_int_cst (itype, 0)); 2138*e4b17023SJohn Marino break; 2139*e4b17023SJohn Marino } 2140*e4b17023SJohn Marino 2141*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, stmt); 2142*e4b17023SJohn Marino gimple_set_location (pattern_stmt, loc); 2143*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt; 2144*e4b17023SJohn Marino return gimple_assign_lhs (pattern_stmt); 2145*e4b17023SJohn Marino } 2146*e4b17023SJohn Marino 2147*e4b17023SJohn Marino 2148*e4b17023SJohn Marino /* Function vect_recog_bool_pattern 2149*e4b17023SJohn Marino 2150*e4b17023SJohn Marino Try to find pattern like following: 2151*e4b17023SJohn Marino 2152*e4b17023SJohn Marino bool a_b, b_b, c_b, d_b, e_b; 2153*e4b17023SJohn Marino TYPE f_T; 2154*e4b17023SJohn Marino loop: 2155*e4b17023SJohn Marino S1 a_b = x1 CMP1 y1; 2156*e4b17023SJohn Marino S2 b_b = x2 CMP2 y2; 2157*e4b17023SJohn Marino S3 c_b = a_b & b_b; 2158*e4b17023SJohn Marino S4 d_b = x3 CMP3 y3; 2159*e4b17023SJohn Marino S5 e_b = c_b | d_b; 2160*e4b17023SJohn Marino S6 f_T = (TYPE) e_b; 2161*e4b17023SJohn Marino 2162*e4b17023SJohn Marino where type 'TYPE' is an integral type. 2163*e4b17023SJohn Marino 2164*e4b17023SJohn Marino Input: 2165*e4b17023SJohn Marino 2166*e4b17023SJohn Marino * LAST_STMT: A stmt at the end from which the pattern 2167*e4b17023SJohn Marino search begins, i.e. cast of a bool to 2168*e4b17023SJohn Marino an integer type. 2169*e4b17023SJohn Marino 2170*e4b17023SJohn Marino Output: 2171*e4b17023SJohn Marino 2172*e4b17023SJohn Marino * TYPE_IN: The type of the input arguments to the pattern. 2173*e4b17023SJohn Marino 2174*e4b17023SJohn Marino * TYPE_OUT: The type of the output of this pattern. 2175*e4b17023SJohn Marino 2176*e4b17023SJohn Marino * Return value: A new stmt that will be used to replace the pattern. 2177*e4b17023SJohn Marino 2178*e4b17023SJohn Marino Assuming size of TYPE is the same as size of all comparisons 2179*e4b17023SJohn Marino (otherwise some casts would be added where needed), the above 2180*e4b17023SJohn Marino sequence we create related pattern stmts: 2181*e4b17023SJohn Marino S1' a_T = x1 CMP1 y1 ? 1 : 0; 2182*e4b17023SJohn Marino S3' c_T = x2 CMP2 y2 ? a_T : 0; 2183*e4b17023SJohn Marino S4' d_T = x3 CMP3 y3 ? 1 : 0; 2184*e4b17023SJohn Marino S5' e_T = c_T | d_T; 2185*e4b17023SJohn Marino S6' f_T = e_T; 2186*e4b17023SJohn Marino 2187*e4b17023SJohn Marino Instead of the above S3' we could emit: 2188*e4b17023SJohn Marino S2' b_T = x2 CMP2 y2 ? 1 : 0; 2189*e4b17023SJohn Marino S3' c_T = a_T | b_T; 2190*e4b17023SJohn Marino but the above is more efficient. */ 2191*e4b17023SJohn Marino 2192*e4b17023SJohn Marino static gimple 2193*e4b17023SJohn Marino vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in, 2194*e4b17023SJohn Marino tree *type_out) 2195*e4b17023SJohn Marino { 2196*e4b17023SJohn Marino gimple last_stmt = VEC_pop (gimple, *stmts); 2197*e4b17023SJohn Marino enum tree_code rhs_code; 2198*e4b17023SJohn Marino tree var, lhs, rhs, vectype; 2199*e4b17023SJohn Marino stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); 2200*e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 2201*e4b17023SJohn Marino gimple pattern_stmt; 2202*e4b17023SJohn Marino 2203*e4b17023SJohn Marino if (!is_gimple_assign (last_stmt)) 2204*e4b17023SJohn Marino return NULL; 2205*e4b17023SJohn Marino 2206*e4b17023SJohn Marino var = gimple_assign_rhs1 (last_stmt); 2207*e4b17023SJohn Marino lhs = gimple_assign_lhs (last_stmt); 2208*e4b17023SJohn Marino 2209*e4b17023SJohn Marino if ((TYPE_PRECISION (TREE_TYPE (var)) != 1 2210*e4b17023SJohn Marino || !TYPE_UNSIGNED (TREE_TYPE (var))) 2211*e4b17023SJohn Marino && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE) 2212*e4b17023SJohn Marino return NULL; 2213*e4b17023SJohn Marino 2214*e4b17023SJohn Marino rhs_code = gimple_assign_rhs_code (last_stmt); 2215*e4b17023SJohn Marino if (CONVERT_EXPR_CODE_P (rhs_code)) 2216*e4b17023SJohn Marino { 2217*e4b17023SJohn Marino if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE 2218*e4b17023SJohn Marino || TYPE_PRECISION (TREE_TYPE (lhs)) == 1) 2219*e4b17023SJohn Marino return NULL; 2220*e4b17023SJohn Marino vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs)); 2221*e4b17023SJohn Marino if (vectype == NULL_TREE) 2222*e4b17023SJohn Marino return NULL; 2223*e4b17023SJohn Marino 2224*e4b17023SJohn Marino if (!check_bool_pattern (var, loop_vinfo)) 2225*e4b17023SJohn Marino return NULL; 2226*e4b17023SJohn Marino 2227*e4b17023SJohn Marino rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts); 2228*e4b17023SJohn Marino lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 2229*e4b17023SJohn Marino if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 2230*e4b17023SJohn Marino pattern_stmt 2231*e4b17023SJohn Marino = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); 2232*e4b17023SJohn Marino else 2233*e4b17023SJohn Marino pattern_stmt 2234*e4b17023SJohn Marino = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE); 2235*e4b17023SJohn Marino *type_out = vectype; 2236*e4b17023SJohn Marino *type_in = vectype; 2237*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, last_stmt); 2238*e4b17023SJohn Marino return pattern_stmt; 2239*e4b17023SJohn Marino } 2240*e4b17023SJohn Marino else if (rhs_code == SSA_NAME 2241*e4b17023SJohn Marino && STMT_VINFO_DATA_REF (stmt_vinfo)) 2242*e4b17023SJohn Marino { 2243*e4b17023SJohn Marino stmt_vec_info pattern_stmt_info; 2244*e4b17023SJohn Marino vectype = STMT_VINFO_VECTYPE (stmt_vinfo); 2245*e4b17023SJohn Marino gcc_assert (vectype != NULL_TREE); 2246*e4b17023SJohn Marino if (!VECTOR_MODE_P (TYPE_MODE (vectype))) 2247*e4b17023SJohn Marino return NULL; 2248*e4b17023SJohn Marino if (!check_bool_pattern (var, loop_vinfo)) 2249*e4b17023SJohn Marino return NULL; 2250*e4b17023SJohn Marino 2251*e4b17023SJohn Marino rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts); 2252*e4b17023SJohn Marino lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs); 2253*e4b17023SJohn Marino if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs))) 2254*e4b17023SJohn Marino { 2255*e4b17023SJohn Marino tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); 2256*e4b17023SJohn Marino gimple cast_stmt 2257*e4b17023SJohn Marino = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE); 2258*e4b17023SJohn Marino new_pattern_def_seq (stmt_vinfo, cast_stmt); 2259*e4b17023SJohn Marino rhs = rhs2; 2260*e4b17023SJohn Marino } 2261*e4b17023SJohn Marino pattern_stmt 2262*e4b17023SJohn Marino = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE); 2263*e4b17023SJohn Marino pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); 2264*e4b17023SJohn Marino set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 2265*e4b17023SJohn Marino STMT_VINFO_DATA_REF (pattern_stmt_info) 2266*e4b17023SJohn Marino = STMT_VINFO_DATA_REF (stmt_vinfo); 2267*e4b17023SJohn Marino STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info) 2268*e4b17023SJohn Marino = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo); 2269*e4b17023SJohn Marino STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo); 2270*e4b17023SJohn Marino STMT_VINFO_DR_OFFSET (pattern_stmt_info) 2271*e4b17023SJohn Marino = STMT_VINFO_DR_OFFSET (stmt_vinfo); 2272*e4b17023SJohn Marino STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo); 2273*e4b17023SJohn Marino STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info) 2274*e4b17023SJohn Marino = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo); 2275*e4b17023SJohn Marino DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt; 2276*e4b17023SJohn Marino *type_out = vectype; 2277*e4b17023SJohn Marino *type_in = vectype; 2278*e4b17023SJohn Marino VEC_safe_push (gimple, heap, *stmts, last_stmt); 2279*e4b17023SJohn Marino return pattern_stmt; 2280*e4b17023SJohn Marino } 2281*e4b17023SJohn Marino else 2282*e4b17023SJohn Marino return NULL; 2283*e4b17023SJohn Marino } 2284*e4b17023SJohn Marino 2285*e4b17023SJohn Marino 2286*e4b17023SJohn Marino /* Mark statements that are involved in a pattern. */ 2287*e4b17023SJohn Marino 2288*e4b17023SJohn Marino static inline void 2289*e4b17023SJohn Marino vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt, 2290*e4b17023SJohn Marino tree pattern_vectype) 2291*e4b17023SJohn Marino { 2292*e4b17023SJohn Marino stmt_vec_info pattern_stmt_info, def_stmt_info; 2293*e4b17023SJohn Marino stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); 2294*e4b17023SJohn Marino loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info); 2295*e4b17023SJohn Marino gimple def_stmt; 2296*e4b17023SJohn Marino 2297*e4b17023SJohn Marino pattern_stmt_info = vinfo_for_stmt (pattern_stmt); 2298*e4b17023SJohn Marino if (pattern_stmt_info == NULL) 2299*e4b17023SJohn Marino { 2300*e4b17023SJohn Marino pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL); 2301*e4b17023SJohn Marino set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); 2302*e4b17023SJohn Marino } 2303*e4b17023SJohn Marino gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt)); 2304*e4b17023SJohn Marino 2305*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt; 2306*e4b17023SJohn Marino STMT_VINFO_DEF_TYPE (pattern_stmt_info) 2307*e4b17023SJohn Marino = STMT_VINFO_DEF_TYPE (orig_stmt_info); 2308*e4b17023SJohn Marino STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype; 2309*e4b17023SJohn Marino STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; 2310*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt; 2311*e4b17023SJohn Marino STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info) 2312*e4b17023SJohn Marino = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info); 2313*e4b17023SJohn Marino if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)) 2314*e4b17023SJohn Marino { 2315*e4b17023SJohn Marino gimple_stmt_iterator si; 2316*e4b17023SJohn Marino for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)); 2317*e4b17023SJohn Marino !gsi_end_p (si); gsi_next (&si)) 2318*e4b17023SJohn Marino { 2319*e4b17023SJohn Marino def_stmt = gsi_stmt (si); 2320*e4b17023SJohn Marino def_stmt_info = vinfo_for_stmt (def_stmt); 2321*e4b17023SJohn Marino if (def_stmt_info == NULL) 2322*e4b17023SJohn Marino { 2323*e4b17023SJohn Marino def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); 2324*e4b17023SJohn Marino set_vinfo_for_stmt (def_stmt, def_stmt_info); 2325*e4b17023SJohn Marino } 2326*e4b17023SJohn Marino gimple_set_bb (def_stmt, gimple_bb (orig_stmt)); 2327*e4b17023SJohn Marino STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt; 2328*e4b17023SJohn Marino STMT_VINFO_DEF_TYPE (def_stmt_info) 2329*e4b17023SJohn Marino = STMT_VINFO_DEF_TYPE (orig_stmt_info); 2330*e4b17023SJohn Marino if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE) 2331*e4b17023SJohn Marino STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype; 2332*e4b17023SJohn Marino } 2333*e4b17023SJohn Marino } 2334*e4b17023SJohn Marino } 2335*e4b17023SJohn Marino 2336*e4b17023SJohn Marino /* Function vect_pattern_recog_1 2337*e4b17023SJohn Marino 2338*e4b17023SJohn Marino Input: 2339*e4b17023SJohn Marino PATTERN_RECOG_FUNC: A pointer to a function that detects a certain 2340*e4b17023SJohn Marino computation pattern. 2341*e4b17023SJohn Marino STMT: A stmt from which the pattern search should start. 2342*e4b17023SJohn Marino 2343*e4b17023SJohn Marino If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an 2344*e4b17023SJohn Marino expression that computes the same functionality and can be used to 2345*e4b17023SJohn Marino replace the sequence of stmts that are involved in the pattern. 2346*e4b17023SJohn Marino 2347*e4b17023SJohn Marino Output: 2348*e4b17023SJohn Marino This function checks if the expression returned by PATTERN_RECOG_FUNC is 2349*e4b17023SJohn Marino supported in vector form by the target. We use 'TYPE_IN' to obtain the 2350*e4b17023SJohn Marino relevant vector type. If 'TYPE_IN' is already a vector type, then this 2351*e4b17023SJohn Marino indicates that target support had already been checked by PATTERN_RECOG_FUNC. 2352*e4b17023SJohn Marino If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits 2353*e4b17023SJohn Marino to the available target pattern. 2354*e4b17023SJohn Marino 2355*e4b17023SJohn Marino This function also does some bookkeeping, as explained in the documentation 2356*e4b17023SJohn Marino for vect_recog_pattern. */ 2357*e4b17023SJohn Marino 2358*e4b17023SJohn Marino static void 2359*e4b17023SJohn Marino vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func, 2360*e4b17023SJohn Marino gimple_stmt_iterator si, 2361*e4b17023SJohn Marino VEC (gimple, heap) **stmts_to_replace) 2362*e4b17023SJohn Marino { 2363*e4b17023SJohn Marino gimple stmt = gsi_stmt (si), pattern_stmt; 2364*e4b17023SJohn Marino stmt_vec_info stmt_info; 2365*e4b17023SJohn Marino loop_vec_info loop_vinfo; 2366*e4b17023SJohn Marino tree pattern_vectype; 2367*e4b17023SJohn Marino tree type_in, type_out; 2368*e4b17023SJohn Marino enum tree_code code; 2369*e4b17023SJohn Marino int i; 2370*e4b17023SJohn Marino gimple next; 2371*e4b17023SJohn Marino 2372*e4b17023SJohn Marino VEC_truncate (gimple, *stmts_to_replace, 0); 2373*e4b17023SJohn Marino VEC_quick_push (gimple, *stmts_to_replace, stmt); 2374*e4b17023SJohn Marino pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out); 2375*e4b17023SJohn Marino if (!pattern_stmt) 2376*e4b17023SJohn Marino return; 2377*e4b17023SJohn Marino 2378*e4b17023SJohn Marino stmt = VEC_last (gimple, *stmts_to_replace); 2379*e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt); 2380*e4b17023SJohn Marino loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2381*e4b17023SJohn Marino 2382*e4b17023SJohn Marino if (VECTOR_MODE_P (TYPE_MODE (type_in))) 2383*e4b17023SJohn Marino { 2384*e4b17023SJohn Marino /* No need to check target support (already checked by the pattern 2385*e4b17023SJohn Marino recognition function). */ 2386*e4b17023SJohn Marino pattern_vectype = type_out ? type_out : type_in; 2387*e4b17023SJohn Marino } 2388*e4b17023SJohn Marino else 2389*e4b17023SJohn Marino { 2390*e4b17023SJohn Marino enum machine_mode vec_mode; 2391*e4b17023SJohn Marino enum insn_code icode; 2392*e4b17023SJohn Marino optab optab; 2393*e4b17023SJohn Marino 2394*e4b17023SJohn Marino /* Check target support */ 2395*e4b17023SJohn Marino type_in = get_vectype_for_scalar_type (type_in); 2396*e4b17023SJohn Marino if (!type_in) 2397*e4b17023SJohn Marino return; 2398*e4b17023SJohn Marino if (type_out) 2399*e4b17023SJohn Marino type_out = get_vectype_for_scalar_type (type_out); 2400*e4b17023SJohn Marino else 2401*e4b17023SJohn Marino type_out = type_in; 2402*e4b17023SJohn Marino if (!type_out) 2403*e4b17023SJohn Marino return; 2404*e4b17023SJohn Marino pattern_vectype = type_out; 2405*e4b17023SJohn Marino 2406*e4b17023SJohn Marino if (is_gimple_assign (pattern_stmt)) 2407*e4b17023SJohn Marino code = gimple_assign_rhs_code (pattern_stmt); 2408*e4b17023SJohn Marino else 2409*e4b17023SJohn Marino { 2410*e4b17023SJohn Marino gcc_assert (is_gimple_call (pattern_stmt)); 2411*e4b17023SJohn Marino code = CALL_EXPR; 2412*e4b17023SJohn Marino } 2413*e4b17023SJohn Marino 2414*e4b17023SJohn Marino optab = optab_for_tree_code (code, type_in, optab_default); 2415*e4b17023SJohn Marino vec_mode = TYPE_MODE (type_in); 2416*e4b17023SJohn Marino if (!optab 2417*e4b17023SJohn Marino || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing 2418*e4b17023SJohn Marino || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out))) 2419*e4b17023SJohn Marino return; 2420*e4b17023SJohn Marino } 2421*e4b17023SJohn Marino 2422*e4b17023SJohn Marino /* Found a vectorizable pattern. */ 2423*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 2424*e4b17023SJohn Marino { 2425*e4b17023SJohn Marino fprintf (vect_dump, "pattern recognized: "); 2426*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 2427*e4b17023SJohn Marino } 2428*e4b17023SJohn Marino 2429*e4b17023SJohn Marino /* Mark the stmts that are involved in the pattern. */ 2430*e4b17023SJohn Marino vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype); 2431*e4b17023SJohn Marino 2432*e4b17023SJohn Marino /* Patterns cannot be vectorized using SLP, because they change the order of 2433*e4b17023SJohn Marino computation. */ 2434*e4b17023SJohn Marino FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next) 2435*e4b17023SJohn Marino if (next == stmt) 2436*e4b17023SJohn Marino VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); 2437*e4b17023SJohn Marino 2438*e4b17023SJohn Marino /* It is possible that additional pattern stmts are created and inserted in 2439*e4b17023SJohn Marino STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the 2440*e4b17023SJohn Marino relevant statements. */ 2441*e4b17023SJohn Marino for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt) 2442*e4b17023SJohn Marino && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1); 2443*e4b17023SJohn Marino i++) 2444*e4b17023SJohn Marino { 2445*e4b17023SJohn Marino stmt_info = vinfo_for_stmt (stmt); 2446*e4b17023SJohn Marino pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 2447*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 2448*e4b17023SJohn Marino { 2449*e4b17023SJohn Marino fprintf (vect_dump, "additional pattern stmt: "); 2450*e4b17023SJohn Marino print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); 2451*e4b17023SJohn Marino } 2452*e4b17023SJohn Marino 2453*e4b17023SJohn Marino vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE); 2454*e4b17023SJohn Marino } 2455*e4b17023SJohn Marino } 2456*e4b17023SJohn Marino 2457*e4b17023SJohn Marino 2458*e4b17023SJohn Marino /* Function vect_pattern_recog 2459*e4b17023SJohn Marino 2460*e4b17023SJohn Marino Input: 2461*e4b17023SJohn Marino LOOP_VINFO - a struct_loop_info of a loop in which we want to look for 2462*e4b17023SJohn Marino computation idioms. 2463*e4b17023SJohn Marino 2464*e4b17023SJohn Marino Output - for each computation idiom that is detected we create a new stmt 2465*e4b17023SJohn Marino that provides the same functionality and that can be vectorized. We 2466*e4b17023SJohn Marino also record some information in the struct_stmt_info of the relevant 2467*e4b17023SJohn Marino stmts, as explained below: 2468*e4b17023SJohn Marino 2469*e4b17023SJohn Marino At the entry to this function we have the following stmts, with the 2470*e4b17023SJohn Marino following initial value in the STMT_VINFO fields: 2471*e4b17023SJohn Marino 2472*e4b17023SJohn Marino stmt in_pattern_p related_stmt vec_stmt 2473*e4b17023SJohn Marino S1: a_i = .... - - - 2474*e4b17023SJohn Marino S2: a_2 = ..use(a_i).. - - - 2475*e4b17023SJohn Marino S3: a_1 = ..use(a_2).. - - - 2476*e4b17023SJohn Marino S4: a_0 = ..use(a_1).. - - - 2477*e4b17023SJohn Marino S5: ... = ..use(a_0).. - - - 2478*e4b17023SJohn Marino 2479*e4b17023SJohn Marino Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be 2480*e4b17023SJohn Marino represented by a single stmt. We then: 2481*e4b17023SJohn Marino - create a new stmt S6 equivalent to the pattern (the stmt is not 2482*e4b17023SJohn Marino inserted into the code) 2483*e4b17023SJohn Marino - fill in the STMT_VINFO fields as follows: 2484*e4b17023SJohn Marino 2485*e4b17023SJohn Marino in_pattern_p related_stmt vec_stmt 2486*e4b17023SJohn Marino S1: a_i = .... - - - 2487*e4b17023SJohn Marino S2: a_2 = ..use(a_i).. - - - 2488*e4b17023SJohn Marino S3: a_1 = ..use(a_2).. - - - 2489*e4b17023SJohn Marino S4: a_0 = ..use(a_1).. true S6 - 2490*e4b17023SJohn Marino '---> S6: a_new = .... - S4 - 2491*e4b17023SJohn Marino S5: ... = ..use(a_0).. - - - 2492*e4b17023SJohn Marino 2493*e4b17023SJohn Marino (the last stmt in the pattern (S4) and the new pattern stmt (S6) point 2494*e4b17023SJohn Marino to each other through the RELATED_STMT field). 2495*e4b17023SJohn Marino 2496*e4b17023SJohn Marino S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead 2497*e4b17023SJohn Marino of S4 because it will replace all its uses. Stmts {S1,S2,S3} will 2498*e4b17023SJohn Marino remain irrelevant unless used by stmts other than S4. 2499*e4b17023SJohn Marino 2500*e4b17023SJohn Marino If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3} 2501*e4b17023SJohn Marino (because they are marked as irrelevant). It will vectorize S6, and record 2502*e4b17023SJohn Marino a pointer to the new vector stmt VS6 from S6 (as usual). 2503*e4b17023SJohn Marino S4 will be skipped, and S5 will be vectorized as usual: 2504*e4b17023SJohn Marino 2505*e4b17023SJohn Marino in_pattern_p related_stmt vec_stmt 2506*e4b17023SJohn Marino S1: a_i = .... - - - 2507*e4b17023SJohn Marino S2: a_2 = ..use(a_i).. - - - 2508*e4b17023SJohn Marino S3: a_1 = ..use(a_2).. - - - 2509*e4b17023SJohn Marino > VS6: va_new = .... - - - 2510*e4b17023SJohn Marino S4: a_0 = ..use(a_1).. true S6 VS6 2511*e4b17023SJohn Marino '---> S6: a_new = .... - S4 VS6 2512*e4b17023SJohn Marino > VS5: ... = ..vuse(va_new).. - - - 2513*e4b17023SJohn Marino S5: ... = ..use(a_0).. - - - 2514*e4b17023SJohn Marino 2515*e4b17023SJohn Marino DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used 2516*e4b17023SJohn Marino elsewhere), and we'll end up with: 2517*e4b17023SJohn Marino 2518*e4b17023SJohn Marino VS6: va_new = .... 2519*e4b17023SJohn Marino VS5: ... = ..vuse(va_new).. 2520*e4b17023SJohn Marino 2521*e4b17023SJohn Marino In case of more than one pattern statements, e.g., widen-mult with 2522*e4b17023SJohn Marino intermediate type: 2523*e4b17023SJohn Marino 2524*e4b17023SJohn Marino S1 a_t = ; 2525*e4b17023SJohn Marino S2 a_T = (TYPE) a_t; 2526*e4b17023SJohn Marino '--> S3: a_it = (interm_type) a_t; 2527*e4b17023SJohn Marino S4 prod_T = a_T * CONST; 2528*e4b17023SJohn Marino '--> S5: prod_T' = a_it w* CONST; 2529*e4b17023SJohn Marino 2530*e4b17023SJohn Marino there may be other users of a_T outside the pattern. In that case S2 will 2531*e4b17023SJohn Marino be marked as relevant (as well as S3), and both S2 and S3 will be analyzed 2532*e4b17023SJohn Marino and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will 2533*e4b17023SJohn Marino be recorded in S3. */ 2534*e4b17023SJohn Marino 2535*e4b17023SJohn Marino void 2536*e4b17023SJohn Marino vect_pattern_recog (loop_vec_info loop_vinfo) 2537*e4b17023SJohn Marino { 2538*e4b17023SJohn Marino struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2539*e4b17023SJohn Marino basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 2540*e4b17023SJohn Marino unsigned int nbbs = loop->num_nodes; 2541*e4b17023SJohn Marino gimple_stmt_iterator si; 2542*e4b17023SJohn Marino unsigned int i, j; 2543*e4b17023SJohn Marino vect_recog_func_ptr vect_recog_func; 2544*e4b17023SJohn Marino VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1); 2545*e4b17023SJohn Marino 2546*e4b17023SJohn Marino if (vect_print_dump_info (REPORT_DETAILS)) 2547*e4b17023SJohn Marino fprintf (vect_dump, "=== vect_pattern_recog ==="); 2548*e4b17023SJohn Marino 2549*e4b17023SJohn Marino /* Scan through the loop stmts, applying the pattern recognition 2550*e4b17023SJohn Marino functions starting at each stmt visited: */ 2551*e4b17023SJohn Marino for (i = 0; i < nbbs; i++) 2552*e4b17023SJohn Marino { 2553*e4b17023SJohn Marino basic_block bb = bbs[i]; 2554*e4b17023SJohn Marino for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 2555*e4b17023SJohn Marino { 2556*e4b17023SJohn Marino /* Scan over all generic vect_recog_xxx_pattern functions. */ 2557*e4b17023SJohn Marino for (j = 0; j < NUM_PATTERNS; j++) 2558*e4b17023SJohn Marino { 2559*e4b17023SJohn Marino vect_recog_func = vect_vect_recog_func_ptrs[j]; 2560*e4b17023SJohn Marino vect_pattern_recog_1 (vect_recog_func, si, 2561*e4b17023SJohn Marino &stmts_to_replace); 2562*e4b17023SJohn Marino } 2563*e4b17023SJohn Marino } 2564*e4b17023SJohn Marino } 2565*e4b17023SJohn Marino 2566*e4b17023SJohn Marino VEC_free (gimple, heap, stmts_to_replace); 2567*e4b17023SJohn Marino } 2568