1 /* Data References Analysis and Manipulation Utilities for Vectorization. 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 3 Free Software Foundation, Inc. 4 Contributed by Dorit Naishlos <dorit@il.ibm.com> 5 and Ira Rosen <irar@il.ibm.com> 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free 11 Software Foundation; either version 3, or (at your option) any later 12 version. 13 14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 15 WARRANTY; without even the implied warranty of MERCHANTABILITY or 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 17 for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "ggc.h" 28 #include "tree.h" 29 #include "target.h" 30 #include "basic-block.h" 31 #include "diagnostic.h" 32 #include "tree-flow.h" 33 #include "tree-dump.h" 34 #include "cfgloop.h" 35 #include "expr.h" 36 #include "optabs.h" 37 #include "tree-chrec.h" 38 #include "tree-scalar-evolution.h" 39 #include "tree-vectorizer.h" 40 #include "toplev.h" 41 42 43 /* Return the smallest scalar part of STMT. 44 This is used to determine the vectype of the stmt. We generally set the 45 vectype according to the type of the result (lhs). For stmts whose 46 result-type is different than the type of the arguments (e.g., demotion, 47 promotion), vectype will be reset appropriately (later). Note that we have 48 to visit the smallest datatype in this function, because that determines the 49 VF. If the smallest datatype in the loop is present only as the rhs of a 50 promotion operation - we'd miss it. 51 Such a case, where a variable of this datatype does not appear in the lhs 52 anywhere in the loop, can only occur if it's an invariant: e.g.: 53 'int_x = (int) short_inv', which we'd expect to have been optimized away by 54 invariant motion. However, we cannot rely on invariant motion to always take 55 invariants out of the loop, and so in the case of promotion we also have to 56 check the rhs. 57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding 58 types. */ 59 60 tree 61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit, 62 HOST_WIDE_INT *rhs_size_unit) 63 { 64 tree scalar_type = gimple_expr_type (stmt); 65 HOST_WIDE_INT lhs, rhs; 66 67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)); 68 69 if (is_gimple_assign (stmt) 70 && (gimple_assign_cast_p (stmt) 71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR 72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR)) 73 { 74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); 75 76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)); 77 if (rhs < lhs) 78 scalar_type = rhs_type; 79 } 80 81 *lhs_size_unit = lhs; 82 *rhs_size_unit = rhs; 83 return scalar_type; 84 } 85 86 87 /* Find the place of the data-ref in STMT in the interleaving chain that starts 88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ 89 90 int 91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt) 92 { 93 gimple next_stmt = first_stmt; 94 int result = 0; 95 96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))) 97 return -1; 98 99 while (next_stmt && next_stmt != stmt) 100 { 101 result++; 102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); 103 } 104 105 if (next_stmt) 106 return result; 107 else 108 return -1; 109 } 110 111 112 /* Function vect_insert_into_interleaving_chain. 113 114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */ 115 116 static void 117 vect_insert_into_interleaving_chain (struct data_reference *dra, 118 struct data_reference *drb) 119 { 120 gimple prev, next; 121 tree next_init; 122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 124 125 prev = DR_GROUP_FIRST_DR (stmtinfo_b); 126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)); 127 while (next) 128 { 129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); 130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0) 131 { 132 /* Insert here. */ 133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra); 134 DR_GROUP_NEXT_DR (stmtinfo_a) = next; 135 return; 136 } 137 prev = next; 138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)); 139 } 140 141 /* We got to the end of the list. Insert here. */ 142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra); 143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL; 144 } 145 146 147 /* Function vect_update_interleaving_chain. 148 149 For two data-refs DRA and DRB that are a part of a chain interleaved data 150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's. 151 152 There are four possible cases: 153 1. New stmts - both DRA and DRB are not a part of any chain: 154 FIRST_DR = DRB 155 NEXT_DR (DRB) = DRA 156 2. DRB is a part of a chain and DRA is not: 157 no need to update FIRST_DR 158 no need to insert DRB 159 insert DRA according to init 160 3. DRA is a part of a chain and DRB is not: 161 if (init of FIRST_DR > init of DRB) 162 FIRST_DR = DRB 163 NEXT(FIRST_DR) = previous FIRST_DR 164 else 165 insert DRB according to its init 166 4. both DRA and DRB are in some interleaving chains: 167 choose the chain with the smallest init of FIRST_DR 168 insert the nodes of the second chain into the first one. */ 169 170 static void 171 vect_update_interleaving_chain (struct data_reference *drb, 172 struct data_reference *dra) 173 { 174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 176 tree next_init, init_dra_chain, init_drb_chain; 177 gimple first_a, first_b; 178 tree node_init; 179 gimple node, prev, next, first_stmt; 180 181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */ 182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b)) 183 { 184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb); 185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb); 186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra); 187 return; 188 } 189 190 /* 2. DRB is a part of a chain and DRA is not. */ 191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b)) 192 { 193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b); 194 /* Insert DRA into the chain of DRB. */ 195 vect_insert_into_interleaving_chain (dra, drb); 196 return; 197 } 198 199 /* 3. DRA is a part of a chain and DRB is not. */ 200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b)) 201 { 202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a); 203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt ( 204 old_first_stmt))); 205 gimple tmp; 206 207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0) 208 { 209 /* DRB's init is smaller than the init of the stmt previously marked 210 as the first stmt of the interleaving chain of DRA. Therefore, we 211 update FIRST_STMT and put DRB in the head of the list. */ 212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb); 213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt; 214 215 /* Update all the stmts in the list to point to the new FIRST_STMT. */ 216 tmp = old_first_stmt; 217 while (tmp) 218 { 219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb); 220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp)); 221 } 222 } 223 else 224 { 225 /* Insert DRB in the list of DRA. */ 226 vect_insert_into_interleaving_chain (drb, dra); 227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a); 228 } 229 return; 230 } 231 232 /* 4. both DRA and DRB are in some interleaving chains. */ 233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a); 234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b); 235 if (first_a == first_b) 236 return; 237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a))); 238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b))); 239 240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0) 241 { 242 /* Insert the nodes of DRA chain into the DRB chain. 243 After inserting a node, continue from this node of the DRB chain (don't 244 start from the beginning. */ 245 node = DR_GROUP_FIRST_DR (stmtinfo_a); 246 prev = DR_GROUP_FIRST_DR (stmtinfo_b); 247 first_stmt = first_b; 248 } 249 else 250 { 251 /* Insert the nodes of DRB chain into the DRA chain. 252 After inserting a node, continue from this node of the DRA chain (don't 253 start from the beginning. */ 254 node = DR_GROUP_FIRST_DR (stmtinfo_b); 255 prev = DR_GROUP_FIRST_DR (stmtinfo_a); 256 first_stmt = first_a; 257 } 258 259 while (node) 260 { 261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node))); 262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)); 263 while (next) 264 { 265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); 266 if (tree_int_cst_compare (next_init, node_init) > 0) 267 { 268 /* Insert here. */ 269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node; 270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next; 271 prev = node; 272 break; 273 } 274 prev = next; 275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)); 276 } 277 if (!next) 278 { 279 /* We got to the end of the list. Insert here. */ 280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node; 281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL; 282 prev = node; 283 } 284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt; 285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node)); 286 } 287 } 288 289 290 /* Function vect_equal_offsets. 291 292 Check if OFFSET1 and OFFSET2 are identical expressions. */ 293 294 static bool 295 vect_equal_offsets (tree offset1, tree offset2) 296 { 297 bool res; 298 299 STRIP_NOPS (offset1); 300 STRIP_NOPS (offset2); 301 302 if (offset1 == offset2) 303 return true; 304 305 if (TREE_CODE (offset1) != TREE_CODE (offset2) 306 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) 307 return false; 308 309 res = vect_equal_offsets (TREE_OPERAND (offset1, 0), 310 TREE_OPERAND (offset2, 0)); 311 312 if (!res || !BINARY_CLASS_P (offset1)) 313 return res; 314 315 res = vect_equal_offsets (TREE_OPERAND (offset1, 1), 316 TREE_OPERAND (offset2, 1)); 317 318 return res; 319 } 320 321 322 /* Function vect_check_interleaving. 323 324 Check if DRA and DRB are a part of interleaving. In case they are, insert 325 DRA and DRB in an interleaving chain. */ 326 327 static bool 328 vect_check_interleaving (struct data_reference *dra, 329 struct data_reference *drb) 330 { 331 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b; 332 333 /* Check that the data-refs have same first location (except init) and they 334 are both either store or load (not load and store). */ 335 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb) 336 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR 337 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR 338 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0) 339 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0))) 340 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)) 341 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) 342 || DR_IS_READ (dra) != DR_IS_READ (drb)) 343 return false; 344 345 /* Check: 346 1. data-refs are of the same type 347 2. their steps are equal 348 3. the step (if greater than zero) is greater than the difference between 349 data-refs' inits. */ 350 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)))); 351 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)))); 352 353 if (type_size_a != type_size_b 354 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)) 355 || !types_compatible_p (TREE_TYPE (DR_REF (dra)), 356 TREE_TYPE (DR_REF (drb)))) 357 return false; 358 359 init_a = TREE_INT_CST_LOW (DR_INIT (dra)); 360 init_b = TREE_INT_CST_LOW (DR_INIT (drb)); 361 step = TREE_INT_CST_LOW (DR_STEP (dra)); 362 363 if (init_a > init_b) 364 { 365 /* If init_a == init_b + the size of the type * k, we have an interleaving, 366 and DRB is accessed before DRA. */ 367 diff_mod_size = (init_a - init_b) % type_size_a; 368 369 if (step && (init_a - init_b) > step) 370 return false; 371 372 if (diff_mod_size == 0) 373 { 374 vect_update_interleaving_chain (drb, dra); 375 if (vect_print_dump_info (REPORT_DR_DETAILS)) 376 { 377 fprintf (vect_dump, "Detected interleaving "); 378 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 379 fprintf (vect_dump, " and "); 380 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 381 } 382 return true; 383 } 384 } 385 else 386 { 387 /* If init_b == init_a + the size of the type * k, we have an 388 interleaving, and DRA is accessed before DRB. */ 389 diff_mod_size = (init_b - init_a) % type_size_a; 390 391 if (step && (init_b - init_a) > step) 392 return false; 393 394 if (diff_mod_size == 0) 395 { 396 vect_update_interleaving_chain (dra, drb); 397 if (vect_print_dump_info (REPORT_DR_DETAILS)) 398 { 399 fprintf (vect_dump, "Detected interleaving "); 400 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 401 fprintf (vect_dump, " and "); 402 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 403 } 404 return true; 405 } 406 } 407 408 return false; 409 } 410 411 /* Check if data references pointed by DR_I and DR_J are same or 412 belong to same interleaving group. Return FALSE if drs are 413 different, otherwise return TRUE. */ 414 415 static bool 416 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j) 417 { 418 gimple stmt_i = DR_STMT (dr_i); 419 gimple stmt_j = DR_STMT (dr_j); 420 421 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0) 422 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i)) 423 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)) 424 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i)) 425 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))))) 426 return true; 427 else 428 return false; 429 } 430 431 /* If address ranges represented by DDR_I and DDR_J are equal, 432 return TRUE, otherwise return FALSE. */ 433 434 static bool 435 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j) 436 { 437 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j)) 438 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j))) 439 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j)) 440 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j)))) 441 return true; 442 else 443 return false; 444 } 445 446 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be 447 tested at run-time. Return TRUE if DDR was successfully inserted. 448 Return false if versioning is not supported. */ 449 450 static bool 451 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) 452 { 453 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 454 455 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0) 456 return false; 457 458 if (vect_print_dump_info (REPORT_DR_DETAILS)) 459 { 460 fprintf (vect_dump, "mark for run-time aliasing test between "); 461 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM); 462 fprintf (vect_dump, " and "); 463 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM); 464 } 465 466 if (optimize_loop_nest_for_size_p (loop)) 467 { 468 if (vect_print_dump_info (REPORT_DR_DETAILS)) 469 fprintf (vect_dump, "versioning not supported when optimizing for size."); 470 return false; 471 } 472 473 /* FORNOW: We don't support versioning with outer-loop vectorization. */ 474 if (loop->inner) 475 { 476 if (vect_print_dump_info (REPORT_DR_DETAILS)) 477 fprintf (vect_dump, "versioning not yet supported for outer-loops."); 478 return false; 479 } 480 481 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr); 482 return true; 483 } 484 485 486 /* Function vect_analyze_data_ref_dependence. 487 488 Return TRUE if there (might) exist a dependence between a memory-reference 489 DRA and a memory-reference DRB. When versioning for alias may check a 490 dependence at run-time, return FALSE. */ 491 492 static bool 493 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, 494 loop_vec_info loop_vinfo) 495 { 496 unsigned int i; 497 struct loop *loop = NULL; 498 int vectorization_factor = 0; 499 struct data_reference *dra = DDR_A (ddr); 500 struct data_reference *drb = DDR_B (ddr); 501 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra)); 502 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb)); 503 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra)))); 504 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb)))); 505 lambda_vector dist_v; 506 unsigned int loop_depth; 507 508 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 509 { 510 /* Independent data accesses. */ 511 vect_check_interleaving (dra, drb); 512 return false; 513 } 514 515 if (loop_vinfo) 516 { 517 loop = LOOP_VINFO_LOOP (loop_vinfo); 518 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 519 } 520 521 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb) 522 return false; 523 524 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 525 { 526 if (loop_vinfo) 527 { 528 if (vect_print_dump_info (REPORT_DR_DETAILS)) 529 { 530 fprintf (vect_dump, "versioning for alias required: " 531 "can't determine dependence between "); 532 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 533 fprintf (vect_dump, " and "); 534 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 535 } 536 537 /* Add to list of ddrs that need to be tested at run-time. */ 538 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo); 539 } 540 541 /* When vectorizing a basic block unknown depnedence can still mean 542 strided access. */ 543 if (vect_check_interleaving (dra, drb)) 544 return false; 545 546 if (vect_print_dump_info (REPORT_DR_DETAILS)) 547 { 548 fprintf (vect_dump, "can't determine dependence between "); 549 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 550 fprintf (vect_dump, " and "); 551 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 552 } 553 554 return true; 555 } 556 557 /* Versioning for alias is not yet supported for basic block SLP, and 558 dependence distance is unapplicable, hence, in case of known data 559 dependence, basic block vectorization is impossible for now. */ 560 if (!loop_vinfo) 561 { 562 if (dra != drb && vect_check_interleaving (dra, drb)) 563 return false; 564 565 if (vect_print_dump_info (REPORT_DR_DETAILS)) 566 { 567 fprintf (vect_dump, "determined dependence between "); 568 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 569 fprintf (vect_dump, " and "); 570 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 571 } 572 573 return true; 574 } 575 576 /* Loop-based vectorization and known data dependence. */ 577 if (DDR_NUM_DIST_VECTS (ddr) == 0) 578 { 579 if (vect_print_dump_info (REPORT_DR_DETAILS)) 580 { 581 fprintf (vect_dump, "versioning for alias required: bad dist vector for "); 582 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 583 fprintf (vect_dump, " and "); 584 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 585 } 586 /* Add to list of ddrs that need to be tested at run-time. */ 587 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo); 588 } 589 590 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr)); 591 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++) 592 { 593 int dist = dist_v[loop_depth]; 594 595 if (vect_print_dump_info (REPORT_DR_DETAILS)) 596 fprintf (vect_dump, "dependence distance = %d.", dist); 597 598 /* Same loop iteration. */ 599 if (dist % vectorization_factor == 0 && dra_size == drb_size) 600 { 601 /* Two references with distance zero have the same alignment. */ 602 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb); 603 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra); 604 if (vect_print_dump_info (REPORT_ALIGNMENT)) 605 fprintf (vect_dump, "accesses have the same alignment."); 606 if (vect_print_dump_info (REPORT_DR_DETAILS)) 607 { 608 fprintf (vect_dump, "dependence distance modulo vf == 0 between "); 609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 610 fprintf (vect_dump, " and "); 611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 612 } 613 614 /* For interleaving, mark that there is a read-write dependency if 615 necessary. We check before that one of the data-refs is store. */ 616 if (DR_IS_READ (dra)) 617 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true; 618 else 619 { 620 if (DR_IS_READ (drb)) 621 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true; 622 } 623 624 continue; 625 } 626 627 if (abs (dist) >= vectorization_factor 628 || (dist > 0 && DDR_REVERSED_P (ddr))) 629 { 630 /* Dependence distance does not create dependence, as far as 631 vectorization is concerned, in this case. If DDR_REVERSED_P the 632 order of the data-refs in DDR was reversed (to make distance 633 vector positive), and the actual distance is negative. */ 634 if (vect_print_dump_info (REPORT_DR_DETAILS)) 635 fprintf (vect_dump, "dependence distance >= VF or negative."); 636 continue; 637 } 638 639 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 640 { 641 fprintf (vect_dump, "not vectorized, possible dependence " 642 "between data-refs "); 643 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM); 644 fprintf (vect_dump, " and "); 645 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM); 646 } 647 648 return true; 649 } 650 651 return false; 652 } 653 654 /* Function vect_analyze_data_ref_dependences. 655 656 Examine all the data references in the loop, and make sure there do not 657 exist any data dependences between them. */ 658 659 bool 660 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, 661 bb_vec_info bb_vinfo) 662 { 663 unsigned int i; 664 VEC (ddr_p, heap) *ddrs = NULL; 665 struct data_dependence_relation *ddr; 666 667 if (vect_print_dump_info (REPORT_DETAILS)) 668 fprintf (vect_dump, "=== vect_analyze_dependences ==="); 669 670 if (loop_vinfo) 671 ddrs = LOOP_VINFO_DDRS (loop_vinfo); 672 else 673 ddrs = BB_VINFO_DDRS (bb_vinfo); 674 675 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++) 676 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo)) 677 return false; 678 679 return true; 680 } 681 682 683 /* Function vect_compute_data_ref_alignment 684 685 Compute the misalignment of the data reference DR. 686 687 Output: 688 1. If during the misalignment computation it is found that the data reference 689 cannot be vectorized then false is returned. 690 2. DR_MISALIGNMENT (DR) is defined. 691 692 FOR NOW: No analysis is actually performed. Misalignment is calculated 693 only for trivial cases. TODO. */ 694 695 static bool 696 vect_compute_data_ref_alignment (struct data_reference *dr) 697 { 698 gimple stmt = DR_STMT (dr); 699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 701 struct loop *loop = NULL; 702 tree ref = DR_REF (dr); 703 tree vectype; 704 tree base, base_addr; 705 bool base_aligned; 706 tree misalign; 707 tree aligned_to, alignment; 708 709 if (vect_print_dump_info (REPORT_DETAILS)) 710 fprintf (vect_dump, "vect_compute_data_ref_alignment:"); 711 712 if (loop_vinfo) 713 loop = LOOP_VINFO_LOOP (loop_vinfo); 714 715 /* Initialize misalignment to unknown. */ 716 SET_DR_MISALIGNMENT (dr, -1); 717 718 misalign = DR_INIT (dr); 719 aligned_to = DR_ALIGNED_TO (dr); 720 base_addr = DR_BASE_ADDRESS (dr); 721 vectype = STMT_VINFO_VECTYPE (stmt_info); 722 723 /* In case the dataref is in an inner-loop of the loop that is being 724 vectorized (LOOP), we use the base and misalignment information 725 relative to the outer-loop (LOOP). This is ok only if the misalignment 726 stays the same throughout the execution of the inner-loop, which is why 727 we have to check that the stride of the dataref in the inner-loop evenly 728 divides by the vector size. */ 729 if (loop && nested_in_vect_loop_p (loop, stmt)) 730 { 731 tree step = DR_STEP (dr); 732 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); 733 734 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0) 735 { 736 if (vect_print_dump_info (REPORT_ALIGNMENT)) 737 fprintf (vect_dump, "inner step divides the vector-size."); 738 misalign = STMT_VINFO_DR_INIT (stmt_info); 739 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info); 740 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info); 741 } 742 else 743 { 744 if (vect_print_dump_info (REPORT_ALIGNMENT)) 745 fprintf (vect_dump, "inner step doesn't divide the vector-size."); 746 misalign = NULL_TREE; 747 } 748 } 749 750 base = build_fold_indirect_ref (base_addr); 751 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT); 752 753 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0) 754 || !misalign) 755 { 756 if (vect_print_dump_info (REPORT_ALIGNMENT)) 757 { 758 fprintf (vect_dump, "Unknown alignment for access: "); 759 print_generic_expr (vect_dump, base, TDF_SLIM); 760 } 761 return true; 762 } 763 764 if ((DECL_P (base) 765 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)), 766 alignment) >= 0) 767 || (TREE_CODE (base_addr) == SSA_NAME 768 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE ( 769 TREE_TYPE (base_addr)))), 770 alignment) >= 0)) 771 base_aligned = true; 772 else 773 base_aligned = false; 774 775 if (!base_aligned) 776 { 777 /* Do not change the alignment of global variables if 778 flag_section_anchors is enabled. */ 779 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)) 780 || (TREE_STATIC (base) && flag_section_anchors)) 781 { 782 if (vect_print_dump_info (REPORT_DETAILS)) 783 { 784 fprintf (vect_dump, "can't force alignment of ref: "); 785 print_generic_expr (vect_dump, ref, TDF_SLIM); 786 } 787 return true; 788 } 789 790 /* Force the alignment of the decl. 791 NOTE: This is the only change to the code we make during 792 the analysis phase, before deciding to vectorize the loop. */ 793 if (vect_print_dump_info (REPORT_DETAILS)) 794 fprintf (vect_dump, "force alignment"); 795 DECL_ALIGN (base) = TYPE_ALIGN (vectype); 796 DECL_USER_ALIGN (base) = 1; 797 } 798 799 /* At this point we assume that the base is aligned. */ 800 gcc_assert (base_aligned 801 || (TREE_CODE (base) == VAR_DECL 802 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype))); 803 804 /* Modulo alignment. */ 805 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment); 806 807 if (!host_integerp (misalign, 1)) 808 { 809 /* Negative or overflowed misalignment value. */ 810 if (vect_print_dump_info (REPORT_DETAILS)) 811 fprintf (vect_dump, "unexpected misalign value"); 812 return false; 813 } 814 815 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign)); 816 817 if (vect_print_dump_info (REPORT_DETAILS)) 818 { 819 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr)); 820 print_generic_expr (vect_dump, ref, TDF_SLIM); 821 } 822 823 return true; 824 } 825 826 827 /* Function vect_compute_data_refs_alignment 828 829 Compute the misalignment of data references in the loop. 830 Return FALSE if a data reference is found that cannot be vectorized. */ 831 832 static bool 833 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo, 834 bb_vec_info bb_vinfo) 835 { 836 VEC (data_reference_p, heap) *datarefs; 837 struct data_reference *dr; 838 unsigned int i; 839 840 if (loop_vinfo) 841 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 842 else 843 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 844 845 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 846 if (!vect_compute_data_ref_alignment (dr)) 847 return false; 848 849 return true; 850 } 851 852 853 /* Function vect_update_misalignment_for_peel 854 855 DR - the data reference whose misalignment is to be adjusted. 856 DR_PEEL - the data reference whose misalignment is being made 857 zero in the vector loop by the peel. 858 NPEEL - the number of iterations in the peel loop if the misalignment 859 of DR_PEEL is known at compile time. */ 860 861 static void 862 vect_update_misalignment_for_peel (struct data_reference *dr, 863 struct data_reference *dr_peel, int npeel) 864 { 865 unsigned int i; 866 VEC(dr_p,heap) *same_align_drs; 867 struct data_reference *current_dr; 868 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); 869 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel)))); 870 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); 871 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel)); 872 873 /* For interleaved data accesses the step in the loop must be multiplied by 874 the size of the interleaving group. */ 875 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 876 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info))); 877 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info)) 878 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); 879 880 /* It can be assumed that the data refs with the same alignment as dr_peel 881 are aligned in the vector loop. */ 882 same_align_drs 883 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel))); 884 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++) 885 { 886 if (current_dr != dr) 887 continue; 888 gcc_assert (DR_MISALIGNMENT (dr) / dr_size == 889 DR_MISALIGNMENT (dr_peel) / dr_peel_size); 890 SET_DR_MISALIGNMENT (dr, 0); 891 return; 892 } 893 894 if (known_alignment_for_access_p (dr) 895 && known_alignment_for_access_p (dr_peel)) 896 { 897 int misal = DR_MISALIGNMENT (dr); 898 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 899 misal += npeel * dr_size; 900 misal %= GET_MODE_SIZE (TYPE_MODE (vectype)); 901 SET_DR_MISALIGNMENT (dr, misal); 902 return; 903 } 904 905 if (vect_print_dump_info (REPORT_DETAILS)) 906 fprintf (vect_dump, "Setting misalignment to -1."); 907 SET_DR_MISALIGNMENT (dr, -1); 908 } 909 910 911 /* Function vect_verify_datarefs_alignment 912 913 Return TRUE if all data references in the loop can be 914 handled with respect to alignment. */ 915 916 bool 917 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 918 { 919 VEC (data_reference_p, heap) *datarefs; 920 struct data_reference *dr; 921 enum dr_alignment_support supportable_dr_alignment; 922 unsigned int i; 923 924 if (loop_vinfo) 925 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 926 else 927 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 928 929 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 930 { 931 gimple stmt = DR_STMT (dr); 932 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 933 934 /* For interleaving, only the alignment of the first access matters. */ 935 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 936 && DR_GROUP_FIRST_DR (stmt_info) != stmt) 937 continue; 938 939 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 940 if (!supportable_dr_alignment) 941 { 942 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 943 { 944 if (DR_IS_READ (dr)) 945 fprintf (vect_dump, 946 "not vectorized: unsupported unaligned load."); 947 else 948 fprintf (vect_dump, 949 "not vectorized: unsupported unaligned store."); 950 } 951 return false; 952 } 953 if (supportable_dr_alignment != dr_aligned 954 && vect_print_dump_info (REPORT_ALIGNMENT)) 955 fprintf (vect_dump, "Vectorizing an unaligned access."); 956 } 957 return true; 958 } 959 960 961 /* Function vector_alignment_reachable_p 962 963 Return true if vector alignment for DR is reachable by peeling 964 a few loop iterations. Return false otherwise. */ 965 966 static bool 967 vector_alignment_reachable_p (struct data_reference *dr) 968 { 969 gimple stmt = DR_STMT (dr); 970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 971 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 972 973 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 974 { 975 /* For interleaved access we peel only if number of iterations in 976 the prolog loop ({VF - misalignment}), is a multiple of the 977 number of the interleaved accesses. */ 978 int elem_size, mis_in_elements; 979 int nelements = TYPE_VECTOR_SUBPARTS (vectype); 980 981 /* FORNOW: handle only known alignment. */ 982 if (!known_alignment_for_access_p (dr)) 983 return false; 984 985 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements; 986 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size; 987 988 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info)) 989 return false; 990 } 991 992 /* If misalignment is known at the compile time then allow peeling 993 only if natural alignment is reachable through peeling. */ 994 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr)) 995 { 996 HOST_WIDE_INT elmsize = 997 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); 998 if (vect_print_dump_info (REPORT_DETAILS)) 999 { 1000 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize); 1001 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr)); 1002 } 1003 if (DR_MISALIGNMENT (dr) % elmsize) 1004 { 1005 if (vect_print_dump_info (REPORT_DETAILS)) 1006 fprintf (vect_dump, "data size does not divide the misalignment.\n"); 1007 return false; 1008 } 1009 } 1010 1011 if (!known_alignment_for_access_p (dr)) 1012 { 1013 tree type = (TREE_TYPE (DR_REF (dr))); 1014 tree ba = DR_BASE_OBJECT (dr); 1015 bool is_packed = false; 1016 1017 if (ba) 1018 is_packed = contains_packed_reference (ba); 1019 1020 if (vect_print_dump_info (REPORT_DETAILS)) 1021 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed); 1022 if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) 1023 return true; 1024 else 1025 return false; 1026 } 1027 1028 return true; 1029 } 1030 1031 /* Function vect_enhance_data_refs_alignment 1032 1033 This pass will use loop versioning and loop peeling in order to enhance 1034 the alignment of data references in the loop. 1035 1036 FOR NOW: we assume that whatever versioning/peeling takes place, only the 1037 original loop is to be vectorized; Any other loops that are created by 1038 the transformations performed in this pass - are not supposed to be 1039 vectorized. This restriction will be relaxed. 1040 1041 This pass will require a cost model to guide it whether to apply peeling 1042 or versioning or a combination of the two. For example, the scheme that 1043 intel uses when given a loop with several memory accesses, is as follows: 1044 choose one memory access ('p') which alignment you want to force by doing 1045 peeling. Then, either (1) generate a loop in which 'p' is aligned and all 1046 other accesses are not necessarily aligned, or (2) use loop versioning to 1047 generate one loop in which all accesses are aligned, and another loop in 1048 which only 'p' is necessarily aligned. 1049 1050 ("Automatic Intra-Register Vectorization for the Intel Architecture", 1051 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International 1052 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.) 1053 1054 Devising a cost model is the most critical aspect of this work. It will 1055 guide us on which access to peel for, whether to use loop versioning, how 1056 many versions to create, etc. The cost model will probably consist of 1057 generic considerations as well as target specific considerations (on 1058 powerpc for example, misaligned stores are more painful than misaligned 1059 loads). 1060 1061 Here are the general steps involved in alignment enhancements: 1062 1063 -- original loop, before alignment analysis: 1064 for (i=0; i<N; i++){ 1065 x = q[i]; # DR_MISALIGNMENT(q) = unknown 1066 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1067 } 1068 1069 -- After vect_compute_data_refs_alignment: 1070 for (i=0; i<N; i++){ 1071 x = q[i]; # DR_MISALIGNMENT(q) = 3 1072 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1073 } 1074 1075 -- Possibility 1: we do loop versioning: 1076 if (p is aligned) { 1077 for (i=0; i<N; i++){ # loop 1A 1078 x = q[i]; # DR_MISALIGNMENT(q) = 3 1079 p[i] = y; # DR_MISALIGNMENT(p) = 0 1080 } 1081 } 1082 else { 1083 for (i=0; i<N; i++){ # loop 1B 1084 x = q[i]; # DR_MISALIGNMENT(q) = 3 1085 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 1086 } 1087 } 1088 1089 -- Possibility 2: we do loop peeling: 1090 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 1091 x = q[i]; 1092 p[i] = y; 1093 } 1094 for (i = 3; i < N; i++){ # loop 2A 1095 x = q[i]; # DR_MISALIGNMENT(q) = 0 1096 p[i] = y; # DR_MISALIGNMENT(p) = unknown 1097 } 1098 1099 -- Possibility 3: combination of loop peeling and versioning: 1100 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized). 1101 x = q[i]; 1102 p[i] = y; 1103 } 1104 if (p is aligned) { 1105 for (i = 3; i<N; i++){ # loop 3A 1106 x = q[i]; # DR_MISALIGNMENT(q) = 0 1107 p[i] = y; # DR_MISALIGNMENT(p) = 0 1108 } 1109 } 1110 else { 1111 for (i = 3; i<N; i++){ # loop 3B 1112 x = q[i]; # DR_MISALIGNMENT(q) = 0 1113 p[i] = y; # DR_MISALIGNMENT(p) = unaligned 1114 } 1115 } 1116 1117 These loops are later passed to loop_transform to be vectorized. The 1118 vectorizer will use the alignment information to guide the transformation 1119 (whether to generate regular loads/stores, or with special handling for 1120 misalignment). */ 1121 1122 bool 1123 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) 1124 { 1125 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1126 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1127 enum dr_alignment_support supportable_dr_alignment; 1128 struct data_reference *dr0 = NULL; 1129 struct data_reference *dr; 1130 unsigned int i; 1131 bool do_peeling = false; 1132 bool do_versioning = false; 1133 bool stat; 1134 gimple stmt; 1135 stmt_vec_info stmt_info; 1136 int vect_versioning_for_alias_required; 1137 1138 if (vect_print_dump_info (REPORT_DETAILS)) 1139 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ==="); 1140 1141 /* While cost model enhancements are expected in the future, the high level 1142 view of the code at this time is as follows: 1143 1144 A) If there is a misaligned access then see if peeling to align 1145 this access can make all data references satisfy 1146 vect_supportable_dr_alignment. If so, update data structures 1147 as needed and return true. 1148 1149 B) If peeling wasn't possible and there is a data reference with an 1150 unknown misalignment that does not satisfy vect_supportable_dr_alignment 1151 then see if loop versioning checks can be used to make all data 1152 references satisfy vect_supportable_dr_alignment. If so, update 1153 data structures as needed and return true. 1154 1155 C) If neither peeling nor versioning were successful then return false if 1156 any data reference does not satisfy vect_supportable_dr_alignment. 1157 1158 D) Return true (all data references satisfy vect_supportable_dr_alignment). 1159 1160 Note, Possibility 3 above (which is peeling and versioning together) is not 1161 being done at this time. */ 1162 1163 /* (1) Peeling to force alignment. */ 1164 1165 /* (1.1) Decide whether to perform peeling, and how many iterations to peel: 1166 Considerations: 1167 + How many accesses will become aligned due to the peeling 1168 - How many accesses will become unaligned due to the peeling, 1169 and the cost of misaligned accesses. 1170 - The cost of peeling (the extra runtime checks, the increase 1171 in code size). 1172 1173 The scheme we use FORNOW: peel to force the alignment of the first 1174 unsupported misaligned access in the loop. 1175 1176 TODO: Use a cost model. */ 1177 1178 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1179 { 1180 stmt = DR_STMT (dr); 1181 stmt_info = vinfo_for_stmt (stmt); 1182 1183 /* For interleaving, only the alignment of the first access 1184 matters. */ 1185 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1186 && DR_GROUP_FIRST_DR (stmt_info) != stmt) 1187 continue; 1188 1189 if (!DR_IS_READ (dr) && !aligned_access_p (dr)) 1190 { 1191 do_peeling = vector_alignment_reachable_p (dr); 1192 if (do_peeling) 1193 dr0 = dr; 1194 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS)) 1195 fprintf (vect_dump, "vector alignment may not be reachable"); 1196 break; 1197 } 1198 } 1199 1200 vect_versioning_for_alias_required 1201 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); 1202 1203 /* Temporarily, if versioning for alias is required, we disable peeling 1204 until we support peeling and versioning. Often peeling for alignment 1205 will require peeling for loop-bound, which in turn requires that we 1206 know how to adjust the loop ivs after the loop. */ 1207 if (vect_versioning_for_alias_required 1208 || !vect_can_advance_ivs_p (loop_vinfo) 1209 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) 1210 do_peeling = false; 1211 1212 if (do_peeling) 1213 { 1214 int mis; 1215 int npeel = 0; 1216 gimple stmt = DR_STMT (dr0); 1217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1218 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 1219 int nelements = TYPE_VECTOR_SUBPARTS (vectype); 1220 1221 if (known_alignment_for_access_p (dr0)) 1222 { 1223 /* Since it's known at compile time, compute the number of iterations 1224 in the peeled loop (the peeling factor) for use in updating 1225 DR_MISALIGNMENT values. The peeling factor is the vectorization 1226 factor minus the misalignment as an element count. */ 1227 mis = DR_MISALIGNMENT (dr0); 1228 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); 1229 npeel = nelements - mis; 1230 1231 /* For interleaved data access every iteration accesses all the 1232 members of the group, therefore we divide the number of iterations 1233 by the group size. */ 1234 stmt_info = vinfo_for_stmt (DR_STMT (dr0)); 1235 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) 1236 npeel /= DR_GROUP_SIZE (stmt_info); 1237 1238 if (vect_print_dump_info (REPORT_DETAILS)) 1239 fprintf (vect_dump, "Try peeling by %d", npeel); 1240 } 1241 1242 /* Ensure that all data refs can be vectorized after the peel. */ 1243 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1244 { 1245 int save_misalignment; 1246 1247 if (dr == dr0) 1248 continue; 1249 1250 stmt = DR_STMT (dr); 1251 stmt_info = vinfo_for_stmt (stmt); 1252 /* For interleaving, only the alignment of the first access 1253 matters. */ 1254 if (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1255 && DR_GROUP_FIRST_DR (stmt_info) != stmt) 1256 continue; 1257 1258 save_misalignment = DR_MISALIGNMENT (dr); 1259 vect_update_misalignment_for_peel (dr, dr0, npeel); 1260 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 1261 SET_DR_MISALIGNMENT (dr, save_misalignment); 1262 1263 if (!supportable_dr_alignment) 1264 { 1265 do_peeling = false; 1266 break; 1267 } 1268 } 1269 1270 if (do_peeling) 1271 { 1272 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i. 1273 If the misalignment of DR_i is identical to that of dr0 then set 1274 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and 1275 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i) 1276 by the peeling factor times the element size of DR_i (MOD the 1277 vectorization factor times the size). Otherwise, the 1278 misalignment of DR_i must be set to unknown. */ 1279 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1280 if (dr != dr0) 1281 vect_update_misalignment_for_peel (dr, dr0, npeel); 1282 1283 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0; 1284 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0); 1285 SET_DR_MISALIGNMENT (dr0, 0); 1286 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1287 fprintf (vect_dump, "Alignment of access forced using peeling."); 1288 1289 if (vect_print_dump_info (REPORT_DETAILS)) 1290 fprintf (vect_dump, "Peeling for alignment will be applied."); 1291 1292 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1293 gcc_assert (stat); 1294 return stat; 1295 } 1296 } 1297 1298 1299 /* (2) Versioning to force alignment. */ 1300 1301 /* Try versioning if: 1302 1) flag_tree_vect_loop_version is TRUE 1303 2) optimize loop for speed 1304 3) there is at least one unsupported misaligned data ref with an unknown 1305 misalignment, and 1306 4) all misaligned data refs with a known misalignment are supported, and 1307 5) the number of runtime alignment checks is within reason. */ 1308 1309 do_versioning = 1310 flag_tree_vect_loop_version 1311 && optimize_loop_nest_for_speed_p (loop) 1312 && (!loop->inner); /* FORNOW */ 1313 1314 if (do_versioning) 1315 { 1316 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1317 { 1318 stmt = DR_STMT (dr); 1319 stmt_info = vinfo_for_stmt (stmt); 1320 1321 /* For interleaving, only the alignment of the first access 1322 matters. */ 1323 if (aligned_access_p (dr) 1324 || (STMT_VINFO_STRIDED_ACCESS (stmt_info) 1325 && DR_GROUP_FIRST_DR (stmt_info) != stmt)) 1326 continue; 1327 1328 supportable_dr_alignment = vect_supportable_dr_alignment (dr); 1329 1330 if (!supportable_dr_alignment) 1331 { 1332 gimple stmt; 1333 int mask; 1334 tree vectype; 1335 1336 if (known_alignment_for_access_p (dr) 1337 || VEC_length (gimple, 1338 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) 1339 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)) 1340 { 1341 do_versioning = false; 1342 break; 1343 } 1344 1345 stmt = DR_STMT (dr); 1346 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 1347 gcc_assert (vectype); 1348 1349 /* The rightmost bits of an aligned address must be zeros. 1350 Construct the mask needed for this test. For example, 1351 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the 1352 mask must be 15 = 0xf. */ 1353 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1; 1354 1355 /* FORNOW: use the same mask to test all potentially unaligned 1356 references in the loop. The vectorizer currently supports 1357 a single vector size, see the reference to 1358 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the 1359 vectorization factor is computed. */ 1360 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo) 1361 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask); 1362 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask; 1363 VEC_safe_push (gimple, heap, 1364 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 1365 DR_STMT (dr)); 1366 } 1367 } 1368 1369 /* Versioning requires at least one misaligned data reference. */ 1370 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)) 1371 do_versioning = false; 1372 else if (!do_versioning) 1373 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0); 1374 } 1375 1376 if (do_versioning) 1377 { 1378 VEC(gimple,heap) *may_misalign_stmts 1379 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); 1380 gimple stmt; 1381 1382 /* It can now be assumed that the data references in the statements 1383 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version 1384 of the loop being vectorized. */ 1385 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++) 1386 { 1387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1388 dr = STMT_VINFO_DATA_REF (stmt_info); 1389 SET_DR_MISALIGNMENT (dr, 0); 1390 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1391 fprintf (vect_dump, "Alignment of access forced using versioning."); 1392 } 1393 1394 if (vect_print_dump_info (REPORT_DETAILS)) 1395 fprintf (vect_dump, "Versioning for alignment will be applied."); 1396 1397 /* Peeling and versioning can't be done together at this time. */ 1398 gcc_assert (! (do_peeling && do_versioning)); 1399 1400 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1401 gcc_assert (stat); 1402 return stat; 1403 } 1404 1405 /* This point is reached if neither peeling nor versioning is being done. */ 1406 gcc_assert (! (do_peeling || do_versioning)); 1407 1408 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL); 1409 return stat; 1410 } 1411 1412 1413 /* Function vect_analyze_data_refs_alignment 1414 1415 Analyze the alignment of the data-references in the loop. 1416 Return FALSE if a data reference is found that cannot be vectorized. */ 1417 1418 bool 1419 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo, 1420 bb_vec_info bb_vinfo) 1421 { 1422 if (vect_print_dump_info (REPORT_DETAILS)) 1423 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ==="); 1424 1425 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo)) 1426 { 1427 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1428 fprintf (vect_dump, 1429 "not vectorized: can't calculate alignment for data ref."); 1430 return false; 1431 } 1432 1433 return true; 1434 } 1435 1436 1437 /* Analyze groups of strided accesses: check that DR belongs to a group of 1438 strided accesses of legal size, step, etc. Detect gaps, single element 1439 interleaving, and other special cases. Set strided access info. 1440 Collect groups of strided stores for further use in SLP analysis. */ 1441 1442 static bool 1443 vect_analyze_group_access (struct data_reference *dr) 1444 { 1445 tree step = DR_STEP (dr); 1446 tree scalar_type = TREE_TYPE (DR_REF (dr)); 1447 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)); 1448 gimple stmt = DR_STMT (dr); 1449 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1450 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1451 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 1452 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); 1453 HOST_WIDE_INT stride, last_accessed_element = 1; 1454 bool slp_impossible = false; 1455 1456 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the 1457 interleaving group (including gaps). */ 1458 stride = dr_step / type_size; 1459 1460 /* Not consecutive access is possible only if it is a part of interleaving. */ 1461 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt))) 1462 { 1463 /* Check if it this DR is a part of interleaving, and is a single 1464 element of the group that is accessed in the loop. */ 1465 1466 /* Gaps are supported only for loads. STEP must be a multiple of the type 1467 size. The size of the group must be a power of 2. */ 1468 if (DR_IS_READ (dr) 1469 && (dr_step % type_size) == 0 1470 && stride > 0 1471 && exact_log2 (stride) != -1) 1472 { 1473 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt; 1474 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; 1475 if (vect_print_dump_info (REPORT_DR_DETAILS)) 1476 { 1477 fprintf (vect_dump, "Detected single element interleaving "); 1478 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 1479 fprintf (vect_dump, " step "); 1480 print_generic_expr (vect_dump, step, TDF_SLIM); 1481 } 1482 1483 if (loop_vinfo) 1484 { 1485 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 1486 1487 if (vect_print_dump_info (REPORT_DETAILS)) 1488 fprintf (vect_dump, "Data access with gaps requires scalar " 1489 "epilogue loop"); 1490 } 1491 1492 return true; 1493 } 1494 if (vect_print_dump_info (REPORT_DETAILS)) 1495 fprintf (vect_dump, "not consecutive access"); 1496 return false; 1497 } 1498 1499 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt) 1500 { 1501 /* First stmt in the interleaving chain. Check the chain. */ 1502 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt)); 1503 struct data_reference *data_ref = dr; 1504 unsigned int count = 1; 1505 tree next_step; 1506 tree prev_init = DR_INIT (data_ref); 1507 gimple prev = stmt; 1508 HOST_WIDE_INT diff, count_in_bytes, gaps = 0; 1509 1510 while (next) 1511 { 1512 /* Skip same data-refs. In case that two or more stmts share data-ref 1513 (supported only for loads), we vectorize only the first stmt, and 1514 the rest get their vectorized loads from the first one. */ 1515 if (!tree_int_cst_compare (DR_INIT (data_ref), 1516 DR_INIT (STMT_VINFO_DATA_REF ( 1517 vinfo_for_stmt (next))))) 1518 { 1519 if (!DR_IS_READ (data_ref)) 1520 { 1521 if (vect_print_dump_info (REPORT_DETAILS)) 1522 fprintf (vect_dump, "Two store stmts share the same dr."); 1523 return false; 1524 } 1525 1526 /* Check that there is no load-store dependencies for this loads 1527 to prevent a case of load-store-load to the same location. */ 1528 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next)) 1529 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev))) 1530 { 1531 if (vect_print_dump_info (REPORT_DETAILS)) 1532 fprintf (vect_dump, 1533 "READ_WRITE dependence in interleaving."); 1534 return false; 1535 } 1536 1537 /* For load use the same data-ref load. */ 1538 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev; 1539 1540 prev = next; 1541 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); 1542 continue; 1543 } 1544 1545 prev = next; 1546 1547 /* Check that all the accesses have the same STEP. */ 1548 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next))); 1549 if (tree_int_cst_compare (step, next_step)) 1550 { 1551 if (vect_print_dump_info (REPORT_DETAILS)) 1552 fprintf (vect_dump, "not consecutive access in interleaving"); 1553 return false; 1554 } 1555 1556 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next)); 1557 /* Check that the distance between two accesses is equal to the type 1558 size. Otherwise, we have gaps. */ 1559 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref)) 1560 - TREE_INT_CST_LOW (prev_init)) / type_size; 1561 if (diff != 1) 1562 { 1563 /* FORNOW: SLP of accesses with gaps is not supported. */ 1564 slp_impossible = true; 1565 if (!DR_IS_READ (data_ref)) 1566 { 1567 if (vect_print_dump_info (REPORT_DETAILS)) 1568 fprintf (vect_dump, "interleaved store with gaps"); 1569 return false; 1570 } 1571 1572 gaps += diff - 1; 1573 } 1574 1575 last_accessed_element += diff; 1576 1577 /* Store the gap from the previous member of the group. If there is no 1578 gap in the access, DR_GROUP_GAP is always 1. */ 1579 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff; 1580 1581 prev_init = DR_INIT (data_ref); 1582 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next)); 1583 /* Count the number of data-refs in the chain. */ 1584 count++; 1585 } 1586 1587 /* COUNT is the number of accesses found, we multiply it by the size of 1588 the type to get COUNT_IN_BYTES. */ 1589 count_in_bytes = type_size * count; 1590 1591 /* Check that the size of the interleaving (including gaps) is not 1592 greater than STEP. */ 1593 if (dr_step && dr_step < count_in_bytes + gaps * type_size) 1594 { 1595 if (vect_print_dump_info (REPORT_DETAILS)) 1596 { 1597 fprintf (vect_dump, "interleaving size is greater than step for "); 1598 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM); 1599 } 1600 return false; 1601 } 1602 1603 /* Check that the size of the interleaving is equal to STEP for stores, 1604 i.e., that there are no gaps. */ 1605 if (dr_step && dr_step != count_in_bytes) 1606 { 1607 if (DR_IS_READ (dr)) 1608 { 1609 slp_impossible = true; 1610 /* There is a gap after the last load in the group. This gap is a 1611 difference between the stride and the number of elements. When 1612 there is no gap, this difference should be 0. */ 1613 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count; 1614 } 1615 else 1616 { 1617 if (vect_print_dump_info (REPORT_DETAILS)) 1618 fprintf (vect_dump, "interleaved store with gaps"); 1619 return false; 1620 } 1621 } 1622 1623 /* Check that STEP is a multiple of type size. */ 1624 if (dr_step && (dr_step % type_size) != 0) 1625 { 1626 if (vect_print_dump_info (REPORT_DETAILS)) 1627 { 1628 fprintf (vect_dump, "step is not a multiple of type size: step "); 1629 print_generic_expr (vect_dump, step, TDF_SLIM); 1630 fprintf (vect_dump, " size "); 1631 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type), 1632 TDF_SLIM); 1633 } 1634 return false; 1635 } 1636 1637 /* FORNOW: we handle only interleaving that is a power of 2. 1638 We don't fail here if it may be still possible to vectorize the 1639 group using SLP. If not, the size of the group will be checked in 1640 vect_analyze_operations, and the vectorization will fail. */ 1641 if (exact_log2 (stride) == -1) 1642 { 1643 if (vect_print_dump_info (REPORT_DETAILS)) 1644 fprintf (vect_dump, "interleaving is not a power of 2"); 1645 1646 if (slp_impossible) 1647 return false; 1648 } 1649 1650 if (stride == 0) 1651 stride = count; 1652 1653 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride; 1654 if (vect_print_dump_info (REPORT_DETAILS)) 1655 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride); 1656 1657 /* SLP: create an SLP data structure for every interleaving group of 1658 stores for further analysis in vect_analyse_slp. */ 1659 if (!DR_IS_READ (dr) && !slp_impossible) 1660 { 1661 if (loop_vinfo) 1662 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), 1663 stmt); 1664 if (bb_vinfo) 1665 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo), 1666 stmt); 1667 } 1668 1669 /* There is a gap in the end of the group. */ 1670 if (stride - last_accessed_element > 0 && loop_vinfo) 1671 { 1672 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true; 1673 if (vect_print_dump_info (REPORT_DETAILS)) 1674 fprintf (vect_dump, "Data access with gaps requires scalar " 1675 "epilogue loop"); 1676 } 1677 } 1678 1679 return true; 1680 } 1681 1682 1683 /* Analyze the access pattern of the data-reference DR. 1684 In case of non-consecutive accesses call vect_analyze_group_access() to 1685 analyze groups of strided accesses. */ 1686 1687 static bool 1688 vect_analyze_data_ref_access (struct data_reference *dr) 1689 { 1690 tree step = DR_STEP (dr); 1691 tree scalar_type = TREE_TYPE (DR_REF (dr)); 1692 gimple stmt = DR_STMT (dr); 1693 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1694 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1695 struct loop *loop = NULL; 1696 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step); 1697 1698 if (loop_vinfo) 1699 loop = LOOP_VINFO_LOOP (loop_vinfo); 1700 1701 if (loop_vinfo && !step) 1702 { 1703 if (vect_print_dump_info (REPORT_DETAILS)) 1704 fprintf (vect_dump, "bad data-ref access in loop"); 1705 return false; 1706 } 1707 1708 /* Don't allow invariant accesses in loops. */ 1709 if (loop_vinfo && dr_step == 0) 1710 return false; 1711 1712 if (loop && nested_in_vect_loop_p (loop, stmt)) 1713 { 1714 /* Interleaved accesses are not yet supported within outer-loop 1715 vectorization for references in the inner-loop. */ 1716 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; 1717 1718 /* For the rest of the analysis we use the outer-loop step. */ 1719 step = STMT_VINFO_DR_STEP (stmt_info); 1720 dr_step = TREE_INT_CST_LOW (step); 1721 1722 if (dr_step == 0) 1723 { 1724 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1725 fprintf (vect_dump, "zero step in outer loop."); 1726 if (DR_IS_READ (dr)) 1727 return true; 1728 else 1729 return false; 1730 } 1731 } 1732 1733 /* Consecutive? */ 1734 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) 1735 { 1736 /* Mark that it is not interleaving. */ 1737 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; 1738 return true; 1739 } 1740 1741 if (loop && nested_in_vect_loop_p (loop, stmt)) 1742 { 1743 if (vect_print_dump_info (REPORT_ALIGNMENT)) 1744 fprintf (vect_dump, "strided access in outer loop."); 1745 return false; 1746 } 1747 1748 /* Not consecutive access - check if it's a part of interleaving group. */ 1749 return vect_analyze_group_access (dr); 1750 } 1751 1752 1753 /* Function vect_analyze_data_ref_accesses. 1754 1755 Analyze the access pattern of all the data references in the loop. 1756 1757 FORNOW: the only access pattern that is considered vectorizable is a 1758 simple step 1 (consecutive) access. 1759 1760 FORNOW: handle only arrays and pointer accesses. */ 1761 1762 bool 1763 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 1764 { 1765 unsigned int i; 1766 VEC (data_reference_p, heap) *datarefs; 1767 struct data_reference *dr; 1768 1769 if (vect_print_dump_info (REPORT_DETAILS)) 1770 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ==="); 1771 1772 if (loop_vinfo) 1773 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1774 else 1775 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 1776 1777 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1778 if (!vect_analyze_data_ref_access (dr)) 1779 { 1780 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1781 fprintf (vect_dump, "not vectorized: complicated access pattern."); 1782 return false; 1783 } 1784 1785 return true; 1786 } 1787 1788 /* Function vect_prune_runtime_alias_test_list. 1789 1790 Prune a list of ddrs to be tested at run-time by versioning for alias. 1791 Return FALSE if resulting list of ddrs is longer then allowed by 1792 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */ 1793 1794 bool 1795 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) 1796 { 1797 VEC (ddr_p, heap) * ddrs = 1798 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); 1799 unsigned i, j; 1800 1801 if (vect_print_dump_info (REPORT_DETAILS)) 1802 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ==="); 1803 1804 for (i = 0; i < VEC_length (ddr_p, ddrs); ) 1805 { 1806 bool found; 1807 ddr_p ddr_i; 1808 1809 ddr_i = VEC_index (ddr_p, ddrs, i); 1810 found = false; 1811 1812 for (j = 0; j < i; j++) 1813 { 1814 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j); 1815 1816 if (vect_vfa_range_equal (ddr_i, ddr_j)) 1817 { 1818 if (vect_print_dump_info (REPORT_DR_DETAILS)) 1819 { 1820 fprintf (vect_dump, "found equal ranges "); 1821 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM); 1822 fprintf (vect_dump, ", "); 1823 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM); 1824 fprintf (vect_dump, " and "); 1825 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM); 1826 fprintf (vect_dump, ", "); 1827 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM); 1828 } 1829 found = true; 1830 break; 1831 } 1832 } 1833 1834 if (found) 1835 { 1836 VEC_ordered_remove (ddr_p, ddrs, i); 1837 continue; 1838 } 1839 i++; 1840 } 1841 1842 if (VEC_length (ddr_p, ddrs) > 1843 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)) 1844 { 1845 if (vect_print_dump_info (REPORT_DR_DETAILS)) 1846 { 1847 fprintf (vect_dump, 1848 "disable versioning for alias - max number of generated " 1849 "checks exceeded."); 1850 } 1851 1852 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0); 1853 1854 return false; 1855 } 1856 1857 return true; 1858 } 1859 1860 1861 /* Function vect_analyze_data_refs. 1862 1863 Find all the data references in the loop or basic block. 1864 1865 The general structure of the analysis of data refs in the vectorizer is as 1866 follows: 1867 1- vect_analyze_data_refs(loop/bb): call 1868 compute_data_dependences_for_loop/bb to find and analyze all data-refs 1869 in the loop/bb and their dependences. 1870 2- vect_analyze_dependences(): apply dependence testing using ddrs. 1871 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok. 1872 4- vect_analyze_drs_access(): check that ref_stmt.step is ok. 1873 1874 */ 1875 1876 bool 1877 vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo) 1878 { 1879 struct loop *loop = NULL; 1880 basic_block bb = NULL; 1881 unsigned int i; 1882 VEC (data_reference_p, heap) *datarefs; 1883 struct data_reference *dr; 1884 tree scalar_type; 1885 bool res; 1886 1887 if (vect_print_dump_info (REPORT_DETAILS)) 1888 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n"); 1889 1890 if (loop_vinfo) 1891 { 1892 loop = LOOP_VINFO_LOOP (loop_vinfo); 1893 res = compute_data_dependences_for_loop 1894 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo), 1895 &LOOP_VINFO_DDRS (loop_vinfo)); 1896 1897 if (!res) 1898 { 1899 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1900 fprintf (vect_dump, "not vectorized: loop contains function calls" 1901 " or data references that cannot be analyzed"); 1902 return false; 1903 } 1904 1905 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); 1906 } 1907 else 1908 { 1909 bb = BB_VINFO_BB (bb_vinfo); 1910 res = compute_data_dependences_for_bb (bb, true, 1911 &BB_VINFO_DATAREFS (bb_vinfo), 1912 &BB_VINFO_DDRS (bb_vinfo)); 1913 if (!res) 1914 { 1915 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1916 fprintf (vect_dump, "not vectorized: basic block contains function" 1917 " calls or data references that cannot be analyzed"); 1918 return false; 1919 } 1920 1921 datarefs = BB_VINFO_DATAREFS (bb_vinfo); 1922 } 1923 1924 /* Go through the data-refs, check that the analysis succeeded. Update pointer 1925 from stmt_vec_info struct to DR and vectype. */ 1926 1927 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 1928 { 1929 gimple stmt; 1930 stmt_vec_info stmt_info; 1931 tree base, offset, init; 1932 1933 if (!dr || !DR_REF (dr)) 1934 { 1935 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1936 fprintf (vect_dump, "not vectorized: unhandled data-ref "); 1937 return false; 1938 } 1939 1940 stmt = DR_STMT (dr); 1941 stmt_info = vinfo_for_stmt (stmt); 1942 1943 /* Check that analysis of the data-ref succeeded. */ 1944 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr) 1945 || !DR_STEP (dr)) 1946 { 1947 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1948 { 1949 fprintf (vect_dump, "not vectorized: data ref analysis failed "); 1950 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 1951 } 1952 return false; 1953 } 1954 1955 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST) 1956 { 1957 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 1958 fprintf (vect_dump, "not vectorized: base addr of dr is a " 1959 "constant"); 1960 return false; 1961 } 1962 1963 base = unshare_expr (DR_BASE_ADDRESS (dr)); 1964 offset = unshare_expr (DR_OFFSET (dr)); 1965 init = unshare_expr (DR_INIT (dr)); 1966 1967 /* Update DR field in stmt_vec_info struct. */ 1968 1969 /* If the dataref is in an inner-loop of the loop that is considered for 1970 for vectorization, we also want to analyze the access relative to 1971 the outer-loop (DR contains information only relative to the 1972 inner-most enclosing loop). We do that by building a reference to the 1973 first location accessed by the inner-loop, and analyze it relative to 1974 the outer-loop. */ 1975 if (loop && nested_in_vect_loop_p (loop, stmt)) 1976 { 1977 tree outer_step, outer_base, outer_init; 1978 HOST_WIDE_INT pbitsize, pbitpos; 1979 tree poffset; 1980 enum machine_mode pmode; 1981 int punsignedp, pvolatilep; 1982 affine_iv base_iv, offset_iv; 1983 tree dinit; 1984 1985 /* Build a reference to the first location accessed by the 1986 inner-loop: *(BASE+INIT). (The first location is actually 1987 BASE+INIT+OFFSET, but we add OFFSET separately later). */ 1988 tree inner_base = build_fold_indirect_ref 1989 (fold_build2 (POINTER_PLUS_EXPR, 1990 TREE_TYPE (base), base, 1991 fold_convert (sizetype, init))); 1992 1993 if (vect_print_dump_info (REPORT_DETAILS)) 1994 { 1995 fprintf (vect_dump, "analyze in outer-loop: "); 1996 print_generic_expr (vect_dump, inner_base, TDF_SLIM); 1997 } 1998 1999 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos, 2000 &poffset, &pmode, &punsignedp, &pvolatilep, false); 2001 gcc_assert (outer_base != NULL_TREE); 2002 2003 if (pbitpos % BITS_PER_UNIT != 0) 2004 { 2005 if (vect_print_dump_info (REPORT_DETAILS)) 2006 fprintf (vect_dump, "failed: bit offset alignment.\n"); 2007 return false; 2008 } 2009 2010 outer_base = build_fold_addr_expr (outer_base); 2011 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base, 2012 &base_iv, false)) 2013 { 2014 if (vect_print_dump_info (REPORT_DETAILS)) 2015 fprintf (vect_dump, "failed: evolution of base is not affine.\n"); 2016 return false; 2017 } 2018 2019 if (offset) 2020 { 2021 if (poffset) 2022 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, 2023 poffset); 2024 else 2025 poffset = offset; 2026 } 2027 2028 if (!poffset) 2029 { 2030 offset_iv.base = ssize_int (0); 2031 offset_iv.step = ssize_int (0); 2032 } 2033 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset, 2034 &offset_iv, false)) 2035 { 2036 if (vect_print_dump_info (REPORT_DETAILS)) 2037 fprintf (vect_dump, "evolution of offset is not affine.\n"); 2038 return false; 2039 } 2040 2041 outer_init = ssize_int (pbitpos / BITS_PER_UNIT); 2042 split_constant_offset (base_iv.base, &base_iv.base, &dinit); 2043 outer_init = size_binop (PLUS_EXPR, outer_init, dinit); 2044 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); 2045 outer_init = size_binop (PLUS_EXPR, outer_init, dinit); 2046 2047 outer_step = size_binop (PLUS_EXPR, 2048 fold_convert (ssizetype, base_iv.step), 2049 fold_convert (ssizetype, offset_iv.step)); 2050 2051 STMT_VINFO_DR_STEP (stmt_info) = outer_step; 2052 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */ 2053 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base; 2054 STMT_VINFO_DR_INIT (stmt_info) = outer_init; 2055 STMT_VINFO_DR_OFFSET (stmt_info) = 2056 fold_convert (ssizetype, offset_iv.base); 2057 STMT_VINFO_DR_ALIGNED_TO (stmt_info) = 2058 size_int (highest_pow2_factor (offset_iv.base)); 2059 2060 if (vect_print_dump_info (REPORT_DETAILS)) 2061 { 2062 fprintf (vect_dump, "\touter base_address: "); 2063 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM); 2064 fprintf (vect_dump, "\n\touter offset from base address: "); 2065 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM); 2066 fprintf (vect_dump, "\n\touter constant offset from base address: "); 2067 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM); 2068 fprintf (vect_dump, "\n\touter step: "); 2069 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM); 2070 fprintf (vect_dump, "\n\touter aligned to: "); 2071 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM); 2072 } 2073 } 2074 2075 if (STMT_VINFO_DATA_REF (stmt_info)) 2076 { 2077 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2078 { 2079 fprintf (vect_dump, 2080 "not vectorized: more than one data ref in stmt: "); 2081 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2082 } 2083 return false; 2084 } 2085 2086 STMT_VINFO_DATA_REF (stmt_info) = dr; 2087 2088 /* Set vectype for STMT. */ 2089 scalar_type = TREE_TYPE (DR_REF (dr)); 2090 STMT_VINFO_VECTYPE (stmt_info) = 2091 get_vectype_for_scalar_type (scalar_type); 2092 if (!STMT_VINFO_VECTYPE (stmt_info)) 2093 { 2094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) 2095 { 2096 fprintf (vect_dump, 2097 "not vectorized: no vectype for stmt: "); 2098 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); 2099 fprintf (vect_dump, " scalar_type: "); 2100 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS); 2101 } 2102 return false; 2103 } 2104 } 2105 2106 return true; 2107 } 2108 2109 2110 /* Function vect_get_new_vect_var. 2111 2112 Returns a name for a new variable. The current naming scheme appends the 2113 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to 2114 the name of vectorizer generated variables, and appends that to NAME if 2115 provided. */ 2116 2117 tree 2118 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name) 2119 { 2120 const char *prefix; 2121 tree new_vect_var; 2122 2123 switch (var_kind) 2124 { 2125 case vect_simple_var: 2126 prefix = "vect_"; 2127 break; 2128 case vect_scalar_var: 2129 prefix = "stmp_"; 2130 break; 2131 case vect_pointer_var: 2132 prefix = "vect_p"; 2133 break; 2134 default: 2135 gcc_unreachable (); 2136 } 2137 2138 if (name) 2139 { 2140 char* tmp = concat (prefix, name, NULL); 2141 new_vect_var = create_tmp_var (type, tmp); 2142 free (tmp); 2143 } 2144 else 2145 new_vect_var = create_tmp_var (type, prefix); 2146 2147 /* Mark vector typed variable as a gimple register variable. */ 2148 if (TREE_CODE (type) == VECTOR_TYPE) 2149 DECL_GIMPLE_REG_P (new_vect_var) = true; 2150 2151 return new_vect_var; 2152 } 2153 2154 2155 /* Function vect_create_addr_base_for_vector_ref. 2156 2157 Create an expression that computes the address of the first memory location 2158 that will be accessed for a data reference. 2159 2160 Input: 2161 STMT: The statement containing the data reference. 2162 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list. 2163 OFFSET: Optional. If supplied, it is be added to the initial address. 2164 LOOP: Specify relative to which loop-nest should the address be computed. 2165 For example, when the dataref is in an inner-loop nested in an 2166 outer-loop that is now being vectorized, LOOP can be either the 2167 outer-loop, or the inner-loop. The first memory location accessed 2168 by the following dataref ('in' points to short): 2169 2170 for (i=0; i<N; i++) 2171 for (j=0; j<M; j++) 2172 s += in[i+j] 2173 2174 is as follows: 2175 if LOOP=i_loop: &in (relative to i_loop) 2176 if LOOP=j_loop: &in+i*2B (relative to j_loop) 2177 2178 Output: 2179 1. Return an SSA_NAME whose value is the address of the memory location of 2180 the first vector of the data reference. 2181 2. If new_stmt_list is not NULL_TREE after return then the caller must insert 2182 these statement(s) which define the returned SSA_NAME. 2183 2184 FORNOW: We are only handling array accesses with step 1. */ 2185 2186 tree 2187 vect_create_addr_base_for_vector_ref (gimple stmt, 2188 gimple_seq *new_stmt_list, 2189 tree offset, 2190 struct loop *loop) 2191 { 2192 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2193 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 2194 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr)); 2195 tree base_name; 2196 tree data_ref_base_var; 2197 tree vec_stmt; 2198 tree addr_base, addr_expr; 2199 tree dest; 2200 gimple_seq seq = NULL; 2201 tree base_offset = unshare_expr (DR_OFFSET (dr)); 2202 tree init = unshare_expr (DR_INIT (dr)); 2203 tree vect_ptr_type; 2204 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); 2205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2206 2207 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father) 2208 { 2209 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo); 2210 2211 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt)); 2212 2213 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info)); 2214 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info)); 2215 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info)); 2216 } 2217 2218 if (loop_vinfo) 2219 base_name = build_fold_indirect_ref (data_ref_base); 2220 else 2221 { 2222 base_offset = ssize_int (0); 2223 init = ssize_int (0); 2224 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr))); 2225 } 2226 2227 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp"); 2228 add_referenced_var (data_ref_base_var); 2229 data_ref_base = force_gimple_operand (data_ref_base, &seq, true, 2230 data_ref_base_var); 2231 gimple_seq_add_seq (new_stmt_list, seq); 2232 2233 /* Create base_offset */ 2234 base_offset = size_binop (PLUS_EXPR, 2235 fold_convert (sizetype, base_offset), 2236 fold_convert (sizetype, init)); 2237 dest = create_tmp_var (sizetype, "base_off"); 2238 add_referenced_var (dest); 2239 base_offset = force_gimple_operand (base_offset, &seq, true, dest); 2240 gimple_seq_add_seq (new_stmt_list, seq); 2241 2242 if (offset) 2243 { 2244 tree tmp = create_tmp_var (sizetype, "offset"); 2245 2246 add_referenced_var (tmp); 2247 offset = fold_build2 (MULT_EXPR, sizetype, 2248 fold_convert (sizetype, offset), step); 2249 base_offset = fold_build2 (PLUS_EXPR, sizetype, 2250 base_offset, offset); 2251 base_offset = force_gimple_operand (base_offset, &seq, false, tmp); 2252 gimple_seq_add_seq (new_stmt_list, seq); 2253 } 2254 2255 /* base + base_offset */ 2256 if (loop_vinfo) 2257 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), 2258 data_ref_base, base_offset); 2259 else 2260 { 2261 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF) 2262 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0)); 2263 else 2264 addr_base = build1 (ADDR_EXPR, 2265 build_pointer_type (TREE_TYPE (DR_REF (dr))), 2266 unshare_expr (DR_REF (dr))); 2267 } 2268 2269 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info)); 2270 2271 vec_stmt = fold_convert (vect_ptr_type, addr_base); 2272 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 2273 get_name (base_name)); 2274 add_referenced_var (addr_expr); 2275 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr); 2276 gimple_seq_add_seq (new_stmt_list, seq); 2277 2278 if (vect_print_dump_info (REPORT_DETAILS)) 2279 { 2280 fprintf (vect_dump, "created "); 2281 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM); 2282 } 2283 2284 return vec_stmt; 2285 } 2286 2287 2288 /* Function vect_create_data_ref_ptr. 2289 2290 Create a new pointer to vector type (vp), that points to the first location 2291 accessed in the loop by STMT, along with the def-use update chain to 2292 appropriately advance the pointer through the loop iterations. Also set 2293 aliasing information for the pointer. This vector pointer is used by the 2294 callers to this function to create a memory reference expression for vector 2295 load/store access. 2296 2297 Input: 2298 1. STMT: a stmt that references memory. Expected to be of the form 2299 GIMPLE_ASSIGN <name, data-ref> or 2300 GIMPLE_ASSIGN <data-ref, name>. 2301 2. AT_LOOP: the loop where the vector memref is to be created. 2302 3. OFFSET (optional): an offset to be added to the initial address accessed 2303 by the data-ref in STMT. 2304 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain 2305 pointing to the initial address. 2306 5. TYPE: if not NULL indicates the required type of the data-ref. 2307 2308 Output: 2309 1. Declare a new ptr to vector_type, and have it point to the base of the 2310 data reference (initial addressed accessed by the data reference). 2311 For example, for vector of type V8HI, the following code is generated: 2312 2313 v8hi *vp; 2314 vp = (v8hi *)initial_address; 2315 2316 if OFFSET is not supplied: 2317 initial_address = &a[init]; 2318 if OFFSET is supplied: 2319 initial_address = &a[init + OFFSET]; 2320 2321 Return the initial_address in INITIAL_ADDRESS. 2322 2323 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also 2324 update the pointer in each iteration of the loop. 2325 2326 Return the increment stmt that updates the pointer in PTR_INCR. 2327 2328 3. Set INV_P to true if the access pattern of the data reference in the 2329 vectorized loop is invariant. Set it to false otherwise. 2330 2331 4. Return the pointer. */ 2332 2333 tree 2334 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, 2335 tree offset, tree *initial_address, gimple *ptr_incr, 2336 bool only_init, bool *inv_p) 2337 { 2338 tree base_name; 2339 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2340 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2341 struct loop *loop = NULL; 2342 bool nested_in_vect_loop = false; 2343 struct loop *containing_loop = NULL; 2344 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2345 tree vect_ptr_type; 2346 tree vect_ptr; 2347 tree new_temp; 2348 gimple vec_stmt; 2349 gimple_seq new_stmt_list = NULL; 2350 edge pe = NULL; 2351 basic_block new_bb; 2352 tree vect_ptr_init; 2353 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 2354 tree vptr; 2355 gimple_stmt_iterator incr_gsi; 2356 bool insert_after; 2357 tree indx_before_incr, indx_after_incr; 2358 gimple incr; 2359 tree step; 2360 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2361 gimple_stmt_iterator gsi = gsi_for_stmt (stmt); 2362 2363 if (loop_vinfo) 2364 { 2365 loop = LOOP_VINFO_LOOP (loop_vinfo); 2366 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 2367 containing_loop = (gimple_bb (stmt))->loop_father; 2368 pe = loop_preheader_edge (loop); 2369 } 2370 else 2371 { 2372 gcc_assert (bb_vinfo); 2373 only_init = true; 2374 *ptr_incr = NULL; 2375 } 2376 2377 /* Check the step (evolution) of the load in LOOP, and record 2378 whether it's invariant. */ 2379 if (nested_in_vect_loop) 2380 step = STMT_VINFO_DR_STEP (stmt_info); 2381 else 2382 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info)); 2383 2384 if (tree_int_cst_compare (step, size_zero_node) == 0) 2385 *inv_p = true; 2386 else 2387 *inv_p = false; 2388 2389 /* Create an expression for the first address accessed by this load 2390 in LOOP. */ 2391 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr))); 2392 2393 if (vect_print_dump_info (REPORT_DETAILS)) 2394 { 2395 tree data_ref_base = base_name; 2396 fprintf (vect_dump, "create vector-pointer variable to type: "); 2397 print_generic_expr (vect_dump, vectype, TDF_SLIM); 2398 if (TREE_CODE (data_ref_base) == VAR_DECL 2399 || TREE_CODE (data_ref_base) == ARRAY_REF) 2400 fprintf (vect_dump, " vectorizing an array ref: "); 2401 else if (TREE_CODE (data_ref_base) == COMPONENT_REF) 2402 fprintf (vect_dump, " vectorizing a record based array ref: "); 2403 else if (TREE_CODE (data_ref_base) == SSA_NAME) 2404 fprintf (vect_dump, " vectorizing a pointer ref: "); 2405 print_generic_expr (vect_dump, base_name, TDF_SLIM); 2406 } 2407 2408 /** (1) Create the new vector-pointer variable: **/ 2409 vect_ptr_type = build_pointer_type (vectype); 2410 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 2411 get_name (base_name)); 2412 2413 /* Vector types inherit the alias set of their component type by default so 2414 we need to use a ref-all pointer if the data reference does not conflict 2415 with the created vector data reference because it is not addressable. */ 2416 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), 2417 get_alias_set (DR_REF (dr)))) 2418 { 2419 vect_ptr_type 2420 = build_pointer_type_for_mode (vectype, 2421 TYPE_MODE (vect_ptr_type), true); 2422 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 2423 get_name (base_name)); 2424 } 2425 2426 /* Likewise for any of the data references in the stmt group. */ 2427 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1) 2428 { 2429 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info); 2430 do 2431 { 2432 tree lhs = gimple_assign_lhs (orig_stmt); 2433 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr), 2434 get_alias_set (lhs))) 2435 { 2436 vect_ptr_type 2437 = build_pointer_type_for_mode (vectype, 2438 TYPE_MODE (vect_ptr_type), true); 2439 vect_ptr 2440 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, 2441 get_name (base_name)); 2442 break; 2443 } 2444 2445 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt)); 2446 } 2447 while (orig_stmt); 2448 } 2449 2450 add_referenced_var (vect_ptr); 2451 2452 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are 2453 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two 2454 def-use update cycles for the pointer: One relative to the outer-loop 2455 (LOOP), which is what steps (3) and (4) below do. The other is relative 2456 to the inner-loop (which is the inner-most loop containing the dataref), 2457 and this is done be step (5) below. 2458 2459 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the 2460 inner-most loop, and so steps (3),(4) work the same, and step (5) is 2461 redundant. Steps (3),(4) create the following: 2462 2463 vp0 = &base_addr; 2464 LOOP: vp1 = phi(vp0,vp2) 2465 ... 2466 ... 2467 vp2 = vp1 + step 2468 goto LOOP 2469 2470 If there is an inner-loop nested in loop, then step (5) will also be 2471 applied, and an additional update in the inner-loop will be created: 2472 2473 vp0 = &base_addr; 2474 LOOP: vp1 = phi(vp0,vp2) 2475 ... 2476 inner: vp3 = phi(vp1,vp4) 2477 vp4 = vp3 + inner_step 2478 if () goto inner 2479 ... 2480 vp2 = vp1 + step 2481 if () goto LOOP */ 2482 2483 /** (3) Calculate the initial address the vector-pointer, and set 2484 the vector-pointer to point to it before the loop: **/ 2485 2486 /* Create: (&(base[init_val+offset]) in the loop preheader. */ 2487 2488 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list, 2489 offset, loop); 2490 if (new_stmt_list) 2491 { 2492 if (pe) 2493 { 2494 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list); 2495 gcc_assert (!new_bb); 2496 } 2497 else 2498 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT); 2499 } 2500 2501 *initial_address = new_temp; 2502 2503 /* Create: p = (vectype *) initial_base */ 2504 vec_stmt = gimple_build_assign (vect_ptr, 2505 fold_convert (vect_ptr_type, new_temp)); 2506 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt); 2507 gimple_assign_set_lhs (vec_stmt, vect_ptr_init); 2508 if (pe) 2509 { 2510 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt); 2511 gcc_assert (!new_bb); 2512 } 2513 else 2514 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT); 2515 2516 /** (4) Handle the updating of the vector-pointer inside the loop. 2517 This is needed when ONLY_INIT is false, and also when AT_LOOP 2518 is the inner-loop nested in LOOP (during outer-loop vectorization). 2519 **/ 2520 2521 /* No update in loop is required. */ 2522 if (only_init && (!loop_vinfo || at_loop == loop)) 2523 { 2524 /* Copy the points-to information if it exists. */ 2525 if (DR_PTR_INFO (dr)) 2526 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr)); 2527 vptr = vect_ptr_init; 2528 } 2529 else 2530 { 2531 /* The step of the vector pointer is the Vector Size. */ 2532 tree step = TYPE_SIZE_UNIT (vectype); 2533 /* One exception to the above is when the scalar step of the load in 2534 LOOP is zero. In this case the step here is also zero. */ 2535 if (*inv_p) 2536 step = size_zero_node; 2537 2538 standard_iv_increment_position (loop, &incr_gsi, &insert_after); 2539 2540 create_iv (vect_ptr_init, 2541 fold_convert (vect_ptr_type, step), 2542 vect_ptr, loop, &incr_gsi, insert_after, 2543 &indx_before_incr, &indx_after_incr); 2544 incr = gsi_stmt (incr_gsi); 2545 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); 2546 2547 /* Copy the points-to information if it exists. */ 2548 if (DR_PTR_INFO (dr)) 2549 { 2550 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 2551 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 2552 } 2553 if (ptr_incr) 2554 *ptr_incr = incr; 2555 2556 vptr = indx_before_incr; 2557 } 2558 2559 if (!nested_in_vect_loop || only_init) 2560 return vptr; 2561 2562 2563 /** (5) Handle the updating of the vector-pointer inside the inner-loop 2564 nested in LOOP, if exists: **/ 2565 2566 gcc_assert (nested_in_vect_loop); 2567 if (!only_init) 2568 { 2569 standard_iv_increment_position (containing_loop, &incr_gsi, 2570 &insert_after); 2571 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr, 2572 containing_loop, &incr_gsi, insert_after, &indx_before_incr, 2573 &indx_after_incr); 2574 incr = gsi_stmt (incr_gsi); 2575 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL)); 2576 2577 /* Copy the points-to information if it exists. */ 2578 if (DR_PTR_INFO (dr)) 2579 { 2580 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr)); 2581 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr)); 2582 } 2583 if (ptr_incr) 2584 *ptr_incr = incr; 2585 2586 return indx_before_incr; 2587 } 2588 else 2589 gcc_unreachable (); 2590 } 2591 2592 2593 /* Function bump_vector_ptr 2594 2595 Increment a pointer (to a vector type) by vector-size. If requested, 2596 i.e. if PTR-INCR is given, then also connect the new increment stmt 2597 to the existing def-use update-chain of the pointer, by modifying 2598 the PTR_INCR as illustrated below: 2599 2600 The pointer def-use update-chain before this function: 2601 DATAREF_PTR = phi (p_0, p_2) 2602 .... 2603 PTR_INCR: p_2 = DATAREF_PTR + step 2604 2605 The pointer def-use update-chain after this function: 2606 DATAREF_PTR = phi (p_0, p_2) 2607 .... 2608 NEW_DATAREF_PTR = DATAREF_PTR + BUMP 2609 .... 2610 PTR_INCR: p_2 = NEW_DATAREF_PTR + step 2611 2612 Input: 2613 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated 2614 in the loop. 2615 PTR_INCR - optional. The stmt that updates the pointer in each iteration of 2616 the loop. The increment amount across iterations is expected 2617 to be vector_size. 2618 BSI - location where the new update stmt is to be placed. 2619 STMT - the original scalar memory-access stmt that is being vectorized. 2620 BUMP - optional. The offset by which to bump the pointer. If not given, 2621 the offset is assumed to be vector_size. 2622 2623 Output: Return NEW_DATAREF_PTR as illustrated above. 2624 2625 */ 2626 2627 tree 2628 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi, 2629 gimple stmt, tree bump) 2630 { 2631 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2632 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); 2633 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2634 tree ptr_var = SSA_NAME_VAR (dataref_ptr); 2635 tree update = TYPE_SIZE_UNIT (vectype); 2636 gimple incr_stmt; 2637 ssa_op_iter iter; 2638 use_operand_p use_p; 2639 tree new_dataref_ptr; 2640 2641 if (bump) 2642 update = bump; 2643 2644 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var, 2645 dataref_ptr, update); 2646 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt); 2647 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr); 2648 vect_finish_stmt_generation (stmt, incr_stmt, gsi); 2649 2650 /* Copy the points-to information if it exists. */ 2651 if (DR_PTR_INFO (dr)) 2652 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr)); 2653 2654 if (!ptr_incr) 2655 return new_dataref_ptr; 2656 2657 /* Update the vector-pointer's cross-iteration increment. */ 2658 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE) 2659 { 2660 tree use = USE_FROM_PTR (use_p); 2661 2662 if (use == dataref_ptr) 2663 SET_USE (use_p, new_dataref_ptr); 2664 else 2665 gcc_assert (tree_int_cst_compare (use, update) == 0); 2666 } 2667 2668 return new_dataref_ptr; 2669 } 2670 2671 2672 /* Function vect_create_destination_var. 2673 2674 Create a new temporary of type VECTYPE. */ 2675 2676 tree 2677 vect_create_destination_var (tree scalar_dest, tree vectype) 2678 { 2679 tree vec_dest; 2680 const char *new_name; 2681 tree type; 2682 enum vect_var_kind kind; 2683 2684 kind = vectype ? vect_simple_var : vect_scalar_var; 2685 type = vectype ? vectype : TREE_TYPE (scalar_dest); 2686 2687 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME); 2688 2689 new_name = get_name (scalar_dest); 2690 if (!new_name) 2691 new_name = "var_"; 2692 vec_dest = vect_get_new_vect_var (type, kind, new_name); 2693 add_referenced_var (vec_dest); 2694 2695 return vec_dest; 2696 } 2697 2698 /* Function vect_strided_store_supported. 2699 2700 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported, 2701 and FALSE otherwise. */ 2702 2703 bool 2704 vect_strided_store_supported (tree vectype) 2705 { 2706 optab interleave_high_optab, interleave_low_optab; 2707 int mode; 2708 2709 mode = (int) TYPE_MODE (vectype); 2710 2711 /* Check that the operation is supported. */ 2712 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR, 2713 vectype, optab_default); 2714 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR, 2715 vectype, optab_default); 2716 if (!interleave_high_optab || !interleave_low_optab) 2717 { 2718 if (vect_print_dump_info (REPORT_DETAILS)) 2719 fprintf (vect_dump, "no optab for interleave."); 2720 return false; 2721 } 2722 2723 if (optab_handler (interleave_high_optab, mode)->insn_code 2724 == CODE_FOR_nothing 2725 || optab_handler (interleave_low_optab, mode)->insn_code 2726 == CODE_FOR_nothing) 2727 { 2728 if (vect_print_dump_info (REPORT_DETAILS)) 2729 fprintf (vect_dump, "interleave op not supported by target."); 2730 return false; 2731 } 2732 2733 return true; 2734 } 2735 2736 2737 /* Function vect_permute_store_chain. 2738 2739 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be 2740 a power of 2, generate interleave_high/low stmts to reorder the data 2741 correctly for the stores. Return the final references for stores in 2742 RESULT_CHAIN. 2743 2744 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. 2745 The input is 4 vectors each containing 8 elements. We assign a number to each 2746 element, the input sequence is: 2747 2748 1st vec: 0 1 2 3 4 5 6 7 2749 2nd vec: 8 9 10 11 12 13 14 15 2750 3rd vec: 16 17 18 19 20 21 22 23 2751 4th vec: 24 25 26 27 28 29 30 31 2752 2753 The output sequence should be: 2754 2755 1st vec: 0 8 16 24 1 9 17 25 2756 2nd vec: 2 10 18 26 3 11 19 27 2757 3rd vec: 4 12 20 28 5 13 21 30 2758 4th vec: 6 14 22 30 7 15 23 31 2759 2760 i.e., we interleave the contents of the four vectors in their order. 2761 2762 We use interleave_high/low instructions to create such output. The input of 2763 each interleave_high/low operation is two vectors: 2764 1st vec 2nd vec 2765 0 1 2 3 4 5 6 7 2766 the even elements of the result vector are obtained left-to-right from the 2767 high/low elements of the first vector. The odd elements of the result are 2768 obtained left-to-right from the high/low elements of the second vector. 2769 The output of interleave_high will be: 0 4 1 5 2770 and of interleave_low: 2 6 3 7 2771 2772 2773 The permutation is done in log LENGTH stages. In each stage interleave_high 2774 and interleave_low stmts are created for each pair of vectors in DR_CHAIN, 2775 where the first argument is taken from the first half of DR_CHAIN and the 2776 second argument from it's second half. 2777 In our example, 2778 2779 I1: interleave_high (1st vec, 3rd vec) 2780 I2: interleave_low (1st vec, 3rd vec) 2781 I3: interleave_high (2nd vec, 4th vec) 2782 I4: interleave_low (2nd vec, 4th vec) 2783 2784 The output for the first stage is: 2785 2786 I1: 0 16 1 17 2 18 3 19 2787 I2: 4 20 5 21 6 22 7 23 2788 I3: 8 24 9 25 10 26 11 27 2789 I4: 12 28 13 29 14 30 15 31 2790 2791 The output of the second stage, i.e. the final result is: 2792 2793 I1: 0 8 16 24 1 9 17 25 2794 I2: 2 10 18 26 3 11 19 27 2795 I3: 4 12 20 28 5 13 21 30 2796 I4: 6 14 22 30 7 15 23 31. */ 2797 2798 bool 2799 vect_permute_store_chain (VEC(tree,heap) *dr_chain, 2800 unsigned int length, 2801 gimple stmt, 2802 gimple_stmt_iterator *gsi, 2803 VEC(tree,heap) **result_chain) 2804 { 2805 tree perm_dest, vect1, vect2, high, low; 2806 gimple perm_stmt; 2807 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 2808 int i; 2809 unsigned int j; 2810 enum tree_code high_code, low_code; 2811 2812 /* Check that the operation is supported. */ 2813 if (!vect_strided_store_supported (vectype)) 2814 return false; 2815 2816 *result_chain = VEC_copy (tree, heap, dr_chain); 2817 2818 for (i = 0; i < exact_log2 (length); i++) 2819 { 2820 for (j = 0; j < length/2; j++) 2821 { 2822 vect1 = VEC_index (tree, dr_chain, j); 2823 vect2 = VEC_index (tree, dr_chain, j+length/2); 2824 2825 /* Create interleaving stmt: 2826 in the case of big endian: 2827 high = interleave_high (vect1, vect2) 2828 and in the case of little endian: 2829 high = interleave_low (vect1, vect2). */ 2830 perm_dest = create_tmp_var (vectype, "vect_inter_high"); 2831 DECL_GIMPLE_REG_P (perm_dest) = 1; 2832 add_referenced_var (perm_dest); 2833 if (BYTES_BIG_ENDIAN) 2834 { 2835 high_code = VEC_INTERLEAVE_HIGH_EXPR; 2836 low_code = VEC_INTERLEAVE_LOW_EXPR; 2837 } 2838 else 2839 { 2840 low_code = VEC_INTERLEAVE_HIGH_EXPR; 2841 high_code = VEC_INTERLEAVE_LOW_EXPR; 2842 } 2843 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest, 2844 vect1, vect2); 2845 high = make_ssa_name (perm_dest, perm_stmt); 2846 gimple_assign_set_lhs (perm_stmt, high); 2847 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 2848 VEC_replace (tree, *result_chain, 2*j, high); 2849 2850 /* Create interleaving stmt: 2851 in the case of big endian: 2852 low = interleave_low (vect1, vect2) 2853 and in the case of little endian: 2854 low = interleave_high (vect1, vect2). */ 2855 perm_dest = create_tmp_var (vectype, "vect_inter_low"); 2856 DECL_GIMPLE_REG_P (perm_dest) = 1; 2857 add_referenced_var (perm_dest); 2858 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest, 2859 vect1, vect2); 2860 low = make_ssa_name (perm_dest, perm_stmt); 2861 gimple_assign_set_lhs (perm_stmt, low); 2862 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 2863 VEC_replace (tree, *result_chain, 2*j+1, low); 2864 } 2865 dr_chain = VEC_copy (tree, heap, *result_chain); 2866 } 2867 return true; 2868 } 2869 2870 /* Function vect_setup_realignment 2871 2872 This function is called when vectorizing an unaligned load using 2873 the dr_explicit_realign[_optimized] scheme. 2874 This function generates the following code at the loop prolog: 2875 2876 p = initial_addr; 2877 x msq_init = *(floor(p)); # prolog load 2878 realignment_token = call target_builtin; 2879 loop: 2880 x msq = phi (msq_init, ---) 2881 2882 The stmts marked with x are generated only for the case of 2883 dr_explicit_realign_optimized. 2884 2885 The code above sets up a new (vector) pointer, pointing to the first 2886 location accessed by STMT, and a "floor-aligned" load using that pointer. 2887 It also generates code to compute the "realignment-token" (if the relevant 2888 target hook was defined), and creates a phi-node at the loop-header bb 2889 whose arguments are the result of the prolog-load (created by this 2890 function) and the result of a load that takes place in the loop (to be 2891 created by the caller to this function). 2892 2893 For the case of dr_explicit_realign_optimized: 2894 The caller to this function uses the phi-result (msq) to create the 2895 realignment code inside the loop, and sets up the missing phi argument, 2896 as follows: 2897 loop: 2898 msq = phi (msq_init, lsq) 2899 lsq = *(floor(p')); # load in loop 2900 result = realign_load (msq, lsq, realignment_token); 2901 2902 For the case of dr_explicit_realign: 2903 loop: 2904 msq = *(floor(p)); # load in loop 2905 p' = p + (VS-1); 2906 lsq = *(floor(p')); # load in loop 2907 result = realign_load (msq, lsq, realignment_token); 2908 2909 Input: 2910 STMT - (scalar) load stmt to be vectorized. This load accesses 2911 a memory location that may be unaligned. 2912 BSI - place where new code is to be inserted. 2913 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes 2914 is used. 2915 2916 Output: 2917 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load 2918 target hook, if defined. 2919 Return value - the result of the loop-header phi node. */ 2920 2921 tree 2922 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi, 2923 tree *realignment_token, 2924 enum dr_alignment_support alignment_support_scheme, 2925 tree init_addr, 2926 struct loop **at_loop) 2927 { 2928 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2929 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2930 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 2931 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 2932 edge pe; 2933 tree scalar_dest = gimple_assign_lhs (stmt); 2934 tree vec_dest; 2935 gimple inc; 2936 tree ptr; 2937 tree data_ref; 2938 gimple new_stmt; 2939 basic_block new_bb; 2940 tree msq_init = NULL_TREE; 2941 tree new_temp; 2942 gimple phi_stmt; 2943 tree msq = NULL_TREE; 2944 gimple_seq stmts = NULL; 2945 bool inv_p; 2946 bool compute_in_loop = false; 2947 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt); 2948 struct loop *containing_loop = (gimple_bb (stmt))->loop_father; 2949 struct loop *loop_for_initial_load; 2950 2951 gcc_assert (alignment_support_scheme == dr_explicit_realign 2952 || alignment_support_scheme == dr_explicit_realign_optimized); 2953 2954 /* We need to generate three things: 2955 1. the misalignment computation 2956 2. the extra vector load (for the optimized realignment scheme). 2957 3. the phi node for the two vectors from which the realignment is 2958 done (for the optimized realignment scheme). 2959 */ 2960 2961 /* 1. Determine where to generate the misalignment computation. 2962 2963 If INIT_ADDR is NULL_TREE, this indicates that the misalignment 2964 calculation will be generated by this function, outside the loop (in the 2965 preheader). Otherwise, INIT_ADDR had already been computed for us by the 2966 caller, inside the loop. 2967 2968 Background: If the misalignment remains fixed throughout the iterations of 2969 the loop, then both realignment schemes are applicable, and also the 2970 misalignment computation can be done outside LOOP. This is because we are 2971 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that 2972 are a multiple of VS (the Vector Size), and therefore the misalignment in 2973 different vectorized LOOP iterations is always the same. 2974 The problem arises only if the memory access is in an inner-loop nested 2975 inside LOOP, which is now being vectorized using outer-loop vectorization. 2976 This is the only case when the misalignment of the memory access may not 2977 remain fixed throughout the iterations of the inner-loop (as explained in 2978 detail in vect_supportable_dr_alignment). In this case, not only is the 2979 optimized realignment scheme not applicable, but also the misalignment 2980 computation (and generation of the realignment token that is passed to 2981 REALIGN_LOAD) have to be done inside the loop. 2982 2983 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode 2984 or not, which in turn determines if the misalignment is computed inside 2985 the inner-loop, or outside LOOP. */ 2986 2987 if (init_addr != NULL_TREE) 2988 { 2989 compute_in_loop = true; 2990 gcc_assert (alignment_support_scheme == dr_explicit_realign); 2991 } 2992 2993 2994 /* 2. Determine where to generate the extra vector load. 2995 2996 For the optimized realignment scheme, instead of generating two vector 2997 loads in each iteration, we generate a single extra vector load in the 2998 preheader of the loop, and in each iteration reuse the result of the 2999 vector load from the previous iteration. In case the memory access is in 3000 an inner-loop nested inside LOOP, which is now being vectorized using 3001 outer-loop vectorization, we need to determine whether this initial vector 3002 load should be generated at the preheader of the inner-loop, or can be 3003 generated at the preheader of LOOP. If the memory access has no evolution 3004 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has 3005 to be generated inside LOOP (in the preheader of the inner-loop). */ 3006 3007 if (nested_in_vect_loop) 3008 { 3009 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info); 3010 bool invariant_in_outerloop = 3011 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0); 3012 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner); 3013 } 3014 else 3015 loop_for_initial_load = loop; 3016 if (at_loop) 3017 *at_loop = loop_for_initial_load; 3018 3019 /* 3. For the case of the optimized realignment, create the first vector 3020 load at the loop preheader. */ 3021 3022 if (alignment_support_scheme == dr_explicit_realign_optimized) 3023 { 3024 /* Create msq_init = *(floor(p1)) in the loop preheader */ 3025 3026 gcc_assert (!compute_in_loop); 3027 pe = loop_preheader_edge (loop_for_initial_load); 3028 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3029 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE, 3030 &init_addr, &inc, true, &inv_p); 3031 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr); 3032 new_stmt = gimple_build_assign (vec_dest, data_ref); 3033 new_temp = make_ssa_name (vec_dest, new_stmt); 3034 gimple_assign_set_lhs (new_stmt, new_temp); 3035 mark_symbols_for_renaming (new_stmt); 3036 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 3037 gcc_assert (!new_bb); 3038 msq_init = gimple_assign_lhs (new_stmt); 3039 } 3040 3041 /* 4. Create realignment token using a target builtin, if available. 3042 It is done either inside the containing loop, or before LOOP (as 3043 determined above). */ 3044 3045 if (targetm.vectorize.builtin_mask_for_load) 3046 { 3047 tree builtin_decl; 3048 3049 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */ 3050 if (compute_in_loop) 3051 gcc_assert (init_addr); /* already computed by the caller. */ 3052 else 3053 { 3054 /* Generate the INIT_ADDR computation outside LOOP. */ 3055 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts, 3056 NULL_TREE, loop); 3057 pe = loop_preheader_edge (loop); 3058 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 3059 gcc_assert (!new_bb); 3060 } 3061 3062 builtin_decl = targetm.vectorize.builtin_mask_for_load (); 3063 new_stmt = gimple_build_call (builtin_decl, 1, init_addr); 3064 vec_dest = 3065 vect_create_destination_var (scalar_dest, 3066 gimple_call_return_type (new_stmt)); 3067 new_temp = make_ssa_name (vec_dest, new_stmt); 3068 gimple_call_set_lhs (new_stmt, new_temp); 3069 3070 if (compute_in_loop) 3071 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); 3072 else 3073 { 3074 /* Generate the misalignment computation outside LOOP. */ 3075 pe = loop_preheader_edge (loop); 3076 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt); 3077 gcc_assert (!new_bb); 3078 } 3079 3080 *realignment_token = gimple_call_lhs (new_stmt); 3081 3082 /* The result of the CALL_EXPR to this builtin is determined from 3083 the value of the parameter and no global variables are touched 3084 which makes the builtin a "const" function. Requiring the 3085 builtin to have the "const" attribute makes it unnecessary 3086 to call mark_call_clobbered. */ 3087 gcc_assert (TREE_READONLY (builtin_decl)); 3088 } 3089 3090 if (alignment_support_scheme == dr_explicit_realign) 3091 return msq; 3092 3093 gcc_assert (!compute_in_loop); 3094 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized); 3095 3096 3097 /* 5. Create msq = phi <msq_init, lsq> in loop */ 3098 3099 pe = loop_preheader_edge (containing_loop); 3100 vec_dest = vect_create_destination_var (scalar_dest, vectype); 3101 msq = make_ssa_name (vec_dest, NULL); 3102 phi_stmt = create_phi_node (msq, containing_loop->header); 3103 SSA_NAME_DEF_STMT (msq) = phi_stmt; 3104 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION); 3105 3106 return msq; 3107 } 3108 3109 3110 /* Function vect_strided_load_supported. 3111 3112 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported, 3113 and FALSE otherwise. */ 3114 3115 bool 3116 vect_strided_load_supported (tree vectype) 3117 { 3118 optab perm_even_optab, perm_odd_optab; 3119 int mode; 3120 3121 mode = (int) TYPE_MODE (vectype); 3122 3123 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype, 3124 optab_default); 3125 if (!perm_even_optab) 3126 { 3127 if (vect_print_dump_info (REPORT_DETAILS)) 3128 fprintf (vect_dump, "no optab for perm_even."); 3129 return false; 3130 } 3131 3132 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing) 3133 { 3134 if (vect_print_dump_info (REPORT_DETAILS)) 3135 fprintf (vect_dump, "perm_even op not supported by target."); 3136 return false; 3137 } 3138 3139 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype, 3140 optab_default); 3141 if (!perm_odd_optab) 3142 { 3143 if (vect_print_dump_info (REPORT_DETAILS)) 3144 fprintf (vect_dump, "no optab for perm_odd."); 3145 return false; 3146 } 3147 3148 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing) 3149 { 3150 if (vect_print_dump_info (REPORT_DETAILS)) 3151 fprintf (vect_dump, "perm_odd op not supported by target."); 3152 return false; 3153 } 3154 return true; 3155 } 3156 3157 3158 /* Function vect_permute_load_chain. 3159 3160 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be 3161 a power of 2, generate extract_even/odd stmts to reorder the input data 3162 correctly. Return the final references for loads in RESULT_CHAIN. 3163 3164 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8. 3165 The input is 4 vectors each containing 8 elements. We assign a number to each 3166 element, the input sequence is: 3167 3168 1st vec: 0 1 2 3 4 5 6 7 3169 2nd vec: 8 9 10 11 12 13 14 15 3170 3rd vec: 16 17 18 19 20 21 22 23 3171 4th vec: 24 25 26 27 28 29 30 31 3172 3173 The output sequence should be: 3174 3175 1st vec: 0 4 8 12 16 20 24 28 3176 2nd vec: 1 5 9 13 17 21 25 29 3177 3rd vec: 2 6 10 14 18 22 26 30 3178 4th vec: 3 7 11 15 19 23 27 31 3179 3180 i.e., the first output vector should contain the first elements of each 3181 interleaving group, etc. 3182 3183 We use extract_even/odd instructions to create such output. The input of each 3184 extract_even/odd operation is two vectors 3185 1st vec 2nd vec 3186 0 1 2 3 4 5 6 7 3187 3188 and the output is the vector of extracted even/odd elements. The output of 3189 extract_even will be: 0 2 4 6 3190 and of extract_odd: 1 3 5 7 3191 3192 3193 The permutation is done in log LENGTH stages. In each stage extract_even and 3194 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their 3195 order. In our example, 3196 3197 E1: extract_even (1st vec, 2nd vec) 3198 E2: extract_odd (1st vec, 2nd vec) 3199 E3: extract_even (3rd vec, 4th vec) 3200 E4: extract_odd (3rd vec, 4th vec) 3201 3202 The output for the first stage will be: 3203 3204 E1: 0 2 4 6 8 10 12 14 3205 E2: 1 3 5 7 9 11 13 15 3206 E3: 16 18 20 22 24 26 28 30 3207 E4: 17 19 21 23 25 27 29 31 3208 3209 In order to proceed and create the correct sequence for the next stage (or 3210 for the correct output, if the second stage is the last one, as in our 3211 example), we first put the output of extract_even operation and then the 3212 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN). 3213 The input for the second stage is: 3214 3215 1st vec (E1): 0 2 4 6 8 10 12 14 3216 2nd vec (E3): 16 18 20 22 24 26 28 30 3217 3rd vec (E2): 1 3 5 7 9 11 13 15 3218 4th vec (E4): 17 19 21 23 25 27 29 31 3219 3220 The output of the second stage: 3221 3222 E1: 0 4 8 12 16 20 24 28 3223 E2: 2 6 10 14 18 22 26 30 3224 E3: 1 5 9 13 17 21 25 29 3225 E4: 3 7 11 15 19 23 27 31 3226 3227 And RESULT_CHAIN after reordering: 3228 3229 1st vec (E1): 0 4 8 12 16 20 24 28 3230 2nd vec (E3): 1 5 9 13 17 21 25 29 3231 3rd vec (E2): 2 6 10 14 18 22 26 30 3232 4th vec (E4): 3 7 11 15 19 23 27 31. */ 3233 3234 bool 3235 vect_permute_load_chain (VEC(tree,heap) *dr_chain, 3236 unsigned int length, 3237 gimple stmt, 3238 gimple_stmt_iterator *gsi, 3239 VEC(tree,heap) **result_chain) 3240 { 3241 tree perm_dest, data_ref, first_vect, second_vect; 3242 gimple perm_stmt; 3243 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 3244 int i; 3245 unsigned int j; 3246 3247 /* Check that the operation is supported. */ 3248 if (!vect_strided_load_supported (vectype)) 3249 return false; 3250 3251 *result_chain = VEC_copy (tree, heap, dr_chain); 3252 for (i = 0; i < exact_log2 (length); i++) 3253 { 3254 for (j = 0; j < length; j +=2) 3255 { 3256 first_vect = VEC_index (tree, dr_chain, j); 3257 second_vect = VEC_index (tree, dr_chain, j+1); 3258 3259 /* data_ref = permute_even (first_data_ref, second_data_ref); */ 3260 perm_dest = create_tmp_var (vectype, "vect_perm_even"); 3261 DECL_GIMPLE_REG_P (perm_dest) = 1; 3262 add_referenced_var (perm_dest); 3263 3264 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR, 3265 perm_dest, first_vect, 3266 second_vect); 3267 3268 data_ref = make_ssa_name (perm_dest, perm_stmt); 3269 gimple_assign_set_lhs (perm_stmt, data_ref); 3270 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 3271 mark_symbols_for_renaming (perm_stmt); 3272 3273 VEC_replace (tree, *result_chain, j/2, data_ref); 3274 3275 /* data_ref = permute_odd (first_data_ref, second_data_ref); */ 3276 perm_dest = create_tmp_var (vectype, "vect_perm_odd"); 3277 DECL_GIMPLE_REG_P (perm_dest) = 1; 3278 add_referenced_var (perm_dest); 3279 3280 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR, 3281 perm_dest, first_vect, 3282 second_vect); 3283 data_ref = make_ssa_name (perm_dest, perm_stmt); 3284 gimple_assign_set_lhs (perm_stmt, data_ref); 3285 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 3286 mark_symbols_for_renaming (perm_stmt); 3287 3288 VEC_replace (tree, *result_chain, j/2+length/2, data_ref); 3289 } 3290 dr_chain = VEC_copy (tree, heap, *result_chain); 3291 } 3292 return true; 3293 } 3294 3295 3296 /* Function vect_transform_strided_load. 3297 3298 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements 3299 to perform their permutation and ascribe the result vectorized statements to 3300 the scalar statements. 3301 */ 3302 3303 bool 3304 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size, 3305 gimple_stmt_iterator *gsi) 3306 { 3307 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3308 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info); 3309 gimple next_stmt, new_stmt; 3310 VEC(tree,heap) *result_chain = NULL; 3311 unsigned int i, gap_count; 3312 tree tmp_data_ref; 3313 3314 /* DR_CHAIN contains input data-refs that are a part of the interleaving. 3315 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted 3316 vectors, that are ready for vector computation. */ 3317 result_chain = VEC_alloc (tree, heap, size); 3318 /* Permute. */ 3319 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain)) 3320 return false; 3321 3322 /* Put a permuted data-ref in the VECTORIZED_STMT field. 3323 Since we scan the chain starting from it's first node, their order 3324 corresponds the order of data-refs in RESULT_CHAIN. */ 3325 next_stmt = first_stmt; 3326 gap_count = 1; 3327 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++) 3328 { 3329 if (!next_stmt) 3330 break; 3331 3332 /* Skip the gaps. Loads created for the gaps will be removed by dead 3333 code elimination pass later. No need to check for the first stmt in 3334 the group, since it always exists. 3335 DR_GROUP_GAP is the number of steps in elements from the previous 3336 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that 3337 correspond to the gaps. 3338 */ 3339 if (next_stmt != first_stmt 3340 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt))) 3341 { 3342 gap_count++; 3343 continue; 3344 } 3345 3346 while (next_stmt) 3347 { 3348 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref); 3349 /* We assume that if VEC_STMT is not NULL, this is a case of multiple 3350 copies, and we put the new vector statement in the first available 3351 RELATED_STMT. */ 3352 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt))) 3353 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt; 3354 else 3355 { 3356 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt))) 3357 { 3358 gimple prev_stmt = 3359 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)); 3360 gimple rel_stmt = 3361 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)); 3362 while (rel_stmt) 3363 { 3364 prev_stmt = rel_stmt; 3365 rel_stmt = 3366 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt)); 3367 } 3368 3369 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = 3370 new_stmt; 3371 } 3372 } 3373 3374 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt)); 3375 gap_count = 1; 3376 /* If NEXT_STMT accesses the same DR as the previous statement, 3377 put the same TMP_DATA_REF as its vectorized statement; otherwise 3378 get the next data-ref from RESULT_CHAIN. */ 3379 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt))) 3380 break; 3381 } 3382 } 3383 3384 VEC_free (tree, heap, result_chain); 3385 return true; 3386 } 3387 3388 /* Function vect_force_dr_alignment_p. 3389 3390 Returns whether the alignment of a DECL can be forced to be aligned 3391 on ALIGNMENT bit boundary. */ 3392 3393 bool 3394 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment) 3395 { 3396 if (TREE_CODE (decl) != VAR_DECL) 3397 return false; 3398 3399 if (DECL_EXTERNAL (decl)) 3400 return false; 3401 3402 if (TREE_ASM_WRITTEN (decl)) 3403 return false; 3404 3405 if (TREE_STATIC (decl)) 3406 return (alignment <= MAX_OFILE_ALIGNMENT); 3407 else 3408 return (alignment <= MAX_STACK_ALIGNMENT); 3409 } 3410 3411 /* Function vect_supportable_dr_alignment 3412 3413 Return whether the data reference DR is supported with respect to its 3414 alignment. */ 3415 3416 enum dr_alignment_support 3417 vect_supportable_dr_alignment (struct data_reference *dr) 3418 { 3419 gimple stmt = DR_STMT (dr); 3420 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3421 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3422 enum machine_mode mode = TYPE_MODE (vectype); 3423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3424 struct loop *vect_loop = NULL; 3425 bool nested_in_vect_loop = false; 3426 3427 if (aligned_access_p (dr)) 3428 return dr_aligned; 3429 3430 if (!loop_vinfo) 3431 /* FORNOW: Misaligned accesses are supported only in loops. */ 3432 return dr_unaligned_unsupported; 3433 3434 vect_loop = LOOP_VINFO_LOOP (loop_vinfo); 3435 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt); 3436 3437 /* Possibly unaligned access. */ 3438 3439 /* We can choose between using the implicit realignment scheme (generating 3440 a misaligned_move stmt) and the explicit realignment scheme (generating 3441 aligned loads with a REALIGN_LOAD). There are two variants to the explicit 3442 realignment scheme: optimized, and unoptimized. 3443 We can optimize the realignment only if the step between consecutive 3444 vector loads is equal to the vector size. Since the vector memory 3445 accesses advance in steps of VS (Vector Size) in the vectorized loop, it 3446 is guaranteed that the misalignment amount remains the same throughout the 3447 execution of the vectorized loop. Therefore, we can create the 3448 "realignment token" (the permutation mask that is passed to REALIGN_LOAD) 3449 at the loop preheader. 3450 3451 However, in the case of outer-loop vectorization, when vectorizing a 3452 memory access in the inner-loop nested within the LOOP that is now being 3453 vectorized, while it is guaranteed that the misalignment of the 3454 vectorized memory access will remain the same in different outer-loop 3455 iterations, it is *not* guaranteed that is will remain the same throughout 3456 the execution of the inner-loop. This is because the inner-loop advances 3457 with the original scalar step (and not in steps of VS). If the inner-loop 3458 step happens to be a multiple of VS, then the misalignment remains fixed 3459 and we can use the optimized realignment scheme. For example: 3460 3461 for (i=0; i<N; i++) 3462 for (j=0; j<M; j++) 3463 s += a[i+j]; 3464 3465 When vectorizing the i-loop in the above example, the step between 3466 consecutive vector loads is 1, and so the misalignment does not remain 3467 fixed across the execution of the inner-loop, and the realignment cannot 3468 be optimized (as illustrated in the following pseudo vectorized loop): 3469 3470 for (i=0; i<N; i+=4) 3471 for (j=0; j<M; j++){ 3472 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...} 3473 // when j is {0,1,2,3,4,5,6,7,...} respectively. 3474 // (assuming that we start from an aligned address). 3475 } 3476 3477 We therefore have to use the unoptimized realignment scheme: 3478 3479 for (i=0; i<N; i+=4) 3480 for (j=k; j<M; j+=4) 3481 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming 3482 // that the misalignment of the initial address is 3483 // 0). 3484 3485 The loop can then be vectorized as follows: 3486 3487 for (k=0; k<4; k++){ 3488 rt = get_realignment_token (&vp[k]); 3489 for (i=0; i<N; i+=4){ 3490 v1 = vp[i+k]; 3491 for (j=k; j<M; j+=4){ 3492 v2 = vp[i+j+VS-1]; 3493 va = REALIGN_LOAD <v1,v2,rt>; 3494 vs += va; 3495 v1 = v2; 3496 } 3497 } 3498 } */ 3499 3500 if (DR_IS_READ (dr)) 3501 { 3502 bool is_packed = false; 3503 tree type = (TREE_TYPE (DR_REF (dr))); 3504 3505 if (optab_handler (vec_realign_load_optab, mode)->insn_code != 3506 CODE_FOR_nothing 3507 && (!targetm.vectorize.builtin_mask_for_load 3508 || targetm.vectorize.builtin_mask_for_load ())) 3509 { 3510 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3511 if (nested_in_vect_loop 3512 && (TREE_INT_CST_LOW (DR_STEP (dr)) 3513 != GET_MODE_SIZE (TYPE_MODE (vectype)))) 3514 return dr_explicit_realign; 3515 else 3516 return dr_explicit_realign_optimized; 3517 } 3518 if (!known_alignment_for_access_p (dr)) 3519 { 3520 tree ba = DR_BASE_OBJECT (dr); 3521 3522 if (ba) 3523 is_packed = contains_packed_reference (ba); 3524 } 3525 3526 if (targetm.vectorize. 3527 builtin_support_vector_misalignment (mode, type, 3528 DR_MISALIGNMENT (dr), is_packed)) 3529 /* Can't software pipeline the loads, but can at least do them. */ 3530 return dr_unaligned_supported; 3531 } 3532 else 3533 { 3534 bool is_packed = false; 3535 tree type = (TREE_TYPE (DR_REF (dr))); 3536 3537 if (!known_alignment_for_access_p (dr)) 3538 { 3539 tree ba = DR_BASE_OBJECT (dr); 3540 3541 if (ba) 3542 is_packed = contains_packed_reference (ba); 3543 } 3544 3545 if (targetm.vectorize. 3546 builtin_support_vector_misalignment (mode, type, 3547 DR_MISALIGNMENT (dr), is_packed)) 3548 return dr_unaligned_supported; 3549 } 3550 3551 /* Unsupported. */ 3552 return dr_unaligned_unsupported; 3553 } 3554