1;; VSX patterns. 2;; Copyright (C) 2009-2019 Free Software Foundation, Inc. 3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> 4 5;; This file is part of GCC. 6 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published 9;; by the Free Software Foundation; either version 3, or (at your 10;; option) any later version. 11 12;; GCC is distributed in the hope that it will be useful, but WITHOUT 13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15;; License for more details. 16 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; Iterator for comparison types 22(define_code_iterator CMP_TEST [eq lt gt unordered]) 23 24;; Mode attribute for vector floate and floato conversions 25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) 26 27;; Iterator for both scalar and vector floating point types supported by VSX 28(define_mode_iterator VSX_B [DF V4SF V2DF]) 29 30;; Iterator for the 2 64-bit vector types 31(define_mode_iterator VSX_D [V2DF V2DI]) 32 33;; Mode iterator to handle swapping words on little endian for the 128-bit 34;; types that goes in a single vector register. 35(define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") 36 (TF "FLOAT128_VECTOR_P (TFmode)") 37 TI 38 V1TI]) 39 40;; Iterator for 128-bit integer types that go in a single vector register. 41(define_mode_iterator VSX_TI [TI V1TI]) 42 43;; Iterator for the 2 32-bit vector types 44(define_mode_iterator VSX_W [V4SF V4SI]) 45 46;; Iterator for the DF types 47(define_mode_iterator VSX_DF [V2DF DF]) 48 49;; Iterator for vector floating point types supported by VSX 50(define_mode_iterator VSX_F [V4SF V2DF]) 51 52;; Iterator for logical types supported by VSX 53(define_mode_iterator VSX_L [V16QI 54 V8HI 55 V4SI 56 V2DI 57 V4SF 58 V2DF 59 V1TI 60 TI 61 (KF "FLOAT128_VECTOR_P (KFmode)") 62 (TF "FLOAT128_VECTOR_P (TFmode)")]) 63 64;; Iterator for memory moves. 65(define_mode_iterator VSX_M [V16QI 66 V8HI 67 V4SI 68 V2DI 69 V4SF 70 V2DF 71 V1TI 72 (KF "FLOAT128_VECTOR_P (KFmode)") 73 (TF "FLOAT128_VECTOR_P (TFmode)") 74 TI]) 75 76(define_mode_attr VSX_XXBR [(V8HI "h") 77 (V4SI "w") 78 (V4SF "w") 79 (V2DF "d") 80 (V2DI "d") 81 (V1TI "q")]) 82 83;; Map into the appropriate load/store name based on the type 84(define_mode_attr VSm [(V16QI "vw4") 85 (V8HI "vw4") 86 (V4SI "vw4") 87 (V4SF "vw4") 88 (V2DF "vd2") 89 (V2DI "vd2") 90 (DF "d") 91 (TF "vd2") 92 (KF "vd2") 93 (V1TI "vd2") 94 (TI "vd2")]) 95 96;; Map into the appropriate suffix based on the type 97(define_mode_attr VSs [(V16QI "sp") 98 (V8HI "sp") 99 (V4SI "sp") 100 (V4SF "sp") 101 (V2DF "dp") 102 (V2DI "dp") 103 (DF "dp") 104 (SF "sp") 105 (TF "dp") 106 (KF "dp") 107 (V1TI "dp") 108 (TI "dp")]) 109 110;; Map the register class used 111(define_mode_attr VSr [(V16QI "v") 112 (V8HI "v") 113 (V4SI "v") 114 (V4SF "wf") 115 (V2DI "wd") 116 (V2DF "wd") 117 (DI "wi") 118 (DF "ws") 119 (SF "ww") 120 (TF "wp") 121 (KF "wq") 122 (V1TI "v") 123 (TI "wt")]) 124 125;; Map the register class used for float<->int conversions (floating point side) 126;; VSr2 is the preferred register class, VSr3 is any register class that will 127;; hold the data 128(define_mode_attr VSr2 [(V2DF "wd") 129 (V4SF "wf") 130 (DF "ws") 131 (SF "ww") 132 (DI "wi") 133 (KF "wq") 134 (TF "wp")]) 135 136(define_mode_attr VSr3 [(V2DF "wa") 137 (V4SF "wa") 138 (DF "ws") 139 (SF "ww") 140 (DI "wi") 141 (KF "wq") 142 (TF "wp")]) 143 144;; Map the register class for sp<->dp float conversions, destination 145(define_mode_attr VSr4 [(SF "ws") 146 (DF "f") 147 (V2DF "wd") 148 (V4SF "v")]) 149 150;; Map the register class for sp<->dp float conversions, source 151(define_mode_attr VSr5 [(SF "ws") 152 (DF "f") 153 (V2DF "v") 154 (V4SF "wd")]) 155 156;; The VSX register class that a type can occupy, even if it is not the 157;; preferred register class (VSr is the preferred register class that will get 158;; allocated first). 159(define_mode_attr VSa [(V16QI "wa") 160 (V8HI "wa") 161 (V4SI "wa") 162 (V4SF "wa") 163 (V2DI "wa") 164 (V2DF "wa") 165 (DI "wi") 166 (DF "ws") 167 (SF "ww") 168 (V1TI "wa") 169 (TI "wt") 170 (TF "wp") 171 (KF "wq")]) 172 173;; A mode attribute to disparage use of GPR registers, except for scalar 174;; integer modes. 175(define_mode_attr ??r [(V16QI "??r") 176 (V8HI "??r") 177 (V4SI "??r") 178 (V4SF "??r") 179 (V2DI "??r") 180 (V2DF "??r") 181 (V1TI "??r") 182 (KF "??r") 183 (TF "??r") 184 (TI "r")]) 185 186;; A mode attribute used for 128-bit constant values. 187(define_mode_attr nW [(V16QI "W") 188 (V8HI "W") 189 (V4SI "W") 190 (V4SF "W") 191 (V2DI "W") 192 (V2DF "W") 193 (V1TI "W") 194 (KF "W") 195 (TF "W") 196 (TI "n")]) 197 198;; Same size integer type for floating point data 199(define_mode_attr VSi [(V4SF "v4si") 200 (V2DF "v2di") 201 (DF "di")]) 202 203(define_mode_attr VSI [(V4SF "V4SI") 204 (V2DF "V2DI") 205 (DF "DI")]) 206 207;; Word size for same size conversion 208(define_mode_attr VSc [(V4SF "w") 209 (V2DF "d") 210 (DF "d")]) 211 212;; Map into either s or v, depending on whether this is a scalar or vector 213;; operation 214(define_mode_attr VSv [(V16QI "v") 215 (V8HI "v") 216 (V4SI "v") 217 (V4SF "v") 218 (V2DI "v") 219 (V2DF "v") 220 (V1TI "v") 221 (DF "s") 222 (KF "v")]) 223 224;; Appropriate type for add ops (and other simple FP ops) 225(define_mode_attr VStype_simple [(V2DF "vecdouble") 226 (V4SF "vecfloat") 227 (DF "fp")]) 228 229;; Appropriate type for multiply ops 230(define_mode_attr VStype_mul [(V2DF "vecdouble") 231 (V4SF "vecfloat") 232 (DF "dmul")]) 233 234;; Appropriate type for divide ops. 235(define_mode_attr VStype_div [(V2DF "vecdiv") 236 (V4SF "vecfdiv") 237 (DF "ddiv")]) 238 239;; Appropriate type for sqrt ops. For now, just lump the vector sqrt with 240;; the scalar sqrt 241(define_mode_attr VStype_sqrt [(V2DF "dsqrt") 242 (V4SF "ssqrt") 243 (DF "dsqrt")]) 244 245;; Iterator and modes for sp<->dp conversions 246;; Because scalar SF values are represented internally as double, use the 247;; V4SF type to represent this than SF. 248(define_mode_iterator VSX_SPDP [DF V4SF V2DF]) 249 250(define_mode_attr VS_spdp_res [(DF "V4SF") 251 (V4SF "V2DF") 252 (V2DF "V4SF")]) 253 254(define_mode_attr VS_spdp_insn [(DF "xscvdpsp") 255 (V4SF "xvcvspdp") 256 (V2DF "xvcvdpsp")]) 257 258(define_mode_attr VS_spdp_type [(DF "fp") 259 (V4SF "vecdouble") 260 (V2DF "vecdouble")]) 261 262;; Map the scalar mode for a vector type 263(define_mode_attr VS_scalar [(V1TI "TI") 264 (V2DF "DF") 265 (V2DI "DI") 266 (V4SF "SF") 267 (V4SI "SI") 268 (V8HI "HI") 269 (V16QI "QI")]) 270 271;; Map to a double-sized vector mode 272(define_mode_attr VS_double [(V4SI "V8SI") 273 (V4SF "V8SF") 274 (V2DI "V4DI") 275 (V2DF "V4DF") 276 (V1TI "V2TI")]) 277 278;; Map register class for 64-bit element in 128-bit vector for direct moves 279;; to/from gprs 280(define_mode_attr VS_64dm [(V2DF "wk") 281 (V2DI "wj")]) 282 283;; Map register class for 64-bit element in 128-bit vector for normal register 284;; to register moves 285(define_mode_attr VS_64reg [(V2DF "ws") 286 (V2DI "wi")]) 287 288;; Iterators for loading constants with xxspltib 289(define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) 290(define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) 291 292;; Vector reverse byte modes 293(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) 294 295;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. 296;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be 297;; done on ISA 2.07 and not just ISA 3.0. 298(define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) 299(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) 300 301(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") 302 (V8HI "h") 303 (V4SI "w")]) 304 305;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and 306;; insert to validate the operand number. 307(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") 308 (V8HI "const_0_to_7_operand") 309 (V4SI "const_0_to_3_operand")]) 310 311;; Mode attribute to give the constraint for vector extract and insert 312;; operations. 313(define_mode_attr VSX_EX [(V16QI "v") 314 (V8HI "v") 315 (V4SI "wa")]) 316 317;; Mode iterator for binary floating types other than double to 318;; optimize convert to that floating point type from an extract 319;; of an integer type 320(define_mode_iterator VSX_EXTRACT_FL [SF 321 (IF "FLOAT128_2REG_P (IFmode)") 322 (KF "TARGET_FLOAT128_HW") 323 (TF "FLOAT128_2REG_P (TFmode) 324 || (FLOAT128_IEEE_P (TFmode) 325 && TARGET_FLOAT128_HW)")]) 326 327;; Mode iterator for binary floating types that have a direct conversion 328;; from 64-bit integer to floating point 329(define_mode_iterator FL_CONV [SF 330 DF 331 (KF "TARGET_FLOAT128_HW") 332 (TF "TARGET_FLOAT128_HW 333 && FLOAT128_IEEE_P (TFmode)")]) 334 335;; Iterator for the 2 short vector types to do a splat from an integer 336(define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) 337 338;; Mode attribute to give the count for the splat instruction to splat 339;; the value in the 64-bit integer slot 340(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) 341 342;; Mode attribute to give the suffix for the splat instruction 343(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) 344 345;; Constants for creating unspecs 346(define_c_enum "unspec" 347 [UNSPEC_VSX_CONCAT 348 UNSPEC_VSX_CVDPSXWS 349 UNSPEC_VSX_CVDPUXWS 350 UNSPEC_VSX_CVSPDP 351 UNSPEC_VSX_CVHPSP 352 UNSPEC_VSX_CVSPDPN 353 UNSPEC_VSX_CVDPSPN 354 UNSPEC_VSX_CVSXWDP 355 UNSPEC_VSX_CVUXWDP 356 UNSPEC_VSX_CVSXDSP 357 UNSPEC_VSX_CVUXDSP 358 UNSPEC_VSX_CVSPSXDS 359 UNSPEC_VSX_CVSPUXDS 360 UNSPEC_VSX_CVSXWSP 361 UNSPEC_VSX_CVUXWSP 362 UNSPEC_VSX_FLOAT2 363 UNSPEC_VSX_UNS_FLOAT2 364 UNSPEC_VSX_FLOATE 365 UNSPEC_VSX_UNS_FLOATE 366 UNSPEC_VSX_FLOATO 367 UNSPEC_VSX_UNS_FLOATO 368 UNSPEC_VSX_TDIV 369 UNSPEC_VSX_TSQRT 370 UNSPEC_VSX_SET 371 UNSPEC_VSX_ROUND_I 372 UNSPEC_VSX_ROUND_IC 373 UNSPEC_VSX_SLDWI 374 UNSPEC_VSX_XXPERM 375 376 UNSPEC_VSX_XXSPLTW 377 UNSPEC_VSX_XXSPLTD 378 UNSPEC_VSX_DIVSD 379 UNSPEC_VSX_DIVUD 380 UNSPEC_VSX_MULSD 381 UNSPEC_VSX_XVCVSXDDP 382 UNSPEC_VSX_XVCVUXDDP 383 UNSPEC_VSX_XVCVDPSXDS 384 UNSPEC_VSX_XVCDPSP 385 UNSPEC_VSX_XVCVDPUXDS 386 UNSPEC_VSX_SIGN_EXTEND 387 UNSPEC_VSX_XVCVSPSXWS 388 UNSPEC_VSX_XVCVSPSXDS 389 UNSPEC_VSX_VSLO 390 UNSPEC_VSX_EXTRACT 391 UNSPEC_VSX_SXEXPDP 392 UNSPEC_VSX_SXSIG 393 UNSPEC_VSX_SIEXPDP 394 UNSPEC_VSX_SIEXPQP 395 UNSPEC_VSX_SCMPEXPDP 396 UNSPEC_VSX_SCMPEXPQP 397 UNSPEC_VSX_STSTDC 398 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH 399 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL 400 UNSPEC_VSX_VXEXP 401 UNSPEC_VSX_VXSIG 402 UNSPEC_VSX_VIEXP 403 UNSPEC_VSX_VTSTDC 404 UNSPEC_VSX_VSIGNED2 405 406 UNSPEC_LXVL 407 UNSPEC_LXVLL 408 UNSPEC_LVSL_REG 409 UNSPEC_LVSR_REG 410 UNSPEC_STXVL 411 UNSPEC_STXVLL 412 UNSPEC_XL_LEN_R 413 UNSPEC_XST_LEN_R 414 415 UNSPEC_VCLZLSBB 416 UNSPEC_VCTZLSBB 417 UNSPEC_VEXTUBLX 418 UNSPEC_VEXTUHLX 419 UNSPEC_VEXTUWLX 420 UNSPEC_VEXTUBRX 421 UNSPEC_VEXTUHRX 422 UNSPEC_VEXTUWRX 423 UNSPEC_VCMPNEB 424 UNSPEC_VCMPNEZB 425 UNSPEC_VCMPNEH 426 UNSPEC_VCMPNEZH 427 UNSPEC_VCMPNEW 428 UNSPEC_VCMPNEZW 429 UNSPEC_XXEXTRACTUW 430 UNSPEC_XXINSERTW 431 UNSPEC_VSX_FIRST_MATCH_INDEX 432 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX 433 UNSPEC_VSX_FIRST_MISMATCH_INDEX 434 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX 435 ]) 436 437;; VSX moves 438 439;; The patterns for LE permuted loads and stores come before the general 440;; VSX moves so they match first. 441(define_insn_and_split "*vsx_le_perm_load_<mode>" 442 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") 443 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] 444 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 445 "#" 446 "&& 1" 447 [(set (match_dup 2) 448 (vec_select:<MODE> 449 (match_dup 1) 450 (parallel [(const_int 1) (const_int 0)]))) 451 (set (match_dup 0) 452 (vec_select:<MODE> 453 (match_dup 2) 454 (parallel [(const_int 1) (const_int 0)])))] 455{ 456 rtx mem = operands[1]; 457 458 /* Don't apply the swap optimization if we've already performed register 459 allocation and the hard register destination is not in the altivec 460 range. */ 461 if ((MEM_ALIGN (mem) >= 128) 462 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0])) 463 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) 464 { 465 rtx mem_address = XEXP (mem, 0); 466 enum machine_mode mode = GET_MODE (mem); 467 468 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 469 { 470 /* Replace the source memory address with masked address. */ 471 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 472 emit_insn (lvx_set_expr); 473 DONE; 474 } 475 else if (rs6000_quadword_masked_address_p (mem_address)) 476 { 477 /* This rtl is already in the form that matches lvx 478 instruction, so leave it alone. */ 479 DONE; 480 } 481 /* Otherwise, fall through to transform into a swapping load. */ 482 } 483 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 484 : operands[0]; 485} 486 [(set_attr "type" "vecload") 487 (set_attr "length" "8")]) 488 489(define_insn_and_split "*vsx_le_perm_load_<mode>" 490 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 491 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] 492 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 493 "#" 494 "&& 1" 495 [(set (match_dup 2) 496 (vec_select:<MODE> 497 (match_dup 1) 498 (parallel [(const_int 2) (const_int 3) 499 (const_int 0) (const_int 1)]))) 500 (set (match_dup 0) 501 (vec_select:<MODE> 502 (match_dup 2) 503 (parallel [(const_int 2) (const_int 3) 504 (const_int 0) (const_int 1)])))] 505{ 506 rtx mem = operands[1]; 507 508 /* Don't apply the swap optimization if we've already performed register 509 allocation and the hard register destination is not in the altivec 510 range. */ 511 if ((MEM_ALIGN (mem) >= 128) 512 && (!HARD_REGISTER_P (operands[0]) 513 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 514 { 515 rtx mem_address = XEXP (mem, 0); 516 enum machine_mode mode = GET_MODE (mem); 517 518 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 519 { 520 /* Replace the source memory address with masked address. */ 521 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 522 emit_insn (lvx_set_expr); 523 DONE; 524 } 525 else if (rs6000_quadword_masked_address_p (mem_address)) 526 { 527 /* This rtl is already in the form that matches lvx 528 instruction, so leave it alone. */ 529 DONE; 530 } 531 /* Otherwise, fall through to transform into a swapping load. */ 532 } 533 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 534 : operands[0]; 535} 536 [(set_attr "type" "vecload") 537 (set_attr "length" "8")]) 538 539(define_insn_and_split "*vsx_le_perm_load_v8hi" 540 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 541 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] 542 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 543 "#" 544 "&& 1" 545 [(set (match_dup 2) 546 (vec_select:V8HI 547 (match_dup 1) 548 (parallel [(const_int 4) (const_int 5) 549 (const_int 6) (const_int 7) 550 (const_int 0) (const_int 1) 551 (const_int 2) (const_int 3)]))) 552 (set (match_dup 0) 553 (vec_select:V8HI 554 (match_dup 2) 555 (parallel [(const_int 4) (const_int 5) 556 (const_int 6) (const_int 7) 557 (const_int 0) (const_int 1) 558 (const_int 2) (const_int 3)])))] 559{ 560 rtx mem = operands[1]; 561 562 /* Don't apply the swap optimization if we've already performed register 563 allocation and the hard register destination is not in the altivec 564 range. */ 565 if ((MEM_ALIGN (mem) >= 128) 566 && (!HARD_REGISTER_P (operands[0]) 567 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 568 { 569 rtx mem_address = XEXP (mem, 0); 570 enum machine_mode mode = GET_MODE (mem); 571 572 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 573 { 574 /* Replace the source memory address with masked address. */ 575 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 576 emit_insn (lvx_set_expr); 577 DONE; 578 } 579 else if (rs6000_quadword_masked_address_p (mem_address)) 580 { 581 /* This rtl is already in the form that matches lvx 582 instruction, so leave it alone. */ 583 DONE; 584 } 585 /* Otherwise, fall through to transform into a swapping load. */ 586 } 587 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 588 : operands[0]; 589} 590 [(set_attr "type" "vecload") 591 (set_attr "length" "8")]) 592 593(define_insn_and_split "*vsx_le_perm_load_v16qi" 594 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 595 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] 596 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 597 "#" 598 "&& 1" 599 [(set (match_dup 2) 600 (vec_select:V16QI 601 (match_dup 1) 602 (parallel [(const_int 8) (const_int 9) 603 (const_int 10) (const_int 11) 604 (const_int 12) (const_int 13) 605 (const_int 14) (const_int 15) 606 (const_int 0) (const_int 1) 607 (const_int 2) (const_int 3) 608 (const_int 4) (const_int 5) 609 (const_int 6) (const_int 7)]))) 610 (set (match_dup 0) 611 (vec_select:V16QI 612 (match_dup 2) 613 (parallel [(const_int 8) (const_int 9) 614 (const_int 10) (const_int 11) 615 (const_int 12) (const_int 13) 616 (const_int 14) (const_int 15) 617 (const_int 0) (const_int 1) 618 (const_int 2) (const_int 3) 619 (const_int 4) (const_int 5) 620 (const_int 6) (const_int 7)])))] 621{ 622 rtx mem = operands[1]; 623 624 /* Don't apply the swap optimization if we've already performed register 625 allocation and the hard register destination is not in the altivec 626 range. */ 627 if ((MEM_ALIGN (mem) >= 128) 628 && (!HARD_REGISTER_P (operands[0]) 629 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 630 { 631 rtx mem_address = XEXP (mem, 0); 632 enum machine_mode mode = GET_MODE (mem); 633 634 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 635 { 636 /* Replace the source memory address with masked address. */ 637 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 638 emit_insn (lvx_set_expr); 639 DONE; 640 } 641 else if (rs6000_quadword_masked_address_p (mem_address)) 642 { 643 /* This rtl is already in the form that matches lvx 644 instruction, so leave it alone. */ 645 DONE; 646 } 647 /* Otherwise, fall through to transform into a swapping load. */ 648 } 649 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 650 : operands[0]; 651} 652 [(set_attr "type" "vecload") 653 (set_attr "length" "8")]) 654 655(define_insn "*vsx_le_perm_store_<mode>" 656 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") 657 (match_operand:VSX_D 1 "vsx_register_operand" "+<VSa>"))] 658 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 659 "#" 660 [(set_attr "type" "vecstore") 661 (set_attr "length" "12")]) 662 663(define_split 664 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 665 (match_operand:VSX_D 1 "vsx_register_operand"))] 666 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 667 [(set (match_dup 2) 668 (vec_select:<MODE> 669 (match_dup 1) 670 (parallel [(const_int 1) (const_int 0)]))) 671 (set (match_dup 0) 672 (vec_select:<MODE> 673 (match_dup 2) 674 (parallel [(const_int 1) (const_int 0)])))] 675{ 676 rtx mem = operands[0]; 677 678 /* Don't apply the swap optimization if we've already performed register 679 allocation and the hard register source is not in the altivec range. */ 680 if ((MEM_ALIGN (mem) >= 128) 681 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 682 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 683 { 684 rtx mem_address = XEXP (mem, 0); 685 enum machine_mode mode = GET_MODE (mem); 686 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 687 { 688 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 689 emit_insn (stvx_set_expr); 690 DONE; 691 } 692 else if (rs6000_quadword_masked_address_p (mem_address)) 693 { 694 /* This rtl is already in the form that matches stvx instruction, 695 so leave it alone. */ 696 DONE; 697 } 698 /* Otherwise, fall through to transform into a swapping store. */ 699 } 700 701 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 702 : operands[1]; 703}) 704 705;; The post-reload split requires that we re-permute the source 706;; register in case it is still live. 707(define_split 708 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 709 (match_operand:VSX_D 1 "vsx_register_operand"))] 710 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 711 [(set (match_dup 1) 712 (vec_select:<MODE> 713 (match_dup 1) 714 (parallel [(const_int 1) (const_int 0)]))) 715 (set (match_dup 0) 716 (vec_select:<MODE> 717 (match_dup 1) 718 (parallel [(const_int 1) (const_int 0)]))) 719 (set (match_dup 1) 720 (vec_select:<MODE> 721 (match_dup 1) 722 (parallel [(const_int 1) (const_int 0)])))] 723 "") 724 725(define_insn "*vsx_le_perm_store_<mode>" 726 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") 727 (match_operand:VSX_W 1 "vsx_register_operand" "+<VSa>"))] 728 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 729 "#" 730 [(set_attr "type" "vecstore") 731 (set_attr "length" "12")]) 732 733(define_split 734 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 735 (match_operand:VSX_W 1 "vsx_register_operand"))] 736 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 737 [(set (match_dup 2) 738 (vec_select:<MODE> 739 (match_dup 1) 740 (parallel [(const_int 2) (const_int 3) 741 (const_int 0) (const_int 1)]))) 742 (set (match_dup 0) 743 (vec_select:<MODE> 744 (match_dup 2) 745 (parallel [(const_int 2) (const_int 3) 746 (const_int 0) (const_int 1)])))] 747{ 748 rtx mem = operands[0]; 749 750 /* Don't apply the swap optimization if we've already performed register 751 allocation and the hard register source is not in the altivec range. */ 752 if ((MEM_ALIGN (mem) >= 128) 753 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 754 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 755 { 756 rtx mem_address = XEXP (mem, 0); 757 enum machine_mode mode = GET_MODE (mem); 758 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 759 { 760 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 761 emit_insn (stvx_set_expr); 762 DONE; 763 } 764 else if (rs6000_quadword_masked_address_p (mem_address)) 765 { 766 /* This rtl is already in the form that matches stvx instruction, 767 so leave it alone. */ 768 DONE; 769 } 770 /* Otherwise, fall through to transform into a swapping store. */ 771 } 772 773 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 774 : operands[1]; 775}) 776 777;; The post-reload split requires that we re-permute the source 778;; register in case it is still live. 779(define_split 780 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 781 (match_operand:VSX_W 1 "vsx_register_operand"))] 782 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 783 [(set (match_dup 1) 784 (vec_select:<MODE> 785 (match_dup 1) 786 (parallel [(const_int 2) (const_int 3) 787 (const_int 0) (const_int 1)]))) 788 (set (match_dup 0) 789 (vec_select:<MODE> 790 (match_dup 1) 791 (parallel [(const_int 2) (const_int 3) 792 (const_int 0) (const_int 1)]))) 793 (set (match_dup 1) 794 (vec_select:<MODE> 795 (match_dup 1) 796 (parallel [(const_int 2) (const_int 3) 797 (const_int 0) (const_int 1)])))] 798 "") 799 800(define_insn "*vsx_le_perm_store_v8hi" 801 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") 802 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] 803 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 804 "#" 805 [(set_attr "type" "vecstore") 806 (set_attr "length" "12")]) 807 808(define_split 809 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 810 (match_operand:V8HI 1 "vsx_register_operand"))] 811 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 812 [(set (match_dup 2) 813 (vec_select:V8HI 814 (match_dup 1) 815 (parallel [(const_int 4) (const_int 5) 816 (const_int 6) (const_int 7) 817 (const_int 0) (const_int 1) 818 (const_int 2) (const_int 3)]))) 819 (set (match_dup 0) 820 (vec_select:V8HI 821 (match_dup 2) 822 (parallel [(const_int 4) (const_int 5) 823 (const_int 6) (const_int 7) 824 (const_int 0) (const_int 1) 825 (const_int 2) (const_int 3)])))] 826{ 827 rtx mem = operands[0]; 828 829 /* Don't apply the swap optimization if we've already performed register 830 allocation and the hard register source is not in the altivec range. */ 831 if ((MEM_ALIGN (mem) >= 128) 832 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 833 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 834 { 835 rtx mem_address = XEXP (mem, 0); 836 enum machine_mode mode = GET_MODE (mem); 837 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 838 { 839 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 840 emit_insn (stvx_set_expr); 841 DONE; 842 } 843 else if (rs6000_quadword_masked_address_p (mem_address)) 844 { 845 /* This rtl is already in the form that matches stvx instruction, 846 so leave it alone. */ 847 DONE; 848 } 849 /* Otherwise, fall through to transform into a swapping store. */ 850 } 851 852 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 853 : operands[1]; 854}) 855 856;; The post-reload split requires that we re-permute the source 857;; register in case it is still live. 858(define_split 859 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 860 (match_operand:V8HI 1 "vsx_register_operand"))] 861 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 862 [(set (match_dup 1) 863 (vec_select:V8HI 864 (match_dup 1) 865 (parallel [(const_int 4) (const_int 5) 866 (const_int 6) (const_int 7) 867 (const_int 0) (const_int 1) 868 (const_int 2) (const_int 3)]))) 869 (set (match_dup 0) 870 (vec_select:V8HI 871 (match_dup 1) 872 (parallel [(const_int 4) (const_int 5) 873 (const_int 6) (const_int 7) 874 (const_int 0) (const_int 1) 875 (const_int 2) (const_int 3)]))) 876 (set (match_dup 1) 877 (vec_select:V8HI 878 (match_dup 1) 879 (parallel [(const_int 4) (const_int 5) 880 (const_int 6) (const_int 7) 881 (const_int 0) (const_int 1) 882 (const_int 2) (const_int 3)])))] 883 "") 884 885(define_insn "*vsx_le_perm_store_v16qi" 886 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") 887 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] 888 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 889 "#" 890 [(set_attr "type" "vecstore") 891 (set_attr "length" "12")]) 892 893(define_split 894 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 895 (match_operand:V16QI 1 "vsx_register_operand"))] 896 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 897 [(set (match_dup 2) 898 (vec_select:V16QI 899 (match_dup 1) 900 (parallel [(const_int 8) (const_int 9) 901 (const_int 10) (const_int 11) 902 (const_int 12) (const_int 13) 903 (const_int 14) (const_int 15) 904 (const_int 0) (const_int 1) 905 (const_int 2) (const_int 3) 906 (const_int 4) (const_int 5) 907 (const_int 6) (const_int 7)]))) 908 (set (match_dup 0) 909 (vec_select:V16QI 910 (match_dup 2) 911 (parallel [(const_int 8) (const_int 9) 912 (const_int 10) (const_int 11) 913 (const_int 12) (const_int 13) 914 (const_int 14) (const_int 15) 915 (const_int 0) (const_int 1) 916 (const_int 2) (const_int 3) 917 (const_int 4) (const_int 5) 918 (const_int 6) (const_int 7)])))] 919{ 920 rtx mem = operands[0]; 921 922 /* Don't apply the swap optimization if we've already performed register 923 allocation and the hard register source is not in the altivec range. */ 924 if ((MEM_ALIGN (mem) >= 128) 925 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 926 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 927 { 928 rtx mem_address = XEXP (mem, 0); 929 enum machine_mode mode = GET_MODE (mem); 930 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 931 { 932 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 933 emit_insn (stvx_set_expr); 934 DONE; 935 } 936 else if (rs6000_quadword_masked_address_p (mem_address)) 937 { 938 /* This rtl is already in the form that matches stvx instruction, 939 so leave it alone. */ 940 DONE; 941 } 942 /* Otherwise, fall through to transform into a swapping store. */ 943 } 944 945 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 946 : operands[1]; 947}) 948 949;; The post-reload split requires that we re-permute the source 950;; register in case it is still live. 951(define_split 952 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 953 (match_operand:V16QI 1 "vsx_register_operand"))] 954 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 955 [(set (match_dup 1) 956 (vec_select:V16QI 957 (match_dup 1) 958 (parallel [(const_int 8) (const_int 9) 959 (const_int 10) (const_int 11) 960 (const_int 12) (const_int 13) 961 (const_int 14) (const_int 15) 962 (const_int 0) (const_int 1) 963 (const_int 2) (const_int 3) 964 (const_int 4) (const_int 5) 965 (const_int 6) (const_int 7)]))) 966 (set (match_dup 0) 967 (vec_select:V16QI 968 (match_dup 1) 969 (parallel [(const_int 8) (const_int 9) 970 (const_int 10) (const_int 11) 971 (const_int 12) (const_int 13) 972 (const_int 14) (const_int 15) 973 (const_int 0) (const_int 1) 974 (const_int 2) (const_int 3) 975 (const_int 4) (const_int 5) 976 (const_int 6) (const_int 7)]))) 977 (set (match_dup 1) 978 (vec_select:V16QI 979 (match_dup 1) 980 (parallel [(const_int 8) (const_int 9) 981 (const_int 10) (const_int 11) 982 (const_int 12) (const_int 13) 983 (const_int 14) (const_int 15) 984 (const_int 0) (const_int 1) 985 (const_int 2) (const_int 3) 986 (const_int 4) (const_int 5) 987 (const_int 6) (const_int 7)])))] 988 "") 989 990;; Little endian word swapping for 128-bit types that are either scalars or the 991;; special V1TI container class, which it is not appropriate to use vec_select 992;; for the type. 993(define_insn "*vsx_le_permute_<mode>" 994 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z,&r,&r,Q") 995 (rotate:VSX_TI 996 (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>,r,Q,r") 997 (const_int 64)))] 998 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 999 "@ 1000 xxpermdi %x0,%x1,%x1,2 1001 lxvd2x %x0,%y1 1002 stxvd2x %x1,%y0 1003 mr %0,%L1\;mr %L0,%1 1004 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1 1005 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0" 1006 [(set_attr "length" "4,4,4,8,8,8") 1007 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) 1008 1009(define_insn_and_split "*vsx_le_undo_permute_<mode>" 1010 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>") 1011 (rotate:VSX_TI 1012 (rotate:VSX_TI 1013 (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>") 1014 (const_int 64)) 1015 (const_int 64)))] 1016 "!BYTES_BIG_ENDIAN && TARGET_VSX" 1017 "@ 1018 # 1019 xxlor %x0,%x1" 1020 "" 1021 [(set (match_dup 0) (match_dup 1))] 1022{ 1023 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) 1024 { 1025 emit_note (NOTE_INSN_DELETED); 1026 DONE; 1027 } 1028} 1029 [(set_attr "length" "0,4") 1030 (set_attr "type" "veclogical")]) 1031 1032(define_insn_and_split "*vsx_le_perm_load_<mode>" 1033 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,r") 1034 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] 1035 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1036 "@ 1037 # 1038 #" 1039 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1040 [(const_int 0)] 1041{ 1042 rtx tmp = (can_create_pseudo_p () 1043 ? gen_reg_rtx_and_attrs (operands[0]) 1044 : operands[0]); 1045 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 1046 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 1047 DONE; 1048} 1049 [(set_attr "type" "vecload,load") 1050 (set_attr "length" "8,8")]) 1051 1052(define_insn "*vsx_le_perm_store_<mode>" 1053 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") 1054 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+<VSa>,r"))] 1055 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 1056 "@ 1057 # 1058 #" 1059 [(set_attr "type" "vecstore,store") 1060 (set_attr "length" "12,8")]) 1061 1062(define_split 1063 [(set (match_operand:VSX_LE_128 0 "memory_operand") 1064 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 1065 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR" 1066 [(const_int 0)] 1067{ 1068 rtx tmp = (can_create_pseudo_p () 1069 ? gen_reg_rtx_and_attrs (operands[0]) 1070 : operands[0]); 1071 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 1072 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 1073 DONE; 1074}) 1075 1076;; Peepholes to catch loads and stores for TImode if TImode landed in 1077;; GPR registers on a little endian system. 1078(define_peephole2 1079 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1080 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand") 1081 (const_int 64))) 1082 (set (match_operand:VSX_TI 2 "int_reg_operand") 1083 (rotate:VSX_TI (match_dup 0) 1084 (const_int 64)))] 1085 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1086 && (rtx_equal_p (operands[0], operands[2]) 1087 || peep2_reg_dead_p (2, operands[0]))" 1088 [(set (match_dup 2) (match_dup 1))]) 1089 1090(define_peephole2 1091 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1092 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand") 1093 (const_int 64))) 1094 (set (match_operand:VSX_TI 2 "memory_operand") 1095 (rotate:VSX_TI (match_dup 0) 1096 (const_int 64)))] 1097 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1098 && peep2_reg_dead_p (2, operands[0])" 1099 [(set (match_dup 2) (match_dup 1))]) 1100 1101;; Peephole to catch memory to memory transfers for TImode if TImode landed in 1102;; VSX registers on a little endian system. The vector types and IEEE 128-bit 1103;; floating point are handled by the more generic swap elimination pass. 1104(define_peephole2 1105 [(set (match_operand:TI 0 "vsx_register_operand") 1106 (rotate:TI (match_operand:TI 1 "vsx_register_operand") 1107 (const_int 64))) 1108 (set (match_operand:TI 2 "vsx_register_operand") 1109 (rotate:TI (match_dup 0) 1110 (const_int 64)))] 1111 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1112 && (rtx_equal_p (operands[0], operands[2]) 1113 || peep2_reg_dead_p (2, operands[0]))" 1114 [(set (match_dup 2) (match_dup 1))]) 1115 1116;; The post-reload split requires that we re-permute the source 1117;; register in case it is still live. 1118(define_split 1119 [(set (match_operand:VSX_LE_128 0 "memory_operand") 1120 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 1121 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR" 1122 [(const_int 0)] 1123{ 1124 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1125 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); 1126 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1127 DONE; 1128}) 1129 1130;; Vector constants that can be generated with XXSPLTIB that was added in ISA 1131;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. 1132(define_insn "xxspltib_v16qi" 1133 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1134 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] 1135 "TARGET_P9_VECTOR" 1136{ 1137 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); 1138 return "xxspltib %x0,%2"; 1139} 1140 [(set_attr "type" "vecperm")]) 1141 1142(define_insn "xxspltib_<mode>_nosplit" 1143 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") 1144 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] 1145 "TARGET_P9_VECTOR" 1146{ 1147 rtx op1 = operands[1]; 1148 int value = 256; 1149 int num_insns = -1; 1150 1151 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1152 || num_insns != 1) 1153 gcc_unreachable (); 1154 1155 operands[2] = GEN_INT (value & 0xff); 1156 return "xxspltib %x0,%2"; 1157} 1158 [(set_attr "type" "vecperm")]) 1159 1160(define_insn_and_split "*xxspltib_<mode>_split" 1161 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") 1162 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] 1163 "TARGET_P9_VECTOR" 1164 "#" 1165 "&& 1" 1166 [(const_int 0)] 1167{ 1168 int value = 256; 1169 int num_insns = -1; 1170 rtx op0 = operands[0]; 1171 rtx op1 = operands[1]; 1172 rtx tmp = ((can_create_pseudo_p ()) 1173 ? gen_reg_rtx (V16QImode) 1174 : gen_lowpart (V16QImode, op0)); 1175 1176 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1177 || num_insns != 2) 1178 gcc_unreachable (); 1179 1180 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); 1181 1182 if (<MODE>mode == V2DImode) 1183 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); 1184 1185 else if (<MODE>mode == V4SImode) 1186 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); 1187 1188 else if (<MODE>mode == V8HImode) 1189 emit_insn (gen_altivec_vupkhsb (op0, tmp)); 1190 1191 else 1192 gcc_unreachable (); 1193 1194 DONE; 1195} 1196 [(set_attr "type" "vecperm") 1197 (set_attr "length" "8")]) 1198 1199 1200;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB 1201;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or 1202;; all 1's, since the machine does not have to wait for the previous 1203;; instruction using the register being set (such as a store waiting on a slow 1204;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. 1205 1206;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) 1207;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW 1208;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) 1209(define_insn "vsx_mov<mode>_64bit" 1210 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1211 "=ZwO, <VSa>, <VSa>, r, we, ?wQ, 1212 ?&r, ??r, ??Y, <??r>, wo, v, 1213 ?<VSa>, v, <??r>, wZ, v") 1214 1215 (match_operand:VSX_M 1 "input_operand" 1216 "<VSa>, ZwO, <VSa>, we, r, r, 1217 wQ, Y, r, r, wE, jwM, 1218 ?jwM, W, <nW>, v, wZ"))] 1219 1220 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1221 && (register_operand (operands[0], <MODE>mode) 1222 || register_operand (operands[1], <MODE>mode))" 1223{ 1224 return rs6000_output_move_128bit (operands); 1225} 1226 [(set_attr "type" 1227 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load, 1228 store, load, store, *, vecsimple, vecsimple, 1229 vecsimple, *, *, vecstore, vecload") 1230 1231 (set_attr "length" 1232 "4, 4, 4, 8, 4, 8, 1233 8, 8, 8, 8, 4, 4, 1234 4, 20, 8, 4, 4")]) 1235 1236;; VSX store VSX load VSX move GPR load GPR store GPR move 1237;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const 1238;; LVX (VMX) STVX (VMX) 1239(define_insn "*vsx_mov<mode>_32bit" 1240 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1241 "=ZwO, <VSa>, <VSa>, ??r, ??Y, <??r>, 1242 wo, v, ?<VSa>, v, <??r>, 1243 wZ, v") 1244 1245 (match_operand:VSX_M 1 "input_operand" 1246 "<VSa>, ZwO, <VSa>, Y, r, r, 1247 wE, jwM, ?jwM, W, <nW>, 1248 v, wZ"))] 1249 1250 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1251 && (register_operand (operands[0], <MODE>mode) 1252 || register_operand (operands[1], <MODE>mode))" 1253{ 1254 return rs6000_output_move_128bit (operands); 1255} 1256 [(set_attr "type" 1257 "vecstore, vecload, vecsimple, load, store, *, 1258 vecsimple, vecsimple, vecsimple, *, *, 1259 vecstore, vecload") 1260 1261 (set_attr "length" 1262 "4, 4, 4, 16, 16, 16, 1263 4, 4, 4, 20, 16, 1264 4, 4")]) 1265 1266;; Explicit load/store expanders for the builtin functions 1267(define_expand "vsx_load_<mode>" 1268 [(set (match_operand:VSX_M 0 "vsx_register_operand") 1269 (match_operand:VSX_M 1 "memory_operand"))] 1270 "VECTOR_MEM_VSX_P (<MODE>mode)" 1271{ 1272 /* Expand to swaps if needed, prior to swap optimization. */ 1273 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) 1274 { 1275 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1276 DONE; 1277 } 1278}) 1279 1280(define_expand "vsx_store_<mode>" 1281 [(set (match_operand:VSX_M 0 "memory_operand") 1282 (match_operand:VSX_M 1 "vsx_register_operand"))] 1283 "VECTOR_MEM_VSX_P (<MODE>mode)" 1284{ 1285 /* Expand to swaps if needed, prior to swap optimization. */ 1286 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR) 1287 { 1288 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1289 DONE; 1290 } 1291}) 1292 1293;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., 1294;; when you really want their element-reversing behavior. 1295(define_insn "vsx_ld_elemrev_v2di" 1296 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1297 (vec_select:V2DI 1298 (match_operand:V2DI 1 "memory_operand" "Z") 1299 (parallel [(const_int 1) (const_int 0)])))] 1300 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1301 "lxvd2x %x0,%y1" 1302 [(set_attr "type" "vecload")]) 1303 1304(define_insn "vsx_ld_elemrev_v1ti" 1305 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 1306 (vec_select:V1TI 1307 (match_operand:V1TI 1 "memory_operand" "Z") 1308 (parallel [(const_int 0)])))] 1309 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN" 1310{ 1311 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2"; 1312} 1313 [(set_attr "type" "vecload")]) 1314 1315(define_insn "vsx_ld_elemrev_v2df" 1316 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 1317 (vec_select:V2DF 1318 (match_operand:V2DF 1 "memory_operand" "Z") 1319 (parallel [(const_int 1) (const_int 0)])))] 1320 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1321 "lxvd2x %x0,%y1" 1322 [(set_attr "type" "vecload")]) 1323 1324(define_insn "vsx_ld_elemrev_v4si" 1325 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 1326 (vec_select:V4SI 1327 (match_operand:V4SI 1 "memory_operand" "Z") 1328 (parallel [(const_int 3) (const_int 2) 1329 (const_int 1) (const_int 0)])))] 1330 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1331 "lxvw4x %x0,%y1" 1332 [(set_attr "type" "vecload")]) 1333 1334(define_insn "vsx_ld_elemrev_v4sf" 1335 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 1336 (vec_select:V4SF 1337 (match_operand:V4SF 1 "memory_operand" "Z") 1338 (parallel [(const_int 3) (const_int 2) 1339 (const_int 1) (const_int 0)])))] 1340 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1341 "lxvw4x %x0,%y1" 1342 [(set_attr "type" "vecload")]) 1343 1344(define_expand "vsx_ld_elemrev_v8hi" 1345 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1346 (vec_select:V8HI 1347 (match_operand:V8HI 1 "memory_operand" "Z") 1348 (parallel [(const_int 7) (const_int 6) 1349 (const_int 5) (const_int 4) 1350 (const_int 3) (const_int 2) 1351 (const_int 1) (const_int 0)])))] 1352 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1353{ 1354 if (!TARGET_P9_VECTOR) 1355 { 1356 rtx tmp = gen_reg_rtx (V4SImode); 1357 rtx subreg, subreg2, perm[16], pcv; 1358 /* 2 is leftmost element in register */ 1359 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1360 int i; 1361 1362 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); 1363 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1364 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); 1365 1366 for (i = 0; i < 16; ++i) 1367 perm[i] = GEN_INT (reorder[i]); 1368 1369 pcv = force_reg (V16QImode, 1370 gen_rtx_CONST_VECTOR (V16QImode, 1371 gen_rtvec_v (16, perm))); 1372 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, 1373 subreg2, pcv)); 1374 DONE; 1375 } 1376}) 1377 1378(define_insn "*vsx_ld_elemrev_v8hi_internal" 1379 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1380 (vec_select:V8HI 1381 (match_operand:V8HI 1 "memory_operand" "Z") 1382 (parallel [(const_int 7) (const_int 6) 1383 (const_int 5) (const_int 4) 1384 (const_int 3) (const_int 2) 1385 (const_int 1) (const_int 0)])))] 1386 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1387 "lxvh8x %x0,%y1" 1388 [(set_attr "type" "vecload")]) 1389 1390(define_expand "vsx_ld_elemrev_v16qi" 1391 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1392 (vec_select:V16QI 1393 (match_operand:V16QI 1 "memory_operand" "Z") 1394 (parallel [(const_int 15) (const_int 14) 1395 (const_int 13) (const_int 12) 1396 (const_int 11) (const_int 10) 1397 (const_int 9) (const_int 8) 1398 (const_int 7) (const_int 6) 1399 (const_int 5) (const_int 4) 1400 (const_int 3) (const_int 2) 1401 (const_int 1) (const_int 0)])))] 1402 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1403{ 1404 if (!TARGET_P9_VECTOR) 1405 { 1406 rtx tmp = gen_reg_rtx (V4SImode); 1407 rtx subreg, subreg2, perm[16], pcv; 1408 /* 3 is leftmost element in register */ 1409 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1410 int i; 1411 1412 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); 1413 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1414 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); 1415 1416 for (i = 0; i < 16; ++i) 1417 perm[i] = GEN_INT (reorder[i]); 1418 1419 pcv = force_reg (V16QImode, 1420 gen_rtx_CONST_VECTOR (V16QImode, 1421 gen_rtvec_v (16, perm))); 1422 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, 1423 subreg2, pcv)); 1424 DONE; 1425 } 1426}) 1427 1428(define_insn "vsx_ld_elemrev_v16qi_internal" 1429 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1430 (vec_select:V16QI 1431 (match_operand:V16QI 1 "memory_operand" "Z") 1432 (parallel [(const_int 15) (const_int 14) 1433 (const_int 13) (const_int 12) 1434 (const_int 11) (const_int 10) 1435 (const_int 9) (const_int 8) 1436 (const_int 7) (const_int 6) 1437 (const_int 5) (const_int 4) 1438 (const_int 3) (const_int 2) 1439 (const_int 1) (const_int 0)])))] 1440 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1441 "lxvb16x %x0,%y1" 1442 [(set_attr "type" "vecload")]) 1443 1444(define_insn "vsx_st_elemrev_v1ti" 1445 [(set (match_operand:V1TI 0 "memory_operand" "=Z") 1446 (vec_select:V1TI 1447 (match_operand:V1TI 1 "vsx_register_operand" "+wa") 1448 (parallel [(const_int 0)]))) 1449 (clobber (match_dup 1))] 1450 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1451{ 1452 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0"; 1453} 1454 [(set_attr "type" "vecstore")]) 1455 1456(define_insn "vsx_st_elemrev_v2df" 1457 [(set (match_operand:V2DF 0 "memory_operand" "=Z") 1458 (vec_select:V2DF 1459 (match_operand:V2DF 1 "vsx_register_operand" "wa") 1460 (parallel [(const_int 1) (const_int 0)])))] 1461 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1462 "stxvd2x %x1,%y0" 1463 [(set_attr "type" "vecstore")]) 1464 1465(define_insn "vsx_st_elemrev_v2di" 1466 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1467 (vec_select:V2DI 1468 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1469 (parallel [(const_int 1) (const_int 0)])))] 1470 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1471 "stxvd2x %x1,%y0" 1472 [(set_attr "type" "vecstore")]) 1473 1474(define_insn "vsx_st_elemrev_v4sf" 1475 [(set (match_operand:V4SF 0 "memory_operand" "=Z") 1476 (vec_select:V4SF 1477 (match_operand:V4SF 1 "vsx_register_operand" "wa") 1478 (parallel [(const_int 3) (const_int 2) 1479 (const_int 1) (const_int 0)])))] 1480 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1481 "stxvw4x %x1,%y0" 1482 [(set_attr "type" "vecstore")]) 1483 1484(define_insn "vsx_st_elemrev_v4si" 1485 [(set (match_operand:V4SI 0 "memory_operand" "=Z") 1486 (vec_select:V4SI 1487 (match_operand:V4SI 1 "vsx_register_operand" "wa") 1488 (parallel [(const_int 3) (const_int 2) 1489 (const_int 1) (const_int 0)])))] 1490 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1491 "stxvw4x %x1,%y0" 1492 [(set_attr "type" "vecstore")]) 1493 1494(define_expand "vsx_st_elemrev_v8hi" 1495 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1496 (vec_select:V8HI 1497 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1498 (parallel [(const_int 7) (const_int 6) 1499 (const_int 5) (const_int 4) 1500 (const_int 3) (const_int 2) 1501 (const_int 1) (const_int 0)])))] 1502 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1503{ 1504 if (!TARGET_P9_VECTOR) 1505 { 1506 rtx mem_subreg, subreg, perm[16], pcv; 1507 rtx tmp = gen_reg_rtx (V8HImode); 1508 /* 2 is leftmost element in register */ 1509 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1510 int i; 1511 1512 for (i = 0; i < 16; ++i) 1513 perm[i] = GEN_INT (reorder[i]); 1514 1515 pcv = force_reg (V16QImode, 1516 gen_rtx_CONST_VECTOR (V16QImode, 1517 gen_rtvec_v (16, perm))); 1518 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], 1519 operands[1], pcv)); 1520 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); 1521 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); 1522 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1523 DONE; 1524 } 1525}) 1526 1527(define_insn "*vsx_st_elemrev_v2di_internal" 1528 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1529 (vec_select:V2DI 1530 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1531 (parallel [(const_int 1) (const_int 0)])))] 1532 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1533 "stxvd2x %x1,%y0" 1534 [(set_attr "type" "vecstore")]) 1535 1536(define_insn "*vsx_st_elemrev_v8hi_internal" 1537 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1538 (vec_select:V8HI 1539 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1540 (parallel [(const_int 7) (const_int 6) 1541 (const_int 5) (const_int 4) 1542 (const_int 3) (const_int 2) 1543 (const_int 1) (const_int 0)])))] 1544 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1545 "stxvh8x %x1,%y0" 1546 [(set_attr "type" "vecstore")]) 1547 1548(define_expand "vsx_st_elemrev_v16qi" 1549 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1550 (vec_select:V16QI 1551 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1552 (parallel [(const_int 15) (const_int 14) 1553 (const_int 13) (const_int 12) 1554 (const_int 11) (const_int 10) 1555 (const_int 9) (const_int 8) 1556 (const_int 7) (const_int 6) 1557 (const_int 5) (const_int 4) 1558 (const_int 3) (const_int 2) 1559 (const_int 1) (const_int 0)])))] 1560 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1561{ 1562 if (!TARGET_P9_VECTOR) 1563 { 1564 rtx mem_subreg, subreg, perm[16], pcv; 1565 rtx tmp = gen_reg_rtx (V16QImode); 1566 /* 3 is leftmost element in register */ 1567 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1568 int i; 1569 1570 for (i = 0; i < 16; ++i) 1571 perm[i] = GEN_INT (reorder[i]); 1572 1573 pcv = force_reg (V16QImode, 1574 gen_rtx_CONST_VECTOR (V16QImode, 1575 gen_rtvec_v (16, perm))); 1576 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], 1577 operands[1], pcv)); 1578 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); 1579 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0); 1580 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1581 DONE; 1582 } 1583}) 1584 1585(define_insn "*vsx_st_elemrev_v16qi_internal" 1586 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1587 (vec_select:V16QI 1588 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1589 (parallel [(const_int 15) (const_int 14) 1590 (const_int 13) (const_int 12) 1591 (const_int 11) (const_int 10) 1592 (const_int 9) (const_int 8) 1593 (const_int 7) (const_int 6) 1594 (const_int 5) (const_int 4) 1595 (const_int 3) (const_int 2) 1596 (const_int 1) (const_int 0)])))] 1597 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1598 "stxvb16x %x1,%y0" 1599 [(set_attr "type" "vecstore")]) 1600 1601 1602;; VSX vector floating point arithmetic instructions. The VSX scalar 1603;; instructions are now combined with the insn for the traditional floating 1604;; point unit. 1605(define_insn "*vsx_add<mode>3" 1606 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1607 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1608 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1609 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1610 "xvadd<VSs> %x0,%x1,%x2" 1611 [(set_attr "type" "<VStype_simple>")]) 1612 1613(define_insn "*vsx_sub<mode>3" 1614 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1615 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1616 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1617 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1618 "xvsub<VSs> %x0,%x1,%x2" 1619 [(set_attr "type" "<VStype_simple>")]) 1620 1621(define_insn "*vsx_mul<mode>3" 1622 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1623 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1624 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1625 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1626 "xvmul<VSs> %x0,%x1,%x2" 1627 [(set_attr "type" "<VStype_simple>")]) 1628 1629; Emulate vector with scalar for vec_mul in V2DImode 1630(define_insn_and_split "vsx_mul_v2di" 1631 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1632 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1633 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1634 UNSPEC_VSX_MULSD))] 1635 "VECTOR_MEM_VSX_P (V2DImode)" 1636 "#" 1637 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1638 [(const_int 0)] 1639{ 1640 rtx op0 = operands[0]; 1641 rtx op1 = operands[1]; 1642 rtx op2 = operands[2]; 1643 rtx op3 = gen_reg_rtx (DImode); 1644 rtx op4 = gen_reg_rtx (DImode); 1645 rtx op5 = gen_reg_rtx (DImode); 1646 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1647 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1648 if (TARGET_POWERPC64) 1649 emit_insn (gen_muldi3 (op5, op3, op4)); 1650 else 1651 { 1652 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1653 emit_move_insn (op5, ret); 1654 } 1655 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1656 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1657 if (TARGET_POWERPC64) 1658 emit_insn (gen_muldi3 (op3, op3, op4)); 1659 else 1660 { 1661 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1662 emit_move_insn (op3, ret); 1663 } 1664 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1665 DONE; 1666} 1667 [(set_attr "type" "mul")]) 1668 1669(define_insn "*vsx_div<mode>3" 1670 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1671 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1672 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1673 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1674 "xvdiv<VSs> %x0,%x1,%x2" 1675 [(set_attr "type" "<VStype_div>")]) 1676 1677; Emulate vector with scalar for vec_div in V2DImode 1678(define_insn_and_split "vsx_div_v2di" 1679 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1680 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1681 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1682 UNSPEC_VSX_DIVSD))] 1683 "VECTOR_MEM_VSX_P (V2DImode)" 1684 "#" 1685 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1686 [(const_int 0)] 1687{ 1688 rtx op0 = operands[0]; 1689 rtx op1 = operands[1]; 1690 rtx op2 = operands[2]; 1691 rtx op3 = gen_reg_rtx (DImode); 1692 rtx op4 = gen_reg_rtx (DImode); 1693 rtx op5 = gen_reg_rtx (DImode); 1694 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1695 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1696 if (TARGET_POWERPC64) 1697 emit_insn (gen_divdi3 (op5, op3, op4)); 1698 else 1699 { 1700 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1701 rtx target = emit_library_call_value (libfunc, 1702 op5, LCT_NORMAL, DImode, 1703 op3, DImode, 1704 op4, DImode); 1705 emit_move_insn (op5, target); 1706 } 1707 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1708 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1709 if (TARGET_POWERPC64) 1710 emit_insn (gen_divdi3 (op3, op3, op4)); 1711 else 1712 { 1713 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1714 rtx target = emit_library_call_value (libfunc, 1715 op3, LCT_NORMAL, DImode, 1716 op3, DImode, 1717 op4, DImode); 1718 emit_move_insn (op3, target); 1719 } 1720 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1721 DONE; 1722} 1723 [(set_attr "type" "div")]) 1724 1725(define_insn_and_split "vsx_udiv_v2di" 1726 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1727 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1728 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1729 UNSPEC_VSX_DIVUD))] 1730 "VECTOR_MEM_VSX_P (V2DImode)" 1731 "#" 1732 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1733 [(const_int 0)] 1734{ 1735 rtx op0 = operands[0]; 1736 rtx op1 = operands[1]; 1737 rtx op2 = operands[2]; 1738 rtx op3 = gen_reg_rtx (DImode); 1739 rtx op4 = gen_reg_rtx (DImode); 1740 rtx op5 = gen_reg_rtx (DImode); 1741 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1742 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1743 if (TARGET_POWERPC64) 1744 emit_insn (gen_udivdi3 (op5, op3, op4)); 1745 else 1746 { 1747 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1748 rtx target = emit_library_call_value (libfunc, 1749 op5, LCT_NORMAL, DImode, 1750 op3, DImode, 1751 op4, DImode); 1752 emit_move_insn (op5, target); 1753 } 1754 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1755 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1756 if (TARGET_POWERPC64) 1757 emit_insn (gen_udivdi3 (op3, op3, op4)); 1758 else 1759 { 1760 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1761 rtx target = emit_library_call_value (libfunc, 1762 op3, LCT_NORMAL, DImode, 1763 op3, DImode, 1764 op4, DImode); 1765 emit_move_insn (op3, target); 1766 } 1767 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1768 DONE; 1769} 1770 [(set_attr "type" "div")]) 1771 1772;; *tdiv* instruction returning the FG flag 1773(define_expand "vsx_tdiv<mode>3_fg" 1774 [(set (match_dup 3) 1775 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1776 (match_operand:VSX_B 2 "vsx_register_operand")] 1777 UNSPEC_VSX_TDIV)) 1778 (set (match_operand:SI 0 "gpc_reg_operand") 1779 (gt:SI (match_dup 3) 1780 (const_int 0)))] 1781 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1782{ 1783 operands[3] = gen_reg_rtx (CCFPmode); 1784}) 1785 1786;; *tdiv* instruction returning the FE flag 1787(define_expand "vsx_tdiv<mode>3_fe" 1788 [(set (match_dup 3) 1789 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1790 (match_operand:VSX_B 2 "vsx_register_operand")] 1791 UNSPEC_VSX_TDIV)) 1792 (set (match_operand:SI 0 "gpc_reg_operand") 1793 (eq:SI (match_dup 3) 1794 (const_int 0)))] 1795 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1796{ 1797 operands[3] = gen_reg_rtx (CCFPmode); 1798}) 1799 1800(define_insn "*vsx_tdiv<mode>3_internal" 1801 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") 1802 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>") 1803 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,<VSa>")] 1804 UNSPEC_VSX_TDIV))] 1805 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1806 "x<VSv>tdiv<VSs> %0,%x1,%x2" 1807 [(set_attr "type" "<VStype_simple>")]) 1808 1809(define_insn "vsx_fre<mode>2" 1810 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1811 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 1812 UNSPEC_FRES))] 1813 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1814 "xvre<VSs> %x0,%x1" 1815 [(set_attr "type" "<VStype_simple>")]) 1816 1817(define_insn "*vsx_neg<mode>2" 1818 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1819 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 1820 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1821 "xvneg<VSs> %x0,%x1" 1822 [(set_attr "type" "<VStype_simple>")]) 1823 1824(define_insn "*vsx_abs<mode>2" 1825 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1826 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 1827 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1828 "xvabs<VSs> %x0,%x1" 1829 [(set_attr "type" "<VStype_simple>")]) 1830 1831(define_insn "vsx_nabs<mode>2" 1832 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1833 (neg:VSX_F 1834 (abs:VSX_F 1835 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>"))))] 1836 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1837 "xvnabs<VSs> %x0,%x1" 1838 [(set_attr "type" "<VStype_simple>")]) 1839 1840(define_insn "vsx_smax<mode>3" 1841 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1842 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1843 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1844 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1845 "xvmax<VSs> %x0,%x1,%x2" 1846 [(set_attr "type" "<VStype_simple>")]) 1847 1848(define_insn "*vsx_smin<mode>3" 1849 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1850 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 1851 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 1852 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1853 "xvmin<VSs> %x0,%x1,%x2" 1854 [(set_attr "type" "<VStype_simple>")]) 1855 1856(define_insn "*vsx_sqrt<mode>2" 1857 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1858 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 1859 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1860 "xvsqrt<VSs> %x0,%x1" 1861 [(set_attr "type" "<VStype_sqrt>")]) 1862 1863(define_insn "*vsx_rsqrte<mode>2" 1864 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 1865 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 1866 UNSPEC_RSQRT))] 1867 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1868 "xvrsqrte<VSs> %x0,%x1" 1869 [(set_attr "type" "<VStype_simple>")]) 1870 1871;; *tsqrt* returning the fg flag 1872(define_expand "vsx_tsqrt<mode>2_fg" 1873 [(set (match_dup 2) 1874 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1875 UNSPEC_VSX_TSQRT)) 1876 (set (match_operand:SI 0 "gpc_reg_operand") 1877 (gt:SI (match_dup 2) 1878 (const_int 0)))] 1879 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1880{ 1881 operands[2] = gen_reg_rtx (CCFPmode); 1882}) 1883 1884;; *tsqrt* returning the fe flag 1885(define_expand "vsx_tsqrt<mode>2_fe" 1886 [(set (match_dup 2) 1887 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1888 UNSPEC_VSX_TSQRT)) 1889 (set (match_operand:SI 0 "gpc_reg_operand") 1890 (eq:SI (match_dup 2) 1891 (const_int 0)))] 1892 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1893{ 1894 operands[2] = gen_reg_rtx (CCFPmode); 1895}) 1896 1897(define_insn "*vsx_tsqrt<mode>2_internal" 1898 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x,x") 1899 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 1900 UNSPEC_VSX_TSQRT))] 1901 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1902 "x<VSv>tsqrt<VSs> %0,%x1" 1903 [(set_attr "type" "<VStype_simple>")]) 1904 1905;; Fused vector multiply/add instructions. Support the classical Altivec 1906;; versions of fma, which allows the target to be a separate register from the 1907;; 3 inputs. Under VSX, the target must be either the addend or the first 1908;; multiply. 1909 1910(define_insn "*vsx_fmav4sf4" 1911 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") 1912 (fma:V4SF 1913 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") 1914 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") 1915 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))] 1916 "VECTOR_UNIT_VSX_P (V4SFmode)" 1917 "@ 1918 xvmaddasp %x0,%x1,%x2 1919 xvmaddmsp %x0,%x1,%x3 1920 xvmaddasp %x0,%x1,%x2 1921 xvmaddmsp %x0,%x1,%x3 1922 vmaddfp %0,%1,%2,%3" 1923 [(set_attr "type" "vecfloat")]) 1924 1925(define_insn "*vsx_fmav2df4" 1926 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") 1927 (fma:V2DF 1928 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") 1929 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") 1930 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))] 1931 "VECTOR_UNIT_VSX_P (V2DFmode)" 1932 "@ 1933 xvmaddadp %x0,%x1,%x2 1934 xvmaddmdp %x0,%x1,%x3 1935 xvmaddadp %x0,%x1,%x2 1936 xvmaddmdp %x0,%x1,%x3" 1937 [(set_attr "type" "vecdouble")]) 1938 1939(define_insn "*vsx_fms<mode>4" 1940 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>") 1941 (fma:VSX_F 1942 (match_operand:VSX_F 1 "vsx_register_operand" "%<VSr>,<VSr>,<VSa>,<VSa>") 1943 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0") 1944 (neg:VSX_F 1945 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))] 1946 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1947 "@ 1948 xvmsuba<VSs> %x0,%x1,%x2 1949 xvmsubm<VSs> %x0,%x1,%x3 1950 xvmsuba<VSs> %x0,%x1,%x2 1951 xvmsubm<VSs> %x0,%x1,%x3" 1952 [(set_attr "type" "<VStype_mul>")]) 1953 1954(define_insn "*vsx_nfma<mode>4" 1955 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,<VSr>,?<VSa>,?<VSa>") 1956 (neg:VSX_F 1957 (fma:VSX_F 1958 (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSr>,<VSa>,<VSa>") 1959 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,0,<VSa>,0") 1960 (match_operand:VSX_F 3 "vsx_register_operand" "0,<VSr>,0,<VSa>"))))] 1961 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1962 "@ 1963 xvnmadda<VSs> %x0,%x1,%x2 1964 xvnmaddm<VSs> %x0,%x1,%x3 1965 xvnmadda<VSs> %x0,%x1,%x2 1966 xvnmaddm<VSs> %x0,%x1,%x3" 1967 [(set_attr "type" "<VStype_mul>")]) 1968 1969(define_insn "*vsx_nfmsv4sf4" 1970 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wf,wf,?wa,?wa,v") 1971 (neg:V4SF 1972 (fma:V4SF 1973 (match_operand:V4SF 1 "vsx_register_operand" "%wf,wf,wa,wa,v") 1974 (match_operand:V4SF 2 "vsx_register_operand" "wf,0,wa,0,v") 1975 (neg:V4SF 1976 (match_operand:V4SF 3 "vsx_register_operand" "0,wf,0,wa,v")))))] 1977 "VECTOR_UNIT_VSX_P (V4SFmode)" 1978 "@ 1979 xvnmsubasp %x0,%x1,%x2 1980 xvnmsubmsp %x0,%x1,%x3 1981 xvnmsubasp %x0,%x1,%x2 1982 xvnmsubmsp %x0,%x1,%x3 1983 vnmsubfp %0,%1,%2,%3" 1984 [(set_attr "type" "vecfloat")]) 1985 1986(define_insn "*vsx_nfmsv2df4" 1987 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,wd,?wa,?wa") 1988 (neg:V2DF 1989 (fma:V2DF 1990 (match_operand:V2DF 1 "vsx_register_operand" "%wd,wd,wa,wa") 1991 (match_operand:V2DF 2 "vsx_register_operand" "wd,0,wa,0") 1992 (neg:V2DF 1993 (match_operand:V2DF 3 "vsx_register_operand" "0,wd,0,wa")))))] 1994 "VECTOR_UNIT_VSX_P (V2DFmode)" 1995 "@ 1996 xvnmsubadp %x0,%x1,%x2 1997 xvnmsubmdp %x0,%x1,%x3 1998 xvnmsubadp %x0,%x1,%x2 1999 xvnmsubmdp %x0,%x1,%x3" 2000 [(set_attr "type" "vecdouble")]) 2001 2002;; Vector conditional expressions (no scalar version for these instructions) 2003(define_insn "vsx_eq<mode>" 2004 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2005 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2006 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 2007 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2008 "xvcmpeq<VSs> %x0,%x1,%x2" 2009 [(set_attr "type" "<VStype_simple>")]) 2010 2011(define_insn "vsx_gt<mode>" 2012 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2013 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2014 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 2015 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2016 "xvcmpgt<VSs> %x0,%x1,%x2" 2017 [(set_attr "type" "<VStype_simple>")]) 2018 2019(define_insn "*vsx_ge<mode>" 2020 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2021 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2022 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")))] 2023 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2024 "xvcmpge<VSs> %x0,%x1,%x2" 2025 [(set_attr "type" "<VStype_simple>")]) 2026 2027;; Compare vectors producing a vector result and a predicate, setting CR6 to 2028;; indicate a combined status 2029(define_insn "*vsx_eq_<mode>_p" 2030 [(set (reg:CC CR6_REGNO) 2031 (unspec:CC 2032 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") 2033 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] 2034 UNSPEC_PREDICATE)) 2035 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2036 (eq:VSX_F (match_dup 1) 2037 (match_dup 2)))] 2038 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2039 "xvcmpeq<VSs>. %x0,%x1,%x2" 2040 [(set_attr "type" "<VStype_simple>")]) 2041 2042(define_insn "*vsx_gt_<mode>_p" 2043 [(set (reg:CC CR6_REGNO) 2044 (unspec:CC 2045 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") 2046 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] 2047 UNSPEC_PREDICATE)) 2048 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2049 (gt:VSX_F (match_dup 1) 2050 (match_dup 2)))] 2051 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2052 "xvcmpgt<VSs>. %x0,%x1,%x2" 2053 [(set_attr "type" "<VStype_simple>")]) 2054 2055(define_insn "*vsx_ge_<mode>_p" 2056 [(set (reg:CC CR6_REGNO) 2057 (unspec:CC 2058 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,?<VSa>") 2059 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,?<VSa>"))] 2060 UNSPEC_PREDICATE)) 2061 (set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2062 (ge:VSX_F (match_dup 1) 2063 (match_dup 2)))] 2064 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2065 "xvcmpge<VSs>. %x0,%x1,%x2" 2066 [(set_attr "type" "<VStype_simple>")]) 2067 2068;; Vector select 2069(define_insn "*vsx_xxsel<mode>" 2070 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2071 (if_then_else:VSX_L 2072 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>") 2073 (match_operand:VSX_L 4 "zero_constant" "")) 2074 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>") 2075 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))] 2076 "VECTOR_MEM_VSX_P (<MODE>mode)" 2077 "xxsel %x0,%x3,%x2,%x1" 2078 [(set_attr "type" "vecmove")]) 2079 2080(define_insn "*vsx_xxsel<mode>_uns" 2081 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2082 (if_then_else:VSX_L 2083 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,<VSa>") 2084 (match_operand:VSX_L 4 "zero_constant" "")) 2085 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,<VSa>") 2086 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,<VSa>")))] 2087 "VECTOR_MEM_VSX_P (<MODE>mode)" 2088 "xxsel %x0,%x3,%x2,%x1" 2089 [(set_attr "type" "vecmove")]) 2090 2091;; Copy sign 2092(define_insn "vsx_copysign<mode>3" 2093 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2094 (unspec:VSX_F 2095 [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>") 2096 (match_operand:VSX_F 2 "vsx_register_operand" "<VSr>,<VSa>")] 2097 UNSPEC_COPYSIGN))] 2098 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2099 "xvcpsgn<VSs> %x0,%x2,%x1" 2100 [(set_attr "type" "<VStype_simple>")]) 2101 2102;; For the conversions, limit the register class for the integer value to be 2103;; the fprs because we don't want to add the altivec registers to movdi/movsi. 2104;; For the unsigned tests, there isn't a generic double -> unsigned conversion 2105;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. 2106;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md 2107;; in allowing virtual registers. 2108(define_insn "vsx_float<VSi><mode>2" 2109 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") 2110 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] 2111 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2112 "xvcvsx<VSc><VSs> %x0,%x1" 2113 [(set_attr "type" "<VStype_simple>")]) 2114 2115(define_insn "vsx_floatuns<VSi><mode>2" 2116 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=<VSr>,?<VSa>") 2117 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "<VSr2>,<VSr3>")))] 2118 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2119 "xvcvux<VSc><VSs> %x0,%x1" 2120 [(set_attr "type" "<VStype_simple>")]) 2121 2122(define_insn "vsx_fix_trunc<mode><VSi>2" 2123 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") 2124 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] 2125 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2126 "x<VSv>cv<VSs>sx<VSc>s %x0,%x1" 2127 [(set_attr "type" "<VStype_simple>")]) 2128 2129(define_insn "vsx_fixuns_trunc<mode><VSi>2" 2130 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=<VSr2>,?<VSr3>") 2131 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "<VSr>,<VSa>")))] 2132 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2133 "x<VSv>cv<VSs>ux<VSc>s %x0,%x1" 2134 [(set_attr "type" "<VStype_simple>")]) 2135 2136;; Math rounding functions 2137(define_insn "vsx_x<VSv>r<VSs>i" 2138 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2139 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 2140 UNSPEC_VSX_ROUND_I))] 2141 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2142 "x<VSv>r<VSs>i %x0,%x1" 2143 [(set_attr "type" "<VStype_simple>")]) 2144 2145(define_insn "vsx_x<VSv>r<VSs>ic" 2146 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2147 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 2148 UNSPEC_VSX_ROUND_IC))] 2149 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2150 "x<VSv>r<VSs>ic %x0,%x1" 2151 [(set_attr "type" "<VStype_simple>")]) 2152 2153(define_insn "vsx_btrunc<mode>2" 2154 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2155 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")))] 2156 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2157 "xvr<VSs>iz %x0,%x1" 2158 [(set_attr "type" "<VStype_simple>")]) 2159 2160(define_insn "*vsx_b2trunc<mode>2" 2161 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2162 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSa>")] 2163 UNSPEC_FRIZ))] 2164 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2165 "x<VSv>r<VSs>iz %x0,%x1" 2166 [(set_attr "type" "<VStype_simple>")]) 2167 2168(define_insn "vsx_floor<mode>2" 2169 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2170 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 2171 UNSPEC_FRIM))] 2172 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2173 "xvr<VSs>im %x0,%x1" 2174 [(set_attr "type" "<VStype_simple>")]) 2175 2176(define_insn "vsx_ceil<mode>2" 2177 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?<VSa>") 2178 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "<VSr>,<VSa>")] 2179 UNSPEC_FRIP))] 2180 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2181 "xvr<VSs>ip %x0,%x1" 2182 [(set_attr "type" "<VStype_simple>")]) 2183 2184 2185;; VSX convert to/from double vector 2186 2187;; Convert between single and double precision 2188;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal 2189;; scalar single precision instructions internally use the double format. 2190;; Prefer the altivec registers, since we likely will need to do a vperm 2191(define_insn "vsx_<VS_spdp_insn>" 2192 [(set (match_operand:<VS_spdp_res> 0 "vsx_register_operand" "=<VSr4>,?<VSa>") 2193 (unspec:<VS_spdp_res> [(match_operand:VSX_SPDP 1 "vsx_register_operand" "<VSr5>,<VSa>")] 2194 UNSPEC_VSX_CVSPDP))] 2195 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2196 "<VS_spdp_insn> %x0,%x1" 2197 [(set_attr "type" "<VS_spdp_type>")]) 2198 2199;; xscvspdp, represent the scalar SF type as V4SF 2200(define_insn "vsx_xscvspdp" 2201 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2202 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2203 UNSPEC_VSX_CVSPDP))] 2204 "VECTOR_UNIT_VSX_P (V4SFmode)" 2205 "xscvspdp %x0,%x1" 2206 [(set_attr "type" "fp")]) 2207 2208;; Same as vsx_xscvspdp, but use SF as the type 2209(define_insn "vsx_xscvspdp_scalar2" 2210 [(set (match_operand:SF 0 "vsx_register_operand" "=ww") 2211 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2212 UNSPEC_VSX_CVSPDP))] 2213 "VECTOR_UNIT_VSX_P (V4SFmode)" 2214 "xscvspdp %x0,%x1" 2215 [(set_attr "type" "fp")]) 2216 2217;; Generate xvcvhpsp instruction 2218(define_insn "vsx_xvcvhpsp" 2219 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2220 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] 2221 UNSPEC_VSX_CVHPSP))] 2222 "TARGET_P9_VECTOR" 2223 "xvcvhpsp %x0,%x1" 2224 [(set_attr "type" "vecfloat")]) 2225 2226;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF 2227;; format of scalars is actually DF. 2228(define_insn "vsx_xscvdpsp_scalar" 2229 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2230 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")] 2231 UNSPEC_VSX_CVSPDP))] 2232 "VECTOR_UNIT_VSX_P (V4SFmode)" 2233 "xscvdpsp %x0,%x1" 2234 [(set_attr "type" "fp")]) 2235 2236;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs 2237(define_insn "vsx_xscvdpspn" 2238 [(set (match_operand:V4SF 0 "vsx_register_operand" "=ww") 2239 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "ws")] 2240 UNSPEC_VSX_CVDPSPN))] 2241 "TARGET_XSCVDPSPN" 2242 "xscvdpspn %x0,%x1" 2243 [(set_attr "type" "fp")]) 2244 2245(define_insn "vsx_xscvspdpn" 2246 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2247 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2248 UNSPEC_VSX_CVSPDPN))] 2249 "TARGET_XSCVSPDPN" 2250 "xscvspdpn %x0,%x1" 2251 [(set_attr "type" "fp")]) 2252 2253(define_insn "vsx_xscvdpspn_scalar" 2254 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2255 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "ww")] 2256 UNSPEC_VSX_CVDPSPN))] 2257 "TARGET_XSCVDPSPN" 2258 "xscvdpspn %x0,%x1" 2259 [(set_attr "type" "fp")]) 2260 2261;; Used by direct move to move a SFmode value from GPR to VSX register 2262(define_insn "vsx_xscvspdpn_directmove" 2263 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 2264 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2265 UNSPEC_VSX_CVSPDPN))] 2266 "TARGET_XSCVSPDPN" 2267 "xscvspdpn %x0,%x1" 2268 [(set_attr "type" "fp")]) 2269 2270;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) 2271 2272(define_expand "vsx_xvcvsxddp_scale" 2273 [(match_operand:V2DF 0 "vsx_register_operand") 2274 (match_operand:V2DI 1 "vsx_register_operand") 2275 (match_operand:QI 2 "immediate_operand")] 2276 "VECTOR_UNIT_VSX_P (V2DFmode)" 2277{ 2278 rtx op0 = operands[0]; 2279 rtx op1 = operands[1]; 2280 int scale = INTVAL(operands[2]); 2281 emit_insn (gen_vsx_xvcvsxddp (op0, op1)); 2282 if (scale != 0) 2283 rs6000_scale_v2df (op0, op0, -scale); 2284 DONE; 2285}) 2286 2287(define_insn "vsx_xvcvsxddp" 2288 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2289 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2290 UNSPEC_VSX_XVCVSXDDP))] 2291 "VECTOR_UNIT_VSX_P (V2DFmode)" 2292 "xvcvsxddp %x0,%x1" 2293 [(set_attr "type" "vecdouble")]) 2294 2295(define_expand "vsx_xvcvuxddp_scale" 2296 [(match_operand:V2DF 0 "vsx_register_operand") 2297 (match_operand:V2DI 1 "vsx_register_operand") 2298 (match_operand:QI 2 "immediate_operand")] 2299 "VECTOR_UNIT_VSX_P (V2DFmode)" 2300{ 2301 rtx op0 = operands[0]; 2302 rtx op1 = operands[1]; 2303 int scale = INTVAL(operands[2]); 2304 emit_insn (gen_vsx_xvcvuxddp (op0, op1)); 2305 if (scale != 0) 2306 rs6000_scale_v2df (op0, op0, -scale); 2307 DONE; 2308}) 2309 2310(define_insn "vsx_xvcvuxddp" 2311 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2312 (unspec:V2DF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2313 UNSPEC_VSX_XVCVUXDDP))] 2314 "VECTOR_UNIT_VSX_P (V2DFmode)" 2315 "xvcvuxddp %x0,%x1" 2316 [(set_attr "type" "vecdouble")]) 2317 2318(define_expand "vsx_xvcvdpsxds_scale" 2319 [(match_operand:V2DI 0 "vsx_register_operand") 2320 (match_operand:V2DF 1 "vsx_register_operand") 2321 (match_operand:QI 2 "immediate_operand")] 2322 "VECTOR_UNIT_VSX_P (V2DFmode)" 2323{ 2324 rtx op0 = operands[0]; 2325 rtx op1 = operands[1]; 2326 rtx tmp; 2327 int scale = INTVAL (operands[2]); 2328 if (scale == 0) 2329 tmp = op1; 2330 else 2331 { 2332 tmp = gen_reg_rtx (V2DFmode); 2333 rs6000_scale_v2df (tmp, op1, scale); 2334 } 2335 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); 2336 DONE; 2337}) 2338 2339;; convert vector of 64-bit floating point numbers to vector of 2340;; 64-bit signed integer 2341(define_insn "vsx_xvcvdpsxds" 2342 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 2343 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] 2344 UNSPEC_VSX_XVCVDPSXDS))] 2345 "VECTOR_UNIT_VSX_P (V2DFmode)" 2346 "xvcvdpsxds %x0,%x1" 2347 [(set_attr "type" "vecdouble")]) 2348 2349;; convert vector of 32-bit floating point numbers to vector of 2350;; 32-bit signed integer 2351(define_insn "vsx_xvcvspsxws" 2352 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 2353 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2354 UNSPEC_VSX_XVCVSPSXWS))] 2355 "VECTOR_UNIT_VSX_P (V4SFmode)" 2356 "xvcvspsxws %x0,%x1" 2357 [(set_attr "type" "vecfloat")]) 2358 2359;; convert vector of 64-bit floating point numbers to vector of 2360;; 64-bit unsigned integer 2361(define_expand "vsx_xvcvdpuxds_scale" 2362 [(match_operand:V2DI 0 "vsx_register_operand") 2363 (match_operand:V2DF 1 "vsx_register_operand") 2364 (match_operand:QI 2 "immediate_operand")] 2365 "VECTOR_UNIT_VSX_P (V2DFmode)" 2366{ 2367 rtx op0 = operands[0]; 2368 rtx op1 = operands[1]; 2369 rtx tmp; 2370 int scale = INTVAL (operands[2]); 2371 if (scale == 0) 2372 tmp = op1; 2373 else 2374 { 2375 tmp = gen_reg_rtx (V2DFmode); 2376 rs6000_scale_v2df (tmp, op1, scale); 2377 } 2378 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); 2379 DONE; 2380}) 2381 2382;; convert vector of 32-bit floating point numbers to vector of 2383;; 32-bit unsigned integer 2384(define_insn "vsx_xvcvspuxws" 2385 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 2386 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2387 UNSPEC_VSX_XVCVSPSXWS))] 2388 "VECTOR_UNIT_VSX_P (V4SFmode)" 2389 "xvcvspuxws %x0,%x1" 2390 [(set_attr "type" "vecfloat")]) 2391 2392(define_insn "vsx_xvcvdpuxds" 2393 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 2394 (unspec:V2DI [(match_operand:V2DF 1 "vsx_register_operand" "wa")] 2395 UNSPEC_VSX_XVCVDPUXDS))] 2396 "VECTOR_UNIT_VSX_P (V2DFmode)" 2397 "xvcvdpuxds %x0,%x1" 2398 [(set_attr "type" "vecdouble")]) 2399 2400;; Convert from 64-bit to 32-bit types 2401;; Note, favor the Altivec registers since the usual use of these instructions 2402;; is in vector converts and we need to use the Altivec vperm instruction. 2403 2404(define_insn "vsx_xvcvdpsxws" 2405 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2406 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] 2407 UNSPEC_VSX_CVDPSXWS))] 2408 "VECTOR_UNIT_VSX_P (V2DFmode)" 2409 "xvcvdpsxws %x0,%x1" 2410 [(set_attr "type" "vecdouble")]) 2411 2412(define_insn "vsx_xvcvdpuxws" 2413 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2414 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wd,wa")] 2415 UNSPEC_VSX_CVDPUXWS))] 2416 "VECTOR_UNIT_VSX_P (V2DFmode)" 2417 "xvcvdpuxws %x0,%x1" 2418 [(set_attr "type" "vecdouble")]) 2419 2420(define_insn "vsx_xvcvsxdsp" 2421 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") 2422 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")] 2423 UNSPEC_VSX_CVSXDSP))] 2424 "VECTOR_UNIT_VSX_P (V2DFmode)" 2425 "xvcvsxdsp %x0,%x1" 2426 [(set_attr "type" "vecfloat")]) 2427 2428(define_insn "vsx_xvcvuxdsp" 2429 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") 2430 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wf,wa")] 2431 UNSPEC_VSX_CVUXDSP))] 2432 "VECTOR_UNIT_VSX_P (V2DFmode)" 2433 "xvcvuxdsp %x0,%x1" 2434 [(set_attr "type" "vecdouble")]) 2435 2436(define_insn "vsx_xvcdpsp" 2437 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wd,?wa") 2438 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "wf,wa")] 2439 UNSPEC_VSX_XVCDPSP))] 2440 "VECTOR_UNIT_VSX_P (V2DFmode)" 2441 "xvcvdpsp %x0,%x1" 2442 [(set_attr "type" "vecdouble")]) 2443 2444;; Convert from 32-bit to 64-bit types 2445;; Provide both vector and scalar targets 2446(define_insn "vsx_xvcvsxwdp" 2447 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") 2448 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] 2449 UNSPEC_VSX_CVSXWDP))] 2450 "VECTOR_UNIT_VSX_P (V2DFmode)" 2451 "xvcvsxwdp %x0,%x1" 2452 [(set_attr "type" "vecdouble")]) 2453 2454(define_insn "vsx_xvcvsxwdp_df" 2455 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2456 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2457 UNSPEC_VSX_CVSXWDP))] 2458 "TARGET_VSX" 2459 "xvcvsxwdp %x0,%x1" 2460 [(set_attr "type" "vecdouble")]) 2461 2462(define_insn "vsx_xvcvuxwdp" 2463 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") 2464 (unspec:V2DF [(match_operand:V4SI 1 "vsx_register_operand" "wf,wa")] 2465 UNSPEC_VSX_CVUXWDP))] 2466 "VECTOR_UNIT_VSX_P (V2DFmode)" 2467 "xvcvuxwdp %x0,%x1" 2468 [(set_attr "type" "vecdouble")]) 2469 2470(define_insn "vsx_xvcvuxwdp_df" 2471 [(set (match_operand:DF 0 "vsx_register_operand" "=ws") 2472 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2473 UNSPEC_VSX_CVUXWDP))] 2474 "TARGET_VSX" 2475 "xvcvuxwdp %x0,%x1" 2476 [(set_attr "type" "vecdouble")]) 2477 2478(define_insn "vsx_xvcvspsxds" 2479 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2480 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] 2481 UNSPEC_VSX_CVSPSXDS))] 2482 "VECTOR_UNIT_VSX_P (V2DFmode)" 2483 "xvcvspsxds %x0,%x1" 2484 [(set_attr "type" "vecdouble")]) 2485 2486(define_insn "vsx_xvcvspuxds" 2487 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2488 (unspec:V2DI [(match_operand:V4SF 1 "vsx_register_operand" "wd,wa")] 2489 UNSPEC_VSX_CVSPUXDS))] 2490 "VECTOR_UNIT_VSX_P (V2DFmode)" 2491 "xvcvspuxds %x0,%x1" 2492 [(set_attr "type" "vecdouble")]) 2493 2494(define_insn "vsx_xvcvsxwsp" 2495 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2496 (unspec:V4SF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2497 UNSPEC_VSX_CVSXWSP))] 2498 "VECTOR_UNIT_VSX_P (V4SFmode)" 2499 "xvcvsxwsp %x0,%x1" 2500 [(set_attr "type" "vecfloat")]) 2501 2502(define_insn "vsx_xvcvuxwsp" 2503 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2504 (unspec:V4SF[(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2505 UNSPEC_VSX_CVUXWSP))] 2506 "VECTOR_UNIT_VSX_P (V4SFmode)" 2507 "xvcvuxwsp %x0,%x1" 2508 [(set_attr "type" "vecfloat")]) 2509 2510;; Generate float2 double 2511;; convert two double to float 2512(define_expand "float2_v2df" 2513 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2514 (use (match_operand:V2DF 1 "register_operand" "wa")) 2515 (use (match_operand:V2DF 2 "register_operand" "wa"))] 2516 "VECTOR_UNIT_VSX_P (V4SFmode)" 2517{ 2518 rtx rtx_src1, rtx_src2, rtx_dst; 2519 2520 rtx_dst = operands[0]; 2521 rtx_src1 = operands[1]; 2522 rtx_src2 = operands[2]; 2523 2524 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); 2525 DONE; 2526}) 2527 2528;; Generate float2 2529;; convert two long long signed ints to float 2530(define_expand "float2_v2di" 2531 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2532 (use (match_operand:V2DI 1 "register_operand" "wa")) 2533 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2534 "VECTOR_UNIT_VSX_P (V4SFmode)" 2535{ 2536 rtx rtx_src1, rtx_src2, rtx_dst; 2537 2538 rtx_dst = operands[0]; 2539 rtx_src1 = operands[1]; 2540 rtx_src2 = operands[2]; 2541 2542 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2543 DONE; 2544}) 2545 2546;; Generate uns_float2 2547;; convert two long long unsigned ints to float 2548(define_expand "uns_float2_v2di" 2549 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2550 (use (match_operand:V2DI 1 "register_operand" "wa")) 2551 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2552 "VECTOR_UNIT_VSX_P (V4SFmode)" 2553{ 2554 rtx rtx_src1, rtx_src2, rtx_dst; 2555 2556 rtx_dst = operands[0]; 2557 rtx_src1 = operands[1]; 2558 rtx_src2 = operands[2]; 2559 2560 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2561 DONE; 2562}) 2563 2564;; Generate floate 2565;; convert double or long long signed to float 2566;; (Only even words are valid, BE numbering) 2567(define_expand "floate<mode>" 2568 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2569 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2570 "VECTOR_UNIT_VSX_P (V4SFmode)" 2571{ 2572 if (BYTES_BIG_ENDIAN) 2573 { 2574 /* Shift left one word to put even word correct location */ 2575 rtx rtx_tmp; 2576 rtx rtx_val = GEN_INT (4); 2577 2578 rtx_tmp = gen_reg_rtx (V4SFmode); 2579 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2580 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2581 rtx_tmp, rtx_tmp, rtx_val)); 2582 } 2583 else 2584 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2585 2586 DONE; 2587}) 2588 2589;; Generate uns_floate 2590;; convert long long unsigned to float 2591;; (Only even words are valid, BE numbering) 2592(define_expand "unsfloatev2di" 2593 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2594 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2595 "VECTOR_UNIT_VSX_P (V4SFmode)" 2596{ 2597 if (BYTES_BIG_ENDIAN) 2598 { 2599 /* Shift left one word to put even word correct location */ 2600 rtx rtx_tmp; 2601 rtx rtx_val = GEN_INT (4); 2602 2603 rtx_tmp = gen_reg_rtx (V4SFmode); 2604 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2605 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2606 rtx_tmp, rtx_tmp, rtx_val)); 2607 } 2608 else 2609 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2610 2611 DONE; 2612}) 2613 2614;; Generate floato 2615;; convert double or long long signed to float 2616;; Only odd words are valid, BE numbering) 2617(define_expand "floato<mode>" 2618 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2619 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2620 "VECTOR_UNIT_VSX_P (V4SFmode)" 2621{ 2622 if (BYTES_BIG_ENDIAN) 2623 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2624 else 2625 { 2626 /* Shift left one word to put odd word correct location */ 2627 rtx rtx_tmp; 2628 rtx rtx_val = GEN_INT (4); 2629 2630 rtx_tmp = gen_reg_rtx (V4SFmode); 2631 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2632 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2633 rtx_tmp, rtx_tmp, rtx_val)); 2634 } 2635 DONE; 2636}) 2637 2638;; Generate uns_floato 2639;; convert long long unsigned to float 2640;; (Only odd words are valid, BE numbering) 2641(define_expand "unsfloatov2di" 2642 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2643 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2644 "VECTOR_UNIT_VSX_P (V4SFmode)" 2645{ 2646 if (BYTES_BIG_ENDIAN) 2647 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2648 else 2649 { 2650 /* Shift left one word to put odd word correct location */ 2651 rtx rtx_tmp; 2652 rtx rtx_val = GEN_INT (4); 2653 2654 rtx_tmp = gen_reg_rtx (V4SFmode); 2655 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2656 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2657 rtx_tmp, rtx_tmp, rtx_val)); 2658 } 2659 DONE; 2660}) 2661 2662;; Generate vsigned2 2663;; convert two double float vectors to a vector of single precision ints 2664(define_expand "vsigned2_v2df" 2665 [(match_operand:V4SI 0 "register_operand" "=wa") 2666 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa") 2667 (match_operand:V2DF 2 "register_operand" "wa")] 2668 UNSPEC_VSX_VSIGNED2)] 2669 "TARGET_VSX" 2670{ 2671 rtx rtx_src1, rtx_src2, rtx_dst; 2672 bool signed_convert=true; 2673 2674 rtx_dst = operands[0]; 2675 rtx_src1 = operands[1]; 2676 rtx_src2 = operands[2]; 2677 2678 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2679 DONE; 2680}) 2681 2682;; Generate vsignedo_v2df 2683;; signed double float to int convert odd word 2684(define_expand "vsignedo_v2df" 2685 [(set (match_operand:V4SI 0 "register_operand" "=wa") 2686 (match_operand:V2DF 1 "register_operand" "wa"))] 2687 "TARGET_VSX" 2688{ 2689 if (BYTES_BIG_ENDIAN) 2690 { 2691 rtx rtx_tmp; 2692 rtx rtx_val = GEN_INT (12); 2693 rtx_tmp = gen_reg_rtx (V4SImode); 2694 2695 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2696 2697 /* Big endian word numbering for words in operand is 0 1 2 3. 2698 take (operand[1] operand[1]) and shift left one word 2699 0 1 2 3 0 1 2 3 => 1 2 3 0 2700 Words 1 and 3 are now are now where they need to be for result. */ 2701 2702 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2703 rtx_tmp, rtx_val)); 2704 } 2705 else 2706 /* Little endian word numbering for operand is 3 2 1 0. 2707 Result words 3 and 1 are where they need to be. */ 2708 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2709 2710 DONE; 2711} 2712 [(set_attr "type" "veccomplex")]) 2713 2714;; Generate vsignede_v2df 2715;; signed double float to int even word 2716(define_expand "vsignede_v2df" 2717 [(set (match_operand:V4SI 0 "register_operand" "=v") 2718 (match_operand:V2DF 1 "register_operand" "v"))] 2719 "TARGET_VSX" 2720{ 2721 if (BYTES_BIG_ENDIAN) 2722 /* Big endian word numbering for words in operand is 0 1 2723 Result words 0 is where they need to be. */ 2724 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2725 2726 else 2727 { 2728 rtx rtx_tmp; 2729 rtx rtx_val = GEN_INT (12); 2730 rtx_tmp = gen_reg_rtx (V4SImode); 2731 2732 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2733 2734 /* Little endian word numbering for operand is 3 2 1 0. 2735 take (operand[1] operand[1]) and shift left three words 2736 0 1 2 3 0 1 2 3 => 3 0 1 2 2737 Words 0 and 2 are now where they need to be for the result. */ 2738 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2739 rtx_tmp, rtx_val)); 2740 } 2741 DONE; 2742} 2743 [(set_attr "type" "veccomplex")]) 2744 2745;; Generate unsigned2 2746;; convert two double float vectors to a vector of single precision 2747;; unsigned ints 2748(define_expand "vunsigned2_v2df" 2749[(match_operand:V4SI 0 "register_operand" "=v") 2750 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v") 2751 (match_operand:V2DF 2 "register_operand" "v")] 2752 UNSPEC_VSX_VSIGNED2)] 2753 "TARGET_VSX" 2754{ 2755 rtx rtx_src1, rtx_src2, rtx_dst; 2756 bool signed_convert=false; 2757 2758 rtx_dst = operands[0]; 2759 rtx_src1 = operands[1]; 2760 rtx_src2 = operands[2]; 2761 2762 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2763 DONE; 2764}) 2765 2766;; Generate vunsignedo_v2df 2767;; unsigned double float to int convert odd word 2768(define_expand "vunsignedo_v2df" 2769 [(set (match_operand:V4SI 0 "register_operand" "=v") 2770 (match_operand:V2DF 1 "register_operand" "v"))] 2771 "TARGET_VSX" 2772{ 2773 if (BYTES_BIG_ENDIAN) 2774 { 2775 rtx rtx_tmp; 2776 rtx rtx_val = GEN_INT (12); 2777 rtx_tmp = gen_reg_rtx (V4SImode); 2778 2779 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2780 2781 /* Big endian word numbering for words in operand is 0 1 2 3. 2782 take (operand[1] operand[1]) and shift left one word 2783 0 1 2 3 0 1 2 3 => 1 2 3 0 2784 Words 1 and 3 are now are now where they need to be for result. */ 2785 2786 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2787 rtx_tmp, rtx_val)); 2788 } 2789 else 2790 /* Little endian word numbering for operand is 3 2 1 0. 2791 Result words 3 and 1 are where they need to be. */ 2792 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2793 2794 DONE; 2795} 2796 [(set_attr "type" "veccomplex")]) 2797 2798;; Generate vunsignede_v2df 2799;; unsigned double float to int even word 2800(define_expand "vunsignede_v2df" 2801 [(set (match_operand:V4SI 0 "register_operand" "=v") 2802 (match_operand:V2DF 1 "register_operand" "v"))] 2803 "TARGET_VSX" 2804{ 2805 if (BYTES_BIG_ENDIAN) 2806 /* Big endian word numbering for words in operand is 0 1 2807 Result words 0 is where they need to be. */ 2808 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2809 2810 else 2811 { 2812 rtx rtx_tmp; 2813 rtx rtx_val = GEN_INT (12); 2814 rtx_tmp = gen_reg_rtx (V4SImode); 2815 2816 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2817 2818 /* Little endian word numbering for operand is 3 2 1 0. 2819 take (operand[1] operand[1]) and shift left three words 2820 0 1 2 3 0 1 2 3 => 3 0 1 2 2821 Words 0 and 2 are now where they need to be for the result. */ 2822 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2823 rtx_tmp, rtx_val)); 2824 } 2825 DONE; 2826} 2827 [(set_attr "type" "veccomplex")]) 2828 2829;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since 2830;; since the xvrdpiz instruction does not truncate the value if the floating 2831;; point value is < LONG_MIN or > LONG_MAX. 2832(define_insn "*vsx_float_fix_v2df2" 2833 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wd,?wa") 2834 (float:V2DF 2835 (fix:V2DI 2836 (match_operand:V2DF 1 "vsx_register_operand" "wd,?wa"))))] 2837 "TARGET_HARD_FLOAT 2838 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations 2839 && !flag_trapping_math && TARGET_FRIZ" 2840 "xvrdpiz %x0,%x1" 2841 [(set_attr "type" "vecdouble")]) 2842 2843 2844;; Permute operations 2845 2846;; Build a V2DF/V2DI vector from two scalars 2847(define_insn "vsx_concat_<mode>" 2848 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") 2849 (vec_concat:VSX_D 2850 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") 2851 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] 2852 "VECTOR_MEM_VSX_P (<MODE>mode)" 2853{ 2854 if (which_alternative == 0) 2855 return (BYTES_BIG_ENDIAN 2856 ? "xxpermdi %x0,%x1,%x2,0" 2857 : "xxpermdi %x0,%x2,%x1,0"); 2858 2859 else if (which_alternative == 1) 2860 return (BYTES_BIG_ENDIAN 2861 ? "mtvsrdd %x0,%1,%2" 2862 : "mtvsrdd %x0,%2,%1"); 2863 2864 else 2865 gcc_unreachable (); 2866} 2867 [(set_attr "type" "vecperm")]) 2868 2869;; Combiner patterns to allow creating XXPERMDI's to access either double 2870;; word element in a vector register. 2871(define_insn "*vsx_concat_<mode>_1" 2872 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2873 (vec_concat:VSX_D 2874 (vec_select:<VS_scalar> 2875 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2876 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2877 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))] 2878 "VECTOR_MEM_VSX_P (<MODE>mode)" 2879{ 2880 HOST_WIDE_INT dword = INTVAL (operands[2]); 2881 if (BYTES_BIG_ENDIAN) 2882 { 2883 operands[4] = GEN_INT (2*dword); 2884 return "xxpermdi %x0,%x1,%x3,%4"; 2885 } 2886 else 2887 { 2888 operands[4] = GEN_INT (!dword); 2889 return "xxpermdi %x0,%x3,%x1,%4"; 2890 } 2891} 2892 [(set_attr "type" "vecperm")]) 2893 2894(define_insn "*vsx_concat_<mode>_2" 2895 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2896 (vec_concat:VSX_D 2897 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") 2898 (vec_select:<VS_scalar> 2899 (match_operand:VSX_D 2 "gpc_reg_operand" "wa") 2900 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] 2901 "VECTOR_MEM_VSX_P (<MODE>mode)" 2902{ 2903 HOST_WIDE_INT dword = INTVAL (operands[3]); 2904 if (BYTES_BIG_ENDIAN) 2905 { 2906 operands[4] = GEN_INT (dword); 2907 return "xxpermdi %x0,%x1,%x2,%4"; 2908 } 2909 else 2910 { 2911 operands[4] = GEN_INT (2 * !dword); 2912 return "xxpermdi %x0,%x2,%x1,%4"; 2913 } 2914} 2915 [(set_attr "type" "vecperm")]) 2916 2917(define_insn "*vsx_concat_<mode>_3" 2918 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2919 (vec_concat:VSX_D 2920 (vec_select:<VS_scalar> 2921 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2922 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2923 (vec_select:<VS_scalar> 2924 (match_operand:VSX_D 3 "gpc_reg_operand" "wa") 2925 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] 2926 "VECTOR_MEM_VSX_P (<MODE>mode)" 2927{ 2928 HOST_WIDE_INT dword1 = INTVAL (operands[2]); 2929 HOST_WIDE_INT dword2 = INTVAL (operands[4]); 2930 if (BYTES_BIG_ENDIAN) 2931 { 2932 operands[5] = GEN_INT ((2 * dword1) + dword2); 2933 return "xxpermdi %x0,%x1,%x3,%5"; 2934 } 2935 else 2936 { 2937 operands[5] = GEN_INT ((2 * !dword2) + !dword1); 2938 return "xxpermdi %x0,%x3,%x1,%5"; 2939 } 2940} 2941 [(set_attr "type" "vecperm")]) 2942 2943;; Special purpose concat using xxpermdi to glue two single precision values 2944;; together, relying on the fact that internally scalar floats are represented 2945;; as doubles. This is used to initialize a V4SF vector with 4 floats 2946(define_insn "vsx_concat_v2sf" 2947 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2948 (unspec:V2DF 2949 [(match_operand:SF 1 "vsx_register_operand" "ww") 2950 (match_operand:SF 2 "vsx_register_operand" "ww")] 2951 UNSPEC_VSX_CONCAT))] 2952 "VECTOR_MEM_VSX_P (V2DFmode)" 2953{ 2954 if (BYTES_BIG_ENDIAN) 2955 return "xxpermdi %x0,%x1,%x2,0"; 2956 else 2957 return "xxpermdi %x0,%x2,%x1,0"; 2958} 2959 [(set_attr "type" "vecperm")]) 2960 2961;; Concatenate 4 SImode elements into a V4SImode reg. 2962(define_expand "vsx_init_v4si" 2963 [(use (match_operand:V4SI 0 "gpc_reg_operand")) 2964 (use (match_operand:SI 1 "gpc_reg_operand")) 2965 (use (match_operand:SI 2 "gpc_reg_operand")) 2966 (use (match_operand:SI 3 "gpc_reg_operand")) 2967 (use (match_operand:SI 4 "gpc_reg_operand"))] 2968 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 2969{ 2970 rtx a = gen_reg_rtx (DImode); 2971 rtx b = gen_reg_rtx (DImode); 2972 rtx c = gen_reg_rtx (DImode); 2973 rtx d = gen_reg_rtx (DImode); 2974 emit_insn (gen_zero_extendsidi2 (a, operands[1])); 2975 emit_insn (gen_zero_extendsidi2 (b, operands[2])); 2976 emit_insn (gen_zero_extendsidi2 (c, operands[3])); 2977 emit_insn (gen_zero_extendsidi2 (d, operands[4])); 2978 if (!BYTES_BIG_ENDIAN) 2979 { 2980 std::swap (a, b); 2981 std::swap (c, d); 2982 } 2983 2984 rtx aa = gen_reg_rtx (DImode); 2985 rtx ab = gen_reg_rtx (DImode); 2986 rtx cc = gen_reg_rtx (DImode); 2987 rtx cd = gen_reg_rtx (DImode); 2988 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); 2989 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); 2990 emit_insn (gen_iordi3 (ab, aa, b)); 2991 emit_insn (gen_iordi3 (cd, cc, d)); 2992 2993 rtx abcd = gen_reg_rtx (V2DImode); 2994 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); 2995 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); 2996 DONE; 2997}) 2998 2999;; xxpermdi for little endian loads and stores. We need several of 3000;; these since the form of the PARALLEL differs by mode. 3001(define_insn "*vsx_xxpermdi2_le_<mode>" 3002 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") 3003 (vec_select:VSX_D 3004 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>") 3005 (parallel [(const_int 1) (const_int 0)])))] 3006 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" 3007 "xxpermdi %x0,%x1,%x1,2" 3008 [(set_attr "type" "vecperm")]) 3009 3010(define_insn "*vsx_xxpermdi4_le_<mode>" 3011 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 3012 (vec_select:VSX_W 3013 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 3014 (parallel [(const_int 2) (const_int 3) 3015 (const_int 0) (const_int 1)])))] 3016 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" 3017 "xxpermdi %x0,%x1,%x1,2" 3018 [(set_attr "type" "vecperm")]) 3019 3020(define_insn "*vsx_xxpermdi8_le_V8HI" 3021 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 3022 (vec_select:V8HI 3023 (match_operand:V8HI 1 "vsx_register_operand" "wa") 3024 (parallel [(const_int 4) (const_int 5) 3025 (const_int 6) (const_int 7) 3026 (const_int 0) (const_int 1) 3027 (const_int 2) (const_int 3)])))] 3028 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode)" 3029 "xxpermdi %x0,%x1,%x1,2" 3030 [(set_attr "type" "vecperm")]) 3031 3032(define_insn "*vsx_xxpermdi16_le_V16QI" 3033 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 3034 (vec_select:V16QI 3035 (match_operand:V16QI 1 "vsx_register_operand" "wa") 3036 (parallel [(const_int 8) (const_int 9) 3037 (const_int 10) (const_int 11) 3038 (const_int 12) (const_int 13) 3039 (const_int 14) (const_int 15) 3040 (const_int 0) (const_int 1) 3041 (const_int 2) (const_int 3) 3042 (const_int 4) (const_int 5) 3043 (const_int 6) (const_int 7)])))] 3044 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode)" 3045 "xxpermdi %x0,%x1,%x1,2" 3046 [(set_attr "type" "vecperm")]) 3047 3048;; lxvd2x for little endian loads. We need several of 3049;; these since the form of the PARALLEL differs by mode. 3050(define_insn "*vsx_lxvd2x2_le_<mode>" 3051 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSa>") 3052 (vec_select:VSX_D 3053 (match_operand:VSX_D 1 "memory_operand" "Z") 3054 (parallel [(const_int 1) (const_int 0)])))] 3055 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3056 "lxvd2x %x0,%y1" 3057 [(set_attr "type" "vecload")]) 3058 3059(define_insn "*vsx_lxvd2x4_le_<mode>" 3060 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 3061 (vec_select:VSX_W 3062 (match_operand:VSX_W 1 "memory_operand" "Z") 3063 (parallel [(const_int 2) (const_int 3) 3064 (const_int 0) (const_int 1)])))] 3065 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3066 "lxvd2x %x0,%y1" 3067 [(set_attr "type" "vecload")]) 3068 3069(define_insn "*vsx_lxvd2x8_le_V8HI" 3070 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 3071 (vec_select:V8HI 3072 (match_operand:V8HI 1 "memory_operand" "Z") 3073 (parallel [(const_int 4) (const_int 5) 3074 (const_int 6) (const_int 7) 3075 (const_int 0) (const_int 1) 3076 (const_int 2) (const_int 3)])))] 3077 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3078 "lxvd2x %x0,%y1" 3079 [(set_attr "type" "vecload")]) 3080 3081(define_insn "*vsx_lxvd2x16_le_V16QI" 3082 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 3083 (vec_select:V16QI 3084 (match_operand:V16QI 1 "memory_operand" "Z") 3085 (parallel [(const_int 8) (const_int 9) 3086 (const_int 10) (const_int 11) 3087 (const_int 12) (const_int 13) 3088 (const_int 14) (const_int 15) 3089 (const_int 0) (const_int 1) 3090 (const_int 2) (const_int 3) 3091 (const_int 4) (const_int 5) 3092 (const_int 6) (const_int 7)])))] 3093 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3094 "lxvd2x %x0,%y1" 3095 [(set_attr "type" "vecload")]) 3096 3097;; stxvd2x for little endian stores. We need several of 3098;; these since the form of the PARALLEL differs by mode. 3099(define_insn "*vsx_stxvd2x2_le_<mode>" 3100 [(set (match_operand:VSX_D 0 "memory_operand" "=Z") 3101 (vec_select:VSX_D 3102 (match_operand:VSX_D 1 "vsx_register_operand" "<VSa>") 3103 (parallel [(const_int 1) (const_int 0)])))] 3104 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3105 "stxvd2x %x1,%y0" 3106 [(set_attr "type" "vecstore")]) 3107 3108(define_insn "*vsx_stxvd2x4_le_<mode>" 3109 [(set (match_operand:VSX_W 0 "memory_operand" "=Z") 3110 (vec_select:VSX_W 3111 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 3112 (parallel [(const_int 2) (const_int 3) 3113 (const_int 0) (const_int 1)])))] 3114 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3115 "stxvd2x %x1,%y0" 3116 [(set_attr "type" "vecstore")]) 3117 3118(define_insn "*vsx_stxvd2x8_le_V8HI" 3119 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 3120 (vec_select:V8HI 3121 (match_operand:V8HI 1 "vsx_register_operand" "wa") 3122 (parallel [(const_int 4) (const_int 5) 3123 (const_int 6) (const_int 7) 3124 (const_int 0) (const_int 1) 3125 (const_int 2) (const_int 3)])))] 3126 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3127 "stxvd2x %x1,%y0" 3128 [(set_attr "type" "vecstore")]) 3129 3130(define_insn "*vsx_stxvd2x16_le_V16QI" 3131 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 3132 (vec_select:V16QI 3133 (match_operand:V16QI 1 "vsx_register_operand" "wa") 3134 (parallel [(const_int 8) (const_int 9) 3135 (const_int 10) (const_int 11) 3136 (const_int 12) (const_int 13) 3137 (const_int 14) (const_int 15) 3138 (const_int 0) (const_int 1) 3139 (const_int 2) (const_int 3) 3140 (const_int 4) (const_int 5) 3141 (const_int 6) (const_int 7)])))] 3142 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3143 "stxvd2x %x1,%y0" 3144 [(set_attr "type" "vecstore")]) 3145 3146;; Convert a TImode value into V1TImode 3147(define_expand "vsx_set_v1ti" 3148 [(match_operand:V1TI 0 "nonimmediate_operand") 3149 (match_operand:V1TI 1 "nonimmediate_operand") 3150 (match_operand:TI 2 "input_operand") 3151 (match_operand:QI 3 "u5bit_cint_operand")] 3152 "VECTOR_MEM_VSX_P (V1TImode)" 3153{ 3154 if (operands[3] != const0_rtx) 3155 gcc_unreachable (); 3156 3157 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); 3158 DONE; 3159}) 3160 3161;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT 3162(define_expand "vsx_set_<mode>" 3163 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 3164 (use (match_operand:VSX_D 1 "vsx_register_operand")) 3165 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) 3166 (use (match_operand:QI 3 "const_0_to_1_operand"))] 3167 "VECTOR_MEM_VSX_P (<MODE>mode)" 3168{ 3169 rtx dest = operands[0]; 3170 rtx vec_reg = operands[1]; 3171 rtx value = operands[2]; 3172 rtx ele = operands[3]; 3173 rtx tmp = gen_reg_rtx (<VS_scalar>mode); 3174 3175 if (ele == const0_rtx) 3176 { 3177 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); 3178 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); 3179 DONE; 3180 } 3181 else if (ele == const1_rtx) 3182 { 3183 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); 3184 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); 3185 DONE; 3186 } 3187 else 3188 gcc_unreachable (); 3189}) 3190 3191;; Extract a DF/DI element from V2DF/V2DI 3192;; Optimize cases were we can do a simple or direct move. 3193;; Or see if we can avoid doing the move at all 3194 3195;; There are some unresolved problems with reload that show up if an Altivec 3196;; register was picked. Limit the scalar value to FPRs for now. 3197 3198(define_insn "vsx_extract_<mode>" 3199 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr") 3200 3201 (vec_select:<VS_scalar> 3202 (match_operand:VSX_D 1 "gpc_reg_operand" "<VSa>, <VSa>, wm, wo") 3203 3204 (parallel 3205 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))] 3206 "VECTOR_MEM_VSX_P (<MODE>mode)" 3207{ 3208 int element = INTVAL (operands[2]); 3209 int op0_regno = REGNO (operands[0]); 3210 int op1_regno = REGNO (operands[1]); 3211 int fldDM; 3212 3213 gcc_assert (IN_RANGE (element, 0, 1)); 3214 gcc_assert (VSX_REGNO_P (op1_regno)); 3215 3216 if (element == VECTOR_ELEMENT_SCALAR_64BIT) 3217 { 3218 if (op0_regno == op1_regno) 3219 return ASM_COMMENT_START " vec_extract to same register"; 3220 3221 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE 3222 && TARGET_POWERPC64) 3223 return "mfvsrd %0,%x1"; 3224 3225 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) 3226 return "fmr %0,%1"; 3227 3228 else if (VSX_REGNO_P (op0_regno)) 3229 return "xxlor %x0,%x1,%x1"; 3230 3231 else 3232 gcc_unreachable (); 3233 } 3234 3235 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno) 3236 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE) 3237 return "mfvsrld %0,%x1"; 3238 3239 else if (VSX_REGNO_P (op0_regno)) 3240 { 3241 fldDM = element << 1; 3242 if (!BYTES_BIG_ENDIAN) 3243 fldDM = 3 - fldDM; 3244 operands[3] = GEN_INT (fldDM); 3245 return "xxpermdi %x0,%x1,%x1,%3"; 3246 } 3247 3248 else 3249 gcc_unreachable (); 3250} 3251 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm")]) 3252 3253;; Optimize extracting a single scalar element from memory. 3254(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" 3255 [(set (match_operand:<VS_scalar> 0 "register_operand" "=<VSX_D:VS_64reg>,wr") 3256 (vec_select:<VSX_D:VS_scalar> 3257 (match_operand:VSX_D 1 "memory_operand" "m,m") 3258 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) 3259 (clobber (match_scratch:P 3 "=&b,&b"))] 3260 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" 3261 "#" 3262 "&& reload_completed" 3263 [(set (match_dup 0) (match_dup 4))] 3264{ 3265 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3266 operands[3], <VSX_D:VS_scalar>mode); 3267} 3268 [(set_attr "type" "fpload,load") 3269 (set_attr "length" "8")]) 3270 3271;; Optimize storing a single scalar element that is the right location to 3272;; memory 3273(define_insn "*vsx_extract_<mode>_store" 3274 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY") 3275 (vec_select:<VS_scalar> 3276 (match_operand:VSX_D 1 "register_operand" "d,wv,wb") 3277 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] 3278 "VECTOR_MEM_VSX_P (<MODE>mode)" 3279 "@ 3280 stfd%U0%X0 %1,%0 3281 stxsdx %x1,%y0 3282 stxsd %1,%0" 3283 [(set_attr "type" "fpstore")]) 3284 3285;; Variable V2DI/V2DF extract shift 3286(define_insn "vsx_vslo_<mode>" 3287 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") 3288 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") 3289 (match_operand:V2DI 2 "gpc_reg_operand" "v")] 3290 UNSPEC_VSX_VSLO))] 3291 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3292 "vslo %0,%1,%2" 3293 [(set_attr "type" "vecperm")]) 3294 3295;; Variable V2DI/V2DF extract 3296(define_insn_and_split "vsx_extract_<mode>_var" 3297 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v,<VSa>,r") 3298 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "input_operand" "v,m,m") 3299 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3300 UNSPEC_VSX_EXTRACT)) 3301 (clobber (match_scratch:DI 3 "=r,&b,&b")) 3302 (clobber (match_scratch:V2DI 4 "=&v,X,X"))] 3303 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3304 "#" 3305 "&& reload_completed" 3306 [(const_int 0)] 3307{ 3308 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3309 operands[3], operands[4]); 3310 DONE; 3311}) 3312 3313;; Extract a SF element from V4SF 3314(define_insn_and_split "vsx_extract_v4sf" 3315 [(set (match_operand:SF 0 "vsx_register_operand" "=ww") 3316 (vec_select:SF 3317 (match_operand:V4SF 1 "vsx_register_operand" "wa") 3318 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) 3319 (clobber (match_scratch:V4SF 3 "=0"))] 3320 "VECTOR_UNIT_VSX_P (V4SFmode)" 3321 "#" 3322 "&& 1" 3323 [(const_int 0)] 3324{ 3325 rtx op0 = operands[0]; 3326 rtx op1 = operands[1]; 3327 rtx op2 = operands[2]; 3328 rtx op3 = operands[3]; 3329 rtx tmp; 3330 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); 3331 3332 if (ele == 0) 3333 tmp = op1; 3334 else 3335 { 3336 if (GET_CODE (op3) == SCRATCH) 3337 op3 = gen_reg_rtx (V4SFmode); 3338 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); 3339 tmp = op3; 3340 } 3341 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); 3342 DONE; 3343} 3344 [(set_attr "length" "8") 3345 (set_attr "type" "fp")]) 3346 3347(define_insn_and_split "*vsx_extract_v4sf_<mode>_load" 3348 [(set (match_operand:SF 0 "register_operand" "=f,wv,wb,?r") 3349 (vec_select:SF 3350 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") 3351 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) 3352 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] 3353 "VECTOR_MEM_VSX_P (V4SFmode)" 3354 "#" 3355 "&& reload_completed" 3356 [(set (match_dup 0) (match_dup 4))] 3357{ 3358 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3359 operands[3], SFmode); 3360} 3361 [(set_attr "type" "fpload,fpload,fpload,load") 3362 (set_attr "length" "8")]) 3363 3364;; Variable V4SF extract 3365(define_insn_and_split "vsx_extract_v4sf_var" 3366 [(set (match_operand:SF 0 "gpc_reg_operand" "=ww,ww,?r") 3367 (unspec:SF [(match_operand:V4SF 1 "input_operand" "v,m,m") 3368 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3369 UNSPEC_VSX_EXTRACT)) 3370 (clobber (match_scratch:DI 3 "=r,&b,&b")) 3371 (clobber (match_scratch:V2DI 4 "=&v,X,X"))] 3372 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" 3373 "#" 3374 "&& reload_completed" 3375 [(const_int 0)] 3376{ 3377 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3378 operands[3], operands[4]); 3379 DONE; 3380}) 3381 3382;; Expand the builtin form of xxpermdi to canonical rtl. 3383(define_expand "vsx_xxpermdi_<mode>" 3384 [(match_operand:VSX_L 0 "vsx_register_operand") 3385 (match_operand:VSX_L 1 "vsx_register_operand") 3386 (match_operand:VSX_L 2 "vsx_register_operand") 3387 (match_operand:QI 3 "u5bit_cint_operand")] 3388 "VECTOR_MEM_VSX_P (<MODE>mode)" 3389{ 3390 rtx target = operands[0]; 3391 rtx op0 = operands[1]; 3392 rtx op1 = operands[2]; 3393 int mask = INTVAL (operands[3]); 3394 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3395 rtx perm1 = GEN_INT ((mask & 1) + 2); 3396 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3397 3398 if (<MODE>mode == V2DFmode) 3399 gen = gen_vsx_xxpermdi2_v2df_1; 3400 else 3401 { 3402 gen = gen_vsx_xxpermdi2_v2di_1; 3403 if (<MODE>mode != V2DImode) 3404 { 3405 target = gen_lowpart (V2DImode, target); 3406 op0 = gen_lowpart (V2DImode, op0); 3407 op1 = gen_lowpart (V2DImode, op1); 3408 } 3409 } 3410 emit_insn (gen (target, op0, op1, perm0, perm1)); 3411 DONE; 3412}) 3413 3414;; Special version of xxpermdi that retains big-endian semantics. 3415(define_expand "vsx_xxpermdi_<mode>_be" 3416 [(match_operand:VSX_L 0 "vsx_register_operand") 3417 (match_operand:VSX_L 1 "vsx_register_operand") 3418 (match_operand:VSX_L 2 "vsx_register_operand") 3419 (match_operand:QI 3 "u5bit_cint_operand")] 3420 "VECTOR_MEM_VSX_P (<MODE>mode)" 3421{ 3422 rtx target = operands[0]; 3423 rtx op0 = operands[1]; 3424 rtx op1 = operands[2]; 3425 int mask = INTVAL (operands[3]); 3426 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3427 rtx perm1 = GEN_INT ((mask & 1) + 2); 3428 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3429 3430 if (<MODE>mode == V2DFmode) 3431 gen = gen_vsx_xxpermdi2_v2df_1; 3432 else 3433 { 3434 gen = gen_vsx_xxpermdi2_v2di_1; 3435 if (<MODE>mode != V2DImode) 3436 { 3437 target = gen_lowpart (V2DImode, target); 3438 op0 = gen_lowpart (V2DImode, op0); 3439 op1 = gen_lowpart (V2DImode, op1); 3440 } 3441 } 3442 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a 3443 transformation we don't want; it is necessary for 3444 rs6000_expand_vec_perm_const_1 but not for this use. So we 3445 prepare for that by reversing the transformation here. */ 3446 if (BYTES_BIG_ENDIAN) 3447 emit_insn (gen (target, op0, op1, perm0, perm1)); 3448 else 3449 { 3450 rtx p0 = GEN_INT (3 - INTVAL (perm1)); 3451 rtx p1 = GEN_INT (3 - INTVAL (perm0)); 3452 emit_insn (gen (target, op1, op0, p0, p1)); 3453 } 3454 DONE; 3455}) 3456 3457(define_insn "vsx_xxpermdi2_<mode>_1" 3458 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd") 3459 (vec_select:VSX_D 3460 (vec_concat:<VS_double> 3461 (match_operand:VSX_D 1 "vsx_register_operand" "wd") 3462 (match_operand:VSX_D 2 "vsx_register_operand" "wd")) 3463 (parallel [(match_operand 3 "const_0_to_1_operand" "") 3464 (match_operand 4 "const_2_to_3_operand" "")])))] 3465 "VECTOR_MEM_VSX_P (<MODE>mode)" 3466{ 3467 int op3, op4, mask; 3468 3469 /* For little endian, swap operands and invert/swap selectors 3470 to get the correct xxpermdi. The operand swap sets up the 3471 inputs as a little endian array. The selectors are swapped 3472 because they are defined to use big endian ordering. The 3473 selectors are inverted to get the correct doublewords for 3474 little endian ordering. */ 3475 if (BYTES_BIG_ENDIAN) 3476 { 3477 op3 = INTVAL (operands[3]); 3478 op4 = INTVAL (operands[4]); 3479 } 3480 else 3481 { 3482 op3 = 3 - INTVAL (operands[4]); 3483 op4 = 3 - INTVAL (operands[3]); 3484 } 3485 3486 mask = (op3 << 1) | (op4 - 2); 3487 operands[3] = GEN_INT (mask); 3488 3489 if (BYTES_BIG_ENDIAN) 3490 return "xxpermdi %x0,%x1,%x2,%3"; 3491 else 3492 return "xxpermdi %x0,%x2,%x1,%3"; 3493} 3494 [(set_attr "type" "vecperm")]) 3495 3496;; Extraction of a single element in a small integer vector. Until ISA 3.0, 3497;; none of the small types were allowed in a vector register, so we had to 3498;; extract to a DImode and either do a direct move or store. 3499(define_expand "vsx_extract_<mode>" 3500 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") 3501 (vec_select:<VS_scalar> 3502 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") 3503 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3504 (clobber (match_scratch:VSX_EXTRACT_I 3))])] 3505 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3506{ 3507 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ 3508 if (TARGET_P9_VECTOR) 3509 { 3510 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], 3511 operands[2])); 3512 DONE; 3513 } 3514}) 3515 3516(define_insn "vsx_extract_<mode>_p9" 3517 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") 3518 (vec_select:<VS_scalar> 3519 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") 3520 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) 3521 (clobber (match_scratch:SI 3 "=r,X"))] 3522 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3523{ 3524 if (which_alternative == 0) 3525 return "#"; 3526 3527 else 3528 { 3529 HOST_WIDE_INT elt = INTVAL (operands[2]); 3530 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN 3531 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt 3532 : elt); 3533 3534 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); 3535 HOST_WIDE_INT offset = unit_size * elt_adj; 3536 3537 operands[2] = GEN_INT (offset); 3538 if (unit_size == 4) 3539 return "xxextractuw %x0,%x1,%2"; 3540 else 3541 return "vextractu<wd> %0,%1,%2"; 3542 } 3543} 3544 [(set_attr "type" "vecsimple")]) 3545 3546(define_split 3547 [(set (match_operand:<VS_scalar> 0 "int_reg_operand") 3548 (vec_select:<VS_scalar> 3549 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") 3550 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3551 (clobber (match_operand:SI 3 "int_reg_operand"))] 3552 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" 3553 [(const_int 0)] 3554{ 3555 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); 3556 rtx op1 = operands[1]; 3557 rtx op2 = operands[2]; 3558 rtx op3 = operands[3]; 3559 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); 3560 3561 emit_move_insn (op3, GEN_INT (offset)); 3562 if (BYTES_BIG_ENDIAN) 3563 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); 3564 else 3565 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); 3566 DONE; 3567}) 3568 3569;; Optimize zero extracts to eliminate the AND after the extract. 3570(define_insn_and_split "*vsx_extract_<mode>_di_p9" 3571 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") 3572 (zero_extend:DI 3573 (vec_select:<VS_scalar> 3574 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "wK,<VSX_EX>") 3575 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) 3576 (clobber (match_scratch:SI 3 "=r,X"))] 3577 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3578 "#" 3579 "&& reload_completed" 3580 [(parallel [(set (match_dup 4) 3581 (vec_select:<VS_scalar> 3582 (match_dup 1) 3583 (parallel [(match_dup 2)]))) 3584 (clobber (match_dup 3))])] 3585{ 3586 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); 3587}) 3588 3589;; Optimize stores to use the ISA 3.0 scalar store instructions 3590(define_insn_and_split "*vsx_extract_<mode>_store_p9" 3591 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") 3592 (vec_select:<VS_scalar> 3593 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") 3594 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) 3595 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) 3596 (clobber (match_scratch:SI 4 "=X,&r"))] 3597 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3598 "#" 3599 "&& reload_completed" 3600 [(parallel [(set (match_dup 3) 3601 (vec_select:<VS_scalar> 3602 (match_dup 1) 3603 (parallel [(match_dup 2)]))) 3604 (clobber (match_dup 4))]) 3605 (set (match_dup 0) 3606 (match_dup 3))]) 3607 3608(define_insn_and_split "*vsx_extract_si" 3609 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wHwI,Z") 3610 (vec_select:SI 3611 (match_operand:V4SI 1 "gpc_reg_operand" "wJv,wJv,wJv") 3612 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) 3613 (clobber (match_scratch:V4SI 3 "=wJv,wJv,wJv"))] 3614 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" 3615 "#" 3616 "&& reload_completed" 3617 [(const_int 0)] 3618{ 3619 rtx dest = operands[0]; 3620 rtx src = operands[1]; 3621 rtx element = operands[2]; 3622 rtx vec_tmp = operands[3]; 3623 int value; 3624 3625 if (!BYTES_BIG_ENDIAN) 3626 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3627 3628 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3629 instruction. */ 3630 value = INTVAL (element); 3631 if (value != 1) 3632 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); 3633 else 3634 vec_tmp = src; 3635 3636 if (MEM_P (operands[0])) 3637 { 3638 if (can_create_pseudo_p ()) 3639 dest = rs6000_force_indexed_or_indirect_mem (dest); 3640 3641 if (TARGET_P8_VECTOR) 3642 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3643 else 3644 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); 3645 } 3646 3647 else if (TARGET_P8_VECTOR) 3648 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3649 else 3650 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3651 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3652 3653 DONE; 3654} 3655 [(set_attr "type" "mftgpr,vecperm,fpstore") 3656 (set_attr "length" "8")]) 3657 3658(define_insn_and_split "*vsx_extract_<mode>_p8" 3659 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r") 3660 (vec_select:<VS_scalar> 3661 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") 3662 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3663 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] 3664 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3665 && !TARGET_P9_VECTOR" 3666 "#" 3667 "&& reload_completed" 3668 [(const_int 0)] 3669{ 3670 rtx dest = operands[0]; 3671 rtx src = operands[1]; 3672 rtx element = operands[2]; 3673 rtx vec_tmp = operands[3]; 3674 int value; 3675 3676 if (!BYTES_BIG_ENDIAN) 3677 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); 3678 3679 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3680 instruction. */ 3681 value = INTVAL (element); 3682 if (<MODE>mode == V16QImode) 3683 { 3684 if (value != 7) 3685 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); 3686 else 3687 vec_tmp = src; 3688 } 3689 else if (<MODE>mode == V8HImode) 3690 { 3691 if (value != 3) 3692 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); 3693 else 3694 vec_tmp = src; 3695 } 3696 else 3697 gcc_unreachable (); 3698 3699 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3700 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3701 DONE; 3702} 3703 [(set_attr "type" "mftgpr")]) 3704 3705;; Optimize extracting a single scalar element from memory. 3706(define_insn_and_split "*vsx_extract_<mode>_load" 3707 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r") 3708 (vec_select:<VS_scalar> 3709 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") 3710 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3711 (clobber (match_scratch:DI 3 "=&b"))] 3712 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3713 "#" 3714 "&& reload_completed" 3715 [(set (match_dup 0) (match_dup 4))] 3716{ 3717 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3718 operands[3], <VS_scalar>mode); 3719} 3720 [(set_attr "type" "load") 3721 (set_attr "length" "8")]) 3722 3723;; Variable V16QI/V8HI/V4SI extract 3724(define_insn_and_split "vsx_extract_<mode>_var" 3725 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") 3726 (unspec:<VS_scalar> 3727 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") 3728 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3729 UNSPEC_VSX_EXTRACT)) 3730 (clobber (match_scratch:DI 3 "=r,r,&b")) 3731 (clobber (match_scratch:V2DI 4 "=X,&v,X"))] 3732 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3733 "#" 3734 "&& reload_completed" 3735 [(const_int 0)] 3736{ 3737 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3738 operands[3], operands[4]); 3739 DONE; 3740}) 3741 3742(define_insn_and_split "*vsx_extract_<mode>_<VS_scalar>mode_var" 3743 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r,r") 3744 (zero_extend:<VS_scalar> 3745 (unspec:<VSX_EXTRACT_I:VS_scalar> 3746 [(match_operand:VSX_EXTRACT_I 1 "input_operand" "wK,v,m") 3747 (match_operand:DI 2 "gpc_reg_operand" "r,r,r")] 3748 UNSPEC_VSX_EXTRACT))) 3749 (clobber (match_scratch:DI 3 "=r,r,&b")) 3750 (clobber (match_scratch:V2DI 4 "=X,&v,X"))] 3751 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3752 "#" 3753 "&& reload_completed" 3754 [(const_int 0)] 3755{ 3756 machine_mode smode = <VS_scalar>mode; 3757 rs6000_split_vec_extract_var (gen_rtx_REG (smode, REGNO (operands[0])), 3758 operands[1], operands[2], 3759 operands[3], operands[4]); 3760 DONE; 3761}) 3762 3763;; VSX_EXTRACT optimizations 3764;; Optimize double d = (double) vec_extract (vi, <n>) 3765;; Get the element into the top position and use XVCVSWDP/XVCVUWDP 3766(define_insn_and_split "*vsx_extract_si_<uns>float_df" 3767 [(set (match_operand:DF 0 "gpc_reg_operand" "=ws") 3768 (any_float:DF 3769 (vec_select:SI 3770 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3771 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3772 (clobber (match_scratch:V4SI 3 "=v"))] 3773 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3774 "#" 3775 "&& 1" 3776 [(const_int 0)] 3777{ 3778 rtx dest = operands[0]; 3779 rtx src = operands[1]; 3780 rtx element = operands[2]; 3781 rtx v4si_tmp = operands[3]; 3782 int value; 3783 3784 if (!BYTES_BIG_ENDIAN) 3785 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3786 3787 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3788 instruction. */ 3789 value = INTVAL (element); 3790 if (value != 0) 3791 { 3792 if (GET_CODE (v4si_tmp) == SCRATCH) 3793 v4si_tmp = gen_reg_rtx (V4SImode); 3794 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3795 } 3796 else 3797 v4si_tmp = src; 3798 3799 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); 3800 DONE; 3801}) 3802 3803;; Optimize <type> f = (<type>) vec_extract (vi, <n>) 3804;; where <type> is a floating point type that supported by the hardware that is 3805;; not double. First convert the value to double, and then to the desired 3806;; type. 3807(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" 3808 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=ww") 3809 (any_float:VSX_EXTRACT_FL 3810 (vec_select:SI 3811 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3812 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3813 (clobber (match_scratch:V4SI 3 "=v")) 3814 (clobber (match_scratch:DF 4 "=ws"))] 3815 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3816 "#" 3817 "&& 1" 3818 [(const_int 0)] 3819{ 3820 rtx dest = operands[0]; 3821 rtx src = operands[1]; 3822 rtx element = operands[2]; 3823 rtx v4si_tmp = operands[3]; 3824 rtx df_tmp = operands[4]; 3825 int value; 3826 3827 if (!BYTES_BIG_ENDIAN) 3828 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3829 3830 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3831 instruction. */ 3832 value = INTVAL (element); 3833 if (value != 0) 3834 { 3835 if (GET_CODE (v4si_tmp) == SCRATCH) 3836 v4si_tmp = gen_reg_rtx (V4SImode); 3837 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3838 } 3839 else 3840 v4si_tmp = src; 3841 3842 if (GET_CODE (df_tmp) == SCRATCH) 3843 df_tmp = gen_reg_rtx (DFmode); 3844 3845 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); 3846 3847 if (<MODE>mode == SFmode) 3848 emit_insn (gen_truncdfsf2 (dest, df_tmp)); 3849 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) 3850 emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); 3851 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) 3852 && TARGET_FLOAT128_HW) 3853 emit_insn (gen_extenddftf2_hw (dest, df_tmp)); 3854 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) 3855 emit_insn (gen_extenddfif2 (dest, df_tmp)); 3856 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) 3857 emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); 3858 else 3859 gcc_unreachable (); 3860 3861 DONE; 3862}) 3863 3864;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) 3865;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE 3866;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, 3867;; vector short or vector unsigned short. 3868(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" 3869 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") 3870 (float:FL_CONV 3871 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3872 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3873 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3874 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3875 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3876 && TARGET_P9_VECTOR" 3877 "#" 3878 "&& reload_completed" 3879 [(parallel [(set (match_dup 3) 3880 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3881 (match_dup 1) 3882 (parallel [(match_dup 2)]))) 3883 (clobber (scratch:SI))]) 3884 (set (match_dup 4) 3885 (sign_extend:DI (match_dup 3))) 3886 (set (match_dup 0) 3887 (float:<FL_CONV:MODE> (match_dup 4)))] 3888{ 3889 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3890}) 3891 3892(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" 3893 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=<FL_CONV:VSr3>") 3894 (unsigned_float:FL_CONV 3895 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3896 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3897 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3898 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3899 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3900 && TARGET_P9_VECTOR" 3901 "#" 3902 "&& reload_completed" 3903 [(parallel [(set (match_dup 3) 3904 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3905 (match_dup 1) 3906 (parallel [(match_dup 2)]))) 3907 (clobber (scratch:SI))]) 3908 (set (match_dup 0) 3909 (float:<FL_CONV:MODE> (match_dup 4)))] 3910{ 3911 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3912}) 3913 3914;; V4SI/V8HI/V16QI set operation on ISA 3.0 3915(define_insn "vsx_set_<mode>_p9" 3916 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") 3917 (unspec:VSX_EXTRACT_I 3918 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") 3919 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") 3920 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] 3921 UNSPEC_VSX_SET))] 3922 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3923{ 3924 int ele = INTVAL (operands[3]); 3925 int nunits = GET_MODE_NUNITS (<MODE>mode); 3926 3927 if (!BYTES_BIG_ENDIAN) 3928 ele = nunits - 1 - ele; 3929 3930 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele); 3931 if (<MODE>mode == V4SImode) 3932 return "xxinsertw %x0,%x2,%3"; 3933 else 3934 return "vinsert<wd> %0,%2,%3"; 3935} 3936 [(set_attr "type" "vecperm")]) 3937 3938(define_insn_and_split "vsx_set_v4sf_p9" 3939 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3940 (unspec:V4SF 3941 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3942 (match_operand:SF 2 "gpc_reg_operand" "ww") 3943 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3944 UNSPEC_VSX_SET)) 3945 (clobber (match_scratch:SI 4 "=&wJwK"))] 3946 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3947 "#" 3948 "&& reload_completed" 3949 [(set (match_dup 5) 3950 (unspec:V4SF [(match_dup 2)] 3951 UNSPEC_VSX_CVDPSPN)) 3952 (parallel [(set (match_dup 4) 3953 (vec_select:SI (match_dup 6) 3954 (parallel [(match_dup 7)]))) 3955 (clobber (scratch:SI))]) 3956 (set (match_dup 8) 3957 (unspec:V4SI [(match_dup 8) 3958 (match_dup 4) 3959 (match_dup 3)] 3960 UNSPEC_VSX_SET))] 3961{ 3962 unsigned int tmp_regno = reg_or_subregno (operands[4]); 3963 3964 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); 3965 operands[6] = gen_rtx_REG (V4SImode, tmp_regno); 3966 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3); 3967 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 3968} 3969 [(set_attr "type" "vecperm") 3970 (set_attr "length" "12")]) 3971 3972;; Special case setting 0.0f to a V4SF element 3973(define_insn_and_split "*vsx_set_v4sf_p9_zero" 3974 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3975 (unspec:V4SF 3976 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3977 (match_operand:SF 2 "zero_fp_constant" "j") 3978 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3979 UNSPEC_VSX_SET)) 3980 (clobber (match_scratch:SI 4 "=&wJwK"))] 3981 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3982 "#" 3983 "&& reload_completed" 3984 [(set (match_dup 4) 3985 (const_int 0)) 3986 (set (match_dup 5) 3987 (unspec:V4SI [(match_dup 5) 3988 (match_dup 4) 3989 (match_dup 3)] 3990 UNSPEC_VSX_SET))] 3991{ 3992 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 3993} 3994 [(set_attr "type" "vecperm") 3995 (set_attr "length" "8")]) 3996 3997;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element 3998;; that is in the default scalar position (1 for big endian, 2 for little 3999;; endian). We just need to do an xxinsertw since the element is in the 4000;; correct location. 4001 4002(define_insn "*vsx_insert_extract_v4sf_p9" 4003 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4004 (unspec:V4SF 4005 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4006 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4007 (parallel 4008 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4009 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4010 UNSPEC_VSX_SET))] 4011 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 4012 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))" 4013{ 4014 int ele = INTVAL (operands[4]); 4015 4016 if (!BYTES_BIG_ENDIAN) 4017 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; 4018 4019 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); 4020 return "xxinsertw %x0,%x2,%4"; 4021} 4022 [(set_attr "type" "vecperm")]) 4023 4024;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element 4025;; that is in the default scalar position (1 for big endian, 2 for little 4026;; endian). Convert the insert/extract to int and avoid doing the conversion. 4027 4028(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" 4029 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4030 (unspec:V4SF 4031 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4032 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4033 (parallel 4034 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4035 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4036 UNSPEC_VSX_SET)) 4037 (clobber (match_scratch:SI 5 "=&wJwK"))] 4038 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) 4039 && TARGET_P9_VECTOR && TARGET_POWERPC64 4040 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))" 4041 "#" 4042 "&& 1" 4043 [(parallel [(set (match_dup 5) 4044 (vec_select:SI (match_dup 6) 4045 (parallel [(match_dup 3)]))) 4046 (clobber (scratch:SI))]) 4047 (set (match_dup 7) 4048 (unspec:V4SI [(match_dup 8) 4049 (match_dup 5) 4050 (match_dup 4)] 4051 UNSPEC_VSX_SET))] 4052{ 4053 if (GET_CODE (operands[5]) == SCRATCH) 4054 operands[5] = gen_reg_rtx (SImode); 4055 4056 operands[6] = gen_lowpart (V4SImode, operands[2]); 4057 operands[7] = gen_lowpart (V4SImode, operands[0]); 4058 operands[8] = gen_lowpart (V4SImode, operands[1]); 4059} 4060 [(set_attr "type" "vecperm")]) 4061 4062;; Expanders for builtins 4063(define_expand "vsx_mergel_<mode>" 4064 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4065 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4066 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4067 "VECTOR_MEM_VSX_P (<MODE>mode)" 4068{ 4069 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); 4070 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4071 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4072 emit_insn (gen_rtx_SET (operands[0], x)); 4073 DONE; 4074}) 4075 4076(define_expand "vsx_mergeh_<mode>" 4077 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4078 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4079 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4080 "VECTOR_MEM_VSX_P (<MODE>mode)" 4081{ 4082 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); 4083 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4084 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4085 emit_insn (gen_rtx_SET (operands[0], x)); 4086 DONE; 4087}) 4088 4089;; V2DF/V2DI splat 4090;; We separate the register splat insn from the memory splat insn to force the 4091;; register allocator to generate the indexed form of the SPLAT when it is 4092;; given an offsettable memory reference. Otherwise, if the register and 4093;; memory insns were combined into a single insn, the register allocator will 4094;; load the value into a register, and then do a double word permute. 4095(define_expand "vsx_splat_<mode>" 4096 [(set (match_operand:VSX_D 0 "vsx_register_operand") 4097 (vec_duplicate:VSX_D 4098 (match_operand:<VS_scalar> 1 "input_operand")))] 4099 "VECTOR_MEM_VSX_P (<MODE>mode)" 4100{ 4101 rtx op1 = operands[1]; 4102 if (MEM_P (op1)) 4103 operands[1] = rs6000_force_indexed_or_indirect_mem (op1); 4104 else if (!REG_P (op1)) 4105 op1 = force_reg (<VSX_D:VS_scalar>mode, op1); 4106}) 4107 4108(define_insn "vsx_splat_<mode>_reg" 4109 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>,?we") 4110 (vec_duplicate:VSX_D 4111 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VSX_D:VS_64reg>,b")))] 4112 "VECTOR_MEM_VSX_P (<MODE>mode)" 4113 "@ 4114 xxpermdi %x0,%x1,%x1,0 4115 mtvsrdd %x0,%1,%1" 4116 [(set_attr "type" "vecperm")]) 4117 4118(define_insn "vsx_splat_<VSX_D:mode>_mem" 4119 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=<VSX_D:VSa>") 4120 (vec_duplicate:VSX_D 4121 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))] 4122 "VECTOR_MEM_VSX_P (<MODE>mode)" 4123 "lxvdsx %x0,%y1" 4124 [(set_attr "type" "vecload")]) 4125 4126;; V4SI splat support 4127(define_insn "vsx_splat_v4si" 4128 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we") 4129 (vec_duplicate:V4SI 4130 (match_operand:SI 1 "splat_input_operand" "r,Z")))] 4131 "TARGET_P9_VECTOR" 4132 "@ 4133 mtvsrws %x0,%1 4134 lxvwsx %x0,%y1" 4135 [(set_attr "type" "vecperm,vecload")]) 4136 4137;; SImode is not currently allowed in vector registers. This pattern 4138;; allows us to use direct move to get the value in a vector register 4139;; so that we can use XXSPLTW 4140(define_insn "vsx_splat_v4si_di" 4141 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") 4142 (vec_duplicate:V4SI 4143 (truncate:SI 4144 (match_operand:DI 1 "gpc_reg_operand" "wj,r"))))] 4145 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 4146 "@ 4147 xxspltw %x0,%x1,1 4148 mtvsrws %x0,%1" 4149 [(set_attr "type" "vecperm")]) 4150 4151;; V4SF splat (ISA 3.0) 4152(define_insn_and_split "vsx_splat_v4sf" 4153 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") 4154 (vec_duplicate:V4SF 4155 (match_operand:SF 1 "splat_input_operand" "Z,wy,r")))] 4156 "TARGET_P9_VECTOR" 4157 "@ 4158 lxvwsx %x0,%y1 4159 # 4160 mtvsrws %x0,%1" 4161 "&& reload_completed && vsx_register_operand (operands[1], SFmode)" 4162 [(set (match_dup 0) 4163 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) 4164 (set (match_dup 0) 4165 (unspec:V4SF [(match_dup 0) 4166 (const_int 0)] UNSPEC_VSX_XXSPLTW))] 4167 "" 4168 [(set_attr "type" "vecload,vecperm,mftgpr") 4169 (set_attr "length" "4,8,4")]) 4170 4171;; V4SF/V4SI splat from a vector element 4172(define_insn "vsx_xxspltw_<mode>" 4173 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 4174 (vec_duplicate:VSX_W 4175 (vec_select:<VS_scalar> 4176 (match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 4177 (parallel 4178 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] 4179 "VECTOR_MEM_VSX_P (<MODE>mode)" 4180{ 4181 if (!BYTES_BIG_ENDIAN) 4182 operands[2] = GEN_INT (3 - INTVAL (operands[2])); 4183 4184 return "xxspltw %x0,%x1,%2"; 4185} 4186 [(set_attr "type" "vecperm")]) 4187 4188(define_insn "vsx_xxspltw_<mode>_direct" 4189 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=<VSa>") 4190 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "<VSa>") 4191 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4192 UNSPEC_VSX_XXSPLTW))] 4193 "VECTOR_MEM_VSX_P (<MODE>mode)" 4194 "xxspltw %x0,%x1,%2" 4195 [(set_attr "type" "vecperm")]) 4196 4197;; V16QI/V8HI splat support on ISA 2.07 4198(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" 4199 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") 4200 (vec_duplicate:VSX_SPLAT_I 4201 (truncate:<VS_scalar> 4202 (match_operand:DI 1 "altivec_register_operand" "v"))))] 4203 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 4204 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" 4205 [(set_attr "type" "vecperm")]) 4206 4207;; V2DF/V2DI splat for use by vec_splat builtin 4208(define_insn "vsx_xxspltd_<mode>" 4209 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 4210 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") 4211 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4212 UNSPEC_VSX_XXSPLTD))] 4213 "VECTOR_MEM_VSX_P (<MODE>mode)" 4214{ 4215 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0) 4216 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1)) 4217 return "xxpermdi %x0,%x1,%x1,0"; 4218 else 4219 return "xxpermdi %x0,%x1,%x1,3"; 4220} 4221 [(set_attr "type" "vecperm")]) 4222 4223;; V4SF/V4SI interleave 4224(define_insn "vsx_xxmrghw_<mode>" 4225 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>") 4226 (vec_select:VSX_W 4227 (vec_concat:<VS_double> 4228 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>") 4229 (match_operand:VSX_W 2 "vsx_register_operand" "wf,<VSa>")) 4230 (parallel [(const_int 0) (const_int 4) 4231 (const_int 1) (const_int 5)])))] 4232 "VECTOR_MEM_VSX_P (<MODE>mode)" 4233{ 4234 if (BYTES_BIG_ENDIAN) 4235 return "xxmrghw %x0,%x1,%x2"; 4236 else 4237 return "xxmrglw %x0,%x2,%x1"; 4238} 4239 [(set_attr "type" "vecperm")]) 4240 4241(define_insn "vsx_xxmrglw_<mode>" 4242 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>") 4243 (vec_select:VSX_W 4244 (vec_concat:<VS_double> 4245 (match_operand:VSX_W 1 "vsx_register_operand" "wf,<VSa>") 4246 (match_operand:VSX_W 2 "vsx_register_operand" "wf,?<VSa>")) 4247 (parallel [(const_int 2) (const_int 6) 4248 (const_int 3) (const_int 7)])))] 4249 "VECTOR_MEM_VSX_P (<MODE>mode)" 4250{ 4251 if (BYTES_BIG_ENDIAN) 4252 return "xxmrglw %x0,%x1,%x2"; 4253 else 4254 return "xxmrghw %x0,%x2,%x1"; 4255} 4256 [(set_attr "type" "vecperm")]) 4257 4258;; Shift left double by word immediate 4259(define_insn "vsx_xxsldwi_<mode>" 4260 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSa>") 4261 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "<VSa>") 4262 (match_operand:VSX_L 2 "vsx_register_operand" "<VSa>") 4263 (match_operand:QI 3 "u5bit_cint_operand" "i")] 4264 UNSPEC_VSX_SLDWI))] 4265 "VECTOR_MEM_VSX_P (<MODE>mode)" 4266 "xxsldwi %x0,%x1,%x2,%3" 4267 [(set_attr "type" "vecperm")]) 4268 4269 4270;; Vector reduction insns and splitters 4271 4272(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" 4273 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wd,&?wa,wd,?wa") 4274 (VEC_reduc:V2DF 4275 (vec_concat:V2DF 4276 (vec_select:DF 4277 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") 4278 (parallel [(const_int 1)])) 4279 (vec_select:DF 4280 (match_dup 1) 4281 (parallel [(const_int 0)]))) 4282 (match_dup 1))) 4283 (clobber (match_scratch:V2DF 2 "=0,0,&wd,&wa"))] 4284 "VECTOR_UNIT_VSX_P (V2DFmode)" 4285 "#" 4286 "" 4287 [(const_int 0)] 4288{ 4289 rtx tmp = (GET_CODE (operands[2]) == SCRATCH) 4290 ? gen_reg_rtx (V2DFmode) 4291 : operands[2]; 4292 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); 4293 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); 4294 DONE; 4295} 4296 [(set_attr "length" "8") 4297 (set_attr "type" "veccomplex")]) 4298 4299(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" 4300 [(set (match_operand:V4SF 0 "vfloat_operand" "=wf,?wa") 4301 (VEC_reduc:V4SF 4302 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4303 (match_operand:V4SF 1 "vfloat_operand" "wf,wa"))) 4304 (clobber (match_scratch:V4SF 2 "=&wf,&wa")) 4305 (clobber (match_scratch:V4SF 3 "=&wf,&wa"))] 4306 "VECTOR_UNIT_VSX_P (V4SFmode)" 4307 "#" 4308 "" 4309 [(const_int 0)] 4310{ 4311 rtx op0 = operands[0]; 4312 rtx op1 = operands[1]; 4313 rtx tmp2, tmp3, tmp4; 4314 4315 if (can_create_pseudo_p ()) 4316 { 4317 tmp2 = gen_reg_rtx (V4SFmode); 4318 tmp3 = gen_reg_rtx (V4SFmode); 4319 tmp4 = gen_reg_rtx (V4SFmode); 4320 } 4321 else 4322 { 4323 tmp2 = operands[2]; 4324 tmp3 = operands[3]; 4325 tmp4 = tmp2; 4326 } 4327 4328 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4329 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4330 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4331 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); 4332 DONE; 4333} 4334 [(set_attr "length" "16") 4335 (set_attr "type" "veccomplex")]) 4336 4337;; Combiner patterns with the vector reduction patterns that knows we can get 4338;; to the top element of the V2DF array without doing an extract. 4339 4340(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" 4341 [(set (match_operand:DF 0 "vfloat_operand" "=&ws,&?ws,ws,?ws") 4342 (vec_select:DF 4343 (VEC_reduc:V2DF 4344 (vec_concat:V2DF 4345 (vec_select:DF 4346 (match_operand:V2DF 1 "vfloat_operand" "wd,wa,wd,wa") 4347 (parallel [(const_int 1)])) 4348 (vec_select:DF 4349 (match_dup 1) 4350 (parallel [(const_int 0)]))) 4351 (match_dup 1)) 4352 (parallel [(const_int 1)]))) 4353 (clobber (match_scratch:DF 2 "=0,0,&wd,&wa"))] 4354 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)" 4355 "#" 4356 "" 4357 [(const_int 0)] 4358{ 4359 rtx hi = gen_highpart (DFmode, operands[1]); 4360 rtx lo = (GET_CODE (operands[2]) == SCRATCH) 4361 ? gen_reg_rtx (DFmode) 4362 : operands[2]; 4363 4364 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); 4365 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); 4366 DONE; 4367} 4368 [(set_attr "length" "8") 4369 (set_attr "type" "veccomplex")]) 4370 4371(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" 4372 [(set (match_operand:SF 0 "vfloat_operand" "=f,?f") 4373 (vec_select:SF 4374 (VEC_reduc:V4SF 4375 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4376 (match_operand:V4SF 1 "vfloat_operand" "wf,wa")) 4377 (parallel [(const_int 3)]))) 4378 (clobber (match_scratch:V4SF 2 "=&wf,&wa")) 4379 (clobber (match_scratch:V4SF 3 "=&wf,&wa")) 4380 (clobber (match_scratch:V4SF 4 "=0,0"))] 4381 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)" 4382 "#" 4383 "" 4384 [(const_int 0)] 4385{ 4386 rtx op0 = operands[0]; 4387 rtx op1 = operands[1]; 4388 rtx tmp2, tmp3, tmp4, tmp5; 4389 4390 if (can_create_pseudo_p ()) 4391 { 4392 tmp2 = gen_reg_rtx (V4SFmode); 4393 tmp3 = gen_reg_rtx (V4SFmode); 4394 tmp4 = gen_reg_rtx (V4SFmode); 4395 tmp5 = gen_reg_rtx (V4SFmode); 4396 } 4397 else 4398 { 4399 tmp2 = operands[2]; 4400 tmp3 = operands[3]; 4401 tmp4 = tmp2; 4402 tmp5 = operands[4]; 4403 } 4404 4405 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4406 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4407 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4408 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); 4409 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); 4410 DONE; 4411} 4412 [(set_attr "length" "20") 4413 (set_attr "type" "veccomplex")]) 4414 4415 4416;; Power8 Vector fusion. The fused ops must be physically adjacent. 4417(define_peephole 4418 [(set (match_operand:P 0 "base_reg_operand") 4419 (match_operand:P 1 "short_cint_operand")) 4420 (set (match_operand:VSX_M 2 "vsx_register_operand") 4421 (mem:VSX_M (plus:P (match_dup 0) 4422 (match_operand:P 3 "int_reg_operand"))))] 4423 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4424 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4425 [(set_attr "length" "8") 4426 (set_attr "type" "vecload")]) 4427 4428(define_peephole 4429 [(set (match_operand:P 0 "base_reg_operand") 4430 (match_operand:P 1 "short_cint_operand")) 4431 (set (match_operand:VSX_M 2 "vsx_register_operand") 4432 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand") 4433 (match_dup 0))))] 4434 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4435 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4436 [(set_attr "length" "8") 4437 (set_attr "type" "vecload")]) 4438 4439 4440;; ISA 3.0 vector extend sign support 4441 4442(define_insn "vsx_sign_extend_qi_<mode>" 4443 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4444 (unspec:VSINT_84 4445 [(match_operand:V16QI 1 "vsx_register_operand" "v")] 4446 UNSPEC_VSX_SIGN_EXTEND))] 4447 "TARGET_P9_VECTOR" 4448 "vextsb2<wd> %0,%1" 4449 [(set_attr "type" "vecexts")]) 4450 4451(define_insn "vsx_sign_extend_hi_<mode>" 4452 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4453 (unspec:VSINT_84 4454 [(match_operand:V8HI 1 "vsx_register_operand" "v")] 4455 UNSPEC_VSX_SIGN_EXTEND))] 4456 "TARGET_P9_VECTOR" 4457 "vextsh2<wd> %0,%1" 4458 [(set_attr "type" "vecexts")]) 4459 4460(define_insn "*vsx_sign_extend_si_v2di" 4461 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") 4462 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] 4463 UNSPEC_VSX_SIGN_EXTEND))] 4464 "TARGET_P9_VECTOR" 4465 "vextsw2d %0,%1" 4466 [(set_attr "type" "vecexts")]) 4467 4468 4469;; ISA 3.0 Binary Floating-Point Support 4470 4471;; VSX Scalar Extract Exponent Quad-Precision 4472(define_insn "xsxexpqp_<mode>" 4473 [(set (match_operand:DI 0 "altivec_register_operand" "=v") 4474 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4475 UNSPEC_VSX_SXEXPDP))] 4476 "TARGET_P9_VECTOR" 4477 "xsxexpqp %0,%1" 4478 [(set_attr "type" "vecmove")]) 4479 4480;; VSX Scalar Extract Exponent Double-Precision 4481(define_insn "xsxexpdp" 4482 [(set (match_operand:DI 0 "register_operand" "=r") 4483 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4484 UNSPEC_VSX_SXEXPDP))] 4485 "TARGET_P9_VECTOR && TARGET_64BIT" 4486 "xsxexpdp %0,%x1" 4487 [(set_attr "type" "integer")]) 4488 4489;; VSX Scalar Extract Significand Quad-Precision 4490(define_insn "xsxsigqp_<mode>" 4491 [(set (match_operand:TI 0 "altivec_register_operand" "=v") 4492 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4493 UNSPEC_VSX_SXSIG))] 4494 "TARGET_P9_VECTOR" 4495 "xsxsigqp %0,%1" 4496 [(set_attr "type" "vecmove")]) 4497 4498;; VSX Scalar Extract Significand Double-Precision 4499(define_insn "xsxsigdp" 4500 [(set (match_operand:DI 0 "register_operand" "=r") 4501 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4502 UNSPEC_VSX_SXSIG))] 4503 "TARGET_P9_VECTOR && TARGET_64BIT" 4504 "xsxsigdp %0,%x1" 4505 [(set_attr "type" "integer")]) 4506 4507;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument 4508(define_insn "xsiexpqpf_<mode>" 4509 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4510 (unspec:IEEE128 4511 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4512 (match_operand:DI 2 "altivec_register_operand" "v")] 4513 UNSPEC_VSX_SIEXPQP))] 4514 "TARGET_P9_VECTOR" 4515 "xsiexpqp %0,%1,%2" 4516 [(set_attr "type" "vecmove")]) 4517 4518;; VSX Scalar Insert Exponent Quad-Precision 4519(define_insn "xsiexpqp_<mode>" 4520 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4521 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") 4522 (match_operand:DI 2 "altivec_register_operand" "v")] 4523 UNSPEC_VSX_SIEXPQP))] 4524 "TARGET_P9_VECTOR" 4525 "xsiexpqp %0,%1,%2" 4526 [(set_attr "type" "vecmove")]) 4527 4528;; VSX Scalar Insert Exponent Double-Precision 4529(define_insn "xsiexpdp" 4530 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4531 (unspec:DF [(match_operand:DI 1 "register_operand" "r") 4532 (match_operand:DI 2 "register_operand" "r")] 4533 UNSPEC_VSX_SIEXPDP))] 4534 "TARGET_P9_VECTOR && TARGET_64BIT" 4535 "xsiexpdp %x0,%1,%2" 4536 [(set_attr "type" "fpsimple")]) 4537 4538;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument 4539(define_insn "xsiexpdpf" 4540 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4541 (unspec:DF [(match_operand:DF 1 "register_operand" "r") 4542 (match_operand:DI 2 "register_operand" "r")] 4543 UNSPEC_VSX_SIEXPDP))] 4544 "TARGET_P9_VECTOR && TARGET_64BIT" 4545 "xsiexpdp %x0,%1,%2" 4546 [(set_attr "type" "fpsimple")]) 4547 4548;; VSX Scalar Compare Exponents Double-Precision 4549(define_expand "xscmpexpdp_<code>" 4550 [(set (match_dup 3) 4551 (compare:CCFP 4552 (unspec:DF 4553 [(match_operand:DF 1 "vsx_register_operand" "wa") 4554 (match_operand:DF 2 "vsx_register_operand" "wa")] 4555 UNSPEC_VSX_SCMPEXPDP) 4556 (const_int 0))) 4557 (set (match_operand:SI 0 "register_operand" "=r") 4558 (CMP_TEST:SI (match_dup 3) 4559 (const_int 0)))] 4560 "TARGET_P9_VECTOR" 4561{ 4562 operands[3] = gen_reg_rtx (CCFPmode); 4563}) 4564 4565(define_insn "*xscmpexpdp" 4566 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4567 (compare:CCFP 4568 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") 4569 (match_operand:DF 2 "vsx_register_operand" "wa")] 4570 UNSPEC_VSX_SCMPEXPDP) 4571 (match_operand:SI 3 "zero_constant" "j")))] 4572 "TARGET_P9_VECTOR" 4573 "xscmpexpdp %0,%x1,%x2" 4574 [(set_attr "type" "fpcompare")]) 4575 4576;; VSX Scalar Compare Exponents Quad-Precision 4577(define_expand "xscmpexpqp_<code>_<mode>" 4578 [(set (match_dup 3) 4579 (compare:CCFP 4580 (unspec:IEEE128 4581 [(match_operand:IEEE128 1 "vsx_register_operand" "v") 4582 (match_operand:IEEE128 2 "vsx_register_operand" "v")] 4583 UNSPEC_VSX_SCMPEXPQP) 4584 (const_int 0))) 4585 (set (match_operand:SI 0 "register_operand" "=r") 4586 (CMP_TEST:SI (match_dup 3) 4587 (const_int 0)))] 4588 "TARGET_P9_VECTOR" 4589{ 4590 operands[3] = gen_reg_rtx (CCFPmode); 4591}) 4592 4593(define_insn "*xscmpexpqp" 4594 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4595 (compare:CCFP 4596 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4597 (match_operand:IEEE128 2 "altivec_register_operand" "v")] 4598 UNSPEC_VSX_SCMPEXPQP) 4599 (match_operand:SI 3 "zero_constant" "j")))] 4600 "TARGET_P9_VECTOR" 4601 "xscmpexpqp %0,%1,%2" 4602 [(set_attr "type" "fpcompare")]) 4603 4604;; VSX Scalar Test Data Class Quad-Precision 4605;; (Expansion for scalar_test_data_class (__ieee128, int)) 4606;; (Has side effect of setting the lt bit if operand 1 is negative, 4607;; setting the eq bit if any of the conditions tested by operand 2 4608;; are satisfied, and clearing the gt and undordered bits to zero.) 4609(define_expand "xststdcqp_<mode>" 4610 [(set (match_dup 3) 4611 (compare:CCFP 4612 (unspec:IEEE128 4613 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4614 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4615 UNSPEC_VSX_STSTDC) 4616 (const_int 0))) 4617 (set (match_operand:SI 0 "register_operand" "=r") 4618 (eq:SI (match_dup 3) 4619 (const_int 0)))] 4620 "TARGET_P9_VECTOR" 4621{ 4622 operands[3] = gen_reg_rtx (CCFPmode); 4623}) 4624 4625;; VSX Scalar Test Data Class Double- and Single-Precision 4626;; (The lt bit is set if operand 1 is negative. The eq bit is set 4627;; if any of the conditions tested by operand 2 are satisfied. 4628;; The gt and unordered bits are cleared to zero.) 4629(define_expand "xststdc<Fvsx>" 4630 [(set (match_dup 3) 4631 (compare:CCFP 4632 (unspec:SFDF 4633 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4634 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4635 UNSPEC_VSX_STSTDC) 4636 (match_dup 4))) 4637 (set (match_operand:SI 0 "register_operand" "=r") 4638 (eq:SI (match_dup 3) 4639 (const_int 0)))] 4640 "TARGET_P9_VECTOR" 4641{ 4642 operands[3] = gen_reg_rtx (CCFPmode); 4643 operands[4] = CONST0_RTX (SImode); 4644}) 4645 4646;; The VSX Scalar Test Negative Quad-Precision 4647(define_expand "xststdcnegqp_<mode>" 4648 [(set (match_dup 2) 4649 (compare:CCFP 4650 (unspec:IEEE128 4651 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4652 (const_int 0)] 4653 UNSPEC_VSX_STSTDC) 4654 (const_int 0))) 4655 (set (match_operand:SI 0 "register_operand" "=r") 4656 (lt:SI (match_dup 2) 4657 (const_int 0)))] 4658 "TARGET_P9_VECTOR" 4659{ 4660 operands[2] = gen_reg_rtx (CCFPmode); 4661}) 4662 4663;; The VSX Scalar Test Negative Double- and Single-Precision 4664(define_expand "xststdcneg<Fvsx>" 4665 [(set (match_dup 2) 4666 (compare:CCFP 4667 (unspec:SFDF 4668 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4669 (const_int 0)] 4670 UNSPEC_VSX_STSTDC) 4671 (match_dup 3))) 4672 (set (match_operand:SI 0 "register_operand" "=r") 4673 (lt:SI (match_dup 2) 4674 (const_int 0)))] 4675 "TARGET_P9_VECTOR" 4676{ 4677 operands[2] = gen_reg_rtx (CCFPmode); 4678 operands[3] = CONST0_RTX (SImode); 4679}) 4680 4681(define_insn "*xststdcqp_<mode>" 4682 [(set (match_operand:CCFP 0 "" "=y") 4683 (compare:CCFP 4684 (unspec:IEEE128 4685 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4686 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4687 UNSPEC_VSX_STSTDC) 4688 (const_int 0)))] 4689 "TARGET_P9_VECTOR" 4690 "xststdcqp %0,%1,%2" 4691 [(set_attr "type" "fpcompare")]) 4692 4693(define_insn "*xststdc<Fvsx>" 4694 [(set (match_operand:CCFP 0 "" "=y") 4695 (compare:CCFP 4696 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4697 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4698 UNSPEC_VSX_STSTDC) 4699 (match_operand:SI 3 "zero_constant" "j")))] 4700 "TARGET_P9_VECTOR" 4701 "xststdc<Fvsx> %0,%x1,%2" 4702 [(set_attr "type" "fpcompare")]) 4703 4704;; VSX Vector Extract Exponent Double and Single Precision 4705(define_insn "xvxexp<VSs>" 4706 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4707 (unspec:VSX_F 4708 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4709 UNSPEC_VSX_VXEXP))] 4710 "TARGET_P9_VECTOR" 4711 "xvxexp<VSs> %x0,%x1" 4712 [(set_attr "type" "vecsimple")]) 4713 4714;; VSX Vector Extract Significand Double and Single Precision 4715(define_insn "xvxsig<VSs>" 4716 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4717 (unspec:VSX_F 4718 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4719 UNSPEC_VSX_VXSIG))] 4720 "TARGET_P9_VECTOR" 4721 "xvxsig<VSs> %x0,%x1" 4722 [(set_attr "type" "vecsimple")]) 4723 4724;; VSX Vector Insert Exponent Double and Single Precision 4725(define_insn "xviexp<VSs>" 4726 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4727 (unspec:VSX_F 4728 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4729 (match_operand:VSX_F 2 "vsx_register_operand" "wa")] 4730 UNSPEC_VSX_VIEXP))] 4731 "TARGET_P9_VECTOR" 4732 "xviexp<VSs> %x0,%x1,%x2" 4733 [(set_attr "type" "vecsimple")]) 4734 4735;; VSX Vector Test Data Class Double and Single Precision 4736;; The corresponding elements of the result vector are all ones 4737;; if any of the conditions tested by operand 3 are satisfied. 4738(define_insn "xvtstdc<VSs>" 4739 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") 4740 (unspec:<VSI> 4741 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4742 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4743 UNSPEC_VSX_VTSTDC))] 4744 "TARGET_P9_VECTOR" 4745 "xvtstdc<VSs> %x0,%x1,%2" 4746 [(set_attr "type" "vecsimple")]) 4747 4748;; ISA 3.0 String Operations Support 4749 4750;; Compare vectors producing a vector result and a predicate, setting CR6 4751;; to indicate a combined status. This pattern matches v16qi, v8hi, and 4752;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no 4753;; need to match v4sf, v2df, or v2di modes because those are expanded 4754;; to use Power8 instructions. 4755(define_insn "*vsx_ne_<mode>_p" 4756 [(set (reg:CC CR6_REGNO) 4757 (unspec:CC 4758 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 4759 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] 4760 UNSPEC_PREDICATE)) 4761 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") 4762 (ne:VSX_EXTRACT_I (match_dup 1) 4763 (match_dup 2)))] 4764 "TARGET_P9_VECTOR" 4765 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4766 [(set_attr "type" "vecsimple")]) 4767 4768(define_insn "*vector_nez_<mode>_p" 4769 [(set (reg:CC CR6_REGNO) 4770 (unspec:CC [(unspec:VI 4771 [(match_operand:VI 1 "gpc_reg_operand" "v") 4772 (match_operand:VI 2 "gpc_reg_operand" "v")] 4773 UNSPEC_NEZ_P)] 4774 UNSPEC_PREDICATE)) 4775 (set (match_operand:VI 0 "gpc_reg_operand" "=v") 4776 (unspec:VI [(match_dup 1) 4777 (match_dup 2)] 4778 UNSPEC_NEZ_P))] 4779 "TARGET_P9_VECTOR" 4780 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4781 [(set_attr "type" "vecsimple")]) 4782 4783;; Return first position of match between vectors using natural order 4784;; for both LE and BE execution modes. 4785(define_expand "first_match_index_<mode>" 4786 [(match_operand:SI 0 "register_operand") 4787 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4788 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4789 UNSPEC_VSX_FIRST_MATCH_INDEX)] 4790 "TARGET_P9_VECTOR" 4791{ 4792 int sh; 4793 4794 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4795 rtx not_result = gen_reg_rtx (<MODE>mode); 4796 4797 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4798 operands[2])); 4799 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); 4800 4801 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4802 4803 if (<MODE>mode == V16QImode) 4804 { 4805 if (!BYTES_BIG_ENDIAN) 4806 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); 4807 else 4808 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result)); 4809 } 4810 else 4811 { 4812 rtx tmp = gen_reg_rtx (SImode); 4813 if (!BYTES_BIG_ENDIAN) 4814 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); 4815 else 4816 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result)); 4817 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4818 } 4819 DONE; 4820}) 4821 4822;; Return first position of match between vectors or end of string (EOS) using 4823;; natural element order for both LE and BE execution modes. 4824(define_expand "first_match_or_eos_index_<mode>" 4825 [(match_operand:SI 0 "register_operand") 4826 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4827 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4828 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] 4829 "TARGET_P9_VECTOR" 4830{ 4831 int sh; 4832 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4833 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4834 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4835 rtx and_result = gen_reg_rtx (<MODE>mode); 4836 rtx result = gen_reg_rtx (<MODE>mode); 4837 rtx vzero = gen_reg_rtx (<MODE>mode); 4838 4839 /* Vector with zeros in elements that correspond to zeros in operands. */ 4840 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4841 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4842 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4843 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4844 4845 /* Vector with ones in elments that do not match. */ 4846 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4847 operands[2])); 4848 4849 /* Create vector with ones in elements where there was a zero in one of 4850 the source elements or the elements that match. */ 4851 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); 4852 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4853 4854 if (<MODE>mode == V16QImode) 4855 { 4856 if (!BYTES_BIG_ENDIAN) 4857 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4858 else 4859 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4860 } 4861 else 4862 { 4863 rtx tmp = gen_reg_rtx (SImode); 4864 if (!BYTES_BIG_ENDIAN) 4865 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4866 else 4867 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4868 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4869 } 4870 DONE; 4871}) 4872 4873;; Return first position of mismatch between vectors using natural 4874;; element order for both LE and BE execution modes. 4875(define_expand "first_mismatch_index_<mode>" 4876 [(match_operand:SI 0 "register_operand") 4877 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4878 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4879 UNSPEC_VSX_FIRST_MISMATCH_INDEX)] 4880 "TARGET_P9_VECTOR" 4881{ 4882 int sh; 4883 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4884 4885 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4886 operands[2])); 4887 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4888 4889 if (<MODE>mode == V16QImode) 4890 { 4891 if (!BYTES_BIG_ENDIAN) 4892 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); 4893 else 4894 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result)); 4895 } 4896 else 4897 { 4898 rtx tmp = gen_reg_rtx (SImode); 4899 if (!BYTES_BIG_ENDIAN) 4900 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); 4901 else 4902 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result)); 4903 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4904 } 4905 DONE; 4906}) 4907 4908;; Return first position of mismatch between vectors or end of string (EOS) 4909;; using natural element order for both LE and BE execution modes. 4910(define_expand "first_mismatch_or_eos_index_<mode>" 4911 [(match_operand:SI 0 "register_operand") 4912 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4913 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4914 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] 4915 "TARGET_P9_VECTOR" 4916{ 4917 int sh; 4918 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4919 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4920 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4921 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); 4922 rtx and_result = gen_reg_rtx (<MODE>mode); 4923 rtx result = gen_reg_rtx (<MODE>mode); 4924 rtx vzero = gen_reg_rtx (<MODE>mode); 4925 4926 /* Vector with zeros in elements that correspond to zeros in operands. */ 4927 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4928 4929 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4930 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4931 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4932 4933 /* Vector with ones in elments that match. */ 4934 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4935 operands[2])); 4936 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); 4937 4938 /* Create vector with ones in elements where there was a zero in one of 4939 the source elements or the elements did not match. */ 4940 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); 4941 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4942 4943 if (<MODE>mode == V16QImode) 4944 { 4945 if (!BYTES_BIG_ENDIAN) 4946 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4947 else 4948 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4949 } 4950 else 4951 { 4952 rtx tmp = gen_reg_rtx (SImode); 4953 if (!BYTES_BIG_ENDIAN) 4954 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4955 else 4956 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4957 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4958 } 4959 DONE; 4960}) 4961 4962;; Load VSX Vector with Length 4963(define_expand "lxvl" 4964 [(set (match_dup 3) 4965 (ashift:DI (match_operand:DI 2 "register_operand") 4966 (const_int 56))) 4967 (set (match_operand:V16QI 0 "vsx_register_operand") 4968 (unspec:V16QI 4969 [(match_operand:DI 1 "gpc_reg_operand") 4970 (mem:V16QI (match_dup 1)) 4971 (match_dup 3)] 4972 UNSPEC_LXVL))] 4973 "TARGET_P9_VECTOR && TARGET_64BIT" 4974{ 4975 operands[3] = gen_reg_rtx (DImode); 4976}) 4977 4978(define_insn "*lxvl" 4979 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 4980 (unspec:V16QI 4981 [(match_operand:DI 1 "gpc_reg_operand" "b") 4982 (mem:V16QI (match_dup 1)) 4983 (match_operand:DI 2 "register_operand" "r")] 4984 UNSPEC_LXVL))] 4985 "TARGET_P9_VECTOR && TARGET_64BIT" 4986 "lxvl %x0,%1,%2" 4987 [(set_attr "type" "vecload")]) 4988 4989(define_insn "lxvll" 4990 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 4991 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") 4992 (mem:V16QI (match_dup 1)) 4993 (match_operand:DI 2 "register_operand" "r")] 4994 UNSPEC_LXVLL))] 4995 "TARGET_P9_VECTOR" 4996 "lxvll %x0,%1,%2" 4997 [(set_attr "type" "vecload")]) 4998 4999;; Expand for builtin xl_len_r 5000(define_expand "xl_len_r" 5001 [(match_operand:V16QI 0 "vsx_register_operand") 5002 (match_operand:DI 1 "register_operand") 5003 (match_operand:DI 2 "register_operand")] 5004 "" 5005{ 5006 rtx shift_mask = gen_reg_rtx (V16QImode); 5007 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5008 rtx tmp = gen_reg_rtx (DImode); 5009 5010 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2])); 5011 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5012 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); 5013 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, 5014 shift_mask)); 5015 DONE; 5016}) 5017 5018(define_insn "stxvll" 5019 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5020 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5021 (mem:V16QI (match_dup 1)) 5022 (match_operand:DI 2 "register_operand" "r")] 5023 UNSPEC_STXVLL))] 5024 "TARGET_P9_VECTOR" 5025 "stxvll %x0,%1,%2" 5026 [(set_attr "type" "vecstore")]) 5027 5028;; Store VSX Vector with Length 5029(define_expand "stxvl" 5030 [(set (match_dup 3) 5031 (ashift:DI (match_operand:DI 2 "register_operand") 5032 (const_int 56))) 5033 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) 5034 (unspec:V16QI 5035 [(match_operand:V16QI 0 "vsx_register_operand") 5036 (mem:V16QI (match_dup 1)) 5037 (match_dup 3)] 5038 UNSPEC_STXVL))] 5039 "TARGET_P9_VECTOR && TARGET_64BIT" 5040{ 5041 operands[3] = gen_reg_rtx (DImode); 5042}) 5043 5044(define_insn "*stxvl" 5045 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5046 (unspec:V16QI 5047 [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5048 (mem:V16QI (match_dup 1)) 5049 (match_operand:DI 2 "register_operand" "r")] 5050 UNSPEC_STXVL))] 5051 "TARGET_P9_VECTOR && TARGET_64BIT" 5052 "stxvl %x0,%1,%2" 5053 [(set_attr "type" "vecstore")]) 5054 5055;; Expand for builtin xst_len_r 5056(define_expand "xst_len_r" 5057 [(match_operand:V16QI 0 "vsx_register_operand" "=wa") 5058 (match_operand:DI 1 "register_operand" "b") 5059 (match_operand:DI 2 "register_operand" "r")] 5060 "UNSPEC_XST_LEN_R" 5061{ 5062 rtx shift_mask = gen_reg_rtx (V16QImode); 5063 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5064 rtx tmp = gen_reg_rtx (DImode); 5065 5066 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2])); 5067 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], 5068 shift_mask)); 5069 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5070 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); 5071 DONE; 5072}) 5073 5074;; Vector Compare Not Equal Byte (specified/not+eq:) 5075(define_insn "vcmpneb" 5076 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5077 (not:V16QI 5078 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") 5079 (match_operand:V16QI 2 "altivec_register_operand" "v"))))] 5080 "TARGET_P9_VECTOR" 5081 "vcmpneb %0,%1,%2" 5082 [(set_attr "type" "vecsimple")]) 5083 5084;; Vector Compare Not Equal or Zero Byte 5085(define_insn "vcmpnezb" 5086 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5087 (unspec:V16QI 5088 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5089 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5090 UNSPEC_VCMPNEZB))] 5091 "TARGET_P9_VECTOR" 5092 "vcmpnezb %0,%1,%2" 5093 [(set_attr "type" "vecsimple")]) 5094 5095;; Vector Compare Not Equal or Zero Byte predicate or record-form 5096(define_insn "vcmpnezb_p" 5097 [(set (reg:CC CR6_REGNO) 5098 (unspec:CC 5099 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5100 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5101 UNSPEC_VCMPNEZB)) 5102 (set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5103 (unspec:V16QI 5104 [(match_dup 1) 5105 (match_dup 2)] 5106 UNSPEC_VCMPNEZB))] 5107 "TARGET_P9_VECTOR" 5108 "vcmpnezb. %0,%1,%2" 5109 [(set_attr "type" "vecsimple")]) 5110 5111;; Vector Compare Not Equal Half Word (specified/not+eq:) 5112(define_insn "vcmpneh" 5113 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5114 (not:V8HI 5115 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") 5116 (match_operand:V8HI 2 "altivec_register_operand" "v"))))] 5117 "TARGET_P9_VECTOR" 5118 "vcmpneh %0,%1,%2" 5119 [(set_attr "type" "vecsimple")]) 5120 5121;; Vector Compare Not Equal or Zero Half Word 5122(define_insn "vcmpnezh" 5123 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5124 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") 5125 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5126 UNSPEC_VCMPNEZH))] 5127 "TARGET_P9_VECTOR" 5128 "vcmpnezh %0,%1,%2" 5129 [(set_attr "type" "vecsimple")]) 5130 5131;; Vector Compare Not Equal Word (specified/not+eq:) 5132(define_insn "vcmpnew" 5133 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5134 (not:V4SI 5135 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") 5136 (match_operand:V4SI 2 "altivec_register_operand" "v"))))] 5137 "TARGET_P9_VECTOR" 5138 "vcmpnew %0,%1,%2" 5139 [(set_attr "type" "vecsimple")]) 5140 5141;; Vector Compare Not Equal or Zero Word 5142(define_insn "vcmpnezw" 5143 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5144 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") 5145 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5146 UNSPEC_VCMPNEZW))] 5147 "TARGET_P9_VECTOR" 5148 "vcmpnezw %0,%1,%2" 5149 [(set_attr "type" "vecsimple")]) 5150 5151;; Vector Count Leading Zero Least-Significant Bits Byte 5152(define_insn "vclzlsbb_<mode>" 5153 [(set (match_operand:SI 0 "register_operand" "=r") 5154 (unspec:SI 5155 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5156 UNSPEC_VCLZLSBB))] 5157 "TARGET_P9_VECTOR" 5158 "vclzlsbb %0,%1" 5159 [(set_attr "type" "vecsimple")]) 5160 5161;; Vector Count Trailing Zero Least-Significant Bits Byte 5162(define_insn "vctzlsbb_<mode>" 5163 [(set (match_operand:SI 0 "register_operand" "=r") 5164 (unspec:SI 5165 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5166 UNSPEC_VCTZLSBB))] 5167 "TARGET_P9_VECTOR" 5168 "vctzlsbb %0,%1" 5169 [(set_attr "type" "vecsimple")]) 5170 5171;; Vector Extract Unsigned Byte Left-Indexed 5172(define_insn "vextublx" 5173 [(set (match_operand:SI 0 "register_operand" "=r") 5174 (unspec:SI 5175 [(match_operand:SI 1 "register_operand" "r") 5176 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5177 UNSPEC_VEXTUBLX))] 5178 "TARGET_P9_VECTOR" 5179 "vextublx %0,%1,%2" 5180 [(set_attr "type" "vecsimple")]) 5181 5182;; Vector Extract Unsigned Byte Right-Indexed 5183(define_insn "vextubrx" 5184 [(set (match_operand:SI 0 "register_operand" "=r") 5185 (unspec:SI 5186 [(match_operand:SI 1 "register_operand" "r") 5187 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5188 UNSPEC_VEXTUBRX))] 5189 "TARGET_P9_VECTOR" 5190 "vextubrx %0,%1,%2" 5191 [(set_attr "type" "vecsimple")]) 5192 5193;; Vector Extract Unsigned Half Word Left-Indexed 5194(define_insn "vextuhlx" 5195 [(set (match_operand:SI 0 "register_operand" "=r") 5196 (unspec:SI 5197 [(match_operand:SI 1 "register_operand" "r") 5198 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5199 UNSPEC_VEXTUHLX))] 5200 "TARGET_P9_VECTOR" 5201 "vextuhlx %0,%1,%2" 5202 [(set_attr "type" "vecsimple")]) 5203 5204;; Vector Extract Unsigned Half Word Right-Indexed 5205(define_insn "vextuhrx" 5206 [(set (match_operand:SI 0 "register_operand" "=r") 5207 (unspec:SI 5208 [(match_operand:SI 1 "register_operand" "r") 5209 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5210 UNSPEC_VEXTUHRX))] 5211 "TARGET_P9_VECTOR" 5212 "vextuhrx %0,%1,%2" 5213 [(set_attr "type" "vecsimple")]) 5214 5215;; Vector Extract Unsigned Word Left-Indexed 5216(define_insn "vextuwlx" 5217 [(set (match_operand:SI 0 "register_operand" "=r") 5218 (unspec:SI 5219 [(match_operand:SI 1 "register_operand" "r") 5220 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5221 UNSPEC_VEXTUWLX))] 5222 "TARGET_P9_VECTOR" 5223 "vextuwlx %0,%1,%2" 5224 [(set_attr "type" "vecsimple")]) 5225 5226;; Vector Extract Unsigned Word Right-Indexed 5227(define_insn "vextuwrx" 5228 [(set (match_operand:SI 0 "register_operand" "=r") 5229 (unspec:SI 5230 [(match_operand:SI 1 "register_operand" "r") 5231 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5232 UNSPEC_VEXTUWRX))] 5233 "TARGET_P9_VECTOR" 5234 "vextuwrx %0,%1,%2" 5235 [(set_attr "type" "vecsimple")]) 5236 5237;; Vector insert/extract word at arbitrary byte values. Note, the little 5238;; endian version needs to adjust the byte number, and the V4SI element in 5239;; vinsert4b. 5240(define_insn "extract4b" 5241 [(set (match_operand:V2DI 0 "vsx_register_operand") 5242 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") 5243 (match_operand:QI 2 "const_0_to_12_operand" "n")] 5244 UNSPEC_XXEXTRACTUW))] 5245 "TARGET_P9_VECTOR" 5246{ 5247 if (!BYTES_BIG_ENDIAN) 5248 operands[2] = GEN_INT (12 - INTVAL (operands[2])); 5249 5250 return "xxextractuw %x0,%x1,%2"; 5251}) 5252 5253(define_expand "insert4b" 5254 [(set (match_operand:V16QI 0 "vsx_register_operand") 5255 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") 5256 (match_operand:V16QI 2 "vsx_register_operand") 5257 (match_operand:QI 3 "const_0_to_12_operand")] 5258 UNSPEC_XXINSERTW))] 5259 "TARGET_P9_VECTOR" 5260{ 5261 if (!BYTES_BIG_ENDIAN) 5262 { 5263 rtx op1 = operands[1]; 5264 rtx v4si_tmp = gen_reg_rtx (V4SImode); 5265 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); 5266 operands[1] = v4si_tmp; 5267 operands[3] = GEN_INT (12 - INTVAL (operands[3])); 5268 } 5269}) 5270 5271(define_insn "*insert4b_internal" 5272 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5273 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") 5274 (match_operand:V16QI 2 "vsx_register_operand" "0") 5275 (match_operand:QI 3 "const_0_to_12_operand" "n")] 5276 UNSPEC_XXINSERTW))] 5277 "TARGET_P9_VECTOR" 5278 "xxinsertw %x0,%x1,%3" 5279 [(set_attr "type" "vecperm")]) 5280 5281 5282;; Generate vector extract four float 32 values from left four elements 5283;; of eight element vector of float 16 values. 5284(define_expand "vextract_fp_from_shorth" 5285 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5286 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5287 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] 5288 "TARGET_P9_VECTOR" 5289{ 5290 int i; 5291 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5292 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5293 5294 rtx rvals[16]; 5295 rtx mask = gen_reg_rtx (V16QImode); 5296 rtx tmp = gen_reg_rtx (V16QImode); 5297 rtvec v; 5298 5299 for (i = 0; i < 16; i++) 5300 if (!BYTES_BIG_ENDIAN) 5301 rvals[i] = GEN_INT (vals_le[i]); 5302 else 5303 rvals[i] = GEN_INT (vals_be[i]); 5304 5305 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5306 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5307 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the 5308 conversion instruction. */ 5309 v = gen_rtvec_v (16, rvals); 5310 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5311 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5312 operands[1], mask)); 5313 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5314 DONE; 5315}) 5316 5317;; Generate vector extract four float 32 values from right four elements 5318;; of eight element vector of float 16 values. 5319(define_expand "vextract_fp_from_shortl" 5320 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5321 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5322 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] 5323 "TARGET_P9_VECTOR" 5324{ 5325 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5326 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5327 5328 int i; 5329 rtx rvals[16]; 5330 rtx mask = gen_reg_rtx (V16QImode); 5331 rtx tmp = gen_reg_rtx (V16QImode); 5332 rtvec v; 5333 5334 for (i = 0; i < 16; i++) 5335 if (!BYTES_BIG_ENDIAN) 5336 rvals[i] = GEN_INT (vals_le[i]); 5337 else 5338 rvals[i] = GEN_INT (vals_be[i]); 5339 5340 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5341 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5342 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the 5343 conversion instruction. */ 5344 v = gen_rtvec_v (16, rvals); 5345 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5346 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5347 operands[1], mask)); 5348 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5349 DONE; 5350}) 5351 5352;; Support for ISA 3.0 vector byte reverse 5353 5354;; Swap all bytes with in a vector 5355(define_insn "p9_xxbrq_v1ti" 5356 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 5357 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] 5358 "TARGET_P9_VECTOR" 5359 "xxbrq %x0,%x1" 5360 [(set_attr "type" "vecperm")]) 5361 5362(define_expand "p9_xxbrq_v16qi" 5363 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) 5364 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))] 5365 "TARGET_P9_VECTOR" 5366{ 5367 rtx op0 = gen_reg_rtx (V1TImode); 5368 rtx op1 = gen_lowpart (V1TImode, operands[1]); 5369 emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); 5370 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0)); 5371 DONE; 5372}) 5373 5374;; Swap all bytes in each 64-bit element 5375(define_insn "p9_xxbrd_v2di" 5376 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 5377 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))] 5378 "TARGET_P9_VECTOR" 5379 "xxbrd %x0,%x1" 5380 [(set_attr "type" "vecperm")]) 5381 5382(define_expand "p9_xxbrd_v2df" 5383 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa")) 5384 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))] 5385 "TARGET_P9_VECTOR" 5386{ 5387 rtx op0 = gen_reg_rtx (V2DImode); 5388 rtx op1 = gen_lowpart (V2DImode, operands[1]); 5389 emit_insn (gen_p9_xxbrd_v2di (op0, op1)); 5390 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0)); 5391 DONE; 5392}) 5393 5394;; Swap all bytes in each 32-bit element 5395(define_insn "p9_xxbrw_v4si" 5396 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 5397 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))] 5398 "TARGET_P9_VECTOR" 5399 "xxbrw %x0,%x1" 5400 [(set_attr "type" "vecperm")]) 5401 5402(define_expand "p9_xxbrw_v4sf" 5403 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa")) 5404 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))] 5405 "TARGET_P9_VECTOR" 5406{ 5407 rtx op0 = gen_reg_rtx (V4SImode); 5408 rtx op1 = gen_lowpart (V4SImode, operands[1]); 5409 emit_insn (gen_p9_xxbrw_v4si (op0, op1)); 5410 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0)); 5411 DONE; 5412}) 5413 5414;; Swap all bytes in each element of vector 5415(define_expand "revb_<mode>" 5416 [(use (match_operand:VEC_REVB 0 "vsx_register_operand")) 5417 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))] 5418 "" 5419{ 5420 if (TARGET_P9_VECTOR) 5421 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); 5422 else 5423 { 5424 /* Want to have the elements in reverse order relative 5425 to the endian mode in use, i.e. in LE mode, put elements 5426 in BE order. */ 5427 rtx sel = swap_endian_selector_for_mode(<MODE>mode); 5428 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], 5429 operands[1], sel)); 5430 } 5431 5432 DONE; 5433}) 5434 5435;; Reversing bytes in vector char is just a NOP. 5436(define_expand "revb_v16qi" 5437 [(set (match_operand:V16QI 0 "vsx_register_operand") 5438 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] 5439 "" 5440{ 5441 emit_move_insn (operands[0], operands[1]); 5442 DONE; 5443}) 5444 5445;; Swap all bytes in each 16-bit element 5446(define_insn "p9_xxbrh_v8hi" 5447 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 5448 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] 5449 "TARGET_P9_VECTOR" 5450 "xxbrh %x0,%x1" 5451 [(set_attr "type" "vecperm")]) 5452 5453 5454;; Operand numbers for the following peephole2 5455(define_constants 5456 [(SFBOOL_TMP_GPR 0) ;; GPR temporary 5457 (SFBOOL_TMP_VSX 1) ;; vector temporary 5458 (SFBOOL_MFVSR_D 2) ;; move to gpr dest 5459 (SFBOOL_MFVSR_A 3) ;; move to gpr src 5460 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest 5461 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 5462 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 5463 (SFBOOL_SHL_D 7) ;; shift left dest 5464 (SFBOOL_SHL_A 8) ;; shift left arg 5465 (SFBOOL_MTVSR_D 9) ;; move to vecter dest 5466 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode 5467 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode 5468 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode 5469 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode 5470 5471;; Attempt to optimize some common GLIBC operations using logical operations to 5472;; pick apart SFmode operations. For example, there is code from e_powf.c 5473;; after macro expansion that looks like: 5474;; 5475;; typedef union { 5476;; float value; 5477;; uint32_t word; 5478;; } ieee_float_shape_type; 5479;; 5480;; float t1; 5481;; int32_t is; 5482;; 5483;; do { 5484;; ieee_float_shape_type gf_u; 5485;; gf_u.value = (t1); 5486;; (is) = gf_u.word; 5487;; } while (0); 5488;; 5489;; do { 5490;; ieee_float_shape_type sf_u; 5491;; sf_u.word = (is & 0xfffff000); 5492;; (t1) = sf_u.value; 5493;; } while (0); 5494;; 5495;; 5496;; This would result in two direct move operations (convert to memory format, 5497;; direct move to GPR, do the AND operation, direct move to VSX, convert to 5498;; scalar format). With this peephole, we eliminate the direct move to the 5499;; GPR, and instead move the integer mask value to the vector register after a 5500;; shift and do the VSX logical operation. 5501 5502;; The insns for dealing with SFmode in GPR registers looks like: 5503;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) 5504;; 5505;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) 5506;; 5507;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3))) 5508;; 5509;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32))) 5510;; 5511;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD)) 5512;; 5513;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN)) 5514 5515(define_peephole2 5516 [(match_scratch:DI SFBOOL_TMP_GPR "r") 5517 (match_scratch:V4SF SFBOOL_TMP_VSX "wa") 5518 5519 ;; MFVSRWZ (aka zero_extend) 5520 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") 5521 (zero_extend:DI 5522 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand"))) 5523 5524 ;; AND/IOR/XOR operation on int 5525 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") 5526 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") 5527 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) 5528 5529 ;; SLDI 5530 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") 5531 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") 5532 (const_int 32))) 5533 5534 ;; MTVSRD 5535 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") 5536 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] 5537 5538 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE 5539 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO 5540 to compare registers, when the mode is different. */ 5541 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) 5542 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) 5543 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) 5544 && (REG_P (operands[SFBOOL_BOOL_A2]) 5545 || CONST_INT_P (operands[SFBOOL_BOOL_A2])) 5546 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) 5547 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D])) 5548 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) 5549 || (REG_P (operands[SFBOOL_BOOL_A2]) 5550 && REGNO (operands[SFBOOL_MFVSR_D]) 5551 == REGNO (operands[SFBOOL_BOOL_A2]))) 5552 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) 5553 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) 5554 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D])) 5555 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])" 5556 [(set (match_dup SFBOOL_TMP_GPR) 5557 (ashift:DI (match_dup SFBOOL_BOOL_A_DI) 5558 (const_int 32))) 5559 5560 (set (match_dup SFBOOL_TMP_VSX_DI) 5561 (match_dup SFBOOL_TMP_GPR)) 5562 5563 (set (match_dup SFBOOL_MTVSR_D_V4SF) 5564 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF) 5565 (match_dup SFBOOL_TMP_VSX)))] 5566{ 5567 rtx bool_a1 = operands[SFBOOL_BOOL_A1]; 5568 rtx bool_a2 = operands[SFBOOL_BOOL_A2]; 5569 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); 5570 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]); 5571 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); 5572 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); 5573 5574 if (CONST_INT_P (bool_a2)) 5575 { 5576 rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; 5577 emit_move_insn (tmp_gpr, bool_a2); 5578 operands[SFBOOL_BOOL_A_DI] = tmp_gpr; 5579 } 5580 else 5581 { 5582 int regno_bool_a1 = REGNO (bool_a1); 5583 int regno_bool_a2 = REGNO (bool_a2); 5584 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 5585 ? regno_bool_a2 : regno_bool_a1); 5586 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); 5587 } 5588 5589 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a); 5590 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); 5591 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); 5592}) 5593