1;; VSX patterns. 2;; Copyright (C) 2009-2020 Free Software Foundation, Inc. 3;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com> 4 5;; This file is part of GCC. 6 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published 9;; by the Free Software Foundation; either version 3, or (at your 10;; option) any later version. 11 12;; GCC is distributed in the hope that it will be useful, but WITHOUT 13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15;; License for more details. 16 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; Iterator for comparison types 22(define_code_iterator CMP_TEST [eq lt gt unordered]) 23 24;; Mode attribute for vector floate and floato conversions 25(define_mode_attr VF_sxddp [(V2DI "sxd") (V2DF "dp")]) 26 27;; Iterator for both scalar and vector floating point types supported by VSX 28(define_mode_iterator VSX_B [DF V4SF V2DF]) 29 30;; Iterator for the 2 64-bit vector types 31(define_mode_iterator VSX_D [V2DF V2DI]) 32 33;; Mode iterator to handle swapping words on little endian for the 128-bit 34;; types that goes in a single vector register. 35(define_mode_iterator VSX_LE_128 [(KF "FLOAT128_VECTOR_P (KFmode)") 36 (TF "FLOAT128_VECTOR_P (TFmode)") 37 TI 38 V1TI]) 39 40;; Iterator for 128-bit integer types that go in a single vector register. 41(define_mode_iterator VSX_TI [TI V1TI]) 42 43;; Iterator for the 2 32-bit vector types 44(define_mode_iterator VSX_W [V4SF V4SI]) 45 46;; Iterator for the DF types 47(define_mode_iterator VSX_DF [V2DF DF]) 48 49;; Iterator for vector floating point types supported by VSX 50(define_mode_iterator VSX_F [V4SF V2DF]) 51 52;; Iterator for logical types supported by VSX 53(define_mode_iterator VSX_L [V16QI 54 V8HI 55 V4SI 56 V2DI 57 V4SF 58 V2DF 59 V1TI 60 TI 61 (KF "FLOAT128_VECTOR_P (KFmode)") 62 (TF "FLOAT128_VECTOR_P (TFmode)")]) 63 64;; Iterator for memory moves. 65(define_mode_iterator VSX_M [V16QI 66 V8HI 67 V4SI 68 V2DI 69 V4SF 70 V2DF 71 V1TI 72 (KF "FLOAT128_VECTOR_P (KFmode)") 73 (TF "FLOAT128_VECTOR_P (TFmode)") 74 TI]) 75 76(define_mode_attr VSX_XXBR [(V8HI "h") 77 (V4SI "w") 78 (V4SF "w") 79 (V2DF "d") 80 (V2DI "d") 81 (V1TI "q")]) 82 83;; Map into the appropriate load/store name based on the type 84(define_mode_attr VSm [(V16QI "vw4") 85 (V8HI "vw4") 86 (V4SI "vw4") 87 (V4SF "vw4") 88 (V2DF "vd2") 89 (V2DI "vd2") 90 (DF "d") 91 (TF "vd2") 92 (KF "vd2") 93 (V1TI "vd2") 94 (TI "vd2")]) 95 96;; Map the register class used 97(define_mode_attr VSr [(V16QI "v") 98 (V8HI "v") 99 (V4SI "v") 100 (V4SF "wa") 101 (V2DI "wa") 102 (V2DF "wa") 103 (DI "wa") 104 (DF "wa") 105 (SF "wa") 106 (TF "wa") 107 (KF "wa") 108 (V1TI "v") 109 (TI "wa")]) 110 111;; What value we need in the "isa" field, to make the IEEE QP float work. 112(define_mode_attr VSisa [(V16QI "*") 113 (V8HI "*") 114 (V4SI "*") 115 (V4SF "*") 116 (V2DI "*") 117 (V2DF "*") 118 (DI "*") 119 (DF "*") 120 (SF "*") 121 (V1TI "*") 122 (TI "*") 123 (TF "p9tf") 124 (KF "p9kf")]) 125 126;; A mode attribute to disparage use of GPR registers, except for scalar 127;; integer modes. 128(define_mode_attr ??r [(V16QI "??r") 129 (V8HI "??r") 130 (V4SI "??r") 131 (V4SF "??r") 132 (V2DI "??r") 133 (V2DF "??r") 134 (V1TI "??r") 135 (KF "??r") 136 (TF "??r") 137 (TI "r")]) 138 139;; A mode attribute used for 128-bit constant values. 140(define_mode_attr nW [(V16QI "W") 141 (V8HI "W") 142 (V4SI "W") 143 (V4SF "W") 144 (V2DI "W") 145 (V2DF "W") 146 (V1TI "W") 147 (KF "W") 148 (TF "W") 149 (TI "n")]) 150 151;; Same size integer type for floating point data 152(define_mode_attr VSi [(V4SF "v4si") 153 (V2DF "v2di") 154 (DF "di")]) 155 156(define_mode_attr VSI [(V4SF "V4SI") 157 (V2DF "V2DI") 158 (DF "DI")]) 159 160;; Word size for same size conversion 161(define_mode_attr VSc [(V4SF "w") 162 (V2DF "d") 163 (DF "d")]) 164 165;; Map into either s or v, depending on whether this is a scalar or vector 166;; operation 167(define_mode_attr VSv [(V16QI "v") 168 (V8HI "v") 169 (V4SI "v") 170 (V4SF "v") 171 (V2DI "v") 172 (V2DF "v") 173 (V1TI "v") 174 (DF "s") 175 (KF "v")]) 176 177;; Appropriate type for add ops (and other simple FP ops) 178(define_mode_attr VStype_simple [(V2DF "vecdouble") 179 (V4SF "vecfloat") 180 (DF "fp")]) 181 182;; Appropriate type for multiply ops 183(define_mode_attr VStype_mul [(V2DF "vecdouble") 184 (V4SF "vecfloat") 185 (DF "dmul")]) 186 187;; Appropriate type for divide ops. 188(define_mode_attr VStype_div [(V2DF "vecdiv") 189 (V4SF "vecfdiv") 190 (DF "ddiv")]) 191 192;; Map the scalar mode for a vector type 193(define_mode_attr VS_scalar [(V1TI "TI") 194 (V2DF "DF") 195 (V2DI "DI") 196 (V4SF "SF") 197 (V4SI "SI") 198 (V8HI "HI") 199 (V16QI "QI")]) 200 201;; Map to a double-sized vector mode 202(define_mode_attr VS_double [(V4SI "V8SI") 203 (V4SF "V8SF") 204 (V2DI "V4DI") 205 (V2DF "V4DF") 206 (V1TI "V2TI")]) 207 208;; Iterators for loading constants with xxspltib 209(define_mode_iterator VSINT_84 [V4SI V2DI DI SI]) 210(define_mode_iterator VSINT_842 [V8HI V4SI V2DI]) 211 212;; Vector reverse byte modes 213(define_mode_iterator VEC_REVB [V8HI V4SI V2DI V4SF V2DF V1TI]) 214 215;; Iterator for ISA 3.0 vector extract/insert of small integer vectors. 216;; VSX_EXTRACT_I2 doesn't include V4SImode because SI extracts can be 217;; done on ISA 2.07 and not just ISA 3.0. 218(define_mode_iterator VSX_EXTRACT_I [V16QI V8HI V4SI]) 219(define_mode_iterator VSX_EXTRACT_I2 [V16QI V8HI]) 220 221(define_mode_attr VSX_EXTRACT_WIDTH [(V16QI "b") 222 (V8HI "h") 223 (V4SI "w")]) 224 225;; Mode attribute to give the correct predicate for ISA 3.0 vector extract and 226;; insert to validate the operand number. 227(define_mode_attr VSX_EXTRACT_PREDICATE [(V16QI "const_0_to_15_operand") 228 (V8HI "const_0_to_7_operand") 229 (V4SI "const_0_to_3_operand")]) 230 231;; Mode attribute to give the constraint for vector extract and insert 232;; operations. 233(define_mode_attr VSX_EX [(V16QI "v") 234 (V8HI "v") 235 (V4SI "wa")]) 236 237;; Mode iterator for binary floating types other than double to 238;; optimize convert to that floating point type from an extract 239;; of an integer type 240(define_mode_iterator VSX_EXTRACT_FL [SF 241 (IF "FLOAT128_2REG_P (IFmode)") 242 (KF "TARGET_FLOAT128_HW") 243 (TF "FLOAT128_2REG_P (TFmode) 244 || (FLOAT128_IEEE_P (TFmode) 245 && TARGET_FLOAT128_HW)")]) 246 247;; Mode iterator for binary floating types that have a direct conversion 248;; from 64-bit integer to floating point 249(define_mode_iterator FL_CONV [SF 250 DF 251 (KF "TARGET_FLOAT128_HW") 252 (TF "TARGET_FLOAT128_HW 253 && FLOAT128_IEEE_P (TFmode)")]) 254 255;; Iterator for the 2 short vector types to do a splat from an integer 256(define_mode_iterator VSX_SPLAT_I [V16QI V8HI]) 257 258;; Mode attribute to give the count for the splat instruction to splat 259;; the value in the 64-bit integer slot 260(define_mode_attr VSX_SPLAT_COUNT [(V16QI "7") (V8HI "3")]) 261 262;; Mode attribute to give the suffix for the splat instruction 263(define_mode_attr VSX_SPLAT_SUFFIX [(V16QI "b") (V8HI "h")]) 264 265;; Constants for creating unspecs 266(define_c_enum "unspec" 267 [UNSPEC_VSX_CONCAT 268 UNSPEC_VSX_CVDPSXWS 269 UNSPEC_VSX_CVDPUXWS 270 UNSPEC_VSX_CVSPDP 271 UNSPEC_VSX_CVHPSP 272 UNSPEC_VSX_CVSPDPN 273 UNSPEC_VSX_CVDPSPN 274 UNSPEC_VSX_CVSXWDP 275 UNSPEC_VSX_CVUXWDP 276 UNSPEC_VSX_CVSXDSP 277 UNSPEC_VSX_CVUXDSP 278 UNSPEC_VSX_FLOAT2 279 UNSPEC_VSX_UNS_FLOAT2 280 UNSPEC_VSX_FLOATE 281 UNSPEC_VSX_UNS_FLOATE 282 UNSPEC_VSX_FLOATO 283 UNSPEC_VSX_UNS_FLOATO 284 UNSPEC_VSX_TDIV 285 UNSPEC_VSX_TSQRT 286 UNSPEC_VSX_SET 287 UNSPEC_VSX_ROUND_I 288 UNSPEC_VSX_ROUND_IC 289 UNSPEC_VSX_SLDWI 290 UNSPEC_VSX_XXPERM 291 292 UNSPEC_VSX_XXSPLTW 293 UNSPEC_VSX_XXSPLTD 294 UNSPEC_VSX_DIVSD 295 UNSPEC_VSX_DIVUD 296 UNSPEC_VSX_MULSD 297 UNSPEC_VSX_SIGN_EXTEND 298 UNSPEC_VSX_XVCVBF16SPN 299 UNSPEC_VSX_XVCVSPBF16 300 UNSPEC_VSX_XVCVSPSXDS 301 UNSPEC_VSX_XVCVSPHP 302 UNSPEC_VSX_VSLO 303 UNSPEC_VSX_EXTRACT 304 UNSPEC_VSX_SXEXPDP 305 UNSPEC_VSX_SXSIG 306 UNSPEC_VSX_SIEXPDP 307 UNSPEC_VSX_SIEXPQP 308 UNSPEC_VSX_SCMPEXPDP 309 UNSPEC_VSX_SCMPEXPQP 310 UNSPEC_VSX_STSTDC 311 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH 312 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL 313 UNSPEC_VSX_VXEXP 314 UNSPEC_VSX_VXSIG 315 UNSPEC_VSX_VIEXP 316 UNSPEC_VSX_VTSTDC 317 UNSPEC_VSX_VSIGNED2 318 319 UNSPEC_LXVL 320 UNSPEC_LXVLL 321 UNSPEC_LVSL_REG 322 UNSPEC_LVSR_REG 323 UNSPEC_STXVL 324 UNSPEC_STXVLL 325 UNSPEC_XL_LEN_R 326 UNSPEC_XST_LEN_R 327 328 UNSPEC_VCLZLSBB 329 UNSPEC_VCTZLSBB 330 UNSPEC_VEXTUBLX 331 UNSPEC_VEXTUHLX 332 UNSPEC_VEXTUWLX 333 UNSPEC_VEXTUBRX 334 UNSPEC_VEXTUHRX 335 UNSPEC_VEXTUWRX 336 UNSPEC_VCMPNEB 337 UNSPEC_VCMPNEZB 338 UNSPEC_VCMPNEH 339 UNSPEC_VCMPNEZH 340 UNSPEC_VCMPNEW 341 UNSPEC_VCMPNEZW 342 UNSPEC_XXEXTRACTUW 343 UNSPEC_XXINSERTW 344 UNSPEC_VSX_FIRST_MATCH_INDEX 345 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX 346 UNSPEC_VSX_FIRST_MISMATCH_INDEX 347 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX 348 ]) 349 350(define_int_iterator XVCVBF16 [UNSPEC_VSX_XVCVSPBF16 351 UNSPEC_VSX_XVCVBF16SPN]) 352 353(define_int_attr xvcvbf16 [(UNSPEC_VSX_XVCVSPBF16 "xvcvspbf16") 354 (UNSPEC_VSX_XVCVBF16SPN "xvcvbf16spn")]) 355 356;; VSX moves 357 358;; The patterns for LE permuted loads and stores come before the general 359;; VSX moves so they match first. 360(define_insn_and_split "*vsx_le_perm_load_<mode>" 361 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 362 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z"))] 363 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 364 "#" 365 "&& 1" 366 [(set (match_dup 2) 367 (vec_select:<MODE> 368 (match_dup 1) 369 (parallel [(const_int 1) (const_int 0)]))) 370 (set (match_dup 0) 371 (vec_select:<MODE> 372 (match_dup 2) 373 (parallel [(const_int 1) (const_int 0)])))] 374{ 375 rtx mem = operands[1]; 376 377 /* Don't apply the swap optimization if we've already performed register 378 allocation and the hard register destination is not in the altivec 379 range. */ 380 if ((MEM_ALIGN (mem) >= 128) 381 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[0])) 382 || ALTIVEC_REGNO_P (reg_or_subregno (operands[0])))) 383 { 384 rtx mem_address = XEXP (mem, 0); 385 enum machine_mode mode = GET_MODE (mem); 386 387 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 388 { 389 /* Replace the source memory address with masked address. */ 390 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 391 emit_insn (lvx_set_expr); 392 DONE; 393 } 394 else if (rs6000_quadword_masked_address_p (mem_address)) 395 { 396 /* This rtl is already in the form that matches lvx 397 instruction, so leave it alone. */ 398 DONE; 399 } 400 /* Otherwise, fall through to transform into a swapping load. */ 401 } 402 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 403 : operands[0]; 404} 405 [(set_attr "type" "vecload") 406 (set_attr "length" "8")]) 407 408(define_insn_and_split "*vsx_le_perm_load_<mode>" 409 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 410 (match_operand:VSX_W 1 "indexed_or_indirect_operand" "Z"))] 411 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 412 "#" 413 "&& 1" 414 [(set (match_dup 2) 415 (vec_select:<MODE> 416 (match_dup 1) 417 (parallel [(const_int 2) (const_int 3) 418 (const_int 0) (const_int 1)]))) 419 (set (match_dup 0) 420 (vec_select:<MODE> 421 (match_dup 2) 422 (parallel [(const_int 2) (const_int 3) 423 (const_int 0) (const_int 1)])))] 424{ 425 rtx mem = operands[1]; 426 427 /* Don't apply the swap optimization if we've already performed register 428 allocation and the hard register destination is not in the altivec 429 range. */ 430 if ((MEM_ALIGN (mem) >= 128) 431 && (!HARD_REGISTER_P (operands[0]) 432 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 433 { 434 rtx mem_address = XEXP (mem, 0); 435 enum machine_mode mode = GET_MODE (mem); 436 437 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 438 { 439 /* Replace the source memory address with masked address. */ 440 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 441 emit_insn (lvx_set_expr); 442 DONE; 443 } 444 else if (rs6000_quadword_masked_address_p (mem_address)) 445 { 446 /* This rtl is already in the form that matches lvx 447 instruction, so leave it alone. */ 448 DONE; 449 } 450 /* Otherwise, fall through to transform into a swapping load. */ 451 } 452 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 453 : operands[0]; 454} 455 [(set_attr "type" "vecload") 456 (set_attr "length" "8")]) 457 458(define_insn_and_split "*vsx_le_perm_load_v8hi" 459 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 460 (match_operand:V8HI 1 "indexed_or_indirect_operand" "Z"))] 461 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 462 "#" 463 "&& 1" 464 [(set (match_dup 2) 465 (vec_select:V8HI 466 (match_dup 1) 467 (parallel [(const_int 4) (const_int 5) 468 (const_int 6) (const_int 7) 469 (const_int 0) (const_int 1) 470 (const_int 2) (const_int 3)]))) 471 (set (match_dup 0) 472 (vec_select:V8HI 473 (match_dup 2) 474 (parallel [(const_int 4) (const_int 5) 475 (const_int 6) (const_int 7) 476 (const_int 0) (const_int 1) 477 (const_int 2) (const_int 3)])))] 478{ 479 rtx mem = operands[1]; 480 481 /* Don't apply the swap optimization if we've already performed register 482 allocation and the hard register destination is not in the altivec 483 range. */ 484 if ((MEM_ALIGN (mem) >= 128) 485 && (!HARD_REGISTER_P (operands[0]) 486 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 487 { 488 rtx mem_address = XEXP (mem, 0); 489 enum machine_mode mode = GET_MODE (mem); 490 491 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 492 { 493 /* Replace the source memory address with masked address. */ 494 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 495 emit_insn (lvx_set_expr); 496 DONE; 497 } 498 else if (rs6000_quadword_masked_address_p (mem_address)) 499 { 500 /* This rtl is already in the form that matches lvx 501 instruction, so leave it alone. */ 502 DONE; 503 } 504 /* Otherwise, fall through to transform into a swapping load. */ 505 } 506 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 507 : operands[0]; 508} 509 [(set_attr "type" "vecload") 510 (set_attr "length" "8")]) 511 512(define_insn_and_split "*vsx_le_perm_load_v16qi" 513 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 514 (match_operand:V16QI 1 "indexed_or_indirect_operand" "Z"))] 515 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 516 "#" 517 "&& 1" 518 [(set (match_dup 2) 519 (vec_select:V16QI 520 (match_dup 1) 521 (parallel [(const_int 8) (const_int 9) 522 (const_int 10) (const_int 11) 523 (const_int 12) (const_int 13) 524 (const_int 14) (const_int 15) 525 (const_int 0) (const_int 1) 526 (const_int 2) (const_int 3) 527 (const_int 4) (const_int 5) 528 (const_int 6) (const_int 7)]))) 529 (set (match_dup 0) 530 (vec_select:V16QI 531 (match_dup 2) 532 (parallel [(const_int 8) (const_int 9) 533 (const_int 10) (const_int 11) 534 (const_int 12) (const_int 13) 535 (const_int 14) (const_int 15) 536 (const_int 0) (const_int 1) 537 (const_int 2) (const_int 3) 538 (const_int 4) (const_int 5) 539 (const_int 6) (const_int 7)])))] 540{ 541 rtx mem = operands[1]; 542 543 /* Don't apply the swap optimization if we've already performed register 544 allocation and the hard register destination is not in the altivec 545 range. */ 546 if ((MEM_ALIGN (mem) >= 128) 547 && (!HARD_REGISTER_P (operands[0]) 548 || ALTIVEC_REGNO_P (REGNO(operands[0])))) 549 { 550 rtx mem_address = XEXP (mem, 0); 551 enum machine_mode mode = GET_MODE (mem); 552 553 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 554 { 555 /* Replace the source memory address with masked address. */ 556 rtx lvx_set_expr = rs6000_gen_lvx (mode, operands[0], mem); 557 emit_insn (lvx_set_expr); 558 DONE; 559 } 560 else if (rs6000_quadword_masked_address_p (mem_address)) 561 { 562 /* This rtl is already in the form that matches lvx 563 instruction, so leave it alone. */ 564 DONE; 565 } 566 /* Otherwise, fall through to transform into a swapping load. */ 567 } 568 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0]) 569 : operands[0]; 570} 571 [(set_attr "type" "vecload") 572 (set_attr "length" "8")]) 573 574(define_insn "*vsx_le_perm_store_<mode>" 575 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand" "=Z") 576 (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))] 577 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 578 "#" 579 [(set_attr "type" "vecstore") 580 (set_attr "length" "12")]) 581 582(define_split 583 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 584 (match_operand:VSX_D 1 "vsx_register_operand"))] 585 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 586 [(set (match_dup 2) 587 (vec_select:<MODE> 588 (match_dup 1) 589 (parallel [(const_int 1) (const_int 0)]))) 590 (set (match_dup 0) 591 (vec_select:<MODE> 592 (match_dup 2) 593 (parallel [(const_int 1) (const_int 0)])))] 594{ 595 rtx mem = operands[0]; 596 597 /* Don't apply the swap optimization if we've already performed register 598 allocation and the hard register source is not in the altivec range. */ 599 if ((MEM_ALIGN (mem) >= 128) 600 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 601 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 602 { 603 rtx mem_address = XEXP (mem, 0); 604 enum machine_mode mode = GET_MODE (mem); 605 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 606 { 607 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 608 emit_insn (stvx_set_expr); 609 DONE; 610 } 611 else if (rs6000_quadword_masked_address_p (mem_address)) 612 { 613 /* This rtl is already in the form that matches stvx instruction, 614 so leave it alone. */ 615 DONE; 616 } 617 /* Otherwise, fall through to transform into a swapping store. */ 618 } 619 620 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 621 : operands[1]; 622}) 623 624;; The post-reload split requires that we re-permute the source 625;; register in case it is still live. 626(define_split 627 [(set (match_operand:VSX_D 0 "indexed_or_indirect_operand") 628 (match_operand:VSX_D 1 "vsx_register_operand"))] 629 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 630 [(set (match_dup 1) 631 (vec_select:<MODE> 632 (match_dup 1) 633 (parallel [(const_int 1) (const_int 0)]))) 634 (set (match_dup 0) 635 (vec_select:<MODE> 636 (match_dup 1) 637 (parallel [(const_int 1) (const_int 0)]))) 638 (set (match_dup 1) 639 (vec_select:<MODE> 640 (match_dup 1) 641 (parallel [(const_int 1) (const_int 0)])))] 642 "") 643 644(define_insn "*vsx_le_perm_store_<mode>" 645 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand" "=Z") 646 (match_operand:VSX_W 1 "vsx_register_operand" "+wa"))] 647 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 648 "#" 649 [(set_attr "type" "vecstore") 650 (set_attr "length" "12")]) 651 652(define_split 653 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 654 (match_operand:VSX_W 1 "vsx_register_operand"))] 655 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 656 [(set (match_dup 2) 657 (vec_select:<MODE> 658 (match_dup 1) 659 (parallel [(const_int 2) (const_int 3) 660 (const_int 0) (const_int 1)]))) 661 (set (match_dup 0) 662 (vec_select:<MODE> 663 (match_dup 2) 664 (parallel [(const_int 2) (const_int 3) 665 (const_int 0) (const_int 1)])))] 666{ 667 rtx mem = operands[0]; 668 669 /* Don't apply the swap optimization if we've already performed register 670 allocation and the hard register source is not in the altivec range. */ 671 if ((MEM_ALIGN (mem) >= 128) 672 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 673 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 674 { 675 rtx mem_address = XEXP (mem, 0); 676 enum machine_mode mode = GET_MODE (mem); 677 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 678 { 679 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 680 emit_insn (stvx_set_expr); 681 DONE; 682 } 683 else if (rs6000_quadword_masked_address_p (mem_address)) 684 { 685 /* This rtl is already in the form that matches stvx instruction, 686 so leave it alone. */ 687 DONE; 688 } 689 /* Otherwise, fall through to transform into a swapping store. */ 690 } 691 692 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 693 : operands[1]; 694}) 695 696;; The post-reload split requires that we re-permute the source 697;; register in case it is still live. 698(define_split 699 [(set (match_operand:VSX_W 0 "indexed_or_indirect_operand") 700 (match_operand:VSX_W 1 "vsx_register_operand"))] 701 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 702 [(set (match_dup 1) 703 (vec_select:<MODE> 704 (match_dup 1) 705 (parallel [(const_int 2) (const_int 3) 706 (const_int 0) (const_int 1)]))) 707 (set (match_dup 0) 708 (vec_select:<MODE> 709 (match_dup 1) 710 (parallel [(const_int 2) (const_int 3) 711 (const_int 0) (const_int 1)]))) 712 (set (match_dup 1) 713 (vec_select:<MODE> 714 (match_dup 1) 715 (parallel [(const_int 2) (const_int 3) 716 (const_int 0) (const_int 1)])))] 717 "") 718 719(define_insn "*vsx_le_perm_store_v8hi" 720 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand" "=Z") 721 (match_operand:V8HI 1 "vsx_register_operand" "+wa"))] 722 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 723 "#" 724 [(set_attr "type" "vecstore") 725 (set_attr "length" "12")]) 726 727(define_split 728 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 729 (match_operand:V8HI 1 "vsx_register_operand"))] 730 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 731 [(set (match_dup 2) 732 (vec_select:V8HI 733 (match_dup 1) 734 (parallel [(const_int 4) (const_int 5) 735 (const_int 6) (const_int 7) 736 (const_int 0) (const_int 1) 737 (const_int 2) (const_int 3)]))) 738 (set (match_dup 0) 739 (vec_select:V8HI 740 (match_dup 2) 741 (parallel [(const_int 4) (const_int 5) 742 (const_int 6) (const_int 7) 743 (const_int 0) (const_int 1) 744 (const_int 2) (const_int 3)])))] 745{ 746 rtx mem = operands[0]; 747 748 /* Don't apply the swap optimization if we've already performed register 749 allocation and the hard register source is not in the altivec range. */ 750 if ((MEM_ALIGN (mem) >= 128) 751 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 752 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 753 { 754 rtx mem_address = XEXP (mem, 0); 755 enum machine_mode mode = GET_MODE (mem); 756 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 757 { 758 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 759 emit_insn (stvx_set_expr); 760 DONE; 761 } 762 else if (rs6000_quadword_masked_address_p (mem_address)) 763 { 764 /* This rtl is already in the form that matches stvx instruction, 765 so leave it alone. */ 766 DONE; 767 } 768 /* Otherwise, fall through to transform into a swapping store. */ 769 } 770 771 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 772 : operands[1]; 773}) 774 775;; The post-reload split requires that we re-permute the source 776;; register in case it is still live. 777(define_split 778 [(set (match_operand:V8HI 0 "indexed_or_indirect_operand") 779 (match_operand:V8HI 1 "vsx_register_operand"))] 780 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 781 [(set (match_dup 1) 782 (vec_select:V8HI 783 (match_dup 1) 784 (parallel [(const_int 4) (const_int 5) 785 (const_int 6) (const_int 7) 786 (const_int 0) (const_int 1) 787 (const_int 2) (const_int 3)]))) 788 (set (match_dup 0) 789 (vec_select:V8HI 790 (match_dup 1) 791 (parallel [(const_int 4) (const_int 5) 792 (const_int 6) (const_int 7) 793 (const_int 0) (const_int 1) 794 (const_int 2) (const_int 3)]))) 795 (set (match_dup 1) 796 (vec_select:V8HI 797 (match_dup 1) 798 (parallel [(const_int 4) (const_int 5) 799 (const_int 6) (const_int 7) 800 (const_int 0) (const_int 1) 801 (const_int 2) (const_int 3)])))] 802 "") 803 804(define_insn "*vsx_le_perm_store_v16qi" 805 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand" "=Z") 806 (match_operand:V16QI 1 "vsx_register_operand" "+wa"))] 807 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 808 "#" 809 [(set_attr "type" "vecstore") 810 (set_attr "length" "12")]) 811 812(define_split 813 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 814 (match_operand:V16QI 1 "vsx_register_operand"))] 815 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && !reload_completed" 816 [(set (match_dup 2) 817 (vec_select:V16QI 818 (match_dup 1) 819 (parallel [(const_int 8) (const_int 9) 820 (const_int 10) (const_int 11) 821 (const_int 12) (const_int 13) 822 (const_int 14) (const_int 15) 823 (const_int 0) (const_int 1) 824 (const_int 2) (const_int 3) 825 (const_int 4) (const_int 5) 826 (const_int 6) (const_int 7)]))) 827 (set (match_dup 0) 828 (vec_select:V16QI 829 (match_dup 2) 830 (parallel [(const_int 8) (const_int 9) 831 (const_int 10) (const_int 11) 832 (const_int 12) (const_int 13) 833 (const_int 14) (const_int 15) 834 (const_int 0) (const_int 1) 835 (const_int 2) (const_int 3) 836 (const_int 4) (const_int 5) 837 (const_int 6) (const_int 7)])))] 838{ 839 rtx mem = operands[0]; 840 841 /* Don't apply the swap optimization if we've already performed register 842 allocation and the hard register source is not in the altivec range. */ 843 if ((MEM_ALIGN (mem) >= 128) 844 && (!HARD_REGISTER_NUM_P (reg_or_subregno (operands[1])) 845 || ALTIVEC_REGNO_P (reg_or_subregno (operands[1])))) 846 { 847 rtx mem_address = XEXP (mem, 0); 848 enum machine_mode mode = GET_MODE (mem); 849 if (REG_P (mem_address) || rs6000_sum_of_two_registers_p (mem_address)) 850 { 851 rtx stvx_set_expr = rs6000_gen_stvx (mode, mem, operands[1]); 852 emit_insn (stvx_set_expr); 853 DONE; 854 } 855 else if (rs6000_quadword_masked_address_p (mem_address)) 856 { 857 /* This rtl is already in the form that matches stvx instruction, 858 so leave it alone. */ 859 DONE; 860 } 861 /* Otherwise, fall through to transform into a swapping store. */ 862 } 863 864 operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[1]) 865 : operands[1]; 866}) 867 868;; The post-reload split requires that we re-permute the source 869;; register in case it is still live. 870(define_split 871 [(set (match_operand:V16QI 0 "indexed_or_indirect_operand") 872 (match_operand:V16QI 1 "vsx_register_operand"))] 873 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR && reload_completed" 874 [(set (match_dup 1) 875 (vec_select:V16QI 876 (match_dup 1) 877 (parallel [(const_int 8) (const_int 9) 878 (const_int 10) (const_int 11) 879 (const_int 12) (const_int 13) 880 (const_int 14) (const_int 15) 881 (const_int 0) (const_int 1) 882 (const_int 2) (const_int 3) 883 (const_int 4) (const_int 5) 884 (const_int 6) (const_int 7)]))) 885 (set (match_dup 0) 886 (vec_select:V16QI 887 (match_dup 1) 888 (parallel [(const_int 8) (const_int 9) 889 (const_int 10) (const_int 11) 890 (const_int 12) (const_int 13) 891 (const_int 14) (const_int 15) 892 (const_int 0) (const_int 1) 893 (const_int 2) (const_int 3) 894 (const_int 4) (const_int 5) 895 (const_int 6) (const_int 7)]))) 896 (set (match_dup 1) 897 (vec_select:V16QI 898 (match_dup 1) 899 (parallel [(const_int 8) (const_int 9) 900 (const_int 10) (const_int 11) 901 (const_int 12) (const_int 13) 902 (const_int 14) (const_int 15) 903 (const_int 0) (const_int 1) 904 (const_int 2) (const_int 3) 905 (const_int 4) (const_int 5) 906 (const_int 6) (const_int 7)])))] 907 "") 908 909;; Little endian word swapping for 128-bit types that are either scalars or the 910;; special V1TI container class, which it is not appropriate to use vec_select 911;; for the type. 912(define_insn "*vsx_le_permute_<mode>" 913 [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=wa,wa,Z,&r,&r,Q") 914 (rotate:VSX_TI 915 (match_operand:VSX_TI 1 "input_operand" "wa,Z,wa,r,Q,r") 916 (const_int 64)))] 917 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR" 918 "@ 919 xxpermdi %x0,%x1,%x1,2 920 lxvd2x %x0,%y1 921 stxvd2x %x1,%y0 922 mr %0,%L1\;mr %L0,%1 923 ld%U1%X1 %0,%L1\;ld%U1%X1 %L0,%1 924 std%U0%X0 %L1,%0\;std%U0%X0 %1,%L0" 925 [(set_attr "length" "*,*,*,8,8,8") 926 (set_attr "type" "vecperm,vecload,vecstore,*,load,store")]) 927 928(define_insn_and_split "*vsx_le_undo_permute_<mode>" 929 [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=wa,wa") 930 (rotate:VSX_TI 931 (rotate:VSX_TI 932 (match_operand:VSX_TI 1 "vsx_register_operand" "0,wa") 933 (const_int 64)) 934 (const_int 64)))] 935 "!BYTES_BIG_ENDIAN && TARGET_VSX" 936 "@ 937 # 938 xxlor %x0,%x1" 939 "" 940 [(set (match_dup 0) (match_dup 1))] 941{ 942 if (reload_completed && REGNO (operands[0]) == REGNO (operands[1])) 943 { 944 emit_note (NOTE_INSN_DELETED); 945 DONE; 946 } 947} 948 [(set_attr "length" "0,4") 949 (set_attr "type" "veclogical")]) 950 951(define_insn_and_split "*vsx_le_perm_load_<mode>" 952 [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=wa,r") 953 (match_operand:VSX_LE_128 1 "memory_operand" "Z,Q"))] 954 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 955 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" 956 "@ 957 # 958 #" 959 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 960 && !altivec_indexed_or_indirect_operand (operands[1], <MODE>mode)" 961 [(const_int 0)] 962{ 963 rtx tmp = (can_create_pseudo_p () 964 ? gen_reg_rtx_and_attrs (operands[0]) 965 : operands[0]); 966 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 967 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 968 DONE; 969} 970 [(set_attr "type" "vecload,load") 971 (set_attr "length" "8,8") 972 (set_attr "isa" "<VSisa>,*")]) 973 974(define_insn "*vsx_le_perm_store_<mode>" 975 [(set (match_operand:VSX_LE_128 0 "memory_operand" "=Z,Q") 976 (match_operand:VSX_LE_128 1 "vsx_register_operand" "+wa,r"))] 977 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 978 & !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" 979 "@ 980 # 981 #" 982 [(set_attr "type" "vecstore,store") 983 (set_attr "length" "12,8") 984 (set_attr "isa" "<VSisa>,*")]) 985 986(define_split 987 [(set (match_operand:VSX_LE_128 0 "memory_operand") 988 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 989 "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR 990 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" 991 [(const_int 0)] 992{ 993 rtx tmp = (can_create_pseudo_p () 994 ? gen_reg_rtx_and_attrs (operands[0]) 995 : operands[0]); 996 rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode); 997 rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode); 998 DONE; 999}) 1000 1001;; Peepholes to catch loads and stores for TImode if TImode landed in 1002;; GPR registers on a little endian system. 1003(define_peephole2 1004 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1005 (rotate:VSX_TI (match_operand:VSX_TI 1 "memory_operand") 1006 (const_int 64))) 1007 (set (match_operand:VSX_TI 2 "int_reg_operand") 1008 (rotate:VSX_TI (match_dup 0) 1009 (const_int 64)))] 1010 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1011 && (rtx_equal_p (operands[0], operands[2]) 1012 || peep2_reg_dead_p (2, operands[0]))" 1013 [(set (match_dup 2) (match_dup 1))]) 1014 1015(define_peephole2 1016 [(set (match_operand:VSX_TI 0 "int_reg_operand") 1017 (rotate:VSX_TI (match_operand:VSX_TI 1 "int_reg_operand") 1018 (const_int 64))) 1019 (set (match_operand:VSX_TI 2 "memory_operand") 1020 (rotate:VSX_TI (match_dup 0) 1021 (const_int 64)))] 1022 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1023 && peep2_reg_dead_p (2, operands[0])" 1024 [(set (match_dup 2) (match_dup 1))]) 1025 1026;; Peephole to catch memory to memory transfers for TImode if TImode landed in 1027;; VSX registers on a little endian system. The vector types and IEEE 128-bit 1028;; floating point are handled by the more generic swap elimination pass. 1029(define_peephole2 1030 [(set (match_operand:TI 0 "vsx_register_operand") 1031 (rotate:TI (match_operand:TI 1 "vsx_register_operand") 1032 (const_int 64))) 1033 (set (match_operand:TI 2 "vsx_register_operand") 1034 (rotate:TI (match_dup 0) 1035 (const_int 64)))] 1036 "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR 1037 && (rtx_equal_p (operands[0], operands[2]) 1038 || peep2_reg_dead_p (2, operands[0]))" 1039 [(set (match_dup 2) (match_dup 1))]) 1040 1041;; The post-reload split requires that we re-permute the source 1042;; register in case it is still live. 1043(define_split 1044 [(set (match_operand:VSX_LE_128 0 "memory_operand") 1045 (match_operand:VSX_LE_128 1 "vsx_register_operand"))] 1046 "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR 1047 && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)" 1048 [(const_int 0)] 1049{ 1050 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1051 rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode); 1052 rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode); 1053 DONE; 1054}) 1055 1056;; Vector constants that can be generated with XXSPLTIB that was added in ISA 1057;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized. 1058(define_insn "xxspltib_v16qi" 1059 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1060 (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "n")))] 1061 "TARGET_P9_VECTOR" 1062{ 1063 operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff); 1064 return "xxspltib %x0,%2"; 1065} 1066 [(set_attr "type" "vecperm")]) 1067 1068(define_insn "xxspltib_<mode>_nosplit" 1069 [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa,wa") 1070 (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "jwM,wE"))] 1071 "TARGET_P9_VECTOR" 1072{ 1073 rtx op1 = operands[1]; 1074 int value = 256; 1075 int num_insns = -1; 1076 1077 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1078 || num_insns != 1) 1079 gcc_unreachable (); 1080 1081 operands[2] = GEN_INT (value & 0xff); 1082 return "xxspltib %x0,%2"; 1083} 1084 [(set_attr "type" "vecperm")]) 1085 1086(define_insn_and_split "*xxspltib_<mode>_split" 1087 [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v") 1088 (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))] 1089 "TARGET_P9_VECTOR" 1090 "#" 1091 "&& 1" 1092 [(const_int 0)] 1093{ 1094 int value = 256; 1095 int num_insns = -1; 1096 rtx op0 = operands[0]; 1097 rtx op1 = operands[1]; 1098 rtx tmp = ((can_create_pseudo_p ()) 1099 ? gen_reg_rtx (V16QImode) 1100 : gen_lowpart (V16QImode, op0)); 1101 1102 if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value) 1103 || num_insns != 2) 1104 gcc_unreachable (); 1105 1106 emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value))); 1107 1108 if (<MODE>mode == V2DImode) 1109 emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp)); 1110 1111 else if (<MODE>mode == V4SImode) 1112 emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp)); 1113 1114 else if (<MODE>mode == V8HImode) 1115 emit_insn (gen_altivec_vupkhsb (op0, tmp)); 1116 1117 else 1118 gcc_unreachable (); 1119 1120 DONE; 1121} 1122 [(set_attr "type" "vecperm") 1123 (set_attr "length" "8")]) 1124 1125 1126;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB 1127;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or 1128;; all 1's, since the machine does not have to wait for the previous 1129;; instruction using the register being set (such as a store waiting on a slow 1130;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move. 1131 1132;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR) 1133;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW 1134;; VSX 0/-1 VMX const GPR const LVX (VMX) STVX (VMX) 1135(define_insn "vsx_mov<mode>_64bit" 1136 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1137 "=ZwO, wa, wa, r, we, ?wQ, 1138 ?&r, ??r, ??Y, <??r>, wa, v, 1139 ?wa, v, <??r>, wZ, v") 1140 1141 (match_operand:VSX_M 1 "input_operand" 1142 "wa, ZwO, wa, we, r, r, 1143 wQ, Y, r, r, wE, jwM, 1144 ?jwM, W, <nW>, v, wZ"))] 1145 1146 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1147 && (register_operand (operands[0], <MODE>mode) 1148 || register_operand (operands[1], <MODE>mode))" 1149{ 1150 return rs6000_output_move_128bit (operands); 1151} 1152 [(set_attr "type" 1153 "vecstore, vecload, vecsimple, mffgpr, mftgpr, load, 1154 store, load, store, *, vecsimple, vecsimple, 1155 vecsimple, *, *, vecstore, vecload") 1156 (set_attr "num_insns" 1157 "*, *, *, 2, *, 2, 1158 2, 2, 2, 2, *, *, 1159 *, 5, 2, *, *") 1160 (set_attr "max_prefixed_insns" 1161 "*, *, *, *, *, 2, 1162 2, 2, 2, 2, *, *, 1163 *, *, *, *, *") 1164 (set_attr "length" 1165 "*, *, *, 8, *, 8, 1166 8, 8, 8, 8, *, *, 1167 *, 20, 8, *, *") 1168 (set_attr "isa" 1169 "<VSisa>, <VSisa>, <VSisa>, *, *, *, 1170 *, *, *, *, p9v, *, 1171 <VSisa>, *, *, *, *")]) 1172 1173;; VSX store VSX load VSX move GPR load GPR store GPR move 1174;; XXSPLTIB VSPLTISW VSX 0/-1 VMX const GPR const 1175;; LVX (VMX) STVX (VMX) 1176(define_insn "*vsx_mov<mode>_32bit" 1177 [(set (match_operand:VSX_M 0 "nonimmediate_operand" 1178 "=ZwO, wa, wa, ??r, ??Y, <??r>, 1179 wa, v, ?wa, v, <??r>, 1180 wZ, v") 1181 1182 (match_operand:VSX_M 1 "input_operand" 1183 "wa, ZwO, wa, Y, r, r, 1184 wE, jwM, ?jwM, W, <nW>, 1185 v, wZ"))] 1186 1187 "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode) 1188 && (register_operand (operands[0], <MODE>mode) 1189 || register_operand (operands[1], <MODE>mode))" 1190{ 1191 return rs6000_output_move_128bit (operands); 1192} 1193 [(set_attr "type" 1194 "vecstore, vecload, vecsimple, load, store, *, 1195 vecsimple, vecsimple, vecsimple, *, *, 1196 vecstore, vecload") 1197 (set_attr "length" 1198 "*, *, *, 16, 16, 16, 1199 *, *, *, 20, 16, 1200 *, *") 1201 (set_attr "isa" 1202 "<VSisa>, <VSisa>, <VSisa>, *, *, *, 1203 p9v, *, <VSisa>, *, *, 1204 *, *")]) 1205 1206;; Explicit load/store expanders for the builtin functions 1207(define_expand "vsx_load_<mode>" 1208 [(set (match_operand:VSX_M 0 "vsx_register_operand") 1209 (match_operand:VSX_M 1 "memory_operand"))] 1210 "VECTOR_MEM_VSX_P (<MODE>mode)" 1211{ 1212 /* Expand to swaps if needed, prior to swap optimization. */ 1213 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR 1214 && !altivec_indexed_or_indirect_operand(operands[1], <MODE>mode)) 1215 { 1216 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1217 DONE; 1218 } 1219}) 1220 1221(define_expand "vsx_store_<mode>" 1222 [(set (match_operand:VSX_M 0 "memory_operand") 1223 (match_operand:VSX_M 1 "vsx_register_operand"))] 1224 "VECTOR_MEM_VSX_P (<MODE>mode)" 1225{ 1226 /* Expand to swaps if needed, prior to swap optimization. */ 1227 if (!BYTES_BIG_ENDIAN && !TARGET_P9_VECTOR 1228 && !altivec_indexed_or_indirect_operand(operands[0], <MODE>mode)) 1229 { 1230 rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode); 1231 DONE; 1232 } 1233}) 1234 1235;; Explicit load/store expanders for the builtin functions for lxvd2x, etc., 1236;; when you really want their element-reversing behavior. 1237(define_insn "vsx_ld_elemrev_v2di" 1238 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1239 (vec_select:V2DI 1240 (match_operand:V2DI 1 "memory_operand" "Z") 1241 (parallel [(const_int 1) (const_int 0)])))] 1242 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1243 "lxvd2x %x0,%y1" 1244 [(set_attr "type" "vecload")]) 1245 1246(define_insn "vsx_ld_elemrev_v1ti" 1247 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 1248 (vec_select:V1TI 1249 (match_operand:V1TI 1 "memory_operand" "Z") 1250 (parallel [(const_int 0)])))] 1251 "VECTOR_MEM_VSX_P (V1TImode) && !BYTES_BIG_ENDIAN" 1252{ 1253 return "lxvd2x %x0,%y1\;xxpermdi %x0,%x0,%x0,2"; 1254} 1255 [(set_attr "type" "vecload")]) 1256 1257(define_insn "vsx_ld_elemrev_v2df" 1258 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 1259 (vec_select:V2DF 1260 (match_operand:V2DF 1 "memory_operand" "Z") 1261 (parallel [(const_int 1) (const_int 0)])))] 1262 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1263 "lxvd2x %x0,%y1" 1264 [(set_attr "type" "vecload")]) 1265 1266(define_insn "vsx_ld_elemrev_v4si" 1267 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 1268 (vec_select:V4SI 1269 (match_operand:V4SI 1 "memory_operand" "Z") 1270 (parallel [(const_int 3) (const_int 2) 1271 (const_int 1) (const_int 0)])))] 1272 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1273 "lxvw4x %x0,%y1" 1274 [(set_attr "type" "vecload")]) 1275 1276(define_insn "vsx_ld_elemrev_v4sf" 1277 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 1278 (vec_select:V4SF 1279 (match_operand:V4SF 1 "memory_operand" "Z") 1280 (parallel [(const_int 3) (const_int 2) 1281 (const_int 1) (const_int 0)])))] 1282 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1283 "lxvw4x %x0,%y1" 1284 [(set_attr "type" "vecload")]) 1285 1286(define_expand "vsx_ld_elemrev_v8hi" 1287 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1288 (vec_select:V8HI 1289 (match_operand:V8HI 1 "memory_operand" "Z") 1290 (parallel [(const_int 7) (const_int 6) 1291 (const_int 5) (const_int 4) 1292 (const_int 3) (const_int 2) 1293 (const_int 1) (const_int 0)])))] 1294 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1295{ 1296 if (!TARGET_P9_VECTOR) 1297 { 1298 rtx tmp = gen_reg_rtx (V4SImode); 1299 rtx subreg, subreg2, perm[16], pcv; 1300 /* 2 is leftmost element in register */ 1301 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1302 int i; 1303 1304 subreg = simplify_gen_subreg (V4SImode, operands[1], V8HImode, 0); 1305 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1306 subreg2 = simplify_gen_subreg (V8HImode, tmp, V4SImode, 0); 1307 1308 for (i = 0; i < 16; ++i) 1309 perm[i] = GEN_INT (reorder[i]); 1310 1311 pcv = force_reg (V16QImode, 1312 gen_rtx_CONST_VECTOR (V16QImode, 1313 gen_rtvec_v (16, perm))); 1314 emit_insn (gen_altivec_vperm_v8hi_direct (operands[0], subreg2, 1315 subreg2, pcv)); 1316 DONE; 1317 } 1318}) 1319 1320(define_insn "*vsx_ld_elemrev_v8hi_internal" 1321 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 1322 (vec_select:V8HI 1323 (match_operand:V8HI 1 "memory_operand" "Z") 1324 (parallel [(const_int 7) (const_int 6) 1325 (const_int 5) (const_int 4) 1326 (const_int 3) (const_int 2) 1327 (const_int 1) (const_int 0)])))] 1328 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1329 "lxvh8x %x0,%y1" 1330 [(set_attr "type" "vecload")]) 1331 1332(define_expand "vsx_ld_elemrev_v16qi" 1333 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1334 (vec_select:V16QI 1335 (match_operand:V16QI 1 "memory_operand" "Z") 1336 (parallel [(const_int 15) (const_int 14) 1337 (const_int 13) (const_int 12) 1338 (const_int 11) (const_int 10) 1339 (const_int 9) (const_int 8) 1340 (const_int 7) (const_int 6) 1341 (const_int 5) (const_int 4) 1342 (const_int 3) (const_int 2) 1343 (const_int 1) (const_int 0)])))] 1344 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1345{ 1346 if (!TARGET_P9_VECTOR) 1347 { 1348 rtx tmp = gen_reg_rtx (V4SImode); 1349 rtx subreg, subreg2, perm[16], pcv; 1350 /* 3 is leftmost element in register */ 1351 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1352 int i; 1353 1354 subreg = simplify_gen_subreg (V4SImode, operands[1], V16QImode, 0); 1355 emit_insn (gen_vsx_ld_elemrev_v4si (tmp, subreg)); 1356 subreg2 = simplify_gen_subreg (V16QImode, tmp, V4SImode, 0); 1357 1358 for (i = 0; i < 16; ++i) 1359 perm[i] = GEN_INT (reorder[i]); 1360 1361 pcv = force_reg (V16QImode, 1362 gen_rtx_CONST_VECTOR (V16QImode, 1363 gen_rtvec_v (16, perm))); 1364 emit_insn (gen_altivec_vperm_v16qi_direct (operands[0], subreg2, 1365 subreg2, pcv)); 1366 DONE; 1367 } 1368}) 1369 1370(define_insn "vsx_ld_elemrev_v16qi_internal" 1371 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 1372 (vec_select:V16QI 1373 (match_operand:V16QI 1 "memory_operand" "Z") 1374 (parallel [(const_int 15) (const_int 14) 1375 (const_int 13) (const_int 12) 1376 (const_int 11) (const_int 10) 1377 (const_int 9) (const_int 8) 1378 (const_int 7) (const_int 6) 1379 (const_int 5) (const_int 4) 1380 (const_int 3) (const_int 2) 1381 (const_int 1) (const_int 0)])))] 1382 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1383 "lxvb16x %x0,%y1" 1384 [(set_attr "type" "vecload")]) 1385 1386(define_insn "vsx_st_elemrev_v1ti" 1387 [(set (match_operand:V1TI 0 "memory_operand" "=Z") 1388 (vec_select:V1TI 1389 (match_operand:V1TI 1 "vsx_register_operand" "+wa") 1390 (parallel [(const_int 0)]))) 1391 (clobber (match_dup 1))] 1392 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1393{ 1394 return "xxpermdi %x1,%x1,%x1,2\;stxvd2x %x1,%y0"; 1395} 1396 [(set_attr "type" "vecstore")]) 1397 1398(define_insn "vsx_st_elemrev_v2df" 1399 [(set (match_operand:V2DF 0 "memory_operand" "=Z") 1400 (vec_select:V2DF 1401 (match_operand:V2DF 1 "vsx_register_operand" "wa") 1402 (parallel [(const_int 1) (const_int 0)])))] 1403 "VECTOR_MEM_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 1404 "stxvd2x %x1,%y0" 1405 [(set_attr "type" "vecstore")]) 1406 1407(define_insn "vsx_st_elemrev_v2di" 1408 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1409 (vec_select:V2DI 1410 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1411 (parallel [(const_int 1) (const_int 0)])))] 1412 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN" 1413 "stxvd2x %x1,%y0" 1414 [(set_attr "type" "vecstore")]) 1415 1416(define_insn "vsx_st_elemrev_v4sf" 1417 [(set (match_operand:V4SF 0 "memory_operand" "=Z") 1418 (vec_select:V4SF 1419 (match_operand:V4SF 1 "vsx_register_operand" "wa") 1420 (parallel [(const_int 3) (const_int 2) 1421 (const_int 1) (const_int 0)])))] 1422 "VECTOR_MEM_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 1423 "stxvw4x %x1,%y0" 1424 [(set_attr "type" "vecstore")]) 1425 1426(define_insn "vsx_st_elemrev_v4si" 1427 [(set (match_operand:V4SI 0 "memory_operand" "=Z") 1428 (vec_select:V4SI 1429 (match_operand:V4SI 1 "vsx_register_operand" "wa") 1430 (parallel [(const_int 3) (const_int 2) 1431 (const_int 1) (const_int 0)])))] 1432 "VECTOR_MEM_VSX_P (V4SImode) && !BYTES_BIG_ENDIAN" 1433 "stxvw4x %x1,%y0" 1434 [(set_attr "type" "vecstore")]) 1435 1436(define_expand "vsx_st_elemrev_v8hi" 1437 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1438 (vec_select:V8HI 1439 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1440 (parallel [(const_int 7) (const_int 6) 1441 (const_int 5) (const_int 4) 1442 (const_int 3) (const_int 2) 1443 (const_int 1) (const_int 0)])))] 1444 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN" 1445{ 1446 if (!TARGET_P9_VECTOR) 1447 { 1448 rtx mem_subreg, subreg, perm[16], pcv; 1449 rtx tmp = gen_reg_rtx (V8HImode); 1450 /* 2 is leftmost element in register */ 1451 unsigned int reorder[16] = {13,12,15,14,9,8,11,10,5,4,7,6,1,0,3,2}; 1452 int i; 1453 1454 for (i = 0; i < 16; ++i) 1455 perm[i] = GEN_INT (reorder[i]); 1456 1457 pcv = force_reg (V16QImode, 1458 gen_rtx_CONST_VECTOR (V16QImode, 1459 gen_rtvec_v (16, perm))); 1460 emit_insn (gen_altivec_vperm_v8hi_direct (tmp, operands[1], 1461 operands[1], pcv)); 1462 subreg = simplify_gen_subreg (V4SImode, tmp, V8HImode, 0); 1463 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V8HImode, 0); 1464 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1465 DONE; 1466 } 1467}) 1468 1469(define_insn "*vsx_st_elemrev_v2di_internal" 1470 [(set (match_operand:V2DI 0 "memory_operand" "=Z") 1471 (vec_select:V2DI 1472 (match_operand:V2DI 1 "vsx_register_operand" "wa") 1473 (parallel [(const_int 1) (const_int 0)])))] 1474 "VECTOR_MEM_VSX_P (V2DImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1475 "stxvd2x %x1,%y0" 1476 [(set_attr "type" "vecstore")]) 1477 1478(define_insn "*vsx_st_elemrev_v8hi_internal" 1479 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 1480 (vec_select:V8HI 1481 (match_operand:V8HI 1 "vsx_register_operand" "wa") 1482 (parallel [(const_int 7) (const_int 6) 1483 (const_int 5) (const_int 4) 1484 (const_int 3) (const_int 2) 1485 (const_int 1) (const_int 0)])))] 1486 "VECTOR_MEM_VSX_P (V8HImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1487 "stxvh8x %x1,%y0" 1488 [(set_attr "type" "vecstore")]) 1489 1490(define_expand "vsx_st_elemrev_v16qi" 1491 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1492 (vec_select:V16QI 1493 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1494 (parallel [(const_int 15) (const_int 14) 1495 (const_int 13) (const_int 12) 1496 (const_int 11) (const_int 10) 1497 (const_int 9) (const_int 8) 1498 (const_int 7) (const_int 6) 1499 (const_int 5) (const_int 4) 1500 (const_int 3) (const_int 2) 1501 (const_int 1) (const_int 0)])))] 1502 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN" 1503{ 1504 if (!TARGET_P9_VECTOR) 1505 { 1506 rtx mem_subreg, subreg, perm[16], pcv; 1507 rtx tmp = gen_reg_rtx (V16QImode); 1508 /* 3 is leftmost element in register */ 1509 unsigned int reorder[16] = {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3}; 1510 int i; 1511 1512 for (i = 0; i < 16; ++i) 1513 perm[i] = GEN_INT (reorder[i]); 1514 1515 pcv = force_reg (V16QImode, 1516 gen_rtx_CONST_VECTOR (V16QImode, 1517 gen_rtvec_v (16, perm))); 1518 emit_insn (gen_altivec_vperm_v16qi_direct (tmp, operands[1], 1519 operands[1], pcv)); 1520 subreg = simplify_gen_subreg (V4SImode, tmp, V16QImode, 0); 1521 mem_subreg = simplify_gen_subreg (V4SImode, operands[0], V16QImode, 0); 1522 emit_insn (gen_vsx_st_elemrev_v4si (mem_subreg, subreg)); 1523 DONE; 1524 } 1525}) 1526 1527(define_insn "*vsx_st_elemrev_v16qi_internal" 1528 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 1529 (vec_select:V16QI 1530 (match_operand:V16QI 1 "vsx_register_operand" "wa") 1531 (parallel [(const_int 15) (const_int 14) 1532 (const_int 13) (const_int 12) 1533 (const_int 11) (const_int 10) 1534 (const_int 9) (const_int 8) 1535 (const_int 7) (const_int 6) 1536 (const_int 5) (const_int 4) 1537 (const_int 3) (const_int 2) 1538 (const_int 1) (const_int 0)])))] 1539 "VECTOR_MEM_VSX_P (V16QImode) && !BYTES_BIG_ENDIAN && TARGET_P9_VECTOR" 1540 "stxvb16x %x1,%y0" 1541 [(set_attr "type" "vecstore")]) 1542 1543 1544;; VSX vector floating point arithmetic instructions. The VSX scalar 1545;; instructions are now combined with the insn for the traditional floating 1546;; point unit. 1547(define_insn "*vsx_add<mode>3" 1548 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1549 (plus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1550 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1551 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1552 "xvadd<sd>p %x0,%x1,%x2" 1553 [(set_attr "type" "<VStype_simple>")]) 1554 1555(define_insn "*vsx_sub<mode>3" 1556 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa>") 1557 (minus:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1558 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1559 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1560 "xvsub<sd>p %x0,%x1,%x2" 1561 [(set_attr "type" "<VStype_simple>")]) 1562 1563(define_insn "*vsx_mul<mode>3" 1564 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1565 (mult:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1566 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1567 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1568 "xvmul<sd>p %x0,%x1,%x2" 1569 [(set_attr "type" "<VStype_simple>")]) 1570 1571; Emulate vector with scalar for vec_mul in V2DImode 1572(define_insn_and_split "vsx_mul_v2di" 1573 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1574 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1575 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1576 UNSPEC_VSX_MULSD))] 1577 "VECTOR_MEM_VSX_P (V2DImode)" 1578 "#" 1579 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1580 [(const_int 0)] 1581{ 1582 rtx op0 = operands[0]; 1583 rtx op1 = operands[1]; 1584 rtx op2 = operands[2]; 1585 rtx op3 = gen_reg_rtx (DImode); 1586 rtx op4 = gen_reg_rtx (DImode); 1587 rtx op5 = gen_reg_rtx (DImode); 1588 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1589 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1590 if (TARGET_POWERPC64) 1591 emit_insn (gen_muldi3 (op5, op3, op4)); 1592 else 1593 { 1594 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1595 emit_move_insn (op5, ret); 1596 } 1597 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1598 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1599 if (TARGET_POWERPC64) 1600 emit_insn (gen_muldi3 (op3, op3, op4)); 1601 else 1602 { 1603 rtx ret = expand_mult (DImode, op3, op4, NULL, 0, false); 1604 emit_move_insn (op3, ret); 1605 } 1606 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1607 DONE; 1608} 1609 [(set_attr "type" "mul")]) 1610 1611(define_insn "*vsx_div<mode>3" 1612 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1613 (div:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1614 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1615 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1616 "xvdiv<sd>p %x0,%x1,%x2" 1617 [(set_attr "type" "<VStype_div>")]) 1618 1619; Emulate vector with scalar for vec_div in V2DImode 1620(define_insn_and_split "vsx_div_v2di" 1621 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1622 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1623 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1624 UNSPEC_VSX_DIVSD))] 1625 "VECTOR_MEM_VSX_P (V2DImode)" 1626 "#" 1627 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1628 [(const_int 0)] 1629{ 1630 rtx op0 = operands[0]; 1631 rtx op1 = operands[1]; 1632 rtx op2 = operands[2]; 1633 rtx op3 = gen_reg_rtx (DImode); 1634 rtx op4 = gen_reg_rtx (DImode); 1635 rtx op5 = gen_reg_rtx (DImode); 1636 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1637 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1638 if (TARGET_POWERPC64) 1639 emit_insn (gen_divdi3 (op5, op3, op4)); 1640 else 1641 { 1642 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1643 rtx target = emit_library_call_value (libfunc, 1644 op5, LCT_NORMAL, DImode, 1645 op3, DImode, 1646 op4, DImode); 1647 emit_move_insn (op5, target); 1648 } 1649 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1650 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1651 if (TARGET_POWERPC64) 1652 emit_insn (gen_divdi3 (op3, op3, op4)); 1653 else 1654 { 1655 rtx libfunc = optab_libfunc (sdiv_optab, DImode); 1656 rtx target = emit_library_call_value (libfunc, 1657 op3, LCT_NORMAL, DImode, 1658 op3, DImode, 1659 op4, DImode); 1660 emit_move_insn (op3, target); 1661 } 1662 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1663 DONE; 1664} 1665 [(set_attr "type" "div")]) 1666 1667(define_insn_and_split "vsx_udiv_v2di" 1668 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 1669 (unspec:V2DI [(match_operand:V2DI 1 "vsx_register_operand" "wa") 1670 (match_operand:V2DI 2 "vsx_register_operand" "wa")] 1671 UNSPEC_VSX_DIVUD))] 1672 "VECTOR_MEM_VSX_P (V2DImode)" 1673 "#" 1674 "VECTOR_MEM_VSX_P (V2DImode) && !reload_completed" 1675 [(const_int 0)] 1676{ 1677 rtx op0 = operands[0]; 1678 rtx op1 = operands[1]; 1679 rtx op2 = operands[2]; 1680 rtx op3 = gen_reg_rtx (DImode); 1681 rtx op4 = gen_reg_rtx (DImode); 1682 rtx op5 = gen_reg_rtx (DImode); 1683 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (0))); 1684 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (0))); 1685 if (TARGET_POWERPC64) 1686 emit_insn (gen_udivdi3 (op5, op3, op4)); 1687 else 1688 { 1689 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1690 rtx target = emit_library_call_value (libfunc, 1691 op5, LCT_NORMAL, DImode, 1692 op3, DImode, 1693 op4, DImode); 1694 emit_move_insn (op5, target); 1695 } 1696 emit_insn (gen_vsx_extract_v2di (op3, op1, GEN_INT (1))); 1697 emit_insn (gen_vsx_extract_v2di (op4, op2, GEN_INT (1))); 1698 if (TARGET_POWERPC64) 1699 emit_insn (gen_udivdi3 (op3, op3, op4)); 1700 else 1701 { 1702 rtx libfunc = optab_libfunc (udiv_optab, DImode); 1703 rtx target = emit_library_call_value (libfunc, 1704 op3, LCT_NORMAL, DImode, 1705 op3, DImode, 1706 op4, DImode); 1707 emit_move_insn (op3, target); 1708 } 1709 emit_insn (gen_vsx_concat_v2di (op0, op5, op3)); 1710 DONE; 1711} 1712 [(set_attr "type" "div")]) 1713 1714;; *tdiv* instruction returning the FG flag 1715(define_expand "vsx_tdiv<mode>3_fg" 1716 [(set (match_dup 3) 1717 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1718 (match_operand:VSX_B 2 "vsx_register_operand")] 1719 UNSPEC_VSX_TDIV)) 1720 (set (match_operand:SI 0 "gpc_reg_operand") 1721 (gt:SI (match_dup 3) 1722 (const_int 0)))] 1723 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1724{ 1725 operands[3] = gen_reg_rtx (CCFPmode); 1726}) 1727 1728;; *tdiv* instruction returning the FE flag 1729(define_expand "vsx_tdiv<mode>3_fe" 1730 [(set (match_dup 3) 1731 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand") 1732 (match_operand:VSX_B 2 "vsx_register_operand")] 1733 UNSPEC_VSX_TDIV)) 1734 (set (match_operand:SI 0 "gpc_reg_operand") 1735 (eq:SI (match_dup 3) 1736 (const_int 0)))] 1737 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1738{ 1739 operands[3] = gen_reg_rtx (CCFPmode); 1740}) 1741 1742(define_insn "*vsx_tdiv<mode>3_internal" 1743 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") 1744 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa") 1745 (match_operand:VSX_B 2 "vsx_register_operand" "wa")] 1746 UNSPEC_VSX_TDIV))] 1747 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1748 "x<VSv>tdiv<sd>p %0,%x1,%x2" 1749 [(set_attr "type" "<VStype_simple>")]) 1750 1751(define_insn "vsx_fre<mode>2" 1752 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1753 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 1754 UNSPEC_FRES))] 1755 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1756 "xvre<sd>p %x0,%x1" 1757 [(set_attr "type" "<VStype_simple>")]) 1758 1759(define_insn "*vsx_neg<mode>2" 1760 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1761 (neg:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 1762 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1763 "xvneg<sd>p %x0,%x1" 1764 [(set_attr "type" "<VStype_simple>")]) 1765 1766(define_insn "*vsx_abs<mode>2" 1767 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1768 (abs:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 1769 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1770 "xvabs<sd>p %x0,%x1" 1771 [(set_attr "type" "<VStype_simple>")]) 1772 1773(define_insn "vsx_nabs<mode>2" 1774 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1775 (neg:VSX_F 1776 (abs:VSX_F 1777 (match_operand:VSX_F 1 "vsx_register_operand" "wa"))))] 1778 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1779 "xvnabs<sd>p %x0,%x1" 1780 [(set_attr "type" "<VStype_simple>")]) 1781 1782(define_insn "vsx_smax<mode>3" 1783 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1784 (smax:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1785 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1786 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1787 "xvmax<sd>p %x0,%x1,%x2" 1788 [(set_attr "type" "<VStype_simple>")]) 1789 1790(define_insn "*vsx_smin<mode>3" 1791 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1792 (smin:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1793 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1794 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1795 "xvmin<sd>p %x0,%x1,%x2" 1796 [(set_attr "type" "<VStype_simple>")]) 1797 1798(define_insn "*vsx_sqrt<mode>2" 1799 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1800 (sqrt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 1801 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1802 "xvsqrt<sd>p %x0,%x1" 1803 [(set_attr "type" "<sd>sqrt")]) 1804 1805(define_insn "*vsx_rsqrte<mode>2" 1806 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1807 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 1808 UNSPEC_RSQRT))] 1809 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1810 "xvrsqrte<sd>p %x0,%x1" 1811 [(set_attr "type" "<VStype_simple>")]) 1812 1813;; *tsqrt* returning the fg flag 1814(define_expand "vsx_tsqrt<mode>2_fg" 1815 [(set (match_dup 2) 1816 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1817 UNSPEC_VSX_TSQRT)) 1818 (set (match_operand:SI 0 "gpc_reg_operand") 1819 (gt:SI (match_dup 2) 1820 (const_int 0)))] 1821 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1822{ 1823 operands[2] = gen_reg_rtx (CCFPmode); 1824}) 1825 1826;; *tsqrt* returning the fe flag 1827(define_expand "vsx_tsqrt<mode>2_fe" 1828 [(set (match_dup 2) 1829 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand")] 1830 UNSPEC_VSX_TSQRT)) 1831 (set (match_operand:SI 0 "gpc_reg_operand") 1832 (eq:SI (match_dup 2) 1833 (const_int 0)))] 1834 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1835{ 1836 operands[2] = gen_reg_rtx (CCFPmode); 1837}) 1838 1839(define_insn "*vsx_tsqrt<mode>2_internal" 1840 [(set (match_operand:CCFP 0 "cc_reg_operand" "=x") 1841 (unspec:CCFP [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 1842 UNSPEC_VSX_TSQRT))] 1843 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1844 "x<VSv>tsqrt<sd>p %0,%x1" 1845 [(set_attr "type" "<VStype_simple>")]) 1846 1847;; Fused vector multiply/add instructions. Do not generate the Altivec versions 1848;; of fma (vmaddfp and vnmsubfp). These instructions allows the target to be a 1849;; separate register from the 3 inputs, but they have different rounding 1850;; behaviors than the VSX instructions. 1851(define_insn "*vsx_fmav4sf4" 1852 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa") 1853 (fma:V4SF 1854 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa") 1855 (match_operand:V4SF 2 "vsx_register_operand" "wa,0") 1856 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))] 1857 "VECTOR_UNIT_VSX_P (V4SFmode)" 1858 "@ 1859 xvmaddasp %x0,%x1,%x2 1860 xvmaddmsp %x0,%x1,%x3" 1861 [(set_attr "type" "vecfloat")]) 1862 1863(define_insn "*vsx_fmav2df4" 1864 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") 1865 (fma:V2DF 1866 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") 1867 (match_operand:V2DF 2 "vsx_register_operand" "wa,0") 1868 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))] 1869 "VECTOR_UNIT_VSX_P (V2DFmode)" 1870 "@ 1871 xvmaddadp %x0,%x1,%x2 1872 xvmaddmdp %x0,%x1,%x3" 1873 [(set_attr "type" "vecdouble")]) 1874 1875(define_insn "*vsx_fms<mode>4" 1876 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") 1877 (fma:VSX_F 1878 (match_operand:VSX_F 1 "vsx_register_operand" "%wa,wa") 1879 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") 1880 (neg:VSX_F 1881 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] 1882 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1883 "@ 1884 xvmsuba<sd>p %x0,%x1,%x2 1885 xvmsubm<sd>p %x0,%x1,%x3" 1886 [(set_attr "type" "<VStype_mul>")]) 1887 1888(define_insn "*vsx_nfma<mode>4" 1889 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa,wa") 1890 (neg:VSX_F 1891 (fma:VSX_F 1892 (match_operand:VSX_F 1 "vsx_register_operand" "wa,wa") 1893 (match_operand:VSX_F 2 "vsx_register_operand" "wa,0") 1894 (match_operand:VSX_F 3 "vsx_register_operand" "0,wa"))))] 1895 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1896 "@ 1897 xvnmadda<sd>p %x0,%x1,%x2 1898 xvnmaddm<sd>p %x0,%x1,%x3" 1899 [(set_attr "type" "<VStype_mul>")]) 1900 1901(define_insn "*vsx_nfmsv4sf4" 1902 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa") 1903 (neg:V4SF 1904 (fma:V4SF 1905 (match_operand:V4SF 1 "vsx_register_operand" "%wa,wa") 1906 (match_operand:V4SF 2 "vsx_register_operand" "wa,0") 1907 (neg:V4SF 1908 (match_operand:V4SF 3 "vsx_register_operand" "0,wa")))))] 1909 "VECTOR_UNIT_VSX_P (V4SFmode)" 1910 "@ 1911 xvnmsubasp %x0,%x1,%x2 1912 xvnmsubmsp %x0,%x1,%x3" 1913 [(set_attr "type" "vecfloat")]) 1914 1915(define_insn "*vsx_nfmsv2df4" 1916 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,wa") 1917 (neg:V2DF 1918 (fma:V2DF 1919 (match_operand:V2DF 1 "vsx_register_operand" "%wa,wa") 1920 (match_operand:V2DF 2 "vsx_register_operand" "wa,0") 1921 (neg:V2DF 1922 (match_operand:V2DF 3 "vsx_register_operand" "0,wa")))))] 1923 "VECTOR_UNIT_VSX_P (V2DFmode)" 1924 "@ 1925 xvnmsubadp %x0,%x1,%x2 1926 xvnmsubmdp %x0,%x1,%x3" 1927 [(set_attr "type" "vecdouble")]) 1928 1929;; Vector conditional expressions (no scalar version for these instructions) 1930(define_insn "vsx_eq<mode>" 1931 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1932 (eq:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1933 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1934 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1935 "xvcmpeq<sd>p %x0,%x1,%x2" 1936 [(set_attr "type" "<VStype_simple>")]) 1937 1938(define_insn "vsx_gt<mode>" 1939 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1940 (gt:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1941 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1942 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1943 "xvcmpgt<sd>p %x0,%x1,%x2" 1944 [(set_attr "type" "<VStype_simple>")]) 1945 1946(define_insn "*vsx_ge<mode>" 1947 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1948 (ge:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1949 (match_operand:VSX_F 2 "vsx_register_operand" "wa")))] 1950 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1951 "xvcmpge<sd>p %x0,%x1,%x2" 1952 [(set_attr "type" "<VStype_simple>")]) 1953 1954;; Compare vectors producing a vector result and a predicate, setting CR6 to 1955;; indicate a combined status 1956(define_insn "*vsx_eq_<mode>_p" 1957 [(set (reg:CC CR6_REGNO) 1958 (unspec:CC 1959 [(eq:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1960 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] 1961 UNSPEC_PREDICATE)) 1962 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1963 (eq:VSX_F (match_dup 1) 1964 (match_dup 2)))] 1965 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1966 "xvcmpeq<sd>p. %x0,%x1,%x2" 1967 [(set_attr "type" "<VStype_simple>")]) 1968 1969(define_insn "*vsx_gt_<mode>_p" 1970 [(set (reg:CC CR6_REGNO) 1971 (unspec:CC 1972 [(gt:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1973 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] 1974 UNSPEC_PREDICATE)) 1975 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1976 (gt:VSX_F (match_dup 1) 1977 (match_dup 2)))] 1978 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1979 "xvcmpgt<sd>p. %x0,%x1,%x2" 1980 [(set_attr "type" "<VStype_simple>")]) 1981 1982(define_insn "*vsx_ge_<mode>_p" 1983 [(set (reg:CC CR6_REGNO) 1984 (unspec:CC 1985 [(ge:CC (match_operand:VSX_F 1 "vsx_register_operand" "wa") 1986 (match_operand:VSX_F 2 "vsx_register_operand" "wa"))] 1987 UNSPEC_PREDICATE)) 1988 (set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 1989 (ge:VSX_F (match_dup 1) 1990 (match_dup 2)))] 1991 "VECTOR_UNIT_VSX_P (<MODE>mode)" 1992 "xvcmpge<sd>p. %x0,%x1,%x2" 1993 [(set_attr "type" "<VStype_simple>")]) 1994 1995;; Vector select 1996(define_insn "*vsx_xxsel<mode>" 1997 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") 1998 (if_then_else:VSX_L 1999 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") 2000 (match_operand:VSX_L 4 "zero_constant" "")) 2001 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") 2002 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] 2003 "VECTOR_MEM_VSX_P (<MODE>mode)" 2004 "xxsel %x0,%x3,%x2,%x1" 2005 [(set_attr "type" "vecmove") 2006 (set_attr "isa" "<VSisa>")]) 2007 2008(define_insn "*vsx_xxsel<mode>_uns" 2009 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa") 2010 (if_then_else:VSX_L 2011 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa") 2012 (match_operand:VSX_L 4 "zero_constant" "")) 2013 (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa") 2014 (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))] 2015 "VECTOR_MEM_VSX_P (<MODE>mode)" 2016 "xxsel %x0,%x3,%x2,%x1" 2017 [(set_attr "type" "vecmove") 2018 (set_attr "isa" "<VSisa>")]) 2019 2020;; Copy sign 2021(define_insn "vsx_copysign<mode>3" 2022 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2023 (unspec:VSX_F 2024 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 2025 (match_operand:VSX_F 2 "vsx_register_operand" "wa")] 2026 UNSPEC_COPYSIGN))] 2027 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2028 "xvcpsgn<sd>p %x0,%x2,%x1" 2029 [(set_attr "type" "<VStype_simple>")]) 2030 2031;; For the conversions, limit the register class for the integer value to be 2032;; the fprs because we don't want to add the altivec registers to movdi/movsi. 2033;; For the unsigned tests, there isn't a generic double -> unsigned conversion 2034;; in rs6000.md so don't test VECTOR_UNIT_VSX_P, just test against VSX. 2035;; Don't use vsx_register_operand here, use gpc_reg_operand to match rs6000.md 2036;; in allowing virtual registers. 2037(define_insn "vsx_float<VSi><mode>2" 2038 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") 2039 (float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] 2040 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2041 "xvcvsx<VSc><sd>p %x0,%x1" 2042 [(set_attr "type" "<VStype_simple>")]) 2043 2044(define_insn "vsx_floatuns<VSi><mode>2" 2045 [(set (match_operand:VSX_F 0 "gpc_reg_operand" "=wa") 2046 (unsigned_float:VSX_F (match_operand:<VSI> 1 "gpc_reg_operand" "wa")))] 2047 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2048 "xvcvux<VSc><sd>p %x0,%x1" 2049 [(set_attr "type" "<VStype_simple>")]) 2050 2051(define_insn "vsx_fix_trunc<mode><VSi>2" 2052 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") 2053 (fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] 2054 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2055 "x<VSv>cv<sd>psx<VSc>s %x0,%x1" 2056 [(set_attr "type" "<VStype_simple>")]) 2057 2058(define_insn "vsx_fixuns_trunc<mode><VSi>2" 2059 [(set (match_operand:<VSI> 0 "gpc_reg_operand" "=wa") 2060 (unsigned_fix:<VSI> (match_operand:VSX_F 1 "gpc_reg_operand" "wa")))] 2061 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2062 "x<VSv>cv<sd>pux<VSc>s %x0,%x1" 2063 [(set_attr "type" "<VStype_simple>")]) 2064 2065;; Math rounding functions 2066(define_insn "vsx_x<VSv>r<sd>pi" 2067 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") 2068 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 2069 UNSPEC_VSX_ROUND_I))] 2070 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2071 "x<VSv>r<sd>pi %x0,%x1" 2072 [(set_attr "type" "<VStype_simple>")]) 2073 2074(define_insn "vsx_x<VSv>r<sd>pic" 2075 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") 2076 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 2077 UNSPEC_VSX_ROUND_IC))] 2078 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2079 "x<VSv>r<sd>pic %x0,%x1" 2080 [(set_attr "type" "<VStype_simple>")]) 2081 2082(define_insn "vsx_btrunc<mode>2" 2083 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2084 (fix:VSX_F (match_operand:VSX_F 1 "vsx_register_operand" "wa")))] 2085 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2086 "xvr<sd>piz %x0,%x1" 2087 [(set_attr "type" "<VStype_simple>")]) 2088 2089(define_insn "*vsx_b2trunc<mode>2" 2090 [(set (match_operand:VSX_B 0 "vsx_register_operand" "=wa") 2091 (unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "wa")] 2092 UNSPEC_FRIZ))] 2093 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2094 "x<VSv>r<sd>piz %x0,%x1" 2095 [(set_attr "type" "<VStype_simple>")]) 2096 2097(define_insn "vsx_floor<mode>2" 2098 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2099 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 2100 UNSPEC_FRIM))] 2101 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2102 "xvr<sd>pim %x0,%x1" 2103 [(set_attr "type" "<VStype_simple>")]) 2104 2105(define_insn "vsx_ceil<mode>2" 2106 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 2107 (unspec:VSX_F [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 2108 UNSPEC_FRIP))] 2109 "VECTOR_UNIT_VSX_P (<MODE>mode)" 2110 "xvr<sd>pip %x0,%x1" 2111 [(set_attr "type" "<VStype_simple>")]) 2112 2113 2114;; VSX convert to/from double vector 2115 2116;; Convert between single and double precision 2117;; Don't use xscvspdp and xscvdpsp for scalar conversions, since the normal 2118;; scalar single precision instructions internally use the double format. 2119;; Prefer the altivec registers, since we likely will need to do a vperm 2120(define_insn "vsx_xscvdpsp" 2121 [(set (match_operand:V4SF 0 "vsx_register_operand" "=f,?wa") 2122 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "f,wa")] 2123 UNSPEC_VSX_CVSPDP))] 2124 "VECTOR_UNIT_VSX_P (DFmode)" 2125 "xscvdpsp %x0,%x1" 2126 [(set_attr "type" "fp")]) 2127 2128(define_insn "vsx_xvcvspdp_be" 2129 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") 2130 (float_extend:V2DF 2131 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2132 (parallel [(const_int 0) (const_int 2)]))))] 2133 "VECTOR_UNIT_VSX_P (V4SFmode) && BYTES_BIG_ENDIAN" 2134 "xvcvspdp %x0,%x1" 2135 [(set_attr "type" "vecdouble")]) 2136 2137(define_insn "vsx_xvcvspdp_le" 2138 [(set (match_operand:V2DF 0 "vsx_register_operand" "=v,?wa") 2139 (float_extend:V2DF 2140 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2141 (parallel [(const_int 1) (const_int 3)]))))] 2142 "VECTOR_UNIT_VSX_P (V4SFmode) && !BYTES_BIG_ENDIAN" 2143 "xvcvspdp %x0,%x1" 2144 [(set_attr "type" "vecdouble")]) 2145 2146(define_expand "vsx_xvcvspdp" 2147 [(match_operand:V2DF 0 "vsx_register_operand") 2148 (match_operand:V4SF 1 "vsx_register_operand")] 2149 "VECTOR_UNIT_VSX_P (V4SFmode)" 2150{ 2151 if (BYTES_BIG_ENDIAN) 2152 emit_insn (gen_vsx_xvcvspdp_be (operands[0], operands[1])); 2153 else 2154 emit_insn (gen_vsx_xvcvspdp_le (operands[0], operands[1])); 2155 DONE; 2156}) 2157 2158(define_insn "vsx_xvcvdpsp" 2159 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,?wa") 2160 (unspec:V4SF [(match_operand:V2DF 1 "vsx_register_operand" "v,wa")] 2161 UNSPEC_VSX_CVSPDP))] 2162 "VECTOR_UNIT_VSX_P (V2DFmode)" 2163 "xvcvdpsp %x0,%x1" 2164 [(set_attr "type" "vecdouble")]) 2165 2166;; xscvspdp, represent the scalar SF type as V4SF 2167(define_insn "vsx_xscvspdp" 2168 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2169 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2170 UNSPEC_VSX_CVSPDP))] 2171 "VECTOR_UNIT_VSX_P (V4SFmode)" 2172 "xscvspdp %x0,%x1" 2173 [(set_attr "type" "fp")]) 2174 2175;; Same as vsx_xscvspdp, but use SF as the type 2176(define_insn "vsx_xscvspdp_scalar2" 2177 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 2178 (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2179 UNSPEC_VSX_CVSPDP))] 2180 "VECTOR_UNIT_VSX_P (V4SFmode)" 2181 "xscvspdp %x0,%x1" 2182 [(set_attr "type" "fp")]) 2183 2184;; Generate xvcvhpsp instruction 2185(define_insn "vsx_xvcvhpsp" 2186 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2187 (unspec:V4SF [(match_operand: V16QI 1 "vsx_register_operand" "wa")] 2188 UNSPEC_VSX_CVHPSP))] 2189 "TARGET_P9_VECTOR" 2190 "xvcvhpsp %x0,%x1" 2191 [(set_attr "type" "vecfloat")]) 2192 2193;; Generate xvcvsphp 2194(define_insn "vsx_xvcvsphp" 2195 [(set (match_operand:V4SI 0 "register_operand" "=wa") 2196 (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2197 UNSPEC_VSX_XVCVSPHP))] 2198 "TARGET_P9_VECTOR" 2199 "xvcvsphp %x0,%x1" 2200[(set_attr "type" "vecfloat")]) 2201 2202;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF 2203;; format of scalars is actually DF. 2204(define_insn "vsx_xscvdpsp_scalar" 2205 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2206 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2207 UNSPEC_VSX_CVSPDP))] 2208 "VECTOR_UNIT_VSX_P (V4SFmode)" 2209 "xscvdpsp %x0,%x1" 2210 [(set_attr "type" "fp")]) 2211 2212;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs 2213(define_insn "vsx_xscvdpspn" 2214 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2215 (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wa")] 2216 UNSPEC_VSX_CVDPSPN))] 2217 "TARGET_XSCVDPSPN" 2218 "xscvdpspn %x0,%x1" 2219 [(set_attr "type" "fp")]) 2220 2221(define_insn "vsx_xscvspdpn" 2222 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2223 (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] 2224 UNSPEC_VSX_CVSPDPN))] 2225 "TARGET_XSCVSPDPN" 2226 "xscvspdpn %x0,%x1" 2227 [(set_attr "type" "fp")]) 2228 2229(define_insn "vsx_xscvdpspn_scalar" 2230 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2231 (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2232 UNSPEC_VSX_CVDPSPN))] 2233 "TARGET_XSCVDPSPN" 2234 "xscvdpspn %x0,%x1" 2235 [(set_attr "type" "fp")]) 2236 2237;; Used by direct move to move a SFmode value from GPR to VSX register 2238(define_insn "vsx_xscvspdpn_directmove" 2239 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 2240 (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")] 2241 UNSPEC_VSX_CVSPDPN))] 2242 "TARGET_XSCVSPDPN" 2243 "xscvspdpn %x0,%x1" 2244 [(set_attr "type" "fp")]) 2245 2246;; Convert and scale (used by vec_ctf, vec_cts, vec_ctu for double/long long) 2247 2248(define_insn "vsx_xvcv<su>xwsp" 2249 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2250 (any_float:V4SF (match_operand:V4SI 1 "vsx_register_operand" "wa")))] 2251 "VECTOR_UNIT_VSX_P (V4SFmode)" 2252 "xvcv<su>xwsp %x0,%x1" 2253 [(set_attr "type" "vecfloat")]) 2254 2255(define_insn "vsx_xvcv<su>xddp" 2256 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2257 (any_float:V2DF (match_operand:V2DI 1 "vsx_register_operand" "wa")))] 2258 "VECTOR_UNIT_VSX_P (V2DFmode)" 2259 "xvcv<su>xddp %x0,%x1" 2260 [(set_attr "type" "vecdouble")]) 2261 2262(define_insn "vsx_xvcvsp<su>xws" 2263 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 2264 (any_fix:V4SI (match_operand:V4SF 1 "vsx_register_operand" "wa")))] 2265 "VECTOR_UNIT_VSX_P (V4SFmode)" 2266 "xvcvsp<su>xws %x0,%x1" 2267 [(set_attr "type" "vecfloat")]) 2268 2269(define_insn "vsx_xvcvdp<su>xds" 2270 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 2271 (any_fix:V2DI (match_operand:V2DF 1 "vsx_register_operand" "wa")))] 2272 "VECTOR_UNIT_VSX_P (V2DFmode)" 2273 "xvcvdp<su>xds %x0,%x1" 2274 [(set_attr "type" "vecdouble")]) 2275 2276(define_expand "vsx_xvcvsxddp_scale" 2277 [(match_operand:V2DF 0 "vsx_register_operand") 2278 (match_operand:V2DI 1 "vsx_register_operand") 2279 (match_operand:QI 2 "immediate_operand")] 2280 "VECTOR_UNIT_VSX_P (V2DFmode)" 2281{ 2282 rtx op0 = operands[0]; 2283 rtx op1 = operands[1]; 2284 int scale = INTVAL(operands[2]); 2285 emit_insn (gen_vsx_xvcvsxddp (op0, op1)); 2286 if (scale != 0) 2287 rs6000_scale_v2df (op0, op0, -scale); 2288 DONE; 2289}) 2290 2291(define_expand "vsx_xvcvuxddp_scale" 2292 [(match_operand:V2DF 0 "vsx_register_operand") 2293 (match_operand:V2DI 1 "vsx_register_operand") 2294 (match_operand:QI 2 "immediate_operand")] 2295 "VECTOR_UNIT_VSX_P (V2DFmode)" 2296{ 2297 rtx op0 = operands[0]; 2298 rtx op1 = operands[1]; 2299 int scale = INTVAL(operands[2]); 2300 emit_insn (gen_vsx_xvcvuxddp (op0, op1)); 2301 if (scale != 0) 2302 rs6000_scale_v2df (op0, op0, -scale); 2303 DONE; 2304}) 2305 2306(define_expand "vsx_xvcvdpsxds_scale" 2307 [(match_operand:V2DI 0 "vsx_register_operand") 2308 (match_operand:V2DF 1 "vsx_register_operand") 2309 (match_operand:QI 2 "immediate_operand")] 2310 "VECTOR_UNIT_VSX_P (V2DFmode)" 2311{ 2312 rtx op0 = operands[0]; 2313 rtx op1 = operands[1]; 2314 rtx tmp; 2315 int scale = INTVAL (operands[2]); 2316 if (scale == 0) 2317 tmp = op1; 2318 else 2319 { 2320 tmp = gen_reg_rtx (V2DFmode); 2321 rs6000_scale_v2df (tmp, op1, scale); 2322 } 2323 emit_insn (gen_vsx_xvcvdpsxds (op0, tmp)); 2324 DONE; 2325}) 2326 2327;; convert vector of 64-bit floating point numbers to vector of 2328;; 64-bit unsigned integer 2329(define_expand "vsx_xvcvdpuxds_scale" 2330 [(match_operand:V2DI 0 "vsx_register_operand") 2331 (match_operand:V2DF 1 "vsx_register_operand") 2332 (match_operand:QI 2 "immediate_operand")] 2333 "VECTOR_UNIT_VSX_P (V2DFmode)" 2334{ 2335 rtx op0 = operands[0]; 2336 rtx op1 = operands[1]; 2337 rtx tmp; 2338 int scale = INTVAL (operands[2]); 2339 if (scale == 0) 2340 tmp = op1; 2341 else 2342 { 2343 tmp = gen_reg_rtx (V2DFmode); 2344 rs6000_scale_v2df (tmp, op1, scale); 2345 } 2346 emit_insn (gen_vsx_xvcvdpuxds (op0, tmp)); 2347 DONE; 2348}) 2349 2350;; Convert from 64-bit to 32-bit types 2351;; Note, favor the Altivec registers since the usual use of these instructions 2352;; is in vector converts and we need to use the Altivec vperm instruction. 2353 2354(define_insn "vsx_xvcvdpsxws" 2355 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2356 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] 2357 UNSPEC_VSX_CVDPSXWS))] 2358 "VECTOR_UNIT_VSX_P (V2DFmode)" 2359 "xvcvdpsxws %x0,%x1" 2360 [(set_attr "type" "vecdouble")]) 2361 2362(define_insn "vsx_xvcvdpuxws" 2363 [(set (match_operand:V4SI 0 "vsx_register_operand" "=v,?wa") 2364 (unspec:V4SI [(match_operand:V2DF 1 "vsx_register_operand" "wa,wa")] 2365 UNSPEC_VSX_CVDPUXWS))] 2366 "VECTOR_UNIT_VSX_P (V2DFmode)" 2367 "xvcvdpuxws %x0,%x1" 2368 [(set_attr "type" "vecdouble")]) 2369 2370(define_insn "vsx_xvcvsxdsp" 2371 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2372 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2373 UNSPEC_VSX_CVSXDSP))] 2374 "VECTOR_UNIT_VSX_P (V2DFmode)" 2375 "xvcvsxdsp %x0,%x1" 2376 [(set_attr "type" "vecfloat")]) 2377 2378(define_insn "vsx_xvcvuxdsp" 2379 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") 2380 (unspec:V4SF [(match_operand:V2DI 1 "vsx_register_operand" "wa")] 2381 UNSPEC_VSX_CVUXDSP))] 2382 "VECTOR_UNIT_VSX_P (V2DFmode)" 2383 "xvcvuxdsp %x0,%x1" 2384 [(set_attr "type" "vecdouble")]) 2385 2386;; Convert vector of 32-bit signed/unsigned integers to vector of 2387;; 64-bit floating point numbers. 2388(define_insn "vsx_xvcv<su>xwdp_be" 2389 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2390 (any_float:V2DF 2391 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") 2392 (parallel [(const_int 0) (const_int 2)]))))] 2393 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" 2394 "xvcv<su>xwdp %x0,%x1" 2395 [(set_attr "type" "vecdouble")]) 2396 2397(define_insn "vsx_xvcv<su>xwdp_le" 2398 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2399 (any_float:V2DF 2400 (vec_select:V2SI (match_operand:V4SI 1 "vsx_register_operand" "wa") 2401 (parallel [(const_int 1) (const_int 3)]))))] 2402 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 2403 "xvcv<su>xwdp %x0,%x1" 2404 [(set_attr "type" "vecdouble")]) 2405 2406(define_expand "vsx_xvcv<su>xwdp" 2407 [(match_operand:V2DF 0 "vsx_register_operand") 2408 (match_operand:V4SI 1 "vsx_register_operand") 2409 (any_float (pc))] 2410 "VECTOR_UNIT_VSX_P (V2DFmode)" 2411{ 2412 if (BYTES_BIG_ENDIAN) 2413 emit_insn (gen_vsx_xvcv<su>xwdp_be (operands[0], operands[1])); 2414 else 2415 emit_insn (gen_vsx_xvcv<su>xwdp_le (operands[0], operands[1])); 2416 DONE; 2417}) 2418 2419(define_insn "vsx_xvcvsxwdp_df" 2420 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2421 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2422 UNSPEC_VSX_CVSXWDP))] 2423 "TARGET_VSX" 2424 "xvcvsxwdp %x0,%x1" 2425 [(set_attr "type" "vecdouble")]) 2426 2427(define_insn "vsx_xvcvuxwdp_df" 2428 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 2429 (unspec:DF [(match_operand:V4SI 1 "vsx_register_operand" "wa")] 2430 UNSPEC_VSX_CVUXWDP))] 2431 "TARGET_VSX" 2432 "xvcvuxwdp %x0,%x1" 2433 [(set_attr "type" "vecdouble")]) 2434 2435;; Convert vector of 32-bit floating point numbers to vector of 2436;; 64-bit signed/unsigned integers. 2437(define_insn "vsx_xvcvsp<su>xds_be" 2438 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2439 (any_fix:V2DI 2440 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2441 (parallel [(const_int 0) (const_int 2)]))))] 2442 "VECTOR_UNIT_VSX_P (V2DFmode) && BYTES_BIG_ENDIAN" 2443 "xvcvsp<su>xds %x0,%x1" 2444 [(set_attr "type" "vecdouble")]) 2445 2446(define_insn "vsx_xvcvsp<su>xds_le" 2447 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v,?wa") 2448 (any_fix:V2DI 2449 (vec_select:V2SF (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") 2450 (parallel [(const_int 1) (const_int 3)]))))] 2451 "VECTOR_UNIT_VSX_P (V2DFmode) && !BYTES_BIG_ENDIAN" 2452 "xvcvsp<su>xds %x0,%x1" 2453 [(set_attr "type" "vecdouble")]) 2454 2455(define_expand "vsx_xvcvsp<su>xds" 2456 [(match_operand:V2DI 0 "vsx_register_operand") 2457 (match_operand:V4SF 1 "vsx_register_operand") 2458 (any_fix (pc))] 2459 "VECTOR_UNIT_VSX_P (V2DFmode)" 2460{ 2461 if (BYTES_BIG_ENDIAN) 2462 emit_insn (gen_vsx_xvcvsp<su>xds_be (operands[0], operands[1])); 2463 else 2464 emit_insn (gen_vsx_xvcvsp<su>xds_le (operands[0], operands[1])); 2465 DONE; 2466}) 2467 2468;; Generate float2 double 2469;; convert two double to float 2470(define_expand "float2_v2df" 2471 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2472 (use (match_operand:V2DF 1 "register_operand" "wa")) 2473 (use (match_operand:V2DF 2 "register_operand" "wa"))] 2474 "VECTOR_UNIT_VSX_P (V4SFmode)" 2475{ 2476 rtx rtx_src1, rtx_src2, rtx_dst; 2477 2478 rtx_dst = operands[0]; 2479 rtx_src1 = operands[1]; 2480 rtx_src2 = operands[2]; 2481 2482 rs6000_generate_float2_double_code (rtx_dst, rtx_src1, rtx_src2); 2483 DONE; 2484}) 2485 2486;; Generate float2 2487;; convert two long long signed ints to float 2488(define_expand "float2_v2di" 2489 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2490 (use (match_operand:V2DI 1 "register_operand" "wa")) 2491 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2492 "VECTOR_UNIT_VSX_P (V4SFmode)" 2493{ 2494 rtx rtx_src1, rtx_src2, rtx_dst; 2495 2496 rtx_dst = operands[0]; 2497 rtx_src1 = operands[1]; 2498 rtx_src2 = operands[2]; 2499 2500 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2501 DONE; 2502}) 2503 2504;; Generate uns_float2 2505;; convert two long long unsigned ints to float 2506(define_expand "uns_float2_v2di" 2507 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2508 (use (match_operand:V2DI 1 "register_operand" "wa")) 2509 (use (match_operand:V2DI 2 "register_operand" "wa"))] 2510 "VECTOR_UNIT_VSX_P (V4SFmode)" 2511{ 2512 rtx rtx_src1, rtx_src2, rtx_dst; 2513 2514 rtx_dst = operands[0]; 2515 rtx_src1 = operands[1]; 2516 rtx_src2 = operands[2]; 2517 2518 rs6000_generate_float2_code (true, rtx_dst, rtx_src1, rtx_src2); 2519 DONE; 2520}) 2521 2522;; Generate floate 2523;; convert double or long long signed to float 2524;; (Only even words are valid, BE numbering) 2525(define_expand "floate<mode>" 2526 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2527 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2528 "VECTOR_UNIT_VSX_P (V4SFmode)" 2529{ 2530 if (BYTES_BIG_ENDIAN) 2531 { 2532 /* Shift left one word to put even word correct location */ 2533 rtx rtx_tmp; 2534 rtx rtx_val = GEN_INT (4); 2535 2536 rtx_tmp = gen_reg_rtx (V4SFmode); 2537 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2538 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2539 rtx_tmp, rtx_tmp, rtx_val)); 2540 } 2541 else 2542 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2543 2544 DONE; 2545}) 2546 2547;; Generate uns_floate 2548;; convert long long unsigned to float 2549;; (Only even words are valid, BE numbering) 2550(define_expand "unsfloatev2di" 2551 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2552 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2553 "VECTOR_UNIT_VSX_P (V4SFmode)" 2554{ 2555 if (BYTES_BIG_ENDIAN) 2556 { 2557 /* Shift left one word to put even word correct location */ 2558 rtx rtx_tmp; 2559 rtx rtx_val = GEN_INT (4); 2560 2561 rtx_tmp = gen_reg_rtx (V4SFmode); 2562 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2563 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2564 rtx_tmp, rtx_tmp, rtx_val)); 2565 } 2566 else 2567 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2568 2569 DONE; 2570}) 2571 2572;; Generate floato 2573;; convert double or long long signed to float 2574;; Only odd words are valid, BE numbering) 2575(define_expand "floato<mode>" 2576 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2577 (use (match_operand:VSX_D 1 "register_operand" "wa"))] 2578 "VECTOR_UNIT_VSX_P (V4SFmode)" 2579{ 2580 if (BYTES_BIG_ENDIAN) 2581 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (operands[0], operands[1])); 2582 else 2583 { 2584 /* Shift left one word to put odd word correct location */ 2585 rtx rtx_tmp; 2586 rtx rtx_val = GEN_INT (4); 2587 2588 rtx_tmp = gen_reg_rtx (V4SFmode); 2589 emit_insn (gen_vsx_xvcv<VF_sxddp>sp (rtx_tmp, operands[1])); 2590 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2591 rtx_tmp, rtx_tmp, rtx_val)); 2592 } 2593 DONE; 2594}) 2595 2596;; Generate uns_floato 2597;; convert long long unsigned to float 2598;; (Only odd words are valid, BE numbering) 2599(define_expand "unsfloatov2di" 2600 [(use (match_operand:V4SF 0 "register_operand" "=wa")) 2601 (use (match_operand:V2DI 1 "register_operand" "wa"))] 2602 "VECTOR_UNIT_VSX_P (V4SFmode)" 2603{ 2604 if (BYTES_BIG_ENDIAN) 2605 emit_insn (gen_vsx_xvcvuxdsp (operands[0], operands[1])); 2606 else 2607 { 2608 /* Shift left one word to put odd word correct location */ 2609 rtx rtx_tmp; 2610 rtx rtx_val = GEN_INT (4); 2611 2612 rtx_tmp = gen_reg_rtx (V4SFmode); 2613 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp, operands[1])); 2614 emit_insn (gen_altivec_vsldoi_v4sf (operands[0], 2615 rtx_tmp, rtx_tmp, rtx_val)); 2616 } 2617 DONE; 2618}) 2619 2620;; Generate vsigned2 2621;; convert two double float vectors to a vector of single precision ints 2622(define_expand "vsigned2_v2df" 2623 [(match_operand:V4SI 0 "register_operand" "=wa") 2624 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "wa") 2625 (match_operand:V2DF 2 "register_operand" "wa")] 2626 UNSPEC_VSX_VSIGNED2)] 2627 "TARGET_VSX" 2628{ 2629 rtx rtx_src1, rtx_src2, rtx_dst; 2630 bool signed_convert=true; 2631 2632 rtx_dst = operands[0]; 2633 rtx_src1 = operands[1]; 2634 rtx_src2 = operands[2]; 2635 2636 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2637 DONE; 2638}) 2639 2640;; Generate vsignedo_v2df 2641;; signed double float to int convert odd word 2642(define_expand "vsignedo_v2df" 2643 [(set (match_operand:V4SI 0 "register_operand" "=wa") 2644 (match_operand:V2DF 1 "register_operand" "wa"))] 2645 "TARGET_VSX" 2646{ 2647 if (BYTES_BIG_ENDIAN) 2648 { 2649 rtx rtx_tmp; 2650 rtx rtx_val = GEN_INT (12); 2651 rtx_tmp = gen_reg_rtx (V4SImode); 2652 2653 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2654 2655 /* Big endian word numbering for words in operand is 0 1 2 3. 2656 take (operand[1] operand[1]) and shift left one word 2657 0 1 2 3 0 1 2 3 => 1 2 3 0 2658 Words 1 and 3 are now are now where they need to be for result. */ 2659 2660 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2661 rtx_tmp, rtx_val)); 2662 } 2663 else 2664 /* Little endian word numbering for operand is 3 2 1 0. 2665 Result words 3 and 1 are where they need to be. */ 2666 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2667 2668 DONE; 2669} 2670 [(set_attr "type" "veccomplex")]) 2671 2672;; Generate vsignede_v2df 2673;; signed double float to int even word 2674(define_expand "vsignede_v2df" 2675 [(set (match_operand:V4SI 0 "register_operand" "=v") 2676 (match_operand:V2DF 1 "register_operand" "v"))] 2677 "TARGET_VSX" 2678{ 2679 if (BYTES_BIG_ENDIAN) 2680 /* Big endian word numbering for words in operand is 0 1 2681 Result words 0 is where they need to be. */ 2682 emit_insn (gen_vsx_xvcvdpsxws (operands[0], operands[1])); 2683 2684 else 2685 { 2686 rtx rtx_tmp; 2687 rtx rtx_val = GEN_INT (12); 2688 rtx_tmp = gen_reg_rtx (V4SImode); 2689 2690 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp, operands[1])); 2691 2692 /* Little endian word numbering for operand is 3 2 1 0. 2693 take (operand[1] operand[1]) and shift left three words 2694 0 1 2 3 0 1 2 3 => 3 0 1 2 2695 Words 0 and 2 are now where they need to be for the result. */ 2696 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2697 rtx_tmp, rtx_val)); 2698 } 2699 DONE; 2700} 2701 [(set_attr "type" "veccomplex")]) 2702 2703;; Generate unsigned2 2704;; convert two double float vectors to a vector of single precision 2705;; unsigned ints 2706(define_expand "vunsigned2_v2df" 2707[(match_operand:V4SI 0 "register_operand" "=v") 2708 (unspec:V4SI [(match_operand:V2DF 1 "register_operand" "v") 2709 (match_operand:V2DF 2 "register_operand" "v")] 2710 UNSPEC_VSX_VSIGNED2)] 2711 "TARGET_VSX" 2712{ 2713 rtx rtx_src1, rtx_src2, rtx_dst; 2714 bool signed_convert=false; 2715 2716 rtx_dst = operands[0]; 2717 rtx_src1 = operands[1]; 2718 rtx_src2 = operands[2]; 2719 2720 rs6000_generate_vsigned2_code (signed_convert, rtx_dst, rtx_src1, rtx_src2); 2721 DONE; 2722}) 2723 2724;; Generate vunsignedo_v2df 2725;; unsigned double float to int convert odd word 2726(define_expand "vunsignedo_v2df" 2727 [(set (match_operand:V4SI 0 "register_operand" "=v") 2728 (match_operand:V2DF 1 "register_operand" "v"))] 2729 "TARGET_VSX" 2730{ 2731 if (BYTES_BIG_ENDIAN) 2732 { 2733 rtx rtx_tmp; 2734 rtx rtx_val = GEN_INT (12); 2735 rtx_tmp = gen_reg_rtx (V4SImode); 2736 2737 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2738 2739 /* Big endian word numbering for words in operand is 0 1 2 3. 2740 take (operand[1] operand[1]) and shift left one word 2741 0 1 2 3 0 1 2 3 => 1 2 3 0 2742 Words 1 and 3 are now are now where they need to be for result. */ 2743 2744 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2745 rtx_tmp, rtx_val)); 2746 } 2747 else 2748 /* Little endian word numbering for operand is 3 2 1 0. 2749 Result words 3 and 1 are where they need to be. */ 2750 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2751 2752 DONE; 2753} 2754 [(set_attr "type" "veccomplex")]) 2755 2756;; Generate vunsignede_v2df 2757;; unsigned double float to int even word 2758(define_expand "vunsignede_v2df" 2759 [(set (match_operand:V4SI 0 "register_operand" "=v") 2760 (match_operand:V2DF 1 "register_operand" "v"))] 2761 "TARGET_VSX" 2762{ 2763 if (BYTES_BIG_ENDIAN) 2764 /* Big endian word numbering for words in operand is 0 1 2765 Result words 0 is where they need to be. */ 2766 emit_insn (gen_vsx_xvcvdpuxws (operands[0], operands[1])); 2767 2768 else 2769 { 2770 rtx rtx_tmp; 2771 rtx rtx_val = GEN_INT (12); 2772 rtx_tmp = gen_reg_rtx (V4SImode); 2773 2774 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp, operands[1])); 2775 2776 /* Little endian word numbering for operand is 3 2 1 0. 2777 take (operand[1] operand[1]) and shift left three words 2778 0 1 2 3 0 1 2 3 => 3 0 1 2 2779 Words 0 and 2 are now where they need to be for the result. */ 2780 emit_insn (gen_altivec_vsldoi_v4si (operands[0], rtx_tmp, 2781 rtx_tmp, rtx_val)); 2782 } 2783 DONE; 2784} 2785 [(set_attr "type" "veccomplex")]) 2786 2787;; Only optimize (float (fix x)) -> frz if we are in fast-math mode, since 2788;; since the xvrdpiz instruction does not truncate the value if the floating 2789;; point value is < LONG_MIN or > LONG_MAX. 2790(define_insn "*vsx_float_fix_v2df2" 2791 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa,?wa") 2792 (float:V2DF 2793 (fix:V2DI 2794 (match_operand:V2DF 1 "vsx_register_operand" "wa,?wa"))))] 2795 "TARGET_HARD_FLOAT 2796 && VECTOR_UNIT_VSX_P (V2DFmode) && flag_unsafe_math_optimizations 2797 && !flag_trapping_math && TARGET_FRIZ" 2798 "xvrdpiz %x0,%x1" 2799 [(set_attr "type" "vecdouble")]) 2800 2801 2802;; Permute operations 2803 2804;; Build a V2DF/V2DI vector from two scalars 2805(define_insn "vsx_concat_<mode>" 2806 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") 2807 (vec_concat:VSX_D 2808 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b") 2809 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))] 2810 "VECTOR_MEM_VSX_P (<MODE>mode)" 2811{ 2812 if (which_alternative == 0) 2813 return (BYTES_BIG_ENDIAN 2814 ? "xxpermdi %x0,%x1,%x2,0" 2815 : "xxpermdi %x0,%x2,%x1,0"); 2816 2817 else if (which_alternative == 1) 2818 return (BYTES_BIG_ENDIAN 2819 ? "mtvsrdd %x0,%1,%2" 2820 : "mtvsrdd %x0,%2,%1"); 2821 2822 else 2823 gcc_unreachable (); 2824} 2825 [(set_attr "type" "vecperm")]) 2826 2827;; Combiner patterns to allow creating XXPERMDI's to access either double 2828;; word element in a vector register. 2829(define_insn "*vsx_concat_<mode>_1" 2830 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2831 (vec_concat:VSX_D 2832 (vec_select:<VS_scalar> 2833 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2834 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2835 (match_operand:<VS_scalar> 3 "gpc_reg_operand" "wa")))] 2836 "VECTOR_MEM_VSX_P (<MODE>mode)" 2837{ 2838 HOST_WIDE_INT dword = INTVAL (operands[2]); 2839 if (BYTES_BIG_ENDIAN) 2840 { 2841 operands[4] = GEN_INT (2*dword); 2842 return "xxpermdi %x0,%x1,%x3,%4"; 2843 } 2844 else 2845 { 2846 operands[4] = GEN_INT (!dword); 2847 return "xxpermdi %x0,%x3,%x1,%4"; 2848 } 2849} 2850 [(set_attr "type" "vecperm")]) 2851 2852(define_insn "*vsx_concat_<mode>_2" 2853 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2854 (vec_concat:VSX_D 2855 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa") 2856 (vec_select:<VS_scalar> 2857 (match_operand:VSX_D 2 "gpc_reg_operand" "wa") 2858 (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))] 2859 "VECTOR_MEM_VSX_P (<MODE>mode)" 2860{ 2861 HOST_WIDE_INT dword = INTVAL (operands[3]); 2862 if (BYTES_BIG_ENDIAN) 2863 { 2864 operands[4] = GEN_INT (dword); 2865 return "xxpermdi %x0,%x1,%x2,%4"; 2866 } 2867 else 2868 { 2869 operands[4] = GEN_INT (2 * !dword); 2870 return "xxpermdi %x0,%x2,%x1,%4"; 2871 } 2872} 2873 [(set_attr "type" "vecperm")]) 2874 2875(define_insn "*vsx_concat_<mode>_3" 2876 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2877 (vec_concat:VSX_D 2878 (vec_select:<VS_scalar> 2879 (match_operand:VSX_D 1 "gpc_reg_operand" "wa") 2880 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n")])) 2881 (vec_select:<VS_scalar> 2882 (match_operand:VSX_D 3 "gpc_reg_operand" "wa") 2883 (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))] 2884 "VECTOR_MEM_VSX_P (<MODE>mode)" 2885{ 2886 HOST_WIDE_INT dword1 = INTVAL (operands[2]); 2887 HOST_WIDE_INT dword2 = INTVAL (operands[4]); 2888 if (BYTES_BIG_ENDIAN) 2889 { 2890 operands[5] = GEN_INT ((2 * dword1) + dword2); 2891 return "xxpermdi %x0,%x1,%x3,%5"; 2892 } 2893 else 2894 { 2895 operands[5] = GEN_INT ((2 * !dword2) + !dword1); 2896 return "xxpermdi %x0,%x3,%x1,%5"; 2897 } 2898} 2899 [(set_attr "type" "vecperm")]) 2900 2901;; Special purpose concat using xxpermdi to glue two single precision values 2902;; together, relying on the fact that internally scalar floats are represented 2903;; as doubles. This is used to initialize a V4SF vector with 4 floats 2904(define_insn "vsx_concat_v2sf" 2905 [(set (match_operand:V2DF 0 "vsx_register_operand" "=wa") 2906 (unspec:V2DF 2907 [(match_operand:SF 1 "vsx_register_operand" "wa") 2908 (match_operand:SF 2 "vsx_register_operand" "wa")] 2909 UNSPEC_VSX_CONCAT))] 2910 "VECTOR_MEM_VSX_P (V2DFmode)" 2911{ 2912 if (BYTES_BIG_ENDIAN) 2913 return "xxpermdi %x0,%x1,%x2,0"; 2914 else 2915 return "xxpermdi %x0,%x2,%x1,0"; 2916} 2917 [(set_attr "type" "vecperm")]) 2918 2919;; Concatenate 4 SImode elements into a V4SImode reg. 2920(define_expand "vsx_init_v4si" 2921 [(use (match_operand:V4SI 0 "gpc_reg_operand")) 2922 (use (match_operand:SI 1 "gpc_reg_operand")) 2923 (use (match_operand:SI 2 "gpc_reg_operand")) 2924 (use (match_operand:SI 3 "gpc_reg_operand")) 2925 (use (match_operand:SI 4 "gpc_reg_operand"))] 2926 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 2927{ 2928 rtx a = gen_reg_rtx (DImode); 2929 rtx b = gen_reg_rtx (DImode); 2930 rtx c = gen_reg_rtx (DImode); 2931 rtx d = gen_reg_rtx (DImode); 2932 emit_insn (gen_zero_extendsidi2 (a, operands[1])); 2933 emit_insn (gen_zero_extendsidi2 (b, operands[2])); 2934 emit_insn (gen_zero_extendsidi2 (c, operands[3])); 2935 emit_insn (gen_zero_extendsidi2 (d, operands[4])); 2936 if (!BYTES_BIG_ENDIAN) 2937 { 2938 std::swap (a, b); 2939 std::swap (c, d); 2940 } 2941 2942 rtx aa = gen_reg_rtx (DImode); 2943 rtx ab = gen_reg_rtx (DImode); 2944 rtx cc = gen_reg_rtx (DImode); 2945 rtx cd = gen_reg_rtx (DImode); 2946 emit_insn (gen_ashldi3 (aa, a, GEN_INT (32))); 2947 emit_insn (gen_ashldi3 (cc, c, GEN_INT (32))); 2948 emit_insn (gen_iordi3 (ab, aa, b)); 2949 emit_insn (gen_iordi3 (cd, cc, d)); 2950 2951 rtx abcd = gen_reg_rtx (V2DImode); 2952 emit_insn (gen_vsx_concat_v2di (abcd, ab, cd)); 2953 emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd)); 2954 DONE; 2955}) 2956 2957;; xxpermdi for little endian loads and stores. We need several of 2958;; these since the form of the PARALLEL differs by mode. 2959(define_insn "*vsx_xxpermdi2_le_<mode>" 2960 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 2961 (vec_select:VSX_D 2962 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 2963 (parallel [(const_int 1) (const_int 0)])))] 2964 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)" 2965 "xxpermdi %x0,%x1,%x1,2" 2966 [(set_attr "type" "vecperm")]) 2967 2968(define_insn "xxswapd_v16qi" 2969 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 2970 (vec_select:V16QI 2971 (match_operand:V16QI 1 "vsx_register_operand" "wa") 2972 (parallel [(const_int 8) (const_int 9) 2973 (const_int 10) (const_int 11) 2974 (const_int 12) (const_int 13) 2975 (const_int 14) (const_int 15) 2976 (const_int 0) (const_int 1) 2977 (const_int 2) (const_int 3) 2978 (const_int 4) (const_int 5) 2979 (const_int 6) (const_int 7)])))] 2980 "TARGET_VSX" 2981;; AIX does not support the extended mnemonic xxswapd. Use the basic 2982;; mnemonic xxpermdi instead. 2983 "xxpermdi %x0,%x1,%x1,2" 2984 [(set_attr "type" "vecperm")]) 2985 2986(define_insn "xxswapd_v8hi" 2987 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 2988 (vec_select:V8HI 2989 (match_operand:V8HI 1 "vsx_register_operand" "wa") 2990 (parallel [(const_int 4) (const_int 5) 2991 (const_int 6) (const_int 7) 2992 (const_int 0) (const_int 1) 2993 (const_int 2) (const_int 3)])))] 2994 "TARGET_VSX" 2995;; AIX does not support the extended mnemonic xxswapd. Use the basic 2996;; mnemonic xxpermdi instead. 2997 "xxpermdi %x0,%x1,%x1,2" 2998 [(set_attr "type" "vecperm")]) 2999 3000(define_insn "xxswapd_<mode>" 3001 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 3002 (vec_select:VSX_W 3003 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 3004 (parallel [(const_int 2) (const_int 3) 3005 (const_int 0) (const_int 1)])))] 3006 "TARGET_VSX" 3007;; AIX does not support extended mnemonic xxswapd. Use the basic 3008;; mnemonic xxpermdi instead. 3009 "xxpermdi %x0,%x1,%x1,2" 3010 [(set_attr "type" "vecperm")]) 3011 3012(define_insn "xxswapd_<mode>" 3013 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 3014 (vec_select:VSX_D 3015 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 3016 (parallel [(const_int 1) (const_int 0)])))] 3017 "TARGET_VSX" 3018;; AIX does not support extended mnemonic xxswapd. Use the basic 3019;; mnemonic xxpermdi instead. 3020 "xxpermdi %x0,%x1,%x1,2" 3021 [(set_attr "type" "vecperm")]) 3022 3023;; lxvd2x for little endian loads. We need several of 3024;; these since the form of the PARALLEL differs by mode. 3025(define_insn "*vsx_lxvd2x2_le_<mode>" 3026 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 3027 (vec_select:VSX_D 3028 (match_operand:VSX_D 1 "memory_operand" "Z") 3029 (parallel [(const_int 1) (const_int 0)])))] 3030 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3031 "lxvd2x %x0,%y1" 3032 [(set_attr "type" "vecload")]) 3033 3034(define_insn "*vsx_lxvd2x4_le_<mode>" 3035 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 3036 (vec_select:VSX_W 3037 (match_operand:VSX_W 1 "memory_operand" "Z") 3038 (parallel [(const_int 2) (const_int 3) 3039 (const_int 0) (const_int 1)])))] 3040 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3041 "lxvd2x %x0,%y1" 3042 [(set_attr "type" "vecload")]) 3043 3044(define_insn "*vsx_lxvd2x8_le_V8HI" 3045 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 3046 (vec_select:V8HI 3047 (match_operand:V8HI 1 "memory_operand" "Z") 3048 (parallel [(const_int 4) (const_int 5) 3049 (const_int 6) (const_int 7) 3050 (const_int 0) (const_int 1) 3051 (const_int 2) (const_int 3)])))] 3052 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3053 "lxvd2x %x0,%y1" 3054 [(set_attr "type" "vecload")]) 3055 3056(define_insn "*vsx_lxvd2x16_le_V16QI" 3057 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 3058 (vec_select:V16QI 3059 (match_operand:V16QI 1 "memory_operand" "Z") 3060 (parallel [(const_int 8) (const_int 9) 3061 (const_int 10) (const_int 11) 3062 (const_int 12) (const_int 13) 3063 (const_int 14) (const_int 15) 3064 (const_int 0) (const_int 1) 3065 (const_int 2) (const_int 3) 3066 (const_int 4) (const_int 5) 3067 (const_int 6) (const_int 7)])))] 3068 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3069 "lxvd2x %x0,%y1" 3070 [(set_attr "type" "vecload")]) 3071 3072;; stxvd2x for little endian stores. We need several of 3073;; these since the form of the PARALLEL differs by mode. 3074(define_insn "*vsx_stxvd2x2_le_<mode>" 3075 [(set (match_operand:VSX_D 0 "memory_operand" "=Z") 3076 (vec_select:VSX_D 3077 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 3078 (parallel [(const_int 1) (const_int 0)])))] 3079 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3080 "stxvd2x %x1,%y0" 3081 [(set_attr "type" "vecstore")]) 3082 3083(define_insn "*vsx_stxvd2x4_le_<mode>" 3084 [(set (match_operand:VSX_W 0 "memory_operand" "=Z") 3085 (vec_select:VSX_W 3086 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 3087 (parallel [(const_int 2) (const_int 3) 3088 (const_int 0) (const_int 1)])))] 3089 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_P9_VECTOR" 3090 "stxvd2x %x1,%y0" 3091 [(set_attr "type" "vecstore")]) 3092 3093(define_insn "*vsx_stxvd2x8_le_V8HI" 3094 [(set (match_operand:V8HI 0 "memory_operand" "=Z") 3095 (vec_select:V8HI 3096 (match_operand:V8HI 1 "vsx_register_operand" "wa") 3097 (parallel [(const_int 4) (const_int 5) 3098 (const_int 6) (const_int 7) 3099 (const_int 0) (const_int 1) 3100 (const_int 2) (const_int 3)])))] 3101 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V8HImode) && !TARGET_P9_VECTOR" 3102 "stxvd2x %x1,%y0" 3103 [(set_attr "type" "vecstore")]) 3104 3105(define_insn "*vsx_stxvd2x16_le_V16QI" 3106 [(set (match_operand:V16QI 0 "memory_operand" "=Z") 3107 (vec_select:V16QI 3108 (match_operand:V16QI 1 "vsx_register_operand" "wa") 3109 (parallel [(const_int 8) (const_int 9) 3110 (const_int 10) (const_int 11) 3111 (const_int 12) (const_int 13) 3112 (const_int 14) (const_int 15) 3113 (const_int 0) (const_int 1) 3114 (const_int 2) (const_int 3) 3115 (const_int 4) (const_int 5) 3116 (const_int 6) (const_int 7)])))] 3117 "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (V16QImode) && !TARGET_P9_VECTOR" 3118 "stxvd2x %x1,%y0" 3119 [(set_attr "type" "vecstore")]) 3120 3121;; Convert a TImode value into V1TImode 3122(define_expand "vsx_set_v1ti" 3123 [(match_operand:V1TI 0 "nonimmediate_operand") 3124 (match_operand:V1TI 1 "nonimmediate_operand") 3125 (match_operand:TI 2 "input_operand") 3126 (match_operand:QI 3 "u5bit_cint_operand")] 3127 "VECTOR_MEM_VSX_P (V1TImode)" 3128{ 3129 if (operands[3] != const0_rtx) 3130 gcc_unreachable (); 3131 3132 emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); 3133 DONE; 3134}) 3135 3136;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT 3137(define_expand "vsx_set_<mode>" 3138 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 3139 (use (match_operand:VSX_D 1 "vsx_register_operand")) 3140 (use (match_operand:<VS_scalar> 2 "gpc_reg_operand")) 3141 (use (match_operand:QI 3 "const_0_to_1_operand"))] 3142 "VECTOR_MEM_VSX_P (<MODE>mode)" 3143{ 3144 rtx dest = operands[0]; 3145 rtx vec_reg = operands[1]; 3146 rtx value = operands[2]; 3147 rtx ele = operands[3]; 3148 rtx tmp = gen_reg_rtx (<VS_scalar>mode); 3149 3150 if (ele == const0_rtx) 3151 { 3152 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx)); 3153 emit_insn (gen_vsx_concat_<mode> (dest, value, tmp)); 3154 DONE; 3155 } 3156 else if (ele == const1_rtx) 3157 { 3158 emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx)); 3159 emit_insn (gen_vsx_concat_<mode> (dest, tmp, value)); 3160 DONE; 3161 } 3162 else 3163 gcc_unreachable (); 3164}) 3165 3166;; Extract a DF/DI element from V2DF/V2DI 3167;; Optimize cases were we can do a simple or direct move. 3168;; Or see if we can avoid doing the move at all 3169 3170;; There are some unresolved problems with reload that show up if an Altivec 3171;; register was picked. Limit the scalar value to FPRs for now. 3172 3173(define_insn "vsx_extract_<mode>" 3174 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=d, d, wr, wr") 3175 (vec_select:<VS_scalar> 3176 (match_operand:VSX_D 1 "gpc_reg_operand" "wa, wa, wa, wa") 3177 (parallel 3178 [(match_operand:QI 2 "const_0_to_1_operand" "wD, n, wD, n")])))] 3179 "VECTOR_MEM_VSX_P (<MODE>mode)" 3180{ 3181 int element = INTVAL (operands[2]); 3182 int op0_regno = REGNO (operands[0]); 3183 int op1_regno = REGNO (operands[1]); 3184 int fldDM; 3185 3186 gcc_assert (IN_RANGE (element, 0, 1)); 3187 gcc_assert (VSX_REGNO_P (op1_regno)); 3188 3189 if (element == VECTOR_ELEMENT_SCALAR_64BIT) 3190 { 3191 if (op0_regno == op1_regno) 3192 return ASM_COMMENT_START " vec_extract to same register"; 3193 3194 else if (INT_REGNO_P (op0_regno) && TARGET_DIRECT_MOVE 3195 && TARGET_POWERPC64) 3196 return "mfvsrd %0,%x1"; 3197 3198 else if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno)) 3199 return "fmr %0,%1"; 3200 3201 else if (VSX_REGNO_P (op0_regno)) 3202 return "xxlor %x0,%x1,%x1"; 3203 3204 else 3205 gcc_unreachable (); 3206 } 3207 3208 else if (element == VECTOR_ELEMENT_MFVSRLD_64BIT && INT_REGNO_P (op0_regno) 3209 && TARGET_P9_VECTOR && TARGET_POWERPC64 && TARGET_DIRECT_MOVE) 3210 return "mfvsrld %0,%x1"; 3211 3212 else if (VSX_REGNO_P (op0_regno)) 3213 { 3214 fldDM = element << 1; 3215 if (!BYTES_BIG_ENDIAN) 3216 fldDM = 3 - fldDM; 3217 operands[3] = GEN_INT (fldDM); 3218 return "xxpermdi %x0,%x1,%x1,%3"; 3219 } 3220 3221 else 3222 gcc_unreachable (); 3223} 3224 [(set_attr "type" "veclogical,mftgpr,mftgpr,vecperm") 3225 (set_attr "isa" "*,*,p8v,p9v")]) 3226 3227;; Optimize extracting a single scalar element from memory. 3228(define_insn_and_split "*vsx_extract_<P:mode>_<VSX_D:mode>_load" 3229 [(set (match_operand:<VS_scalar> 0 "register_operand" "=wa,wr") 3230 (vec_select:<VSX_D:VS_scalar> 3231 (match_operand:VSX_D 1 "memory_operand" "m,m") 3232 (parallel [(match_operand:QI 2 "const_0_to_1_operand" "n,n")]))) 3233 (clobber (match_scratch:P 3 "=&b,&b"))] 3234 "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<VSX_D:MODE>mode)" 3235 "#" 3236 "&& reload_completed" 3237 [(set (match_dup 0) (match_dup 4))] 3238{ 3239 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3240 operands[3], <VSX_D:VS_scalar>mode); 3241} 3242 [(set_attr "type" "fpload,load") 3243 (set_attr "length" "8")]) 3244 3245;; Optimize storing a single scalar element that is the right location to 3246;; memory 3247(define_insn "*vsx_extract_<mode>_store" 3248 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=m,Z,wY") 3249 (vec_select:<VS_scalar> 3250 (match_operand:VSX_D 1 "register_operand" "d,v,v") 3251 (parallel [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))] 3252 "VECTOR_MEM_VSX_P (<MODE>mode)" 3253 "@ 3254 stfd%U0%X0 %1,%0 3255 stxsdx %x1,%y0 3256 stxsd %1,%0" 3257 [(set_attr "type" "fpstore") 3258 (set_attr "isa" "*,p7v,p9v")]) 3259 3260;; Variable V2DI/V2DF extract shift 3261(define_insn "vsx_vslo_<mode>" 3262 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") 3263 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") 3264 (match_operand:V2DI 2 "gpc_reg_operand" "v")] 3265 UNSPEC_VSX_VSLO))] 3266 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3267 "vslo %0,%1,%2" 3268 [(set_attr "type" "vecperm")]) 3269 3270;; Variable V2DI/V2DF extract from a register 3271(define_insn_and_split "vsx_extract_<mode>_var" 3272 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=v") 3273 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "gpc_reg_operand" "v") 3274 (match_operand:DI 2 "gpc_reg_operand" "r")] 3275 UNSPEC_VSX_EXTRACT)) 3276 (clobber (match_scratch:DI 3 "=r")) 3277 (clobber (match_scratch:V2DI 4 "=&v"))] 3278 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3279 "#" 3280 "&& reload_completed" 3281 [(const_int 0)] 3282{ 3283 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3284 operands[3], operands[4]); 3285 DONE; 3286}) 3287 3288;; Variable V2DI/V2DF extract from memory 3289(define_insn_and_split "*vsx_extract_<mode>_var_load" 3290 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=wa,r") 3291 (unspec:<VS_scalar> [(match_operand:VSX_D 1 "memory_operand" "Q,Q") 3292 (match_operand:DI 2 "gpc_reg_operand" "r,r")] 3293 UNSPEC_VSX_EXTRACT)) 3294 (clobber (match_scratch:DI 3 "=&b,&b"))] 3295 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3296 "#" 3297 "&& reload_completed" 3298 [(set (match_dup 0) (match_dup 4))] 3299{ 3300 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3301 operands[3], <VS_scalar>mode); 3302} 3303 [(set_attr "type" "fpload,load")]) 3304 3305;; Extract a SF element from V4SF 3306(define_insn_and_split "vsx_extract_v4sf" 3307 [(set (match_operand:SF 0 "vsx_register_operand" "=wa") 3308 (vec_select:SF 3309 (match_operand:V4SF 1 "vsx_register_operand" "wa") 3310 (parallel [(match_operand:QI 2 "u5bit_cint_operand" "n")]))) 3311 (clobber (match_scratch:V4SF 3 "=0"))] 3312 "VECTOR_UNIT_VSX_P (V4SFmode)" 3313 "#" 3314 "&& 1" 3315 [(const_int 0)] 3316{ 3317 rtx op0 = operands[0]; 3318 rtx op1 = operands[1]; 3319 rtx op2 = operands[2]; 3320 rtx op3 = operands[3]; 3321 rtx tmp; 3322 HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); 3323 3324 if (ele == 0) 3325 tmp = op1; 3326 else 3327 { 3328 if (GET_CODE (op3) == SCRATCH) 3329 op3 = gen_reg_rtx (V4SFmode); 3330 emit_insn (gen_vsx_xxsldwi_v4sf (op3, op1, op1, GEN_INT (ele))); 3331 tmp = op3; 3332 } 3333 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp)); 3334 DONE; 3335} 3336 [(set_attr "length" "8") 3337 (set_attr "type" "fp")]) 3338 3339(define_insn_and_split "*vsx_extract_v4sf_<mode>_load" 3340 [(set (match_operand:SF 0 "register_operand" "=f,v,v,?r") 3341 (vec_select:SF 3342 (match_operand:V4SF 1 "memory_operand" "m,Z,m,m") 3343 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")]))) 3344 (clobber (match_scratch:P 3 "=&b,&b,&b,&b"))] 3345 "VECTOR_MEM_VSX_P (V4SFmode)" 3346 "#" 3347 "&& reload_completed" 3348 [(set (match_dup 0) (match_dup 4))] 3349{ 3350 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3351 operands[3], SFmode); 3352} 3353 [(set_attr "type" "fpload,fpload,fpload,load") 3354 (set_attr "length" "8") 3355 (set_attr "isa" "*,p7v,p9v,*")]) 3356 3357;; Variable V4SF extract from a register 3358(define_insn_and_split "vsx_extract_v4sf_var" 3359 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa") 3360 (unspec:SF [(match_operand:V4SF 1 "gpc_reg_operand" "v") 3361 (match_operand:DI 2 "gpc_reg_operand" "r")] 3362 UNSPEC_VSX_EXTRACT)) 3363 (clobber (match_scratch:DI 3 "=r")) 3364 (clobber (match_scratch:V2DI 4 "=&v"))] 3365 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" 3366 "#" 3367 "&& reload_completed" 3368 [(const_int 0)] 3369{ 3370 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3371 operands[3], operands[4]); 3372 DONE; 3373}) 3374 3375;; Variable V4SF extract from memory 3376(define_insn_and_split "*vsx_extract_v4sf_var_load" 3377 [(set (match_operand:SF 0 "gpc_reg_operand" "=wa,?r") 3378 (unspec:SF [(match_operand:V4SF 1 "memory_operand" "Q,Q") 3379 (match_operand:DI 2 "gpc_reg_operand" "r,r")] 3380 UNSPEC_VSX_EXTRACT)) 3381 (clobber (match_scratch:DI 3 "=&b,&b"))] 3382 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_DIRECT_MOVE_64BIT" 3383 "#" 3384 "&& reload_completed" 3385 [(set (match_dup 0) (match_dup 4))] 3386{ 3387 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3388 operands[3], SFmode); 3389} 3390 [(set_attr "type" "fpload,load")]) 3391 3392;; Expand the builtin form of xxpermdi to canonical rtl. 3393(define_expand "vsx_xxpermdi_<mode>" 3394 [(match_operand:VSX_L 0 "vsx_register_operand") 3395 (match_operand:VSX_L 1 "vsx_register_operand") 3396 (match_operand:VSX_L 2 "vsx_register_operand") 3397 (match_operand:QI 3 "u5bit_cint_operand")] 3398 "VECTOR_MEM_VSX_P (<MODE>mode)" 3399{ 3400 rtx target = operands[0]; 3401 rtx op0 = operands[1]; 3402 rtx op1 = operands[2]; 3403 int mask = INTVAL (operands[3]); 3404 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3405 rtx perm1 = GEN_INT ((mask & 1) + 2); 3406 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3407 3408 if (<MODE>mode == V2DFmode) 3409 gen = gen_vsx_xxpermdi2_v2df_1; 3410 else 3411 { 3412 gen = gen_vsx_xxpermdi2_v2di_1; 3413 if (<MODE>mode != V2DImode) 3414 { 3415 target = gen_lowpart (V2DImode, target); 3416 op0 = gen_lowpart (V2DImode, op0); 3417 op1 = gen_lowpart (V2DImode, op1); 3418 } 3419 } 3420 emit_insn (gen (target, op0, op1, perm0, perm1)); 3421 DONE; 3422}) 3423 3424;; Special version of xxpermdi that retains big-endian semantics. 3425(define_expand "vsx_xxpermdi_<mode>_be" 3426 [(match_operand:VSX_L 0 "vsx_register_operand") 3427 (match_operand:VSX_L 1 "vsx_register_operand") 3428 (match_operand:VSX_L 2 "vsx_register_operand") 3429 (match_operand:QI 3 "u5bit_cint_operand")] 3430 "VECTOR_MEM_VSX_P (<MODE>mode)" 3431{ 3432 rtx target = operands[0]; 3433 rtx op0 = operands[1]; 3434 rtx op1 = operands[2]; 3435 int mask = INTVAL (operands[3]); 3436 rtx perm0 = GEN_INT ((mask >> 1) & 1); 3437 rtx perm1 = GEN_INT ((mask & 1) + 2); 3438 rtx (*gen) (rtx, rtx, rtx, rtx, rtx); 3439 3440 if (<MODE>mode == V2DFmode) 3441 gen = gen_vsx_xxpermdi2_v2df_1; 3442 else 3443 { 3444 gen = gen_vsx_xxpermdi2_v2di_1; 3445 if (<MODE>mode != V2DImode) 3446 { 3447 target = gen_lowpart (V2DImode, target); 3448 op0 = gen_lowpart (V2DImode, op0); 3449 op1 = gen_lowpart (V2DImode, op1); 3450 } 3451 } 3452 /* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a 3453 transformation we don't want; it is necessary for 3454 rs6000_expand_vec_perm_const_1 but not for this use. So we 3455 prepare for that by reversing the transformation here. */ 3456 if (BYTES_BIG_ENDIAN) 3457 emit_insn (gen (target, op0, op1, perm0, perm1)); 3458 else 3459 { 3460 rtx p0 = GEN_INT (3 - INTVAL (perm1)); 3461 rtx p1 = GEN_INT (3 - INTVAL (perm0)); 3462 emit_insn (gen (target, op1, op0, p0, p1)); 3463 } 3464 DONE; 3465}) 3466 3467(define_insn "vsx_xxpermdi2_<mode>_1" 3468 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 3469 (vec_select:VSX_D 3470 (vec_concat:<VS_double> 3471 (match_operand:VSX_D 1 "vsx_register_operand" "wa") 3472 (match_operand:VSX_D 2 "vsx_register_operand" "wa")) 3473 (parallel [(match_operand 3 "const_0_to_1_operand" "") 3474 (match_operand 4 "const_2_to_3_operand" "")])))] 3475 "VECTOR_MEM_VSX_P (<MODE>mode)" 3476{ 3477 int op3, op4, mask; 3478 3479 /* For little endian, swap operands and invert/swap selectors 3480 to get the correct xxpermdi. The operand swap sets up the 3481 inputs as a little endian array. The selectors are swapped 3482 because they are defined to use big endian ordering. The 3483 selectors are inverted to get the correct doublewords for 3484 little endian ordering. */ 3485 if (BYTES_BIG_ENDIAN) 3486 { 3487 op3 = INTVAL (operands[3]); 3488 op4 = INTVAL (operands[4]); 3489 } 3490 else 3491 { 3492 op3 = 3 - INTVAL (operands[4]); 3493 op4 = 3 - INTVAL (operands[3]); 3494 } 3495 3496 mask = (op3 << 1) | (op4 - 2); 3497 operands[3] = GEN_INT (mask); 3498 3499 if (BYTES_BIG_ENDIAN) 3500 return "xxpermdi %x0,%x1,%x2,%3"; 3501 else 3502 return "xxpermdi %x0,%x2,%x1,%3"; 3503} 3504 [(set_attr "type" "vecperm")]) 3505 3506;; Extraction of a single element in a small integer vector. Until ISA 3.0, 3507;; none of the small types were allowed in a vector register, so we had to 3508;; extract to a DImode and either do a direct move or store. 3509(define_expand "vsx_extract_<mode>" 3510 [(parallel [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand") 3511 (vec_select:<VS_scalar> 3512 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand") 3513 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3514 (clobber (match_scratch:VSX_EXTRACT_I 3))])] 3515 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3516{ 3517 /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}. */ 3518 if (TARGET_P9_VECTOR) 3519 { 3520 emit_insn (gen_vsx_extract_<mode>_p9 (operands[0], operands[1], 3521 operands[2])); 3522 DONE; 3523 } 3524}) 3525 3526(define_insn "vsx_extract_<mode>_p9" 3527 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,<VSX_EX>") 3528 (vec_select:<VS_scalar> 3529 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") 3530 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n,n")]))) 3531 (clobber (match_scratch:SI 3 "=r,X"))] 3532 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3533{ 3534 if (which_alternative == 0) 3535 return "#"; 3536 3537 else 3538 { 3539 HOST_WIDE_INT elt = INTVAL (operands[2]); 3540 HOST_WIDE_INT elt_adj = (!BYTES_BIG_ENDIAN 3541 ? GET_MODE_NUNITS (<MODE>mode) - 1 - elt 3542 : elt); 3543 3544 HOST_WIDE_INT unit_size = GET_MODE_UNIT_SIZE (<MODE>mode); 3545 HOST_WIDE_INT offset = unit_size * elt_adj; 3546 3547 operands[2] = GEN_INT (offset); 3548 if (unit_size == 4) 3549 return "xxextractuw %x0,%x1,%2"; 3550 else 3551 return "vextractu<wd> %0,%1,%2"; 3552 } 3553} 3554 [(set_attr "type" "vecsimple") 3555 (set_attr "isa" "p9v,*")]) 3556 3557(define_split 3558 [(set (match_operand:<VS_scalar> 0 "int_reg_operand") 3559 (vec_select:<VS_scalar> 3560 (match_operand:VSX_EXTRACT_I 1 "altivec_register_operand") 3561 (parallel [(match_operand:QI 2 "const_int_operand")]))) 3562 (clobber (match_operand:SI 3 "int_reg_operand"))] 3563 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB && reload_completed" 3564 [(const_int 0)] 3565{ 3566 rtx op0_si = gen_rtx_REG (SImode, REGNO (operands[0])); 3567 rtx op1 = operands[1]; 3568 rtx op2 = operands[2]; 3569 rtx op3 = operands[3]; 3570 HOST_WIDE_INT offset = INTVAL (op2) * GET_MODE_UNIT_SIZE (<MODE>mode); 3571 3572 emit_move_insn (op3, GEN_INT (offset)); 3573 if (BYTES_BIG_ENDIAN) 3574 emit_insn (gen_vextu<wd>lx (op0_si, op3, op1)); 3575 else 3576 emit_insn (gen_vextu<wd>rx (op0_si, op3, op1)); 3577 DONE; 3578}) 3579 3580;; Optimize zero extracts to eliminate the AND after the extract. 3581(define_insn_and_split "*vsx_extract_<mode>_di_p9" 3582 [(set (match_operand:DI 0 "gpc_reg_operand" "=r,<VSX_EX>") 3583 (zero_extend:DI 3584 (vec_select:<VS_scalar> 3585 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,<VSX_EX>") 3586 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")])))) 3587 (clobber (match_scratch:SI 3 "=r,X"))] 3588 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3589 "#" 3590 "&& reload_completed" 3591 [(parallel [(set (match_dup 4) 3592 (vec_select:<VS_scalar> 3593 (match_dup 1) 3594 (parallel [(match_dup 2)]))) 3595 (clobber (match_dup 3))])] 3596{ 3597 operands[4] = gen_rtx_REG (<VS_scalar>mode, REGNO (operands[0])); 3598} 3599 [(set_attr "isa" "p9v,*")]) 3600 3601;; Optimize stores to use the ISA 3.0 scalar store instructions 3602(define_insn_and_split "*vsx_extract_<mode>_store_p9" 3603 [(set (match_operand:<VS_scalar> 0 "memory_operand" "=Z,m") 3604 (vec_select:<VS_scalar> 3605 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "<VSX_EX>,v") 3606 (parallel [(match_operand:QI 2 "const_int_operand" "n,n")]))) 3607 (clobber (match_scratch:<VS_scalar> 3 "=<VSX_EX>,&r")) 3608 (clobber (match_scratch:SI 4 "=X,&r"))] 3609 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_VEXTRACTUB" 3610 "#" 3611 "&& reload_completed" 3612 [(parallel [(set (match_dup 3) 3613 (vec_select:<VS_scalar> 3614 (match_dup 1) 3615 (parallel [(match_dup 2)]))) 3616 (clobber (match_dup 4))]) 3617 (set (match_dup 0) 3618 (match_dup 3))]) 3619 3620(define_insn_and_split "*vsx_extract_si" 3621 [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z") 3622 (vec_select:SI 3623 (match_operand:V4SI 1 "gpc_reg_operand" "v,v,v") 3624 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n")]))) 3625 (clobber (match_scratch:V4SI 3 "=v,v,v"))] 3626 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT && !TARGET_P9_VECTOR" 3627 "#" 3628 "&& reload_completed" 3629 [(const_int 0)] 3630{ 3631 rtx dest = operands[0]; 3632 rtx src = operands[1]; 3633 rtx element = operands[2]; 3634 rtx vec_tmp = operands[3]; 3635 int value; 3636 3637 if (!BYTES_BIG_ENDIAN) 3638 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3639 3640 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3641 instruction. */ 3642 value = INTVAL (element); 3643 if (value != 1) 3644 emit_insn (gen_altivec_vspltw_direct (vec_tmp, src, element)); 3645 else 3646 vec_tmp = src; 3647 3648 if (MEM_P (operands[0])) 3649 { 3650 if (can_create_pseudo_p ()) 3651 dest = rs6000_force_indexed_or_indirect_mem (dest); 3652 3653 if (TARGET_P8_VECTOR) 3654 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3655 else 3656 emit_insn (gen_stfiwx (dest, gen_rtx_REG (DImode, REGNO (vec_tmp)))); 3657 } 3658 3659 else if (TARGET_P8_VECTOR) 3660 emit_move_insn (dest, gen_rtx_REG (SImode, REGNO (vec_tmp))); 3661 else 3662 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3663 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3664 3665 DONE; 3666} 3667 [(set_attr "type" "mftgpr,vecperm,fpstore") 3668 (set_attr "length" "8") 3669 (set_attr "isa" "*,p8v,*")]) 3670 3671(define_insn_and_split "*vsx_extract_<mode>_p8" 3672 [(set (match_operand:<VS_scalar> 0 "nonimmediate_operand" "=r") 3673 (vec_select:<VS_scalar> 3674 (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand" "v") 3675 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3676 (clobber (match_scratch:VSX_EXTRACT_I2 3 "=v"))] 3677 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3678 && !TARGET_P9_VECTOR" 3679 "#" 3680 "&& reload_completed" 3681 [(const_int 0)] 3682{ 3683 rtx dest = operands[0]; 3684 rtx src = operands[1]; 3685 rtx element = operands[2]; 3686 rtx vec_tmp = operands[3]; 3687 int value; 3688 3689 if (!BYTES_BIG_ENDIAN) 3690 element = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1 - INTVAL (element)); 3691 3692 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3693 instruction. */ 3694 value = INTVAL (element); 3695 if (<MODE>mode == V16QImode) 3696 { 3697 if (value != 7) 3698 emit_insn (gen_altivec_vspltb_direct (vec_tmp, src, element)); 3699 else 3700 vec_tmp = src; 3701 } 3702 else if (<MODE>mode == V8HImode) 3703 { 3704 if (value != 3) 3705 emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element)); 3706 else 3707 vec_tmp = src; 3708 } 3709 else 3710 gcc_unreachable (); 3711 3712 emit_move_insn (gen_rtx_REG (DImode, REGNO (dest)), 3713 gen_rtx_REG (DImode, REGNO (vec_tmp))); 3714 DONE; 3715} 3716 [(set_attr "type" "mftgpr")]) 3717 3718;; Optimize extracting a single scalar element from memory. 3719(define_insn_and_split "*vsx_extract_<mode>_load" 3720 [(set (match_operand:<VS_scalar> 0 "register_operand" "=r") 3721 (vec_select:<VS_scalar> 3722 (match_operand:VSX_EXTRACT_I 1 "memory_operand" "m") 3723 (parallel [(match_operand:QI 2 "<VSX_EXTRACT_PREDICATE>" "n")]))) 3724 (clobber (match_scratch:DI 3 "=&b"))] 3725 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3726 "#" 3727 "&& reload_completed" 3728 [(set (match_dup 0) (match_dup 4))] 3729{ 3730 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3731 operands[3], <VS_scalar>mode); 3732} 3733 [(set_attr "type" "load") 3734 (set_attr "length" "8")]) 3735 3736;; Variable V16QI/V8HI/V4SI extract from a register 3737(define_insn_and_split "vsx_extract_<mode>_var" 3738 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r,r") 3739 (unspec:<VS_scalar> 3740 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v,v") 3741 (match_operand:DI 2 "gpc_reg_operand" "r,r")] 3742 UNSPEC_VSX_EXTRACT)) 3743 (clobber (match_scratch:DI 3 "=r,r")) 3744 (clobber (match_scratch:V2DI 4 "=X,&v"))] 3745 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3746 "#" 3747 "&& reload_completed" 3748 [(const_int 0)] 3749{ 3750 rs6000_split_vec_extract_var (operands[0], operands[1], operands[2], 3751 operands[3], operands[4]); 3752 DONE; 3753} 3754 [(set_attr "isa" "p9v,*")]) 3755 3756;; Variable V16QI/V8HI/V4SI extract from memory 3757(define_insn_and_split "*vsx_extract_<mode>_var_load" 3758 [(set (match_operand:<VS_scalar> 0 "gpc_reg_operand" "=r") 3759 (unspec:<VS_scalar> 3760 [(match_operand:VSX_EXTRACT_I 1 "memory_operand" "Q") 3761 (match_operand:DI 2 "gpc_reg_operand" "r")] 3762 UNSPEC_VSX_EXTRACT)) 3763 (clobber (match_scratch:DI 3 "=&b"))] 3764 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 3765 "#" 3766 "&& reload_completed" 3767 [(set (match_dup 0) (match_dup 4))] 3768{ 3769 operands[4] = rs6000_adjust_vec_address (operands[0], operands[1], operands[2], 3770 operands[3], <VS_scalar>mode); 3771} 3772 [(set_attr "type" "load")]) 3773 3774;; VSX_EXTRACT optimizations 3775;; Optimize double d = (double) vec_extract (vi, <n>) 3776;; Get the element into the top position and use XVCVSWDP/XVCVUWDP 3777(define_insn_and_split "*vsx_extract_si_<uns>float_df" 3778 [(set (match_operand:DF 0 "gpc_reg_operand" "=wa") 3779 (any_float:DF 3780 (vec_select:SI 3781 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3782 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3783 (clobber (match_scratch:V4SI 3 "=v"))] 3784 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3785 "#" 3786 "&& 1" 3787 [(const_int 0)] 3788{ 3789 rtx dest = operands[0]; 3790 rtx src = operands[1]; 3791 rtx element = operands[2]; 3792 rtx v4si_tmp = operands[3]; 3793 int value; 3794 3795 if (!BYTES_BIG_ENDIAN) 3796 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3797 3798 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3799 instruction. */ 3800 value = INTVAL (element); 3801 if (value != 0) 3802 { 3803 if (GET_CODE (v4si_tmp) == SCRATCH) 3804 v4si_tmp = gen_reg_rtx (V4SImode); 3805 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3806 } 3807 else 3808 v4si_tmp = src; 3809 3810 emit_insn (gen_vsx_xvcv<su>xwdp_df (dest, v4si_tmp)); 3811 DONE; 3812}) 3813 3814;; Optimize <type> f = (<type>) vec_extract (vi, <n>) 3815;; where <type> is a floating point type that supported by the hardware that is 3816;; not double. First convert the value to double, and then to the desired 3817;; type. 3818(define_insn_and_split "*vsx_extract_si_<uns>float_<mode>" 3819 [(set (match_operand:VSX_EXTRACT_FL 0 "gpc_reg_operand" "=wa") 3820 (any_float:VSX_EXTRACT_FL 3821 (vec_select:SI 3822 (match_operand:V4SI 1 "gpc_reg_operand" "v") 3823 (parallel [(match_operand:QI 2 "const_0_to_3_operand" "n")])))) 3824 (clobber (match_scratch:V4SI 3 "=v")) 3825 (clobber (match_scratch:DF 4 "=wa"))] 3826 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 3827 "#" 3828 "&& 1" 3829 [(const_int 0)] 3830{ 3831 rtx dest = operands[0]; 3832 rtx src = operands[1]; 3833 rtx element = operands[2]; 3834 rtx v4si_tmp = operands[3]; 3835 rtx df_tmp = operands[4]; 3836 int value; 3837 3838 if (!BYTES_BIG_ENDIAN) 3839 element = GEN_INT (GET_MODE_NUNITS (V4SImode) - 1 - INTVAL (element)); 3840 3841 /* If the value is in the correct position, we can avoid doing the VSPLT<x> 3842 instruction. */ 3843 value = INTVAL (element); 3844 if (value != 0) 3845 { 3846 if (GET_CODE (v4si_tmp) == SCRATCH) 3847 v4si_tmp = gen_reg_rtx (V4SImode); 3848 emit_insn (gen_altivec_vspltw_direct (v4si_tmp, src, element)); 3849 } 3850 else 3851 v4si_tmp = src; 3852 3853 if (GET_CODE (df_tmp) == SCRATCH) 3854 df_tmp = gen_reg_rtx (DFmode); 3855 3856 emit_insn (gen_vsx_xvcv<su>xwdp_df (df_tmp, v4si_tmp)); 3857 3858 if (<MODE>mode == SFmode) 3859 emit_insn (gen_truncdfsf2 (dest, df_tmp)); 3860 else if (<MODE>mode == TFmode && FLOAT128_IBM_P (TFmode)) 3861 emit_insn (gen_extenddftf2_vsx (dest, df_tmp)); 3862 else if (<MODE>mode == TFmode && FLOAT128_IEEE_P (TFmode) 3863 && TARGET_FLOAT128_HW) 3864 emit_insn (gen_extenddftf2_hw (dest, df_tmp)); 3865 else if (<MODE>mode == IFmode && FLOAT128_IBM_P (IFmode)) 3866 emit_insn (gen_extenddfif2 (dest, df_tmp)); 3867 else if (<MODE>mode == KFmode && TARGET_FLOAT128_HW) 3868 emit_insn (gen_extenddfkf2_hw (dest, df_tmp)); 3869 else 3870 gcc_unreachable (); 3871 3872 DONE; 3873}) 3874 3875;; Optimize <type> f = (<ftype>) vec_extract (<vtype>, <n>) 3876;; Where <ftype> is SFmode, DFmode (and KFmode/TFmode if those types are IEEE 3877;; 128-bit hardware types) and <vtype> is vector char, vector unsigned char, 3878;; vector short or vector unsigned short. 3879(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_fl_<FL_CONV:mode>" 3880 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") 3881 (float:FL_CONV 3882 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3883 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3884 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3885 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3886 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3887 && TARGET_P9_VECTOR" 3888 "#" 3889 "&& reload_completed" 3890 [(parallel [(set (match_dup 3) 3891 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3892 (match_dup 1) 3893 (parallel [(match_dup 2)]))) 3894 (clobber (scratch:SI))]) 3895 (set (match_dup 4) 3896 (sign_extend:DI (match_dup 3))) 3897 (set (match_dup 0) 3898 (float:<FL_CONV:MODE> (match_dup 4)))] 3899{ 3900 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3901} 3902 [(set_attr "isa" "<FL_CONV:VSisa>")]) 3903 3904(define_insn_and_split "*vsx_ext_<VSX_EXTRACT_I:VS_scalar>_ufl_<FL_CONV:mode>" 3905 [(set (match_operand:FL_CONV 0 "gpc_reg_operand" "=wa") 3906 (unsigned_float:FL_CONV 3907 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3908 (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 3909 (parallel [(match_operand:QI 2 "const_int_operand" "n")])))) 3910 (clobber (match_scratch:<VSX_EXTRACT_I:VS_scalar> 3 "=v"))] 3911 "VECTOR_MEM_VSX_P (<VSX_EXTRACT_I:MODE>mode) && TARGET_DIRECT_MOVE_64BIT 3912 && TARGET_P9_VECTOR" 3913 "#" 3914 "&& reload_completed" 3915 [(parallel [(set (match_dup 3) 3916 (vec_select:<VSX_EXTRACT_I:VS_scalar> 3917 (match_dup 1) 3918 (parallel [(match_dup 2)]))) 3919 (clobber (scratch:SI))]) 3920 (set (match_dup 0) 3921 (float:<FL_CONV:MODE> (match_dup 4)))] 3922{ 3923 operands[4] = gen_rtx_REG (DImode, REGNO (operands[3])); 3924} 3925 [(set_attr "isa" "<FL_CONV:VSisa>")]) 3926 3927;; V4SI/V8HI/V16QI set operation on ISA 3.0 3928(define_insn "vsx_set_<mode>_p9" 3929 [(set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=<VSX_EX>") 3930 (unspec:VSX_EXTRACT_I 3931 [(match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "0") 3932 (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VSX_EX>") 3933 (match_operand:QI 3 "<VSX_EXTRACT_PREDICATE>" "n")] 3934 UNSPEC_VSX_SET))] 3935 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3936{ 3937 int ele = INTVAL (operands[3]); 3938 int nunits = GET_MODE_NUNITS (<MODE>mode); 3939 3940 if (!BYTES_BIG_ENDIAN) 3941 ele = nunits - 1 - ele; 3942 3943 operands[3] = GEN_INT (GET_MODE_SIZE (<VS_scalar>mode) * ele); 3944 if (<MODE>mode == V4SImode) 3945 return "xxinsertw %x0,%x2,%3"; 3946 else 3947 return "vinsert<wd> %0,%2,%3"; 3948} 3949 [(set_attr "type" "vecperm")]) 3950 3951(define_insn_and_split "vsx_set_v4sf_p9" 3952 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3953 (unspec:V4SF 3954 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3955 (match_operand:SF 2 "gpc_reg_operand" "wa") 3956 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3957 UNSPEC_VSX_SET)) 3958 (clobber (match_scratch:SI 4 "=&wa"))] 3959 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3960 "#" 3961 "&& reload_completed" 3962 [(set (match_dup 5) 3963 (unspec:V4SF [(match_dup 2)] 3964 UNSPEC_VSX_CVDPSPN)) 3965 (parallel [(set (match_dup 4) 3966 (vec_select:SI (match_dup 6) 3967 (parallel [(match_dup 7)]))) 3968 (clobber (scratch:SI))]) 3969 (set (match_dup 8) 3970 (unspec:V4SI [(match_dup 8) 3971 (match_dup 4) 3972 (match_dup 3)] 3973 UNSPEC_VSX_SET))] 3974{ 3975 unsigned int tmp_regno = reg_or_subregno (operands[4]); 3976 3977 operands[5] = gen_rtx_REG (V4SFmode, tmp_regno); 3978 operands[6] = gen_rtx_REG (V4SImode, tmp_regno); 3979 operands[7] = GEN_INT (BYTES_BIG_ENDIAN ? 0 : 3); 3980 operands[8] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 3981} 3982 [(set_attr "type" "vecperm") 3983 (set_attr "length" "12") 3984 (set_attr "isa" "p9v")]) 3985 3986;; Special case setting 0.0f to a V4SF element 3987(define_insn_and_split "*vsx_set_v4sf_p9_zero" 3988 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 3989 (unspec:V4SF 3990 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 3991 (match_operand:SF 2 "zero_fp_constant" "j") 3992 (match_operand:QI 3 "const_0_to_3_operand" "n")] 3993 UNSPEC_VSX_SET)) 3994 (clobber (match_scratch:SI 4 "=&wa"))] 3995 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64" 3996 "#" 3997 "&& reload_completed" 3998 [(set (match_dup 4) 3999 (const_int 0)) 4000 (set (match_dup 5) 4001 (unspec:V4SI [(match_dup 5) 4002 (match_dup 4) 4003 (match_dup 3)] 4004 UNSPEC_VSX_SET))] 4005{ 4006 operands[5] = gen_rtx_REG (V4SImode, reg_or_subregno (operands[0])); 4007} 4008 [(set_attr "type" "vecperm") 4009 (set_attr "length" "8") 4010 (set_attr "isa" "p9v")]) 4011 4012;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is the element 4013;; that is in the default scalar position (1 for big endian, 2 for little 4014;; endian). We just need to do an xxinsertw since the element is in the 4015;; correct location. 4016 4017(define_insn "*vsx_insert_extract_v4sf_p9" 4018 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4019 (unspec:V4SF 4020 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4021 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4022 (parallel 4023 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4024 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4025 UNSPEC_VSX_SET))] 4026 "VECTOR_MEM_VSX_P (V4SFmode) && TARGET_P9_VECTOR && TARGET_POWERPC64 4027 && (INTVAL (operands[3]) == (BYTES_BIG_ENDIAN ? 1 : 2))" 4028{ 4029 int ele = INTVAL (operands[4]); 4030 4031 if (!BYTES_BIG_ENDIAN) 4032 ele = GET_MODE_NUNITS (V4SFmode) - 1 - ele; 4033 4034 operands[4] = GEN_INT (GET_MODE_SIZE (SFmode) * ele); 4035 return "xxinsertw %x0,%x2,%4"; 4036} 4037 [(set_attr "type" "vecperm")]) 4038 4039;; Optimize x = vec_insert (vec_extract (v2, n), v1, m) if n is not the element 4040;; that is in the default scalar position (1 for big endian, 2 for little 4041;; endian). Convert the insert/extract to int and avoid doing the conversion. 4042 4043(define_insn_and_split "*vsx_insert_extract_v4sf_p9_2" 4044 [(set (match_operand:V4SF 0 "gpc_reg_operand" "=wa") 4045 (unspec:V4SF 4046 [(match_operand:V4SF 1 "gpc_reg_operand" "0") 4047 (vec_select:SF (match_operand:V4SF 2 "gpc_reg_operand" "wa") 4048 (parallel 4049 [(match_operand:QI 3 "const_0_to_3_operand" "n")])) 4050 (match_operand:QI 4 "const_0_to_3_operand" "n")] 4051 UNSPEC_VSX_SET)) 4052 (clobber (match_scratch:SI 5 "=&wa"))] 4053 "VECTOR_MEM_VSX_P (V4SFmode) && VECTOR_MEM_VSX_P (V4SImode) 4054 && TARGET_P9_VECTOR && TARGET_POWERPC64 4055 && (INTVAL (operands[3]) != (BYTES_BIG_ENDIAN ? 1 : 2))" 4056 "#" 4057 "&& 1" 4058 [(parallel [(set (match_dup 5) 4059 (vec_select:SI (match_dup 6) 4060 (parallel [(match_dup 3)]))) 4061 (clobber (scratch:SI))]) 4062 (set (match_dup 7) 4063 (unspec:V4SI [(match_dup 8) 4064 (match_dup 5) 4065 (match_dup 4)] 4066 UNSPEC_VSX_SET))] 4067{ 4068 if (GET_CODE (operands[5]) == SCRATCH) 4069 operands[5] = gen_reg_rtx (SImode); 4070 4071 operands[6] = gen_lowpart (V4SImode, operands[2]); 4072 operands[7] = gen_lowpart (V4SImode, operands[0]); 4073 operands[8] = gen_lowpart (V4SImode, operands[1]); 4074} 4075 [(set_attr "type" "vecperm") 4076 (set_attr "isa" "p9v")]) 4077 4078;; Expanders for builtins 4079(define_expand "vsx_mergel_<mode>" 4080 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4081 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4082 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4083 "VECTOR_MEM_VSX_P (<MODE>mode)" 4084{ 4085 rtvec v = gen_rtvec (2, GEN_INT (1), GEN_INT (3)); 4086 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4087 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4088 emit_insn (gen_rtx_SET (operands[0], x)); 4089 DONE; 4090}) 4091 4092(define_expand "vsx_mergeh_<mode>" 4093 [(use (match_operand:VSX_D 0 "vsx_register_operand")) 4094 (use (match_operand:VSX_D 1 "vsx_register_operand")) 4095 (use (match_operand:VSX_D 2 "vsx_register_operand"))] 4096 "VECTOR_MEM_VSX_P (<MODE>mode)" 4097{ 4098 rtvec v = gen_rtvec (2, GEN_INT (0), GEN_INT (2)); 4099 rtx x = gen_rtx_VEC_CONCAT (<VS_double>mode, operands[1], operands[2]); 4100 x = gen_rtx_VEC_SELECT (<MODE>mode, x, gen_rtx_PARALLEL (VOIDmode, v)); 4101 emit_insn (gen_rtx_SET (operands[0], x)); 4102 DONE; 4103}) 4104 4105;; V2DF/V2DI splat 4106;; We separate the register splat insn from the memory splat insn to force the 4107;; register allocator to generate the indexed form of the SPLAT when it is 4108;; given an offsettable memory reference. Otherwise, if the register and 4109;; memory insns were combined into a single insn, the register allocator will 4110;; load the value into a register, and then do a double word permute. 4111(define_expand "vsx_splat_<mode>" 4112 [(set (match_operand:VSX_D 0 "vsx_register_operand") 4113 (vec_duplicate:VSX_D 4114 (match_operand:<VS_scalar> 1 "input_operand")))] 4115 "VECTOR_MEM_VSX_P (<MODE>mode)" 4116{ 4117 rtx op1 = operands[1]; 4118 if (MEM_P (op1)) 4119 operands[1] = rs6000_force_indexed_or_indirect_mem (op1); 4120 else if (!REG_P (op1)) 4121 op1 = force_reg (<VSX_D:VS_scalar>mode, op1); 4122}) 4123 4124(define_insn "vsx_splat_<mode>_reg" 4125 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we") 4126 (vec_duplicate:VSX_D 4127 (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")))] 4128 "VECTOR_MEM_VSX_P (<MODE>mode)" 4129 "@ 4130 xxpermdi %x0,%x1,%x1,0 4131 mtvsrdd %x0,%1,%1" 4132 [(set_attr "type" "vecperm")]) 4133 4134(define_insn "vsx_splat_<mode>_mem" 4135 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 4136 (vec_duplicate:VSX_D 4137 (match_operand:<VSX_D:VS_scalar> 1 "memory_operand" "Z")))] 4138 "VECTOR_MEM_VSX_P (<MODE>mode)" 4139 "lxvdsx %x0,%y1" 4140 [(set_attr "type" "vecload")]) 4141 4142;; V4SI splat support 4143(define_insn "vsx_splat_v4si" 4144 [(set (match_operand:V4SI 0 "vsx_register_operand" "=we,we") 4145 (vec_duplicate:V4SI 4146 (match_operand:SI 1 "splat_input_operand" "r,Z")))] 4147 "TARGET_P9_VECTOR" 4148 "@ 4149 mtvsrws %x0,%1 4150 lxvwsx %x0,%y1" 4151 [(set_attr "type" "vecperm,vecload")]) 4152 4153;; SImode is not currently allowed in vector registers. This pattern 4154;; allows us to use direct move to get the value in a vector register 4155;; so that we can use XXSPLTW 4156(define_insn "vsx_splat_v4si_di" 4157 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,we") 4158 (vec_duplicate:V4SI 4159 (truncate:SI 4160 (match_operand:DI 1 "gpc_reg_operand" "wa,r"))))] 4161 "VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT" 4162 "@ 4163 xxspltw %x0,%x1,1 4164 mtvsrws %x0,%1" 4165 [(set_attr "type" "vecperm") 4166 (set_attr "isa" "p8v,*")]) 4167 4168;; V4SF splat (ISA 3.0) 4169(define_insn_and_split "vsx_splat_v4sf" 4170 [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa") 4171 (vec_duplicate:V4SF 4172 (match_operand:SF 1 "splat_input_operand" "Z,wa,r")))] 4173 "TARGET_P9_VECTOR" 4174 "@ 4175 lxvwsx %x0,%y1 4176 # 4177 mtvsrws %x0,%1" 4178 "&& reload_completed && vsx_register_operand (operands[1], SFmode)" 4179 [(set (match_dup 0) 4180 (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN)) 4181 (set (match_dup 0) 4182 (unspec:V4SF [(match_dup 0) 4183 (const_int 0)] UNSPEC_VSX_XXSPLTW))] 4184 "" 4185 [(set_attr "type" "vecload,vecperm,mftgpr") 4186 (set_attr "length" "*,8,*") 4187 (set_attr "isa" "*,p8v,*")]) 4188 4189;; V4SF/V4SI splat from a vector element 4190(define_insn "vsx_xxspltw_<mode>" 4191 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4192 (vec_duplicate:VSX_W 4193 (vec_select:<VS_scalar> 4194 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 4195 (parallel 4196 [(match_operand:QI 2 "u5bit_cint_operand" "n")]))))] 4197 "VECTOR_MEM_VSX_P (<MODE>mode)" 4198{ 4199 if (!BYTES_BIG_ENDIAN) 4200 operands[2] = GEN_INT (3 - INTVAL (operands[2])); 4201 4202 return "xxspltw %x0,%x1,%2"; 4203} 4204 [(set_attr "type" "vecperm")]) 4205 4206(define_insn "vsx_xxspltw_<mode>_direct" 4207 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4208 (unspec:VSX_W [(match_operand:VSX_W 1 "vsx_register_operand" "wa") 4209 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4210 UNSPEC_VSX_XXSPLTW))] 4211 "VECTOR_MEM_VSX_P (<MODE>mode)" 4212 "xxspltw %x0,%x1,%2" 4213 [(set_attr "type" "vecperm")]) 4214 4215;; V16QI/V8HI splat support on ISA 2.07 4216(define_insn "vsx_vsplt<VSX_SPLAT_SUFFIX>_di" 4217 [(set (match_operand:VSX_SPLAT_I 0 "altivec_register_operand" "=v") 4218 (vec_duplicate:VSX_SPLAT_I 4219 (truncate:<VS_scalar> 4220 (match_operand:DI 1 "altivec_register_operand" "v"))))] 4221 "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_DIRECT_MOVE_64BIT" 4222 "vsplt<VSX_SPLAT_SUFFIX> %0,%1,<VSX_SPLAT_COUNT>" 4223 [(set_attr "type" "vecperm")]) 4224 4225;; V2DF/V2DI splat for use by vec_splat builtin 4226(define_insn "vsx_xxspltd_<mode>" 4227 [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa") 4228 (unspec:VSX_D [(match_operand:VSX_D 1 "vsx_register_operand" "wa") 4229 (match_operand:QI 2 "u5bit_cint_operand" "i")] 4230 UNSPEC_VSX_XXSPLTD))] 4231 "VECTOR_MEM_VSX_P (<MODE>mode)" 4232{ 4233 if ((BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 0) 4234 || (!BYTES_BIG_ENDIAN && INTVAL (operands[2]) == 1)) 4235 return "xxpermdi %x0,%x1,%x1,0"; 4236 else 4237 return "xxpermdi %x0,%x1,%x1,3"; 4238} 4239 [(set_attr "type" "vecperm")]) 4240 4241;; V4SF/V4SI interleave 4242(define_insn "vsx_xxmrghw_<mode>" 4243 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4244 (vec_select:VSX_W 4245 (vec_concat:<VS_double> 4246 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 4247 (match_operand:VSX_W 2 "vsx_register_operand" "wa")) 4248 (parallel [(const_int 0) (const_int 4) 4249 (const_int 1) (const_int 5)])))] 4250 "VECTOR_MEM_VSX_P (<MODE>mode)" 4251{ 4252 if (BYTES_BIG_ENDIAN) 4253 return "xxmrghw %x0,%x1,%x2"; 4254 else 4255 return "xxmrglw %x0,%x2,%x1"; 4256} 4257 [(set_attr "type" "vecperm")]) 4258 4259(define_insn "vsx_xxmrglw_<mode>" 4260 [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa") 4261 (vec_select:VSX_W 4262 (vec_concat:<VS_double> 4263 (match_operand:VSX_W 1 "vsx_register_operand" "wa") 4264 (match_operand:VSX_W 2 "vsx_register_operand" "wa")) 4265 (parallel [(const_int 2) (const_int 6) 4266 (const_int 3) (const_int 7)])))] 4267 "VECTOR_MEM_VSX_P (<MODE>mode)" 4268{ 4269 if (BYTES_BIG_ENDIAN) 4270 return "xxmrglw %x0,%x1,%x2"; 4271 else 4272 return "xxmrghw %x0,%x2,%x1"; 4273} 4274 [(set_attr "type" "vecperm")]) 4275 4276;; Shift left double by word immediate 4277(define_insn "vsx_xxsldwi_<mode>" 4278 [(set (match_operand:VSX_L 0 "vsx_register_operand" "=wa") 4279 (unspec:VSX_L [(match_operand:VSX_L 1 "vsx_register_operand" "wa") 4280 (match_operand:VSX_L 2 "vsx_register_operand" "wa") 4281 (match_operand:QI 3 "u5bit_cint_operand" "i")] 4282 UNSPEC_VSX_SLDWI))] 4283 "VECTOR_MEM_VSX_P (<MODE>mode)" 4284 "xxsldwi %x0,%x1,%x2,%3" 4285 [(set_attr "type" "vecperm") 4286 (set_attr "isa" "<VSisa>")]) 4287 4288 4289;; Vector reduction insns and splitters 4290 4291(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v2df" 4292 [(set (match_operand:V2DF 0 "vfloat_operand" "=&wa,wa") 4293 (VEC_reduc:V2DF 4294 (vec_concat:V2DF 4295 (vec_select:DF 4296 (match_operand:V2DF 1 "vfloat_operand" "wa,wa") 4297 (parallel [(const_int 1)])) 4298 (vec_select:DF 4299 (match_dup 1) 4300 (parallel [(const_int 0)]))) 4301 (match_dup 1))) 4302 (clobber (match_scratch:V2DF 2 "=0,&wa"))] 4303 "VECTOR_UNIT_VSX_P (V2DFmode)" 4304 "#" 4305 "" 4306 [(const_int 0)] 4307{ 4308 rtx tmp = (GET_CODE (operands[2]) == SCRATCH) 4309 ? gen_reg_rtx (V2DFmode) 4310 : operands[2]; 4311 emit_insn (gen_vsx_xxsldwi_v2df (tmp, operands[1], operands[1], const2_rtx)); 4312 emit_insn (gen_<VEC_reduc_rtx>v2df3 (operands[0], tmp, operands[1])); 4313 DONE; 4314} 4315 [(set_attr "length" "8") 4316 (set_attr "type" "veccomplex")]) 4317 4318(define_insn_and_split "vsx_reduc_<VEC_reduc_name>_v4sf" 4319 [(set (match_operand:V4SF 0 "vfloat_operand" "=wa") 4320 (VEC_reduc:V4SF 4321 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4322 (match_operand:V4SF 1 "vfloat_operand" "wa"))) 4323 (clobber (match_scratch:V4SF 2 "=&wa")) 4324 (clobber (match_scratch:V4SF 3 "=&wa"))] 4325 "VECTOR_UNIT_VSX_P (V4SFmode)" 4326 "#" 4327 "" 4328 [(const_int 0)] 4329{ 4330 rtx op0 = operands[0]; 4331 rtx op1 = operands[1]; 4332 rtx tmp2, tmp3, tmp4; 4333 4334 if (can_create_pseudo_p ()) 4335 { 4336 tmp2 = gen_reg_rtx (V4SFmode); 4337 tmp3 = gen_reg_rtx (V4SFmode); 4338 tmp4 = gen_reg_rtx (V4SFmode); 4339 } 4340 else 4341 { 4342 tmp2 = operands[2]; 4343 tmp3 = operands[3]; 4344 tmp4 = tmp2; 4345 } 4346 4347 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4348 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4349 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4350 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (op0, tmp4, tmp3)); 4351 DONE; 4352} 4353 [(set_attr "length" "16") 4354 (set_attr "type" "veccomplex")]) 4355 4356;; Combiner patterns with the vector reduction patterns that knows we can get 4357;; to the top element of the V2DF array without doing an extract. 4358 4359(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v2df_scalar" 4360 [(set (match_operand:DF 0 "vfloat_operand" "=&wa,wa") 4361 (vec_select:DF 4362 (VEC_reduc:V2DF 4363 (vec_concat:V2DF 4364 (vec_select:DF 4365 (match_operand:V2DF 1 "vfloat_operand" "wa,wa") 4366 (parallel [(const_int 1)])) 4367 (vec_select:DF 4368 (match_dup 1) 4369 (parallel [(const_int 0)]))) 4370 (match_dup 1)) 4371 (parallel [(const_int 1)]))) 4372 (clobber (match_scratch:DF 2 "=0,&wa"))] 4373 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V2DFmode)" 4374 "#" 4375 "" 4376 [(const_int 0)] 4377{ 4378 rtx hi = gen_highpart (DFmode, operands[1]); 4379 rtx lo = (GET_CODE (operands[2]) == SCRATCH) 4380 ? gen_reg_rtx (DFmode) 4381 : operands[2]; 4382 4383 emit_insn (gen_vsx_extract_v2df (lo, operands[1], const1_rtx)); 4384 emit_insn (gen_<VEC_reduc_rtx>df3 (operands[0], hi, lo)); 4385 DONE; 4386} 4387 [(set_attr "length" "8") 4388 (set_attr "type" "veccomplex")]) 4389 4390(define_insn_and_split "*vsx_reduc_<VEC_reduc_name>_v4sf_scalar" 4391 [(set (match_operand:SF 0 "vfloat_operand" "=f") 4392 (vec_select:SF 4393 (VEC_reduc:V4SF 4394 (unspec:V4SF [(const_int 0)] UNSPEC_REDUC) 4395 (match_operand:V4SF 1 "vfloat_operand" "wa")) 4396 (parallel [(const_int 3)]))) 4397 (clobber (match_scratch:V4SF 2 "=&wa")) 4398 (clobber (match_scratch:V4SF 3 "=&wa")) 4399 (clobber (match_scratch:V4SF 4 "=0"))] 4400 "BYTES_BIG_ENDIAN && VECTOR_UNIT_VSX_P (V4SFmode)" 4401 "#" 4402 "" 4403 [(const_int 0)] 4404{ 4405 rtx op0 = operands[0]; 4406 rtx op1 = operands[1]; 4407 rtx tmp2, tmp3, tmp4, tmp5; 4408 4409 if (can_create_pseudo_p ()) 4410 { 4411 tmp2 = gen_reg_rtx (V4SFmode); 4412 tmp3 = gen_reg_rtx (V4SFmode); 4413 tmp4 = gen_reg_rtx (V4SFmode); 4414 tmp5 = gen_reg_rtx (V4SFmode); 4415 } 4416 else 4417 { 4418 tmp2 = operands[2]; 4419 tmp3 = operands[3]; 4420 tmp4 = tmp2; 4421 tmp5 = operands[4]; 4422 } 4423 4424 emit_insn (gen_vsx_xxsldwi_v4sf (tmp2, op1, op1, const2_rtx)); 4425 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp3, tmp2, op1)); 4426 emit_insn (gen_vsx_xxsldwi_v4sf (tmp4, tmp3, tmp3, GEN_INT (3))); 4427 emit_insn (gen_<VEC_reduc_rtx>v4sf3 (tmp5, tmp4, tmp3)); 4428 emit_insn (gen_vsx_xscvspdp_scalar2 (op0, tmp5)); 4429 DONE; 4430} 4431 [(set_attr "length" "20") 4432 (set_attr "type" "veccomplex")]) 4433 4434 4435;; Power8 Vector fusion. The fused ops must be physically adjacent. 4436(define_peephole 4437 [(set (match_operand:P 0 "base_reg_operand") 4438 (match_operand:P 1 "short_cint_operand")) 4439 (set (match_operand:VSX_M 2 "vsx_register_operand") 4440 (mem:VSX_M (plus:P (match_dup 0) 4441 (match_operand:P 3 "int_reg_operand"))))] 4442 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4443 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4444 [(set_attr "length" "8") 4445 (set_attr "type" "vecload")]) 4446 4447(define_peephole 4448 [(set (match_operand:P 0 "base_reg_operand") 4449 (match_operand:P 1 "short_cint_operand")) 4450 (set (match_operand:VSX_M 2 "vsx_register_operand") 4451 (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand") 4452 (match_dup 0))))] 4453 "TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR" 4454 "li %0,%1\;lx<VSX_M:VSm>x %x2,%0,%3\t\t\t# vector load fusion" 4455 [(set_attr "length" "8") 4456 (set_attr "type" "vecload")]) 4457 4458 4459;; ISA 3.0 vector extend sign support 4460 4461(define_insn "vsx_sign_extend_qi_<mode>" 4462 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4463 (unspec:VSINT_84 4464 [(match_operand:V16QI 1 "vsx_register_operand" "v")] 4465 UNSPEC_VSX_SIGN_EXTEND))] 4466 "TARGET_P9_VECTOR" 4467 "vextsb2<wd> %0,%1" 4468 [(set_attr "type" "vecexts")]) 4469 4470(define_insn "vsx_sign_extend_hi_<mode>" 4471 [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v") 4472 (unspec:VSINT_84 4473 [(match_operand:V8HI 1 "vsx_register_operand" "v")] 4474 UNSPEC_VSX_SIGN_EXTEND))] 4475 "TARGET_P9_VECTOR" 4476 "vextsh2<wd> %0,%1" 4477 [(set_attr "type" "vecexts")]) 4478 4479(define_insn "*vsx_sign_extend_si_v2di" 4480 [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") 4481 (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] 4482 UNSPEC_VSX_SIGN_EXTEND))] 4483 "TARGET_P9_VECTOR" 4484 "vextsw2d %0,%1" 4485 [(set_attr "type" "vecexts")]) 4486 4487 4488;; ISA 3.0 Binary Floating-Point Support 4489 4490;; VSX Scalar Extract Exponent Quad-Precision 4491(define_insn "xsxexpqp_<mode>" 4492 [(set (match_operand:DI 0 "altivec_register_operand" "=v") 4493 (unspec:DI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4494 UNSPEC_VSX_SXEXPDP))] 4495 "TARGET_P9_VECTOR" 4496 "xsxexpqp %0,%1" 4497 [(set_attr "type" "vecmove")]) 4498 4499;; VSX Scalar Extract Exponent Double-Precision 4500(define_insn "xsxexpdp" 4501 [(set (match_operand:DI 0 "register_operand" "=r") 4502 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4503 UNSPEC_VSX_SXEXPDP))] 4504 "TARGET_P9_VECTOR && TARGET_64BIT" 4505 "xsxexpdp %0,%x1" 4506 [(set_attr "type" "integer")]) 4507 4508;; VSX Scalar Extract Significand Quad-Precision 4509(define_insn "xsxsigqp_<mode>" 4510 [(set (match_operand:TI 0 "altivec_register_operand" "=v") 4511 (unspec:TI [(match_operand:IEEE128 1 "altivec_register_operand" "v")] 4512 UNSPEC_VSX_SXSIG))] 4513 "TARGET_P9_VECTOR" 4514 "xsxsigqp %0,%1" 4515 [(set_attr "type" "vecmove")]) 4516 4517;; VSX Scalar Extract Significand Double-Precision 4518(define_insn "xsxsigdp" 4519 [(set (match_operand:DI 0 "register_operand" "=r") 4520 (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 4521 UNSPEC_VSX_SXSIG))] 4522 "TARGET_P9_VECTOR && TARGET_64BIT" 4523 "xsxsigdp %0,%x1" 4524 [(set_attr "type" "integer")]) 4525 4526;; VSX Scalar Insert Exponent Quad-Precision Floating Point Argument 4527(define_insn "xsiexpqpf_<mode>" 4528 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4529 (unspec:IEEE128 4530 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4531 (match_operand:DI 2 "altivec_register_operand" "v")] 4532 UNSPEC_VSX_SIEXPQP))] 4533 "TARGET_P9_VECTOR" 4534 "xsiexpqp %0,%1,%2" 4535 [(set_attr "type" "vecmove")]) 4536 4537;; VSX Scalar Insert Exponent Quad-Precision 4538(define_insn "xsiexpqp_<mode>" 4539 [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v") 4540 (unspec:IEEE128 [(match_operand:TI 1 "altivec_register_operand" "v") 4541 (match_operand:DI 2 "altivec_register_operand" "v")] 4542 UNSPEC_VSX_SIEXPQP))] 4543 "TARGET_P9_VECTOR" 4544 "xsiexpqp %0,%1,%2" 4545 [(set_attr "type" "vecmove")]) 4546 4547;; VSX Scalar Insert Exponent Double-Precision 4548(define_insn "xsiexpdp" 4549 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4550 (unspec:DF [(match_operand:DI 1 "register_operand" "r") 4551 (match_operand:DI 2 "register_operand" "r")] 4552 UNSPEC_VSX_SIEXPDP))] 4553 "TARGET_P9_VECTOR && TARGET_64BIT" 4554 "xsiexpdp %x0,%1,%2" 4555 [(set_attr "type" "fpsimple")]) 4556 4557;; VSX Scalar Insert Exponent Double-Precision Floating Point Argument 4558(define_insn "xsiexpdpf" 4559 [(set (match_operand:DF 0 "vsx_register_operand" "=wa") 4560 (unspec:DF [(match_operand:DF 1 "register_operand" "r") 4561 (match_operand:DI 2 "register_operand" "r")] 4562 UNSPEC_VSX_SIEXPDP))] 4563 "TARGET_P9_VECTOR && TARGET_64BIT" 4564 "xsiexpdp %x0,%1,%2" 4565 [(set_attr "type" "fpsimple")]) 4566 4567;; VSX Scalar Compare Exponents Double-Precision 4568(define_expand "xscmpexpdp_<code>" 4569 [(set (match_dup 3) 4570 (compare:CCFP 4571 (unspec:DF 4572 [(match_operand:DF 1 "vsx_register_operand" "wa") 4573 (match_operand:DF 2 "vsx_register_operand" "wa")] 4574 UNSPEC_VSX_SCMPEXPDP) 4575 (const_int 0))) 4576 (set (match_operand:SI 0 "register_operand" "=r") 4577 (CMP_TEST:SI (match_dup 3) 4578 (const_int 0)))] 4579 "TARGET_P9_VECTOR" 4580{ 4581 if (<CODE> == UNORDERED && !HONOR_NANS (DFmode)) 4582 { 4583 emit_move_insn (operands[0], const0_rtx); 4584 DONE; 4585 } 4586 4587 operands[3] = gen_reg_rtx (CCFPmode); 4588}) 4589 4590(define_insn "*xscmpexpdp" 4591 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4592 (compare:CCFP 4593 (unspec:DF [(match_operand:DF 1 "vsx_register_operand" "wa") 4594 (match_operand:DF 2 "vsx_register_operand" "wa")] 4595 UNSPEC_VSX_SCMPEXPDP) 4596 (match_operand:SI 3 "zero_constant" "j")))] 4597 "TARGET_P9_VECTOR" 4598 "xscmpexpdp %0,%x1,%x2" 4599 [(set_attr "type" "fpcompare")]) 4600 4601;; VSX Scalar Compare Exponents Quad-Precision 4602(define_expand "xscmpexpqp_<code>_<mode>" 4603 [(set (match_dup 3) 4604 (compare:CCFP 4605 (unspec:IEEE128 4606 [(match_operand:IEEE128 1 "vsx_register_operand" "v") 4607 (match_operand:IEEE128 2 "vsx_register_operand" "v")] 4608 UNSPEC_VSX_SCMPEXPQP) 4609 (const_int 0))) 4610 (set (match_operand:SI 0 "register_operand" "=r") 4611 (CMP_TEST:SI (match_dup 3) 4612 (const_int 0)))] 4613 "TARGET_P9_VECTOR" 4614{ 4615 if (<CODE> == UNORDERED && !HONOR_NANS (<MODE>mode)) 4616 { 4617 emit_move_insn (operands[0], const0_rtx); 4618 DONE; 4619 } 4620 4621 operands[3] = gen_reg_rtx (CCFPmode); 4622}) 4623 4624(define_insn "*xscmpexpqp" 4625 [(set (match_operand:CCFP 0 "cc_reg_operand" "=y") 4626 (compare:CCFP 4627 (unspec:IEEE128 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4628 (match_operand:IEEE128 2 "altivec_register_operand" "v")] 4629 UNSPEC_VSX_SCMPEXPQP) 4630 (match_operand:SI 3 "zero_constant" "j")))] 4631 "TARGET_P9_VECTOR" 4632 "xscmpexpqp %0,%1,%2" 4633 [(set_attr "type" "fpcompare")]) 4634 4635;; VSX Scalar Test Data Class Quad-Precision 4636;; (Expansion for scalar_test_data_class (__ieee128, int)) 4637;; (Has side effect of setting the lt bit if operand 1 is negative, 4638;; setting the eq bit if any of the conditions tested by operand 2 4639;; are satisfied, and clearing the gt and undordered bits to zero.) 4640(define_expand "xststdcqp_<mode>" 4641 [(set (match_dup 3) 4642 (compare:CCFP 4643 (unspec:IEEE128 4644 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4645 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4646 UNSPEC_VSX_STSTDC) 4647 (const_int 0))) 4648 (set (match_operand:SI 0 "register_operand" "=r") 4649 (eq:SI (match_dup 3) 4650 (const_int 0)))] 4651 "TARGET_P9_VECTOR" 4652{ 4653 operands[3] = gen_reg_rtx (CCFPmode); 4654}) 4655 4656;; VSX Scalar Test Data Class Double- and Single-Precision 4657;; (The lt bit is set if operand 1 is negative. The eq bit is set 4658;; if any of the conditions tested by operand 2 are satisfied. 4659;; The gt and unordered bits are cleared to zero.) 4660(define_expand "xststdc<sd>p" 4661 [(set (match_dup 3) 4662 (compare:CCFP 4663 (unspec:SFDF 4664 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4665 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4666 UNSPEC_VSX_STSTDC) 4667 (match_dup 4))) 4668 (set (match_operand:SI 0 "register_operand" "=r") 4669 (eq:SI (match_dup 3) 4670 (const_int 0)))] 4671 "TARGET_P9_VECTOR" 4672{ 4673 operands[3] = gen_reg_rtx (CCFPmode); 4674 operands[4] = CONST0_RTX (SImode); 4675}) 4676 4677;; The VSX Scalar Test Negative Quad-Precision 4678(define_expand "xststdcnegqp_<mode>" 4679 [(set (match_dup 2) 4680 (compare:CCFP 4681 (unspec:IEEE128 4682 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4683 (const_int 0)] 4684 UNSPEC_VSX_STSTDC) 4685 (const_int 0))) 4686 (set (match_operand:SI 0 "register_operand" "=r") 4687 (lt:SI (match_dup 2) 4688 (const_int 0)))] 4689 "TARGET_P9_VECTOR" 4690{ 4691 operands[2] = gen_reg_rtx (CCFPmode); 4692}) 4693 4694;; The VSX Scalar Test Negative Double- and Single-Precision 4695(define_expand "xststdcneg<sd>p" 4696 [(set (match_dup 2) 4697 (compare:CCFP 4698 (unspec:SFDF 4699 [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4700 (const_int 0)] 4701 UNSPEC_VSX_STSTDC) 4702 (match_dup 3))) 4703 (set (match_operand:SI 0 "register_operand" "=r") 4704 (lt:SI (match_dup 2) 4705 (const_int 0)))] 4706 "TARGET_P9_VECTOR" 4707{ 4708 operands[2] = gen_reg_rtx (CCFPmode); 4709 operands[3] = CONST0_RTX (SImode); 4710}) 4711 4712(define_insn "*xststdcqp_<mode>" 4713 [(set (match_operand:CCFP 0 "" "=y") 4714 (compare:CCFP 4715 (unspec:IEEE128 4716 [(match_operand:IEEE128 1 "altivec_register_operand" "v") 4717 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4718 UNSPEC_VSX_STSTDC) 4719 (const_int 0)))] 4720 "TARGET_P9_VECTOR" 4721 "xststdcqp %0,%1,%2" 4722 [(set_attr "type" "fpcompare")]) 4723 4724(define_insn "*xststdc<sd>p" 4725 [(set (match_operand:CCFP 0 "" "=y") 4726 (compare:CCFP 4727 (unspec:SFDF [(match_operand:SFDF 1 "vsx_register_operand" "wa") 4728 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4729 UNSPEC_VSX_STSTDC) 4730 (match_operand:SI 3 "zero_constant" "j")))] 4731 "TARGET_P9_VECTOR" 4732 "xststdc<sd>p %0,%x1,%2" 4733 [(set_attr "type" "fpcompare")]) 4734 4735;; VSX Vector Extract Exponent Double and Single Precision 4736(define_insn "xvxexp<sd>p" 4737 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4738 (unspec:VSX_F 4739 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4740 UNSPEC_VSX_VXEXP))] 4741 "TARGET_P9_VECTOR" 4742 "xvxexp<sd>p %x0,%x1" 4743 [(set_attr "type" "vecsimple")]) 4744 4745;; VSX Vector Extract Significand Double and Single Precision 4746(define_insn "xvxsig<sd>p" 4747 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4748 (unspec:VSX_F 4749 [(match_operand:VSX_F 1 "vsx_register_operand" "wa")] 4750 UNSPEC_VSX_VXSIG))] 4751 "TARGET_P9_VECTOR" 4752 "xvxsig<sd>p %x0,%x1" 4753 [(set_attr "type" "vecsimple")]) 4754 4755;; VSX Vector Insert Exponent Double and Single Precision 4756(define_insn "xviexp<sd>p" 4757 [(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa") 4758 (unspec:VSX_F 4759 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4760 (match_operand:VSX_F 2 "vsx_register_operand" "wa")] 4761 UNSPEC_VSX_VIEXP))] 4762 "TARGET_P9_VECTOR" 4763 "xviexp<sd>p %x0,%x1,%x2" 4764 [(set_attr "type" "vecsimple")]) 4765 4766;; VSX Vector Test Data Class Double and Single Precision 4767;; The corresponding elements of the result vector are all ones 4768;; if any of the conditions tested by operand 3 are satisfied. 4769(define_insn "xvtstdc<sd>p" 4770 [(set (match_operand:<VSI> 0 "vsx_register_operand" "=wa") 4771 (unspec:<VSI> 4772 [(match_operand:VSX_F 1 "vsx_register_operand" "wa") 4773 (match_operand:SI 2 "u7bit_cint_operand" "n")] 4774 UNSPEC_VSX_VTSTDC))] 4775 "TARGET_P9_VECTOR" 4776 "xvtstdc<sd>p %x0,%x1,%2" 4777 [(set_attr "type" "vecsimple")]) 4778 4779;; ISA 3.0 String Operations Support 4780 4781;; Compare vectors producing a vector result and a predicate, setting CR6 4782;; to indicate a combined status. This pattern matches v16qi, v8hi, and 4783;; v4si modes. It does not match v2df, v4sf, or v2di modes. There's no 4784;; need to match v4sf, v2df, or v2di modes because those are expanded 4785;; to use Power8 instructions. 4786(define_insn "*vsx_ne_<mode>_p" 4787 [(set (reg:CC CR6_REGNO) 4788 (unspec:CC 4789 [(ne:CC (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "v") 4790 (match_operand:VSX_EXTRACT_I 2 "gpc_reg_operand" "v"))] 4791 UNSPEC_PREDICATE)) 4792 (set (match_operand:VSX_EXTRACT_I 0 "gpc_reg_operand" "=v") 4793 (ne:VSX_EXTRACT_I (match_dup 1) 4794 (match_dup 2)))] 4795 "TARGET_P9_VECTOR" 4796 "vcmpne<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4797 [(set_attr "type" "vecsimple")]) 4798 4799(define_insn "*vector_nez_<mode>_p" 4800 [(set (reg:CC CR6_REGNO) 4801 (unspec:CC [(unspec:VI 4802 [(match_operand:VI 1 "gpc_reg_operand" "v") 4803 (match_operand:VI 2 "gpc_reg_operand" "v")] 4804 UNSPEC_NEZ_P)] 4805 UNSPEC_PREDICATE)) 4806 (set (match_operand:VI 0 "gpc_reg_operand" "=v") 4807 (unspec:VI [(match_dup 1) 4808 (match_dup 2)] 4809 UNSPEC_NEZ_P))] 4810 "TARGET_P9_VECTOR" 4811 "vcmpnez<VSX_EXTRACT_WIDTH>. %0,%1,%2" 4812 [(set_attr "type" "vecsimple")]) 4813 4814;; Return first position of match between vectors using natural order 4815;; for both LE and BE execution modes. 4816(define_expand "first_match_index_<mode>" 4817 [(match_operand:SI 0 "register_operand") 4818 (unspec:SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4819 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4820 UNSPEC_VSX_FIRST_MATCH_INDEX)] 4821 "TARGET_P9_VECTOR" 4822{ 4823 int sh; 4824 4825 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4826 rtx not_result = gen_reg_rtx (<MODE>mode); 4827 4828 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4829 operands[2])); 4830 emit_insn (gen_one_cmpl<mode>2 (not_result, cmp_result)); 4831 4832 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4833 4834 if (<MODE>mode == V16QImode) 4835 { 4836 if (!BYTES_BIG_ENDIAN) 4837 emit_insn (gen_vctzlsbb_<mode> (operands[0], not_result)); 4838 else 4839 emit_insn (gen_vclzlsbb_<mode> (operands[0], not_result)); 4840 } 4841 else 4842 { 4843 rtx tmp = gen_reg_rtx (SImode); 4844 if (!BYTES_BIG_ENDIAN) 4845 emit_insn (gen_vctzlsbb_<mode> (tmp, not_result)); 4846 else 4847 emit_insn (gen_vclzlsbb_<mode> (tmp, not_result)); 4848 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4849 } 4850 DONE; 4851}) 4852 4853;; Return first position of match between vectors or end of string (EOS) using 4854;; natural element order for both LE and BE execution modes. 4855(define_expand "first_match_or_eos_index_<mode>" 4856 [(match_operand:SI 0 "register_operand") 4857 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4858 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4859 UNSPEC_VSX_FIRST_MATCH_EOS_INDEX)] 4860 "TARGET_P9_VECTOR" 4861{ 4862 int sh; 4863 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4864 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4865 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4866 rtx and_result = gen_reg_rtx (<MODE>mode); 4867 rtx result = gen_reg_rtx (<MODE>mode); 4868 rtx vzero = gen_reg_rtx (<MODE>mode); 4869 4870 /* Vector with zeros in elements that correspond to zeros in operands. */ 4871 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4872 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4873 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4874 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4875 4876 /* Vector with ones in elments that do not match. */ 4877 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4878 operands[2])); 4879 4880 /* Create vector with ones in elements where there was a zero in one of 4881 the source elements or the elements that match. */ 4882 emit_insn (gen_nand<mode>3 (result, and_result, cmpz_result)); 4883 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4884 4885 if (<MODE>mode == V16QImode) 4886 { 4887 if (!BYTES_BIG_ENDIAN) 4888 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4889 else 4890 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4891 } 4892 else 4893 { 4894 rtx tmp = gen_reg_rtx (SImode); 4895 if (!BYTES_BIG_ENDIAN) 4896 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4897 else 4898 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4899 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4900 } 4901 DONE; 4902}) 4903 4904;; Return first position of mismatch between vectors using natural 4905;; element order for both LE and BE execution modes. 4906(define_expand "first_mismatch_index_<mode>" 4907 [(match_operand:SI 0 "register_operand") 4908 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4909 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4910 UNSPEC_VSX_FIRST_MISMATCH_INDEX)] 4911 "TARGET_P9_VECTOR" 4912{ 4913 int sh; 4914 rtx cmp_result = gen_reg_rtx (<MODE>mode); 4915 4916 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmp_result, operands[1], 4917 operands[2])); 4918 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4919 4920 if (<MODE>mode == V16QImode) 4921 { 4922 if (!BYTES_BIG_ENDIAN) 4923 emit_insn (gen_vctzlsbb_<mode> (operands[0], cmp_result)); 4924 else 4925 emit_insn (gen_vclzlsbb_<mode> (operands[0], cmp_result)); 4926 } 4927 else 4928 { 4929 rtx tmp = gen_reg_rtx (SImode); 4930 if (!BYTES_BIG_ENDIAN) 4931 emit_insn (gen_vctzlsbb_<mode> (tmp, cmp_result)); 4932 else 4933 emit_insn (gen_vclzlsbb_<mode> (tmp, cmp_result)); 4934 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4935 } 4936 DONE; 4937}) 4938 4939;; Return first position of mismatch between vectors or end of string (EOS) 4940;; using natural element order for both LE and BE execution modes. 4941(define_expand "first_mismatch_or_eos_index_<mode>" 4942 [(match_operand:SI 0 "register_operand") 4943 (unspec: SI [(match_operand:VSX_EXTRACT_I 1 "register_operand") 4944 (match_operand:VSX_EXTRACT_I 2 "register_operand")] 4945 UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX)] 4946 "TARGET_P9_VECTOR" 4947{ 4948 int sh; 4949 rtx cmpz1_result = gen_reg_rtx (<MODE>mode); 4950 rtx cmpz2_result = gen_reg_rtx (<MODE>mode); 4951 rtx cmpz_result = gen_reg_rtx (<MODE>mode); 4952 rtx not_cmpz_result = gen_reg_rtx (<MODE>mode); 4953 rtx and_result = gen_reg_rtx (<MODE>mode); 4954 rtx result = gen_reg_rtx (<MODE>mode); 4955 rtx vzero = gen_reg_rtx (<MODE>mode); 4956 4957 /* Vector with zeros in elements that correspond to zeros in operands. */ 4958 emit_move_insn (vzero, CONST0_RTX (<MODE>mode)); 4959 4960 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz1_result, operands[1], vzero)); 4961 emit_insn (gen_vcmpne<VSX_EXTRACT_WIDTH> (cmpz2_result, operands[2], vzero)); 4962 emit_insn (gen_and<mode>3 (and_result, cmpz1_result, cmpz2_result)); 4963 4964 /* Vector with ones in elments that match. */ 4965 emit_insn (gen_vcmpnez<VSX_EXTRACT_WIDTH> (cmpz_result, operands[1], 4966 operands[2])); 4967 emit_insn (gen_one_cmpl<mode>2 (not_cmpz_result, cmpz_result)); 4968 4969 /* Create vector with ones in elements where there was a zero in one of 4970 the source elements or the elements did not match. */ 4971 emit_insn (gen_nand<mode>3 (result, and_result, not_cmpz_result)); 4972 sh = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) / 2; 4973 4974 if (<MODE>mode == V16QImode) 4975 { 4976 if (!BYTES_BIG_ENDIAN) 4977 emit_insn (gen_vctzlsbb_<mode> (operands[0], result)); 4978 else 4979 emit_insn (gen_vclzlsbb_<mode> (operands[0], result)); 4980 } 4981 else 4982 { 4983 rtx tmp = gen_reg_rtx (SImode); 4984 if (!BYTES_BIG_ENDIAN) 4985 emit_insn (gen_vctzlsbb_<mode> (tmp, result)); 4986 else 4987 emit_insn (gen_vclzlsbb_<mode> (tmp, result)); 4988 emit_insn (gen_lshrsi3 (operands[0], tmp, GEN_INT (sh))); 4989 } 4990 DONE; 4991}) 4992 4993;; Load VSX Vector with Length 4994(define_expand "lxvl" 4995 [(set (match_dup 3) 4996 (ashift:DI (match_operand:DI 2 "register_operand") 4997 (const_int 56))) 4998 (set (match_operand:V16QI 0 "vsx_register_operand") 4999 (unspec:V16QI 5000 [(match_operand:DI 1 "gpc_reg_operand") 5001 (mem:V16QI (match_dup 1)) 5002 (match_dup 3)] 5003 UNSPEC_LXVL))] 5004 "TARGET_P9_VECTOR && TARGET_64BIT" 5005{ 5006 operands[3] = gen_reg_rtx (DImode); 5007}) 5008 5009(define_insn "*lxvl" 5010 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5011 (unspec:V16QI 5012 [(match_operand:DI 1 "gpc_reg_operand" "b") 5013 (mem:V16QI (match_dup 1)) 5014 (match_operand:DI 2 "register_operand" "r")] 5015 UNSPEC_LXVL))] 5016 "TARGET_P9_VECTOR && TARGET_64BIT" 5017 "lxvl %x0,%1,%2" 5018 [(set_attr "type" "vecload")]) 5019 5020(define_insn "lxvll" 5021 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5022 (unspec:V16QI [(match_operand:DI 1 "gpc_reg_operand" "b") 5023 (mem:V16QI (match_dup 1)) 5024 (match_operand:DI 2 "register_operand" "r")] 5025 UNSPEC_LXVLL))] 5026 "TARGET_P9_VECTOR" 5027 "lxvll %x0,%1,%2" 5028 [(set_attr "type" "vecload")]) 5029 5030;; Expand for builtin xl_len_r 5031(define_expand "xl_len_r" 5032 [(match_operand:V16QI 0 "vsx_register_operand") 5033 (match_operand:DI 1 "register_operand") 5034 (match_operand:DI 2 "register_operand")] 5035 "" 5036{ 5037 rtx shift_mask = gen_reg_rtx (V16QImode); 5038 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5039 rtx tmp = gen_reg_rtx (DImode); 5040 5041 emit_insn (gen_altivec_lvsl_reg (shift_mask, operands[2])); 5042 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5043 emit_insn (gen_lxvll (rtx_vtmp, operands[1], tmp)); 5044 emit_insn (gen_altivec_vperm_v8hiv16qi (operands[0], rtx_vtmp, rtx_vtmp, 5045 shift_mask)); 5046 DONE; 5047}) 5048 5049(define_insn "stxvll" 5050 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5051 (unspec:V16QI [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5052 (mem:V16QI (match_dup 1)) 5053 (match_operand:DI 2 "register_operand" "r")] 5054 UNSPEC_STXVLL))] 5055 "TARGET_P9_VECTOR" 5056 "stxvll %x0,%1,%2" 5057 [(set_attr "type" "vecstore")]) 5058 5059;; Store VSX Vector with Length 5060(define_expand "stxvl" 5061 [(set (match_dup 3) 5062 (ashift:DI (match_operand:DI 2 "register_operand") 5063 (const_int 56))) 5064 (set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand")) 5065 (unspec:V16QI 5066 [(match_operand:V16QI 0 "vsx_register_operand") 5067 (mem:V16QI (match_dup 1)) 5068 (match_dup 3)] 5069 UNSPEC_STXVL))] 5070 "TARGET_P9_VECTOR && TARGET_64BIT" 5071{ 5072 operands[3] = gen_reg_rtx (DImode); 5073}) 5074 5075(define_insn "*stxvl" 5076 [(set (mem:V16QI (match_operand:DI 1 "gpc_reg_operand" "b")) 5077 (unspec:V16QI 5078 [(match_operand:V16QI 0 "vsx_register_operand" "wa") 5079 (mem:V16QI (match_dup 1)) 5080 (match_operand:DI 2 "register_operand" "r")] 5081 UNSPEC_STXVL))] 5082 "TARGET_P9_VECTOR && TARGET_64BIT" 5083 "stxvl %x0,%1,%2" 5084 [(set_attr "type" "vecstore")]) 5085 5086;; Expand for builtin xst_len_r 5087(define_expand "xst_len_r" 5088 [(match_operand:V16QI 0 "vsx_register_operand" "=wa") 5089 (match_operand:DI 1 "register_operand" "b") 5090 (match_operand:DI 2 "register_operand" "r")] 5091 "UNSPEC_XST_LEN_R" 5092{ 5093 rtx shift_mask = gen_reg_rtx (V16QImode); 5094 rtx rtx_vtmp = gen_reg_rtx (V16QImode); 5095 rtx tmp = gen_reg_rtx (DImode); 5096 5097 emit_insn (gen_altivec_lvsr_reg (shift_mask, operands[2])); 5098 emit_insn (gen_altivec_vperm_v8hiv16qi (rtx_vtmp, operands[0], operands[0], 5099 shift_mask)); 5100 emit_insn (gen_ashldi3 (tmp, operands[2], GEN_INT (56))); 5101 emit_insn (gen_stxvll (rtx_vtmp, operands[1], tmp)); 5102 DONE; 5103}) 5104 5105;; Vector Compare Not Equal Byte (specified/not+eq:) 5106(define_insn "vcmpneb" 5107 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5108 (not:V16QI 5109 (eq:V16QI (match_operand:V16QI 1 "altivec_register_operand" "v") 5110 (match_operand:V16QI 2 "altivec_register_operand" "v"))))] 5111 "TARGET_P9_VECTOR" 5112 "vcmpneb %0,%1,%2" 5113 [(set_attr "type" "vecsimple")]) 5114 5115;; Vector Compare Not Equal or Zero Byte 5116(define_insn "vcmpnezb" 5117 [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5118 (unspec:V16QI 5119 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5120 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5121 UNSPEC_VCMPNEZB))] 5122 "TARGET_P9_VECTOR" 5123 "vcmpnezb %0,%1,%2" 5124 [(set_attr "type" "vecsimple")]) 5125 5126;; Vector Compare Not Equal or Zero Byte predicate or record-form 5127(define_insn "vcmpnezb_p" 5128 [(set (reg:CC CR6_REGNO) 5129 (unspec:CC 5130 [(match_operand:V16QI 1 "altivec_register_operand" "v") 5131 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5132 UNSPEC_VCMPNEZB)) 5133 (set (match_operand:V16QI 0 "altivec_register_operand" "=v") 5134 (unspec:V16QI 5135 [(match_dup 1) 5136 (match_dup 2)] 5137 UNSPEC_VCMPNEZB))] 5138 "TARGET_P9_VECTOR" 5139 "vcmpnezb. %0,%1,%2" 5140 [(set_attr "type" "vecsimple")]) 5141 5142;; Vector Compare Not Equal Half Word (specified/not+eq:) 5143(define_insn "vcmpneh" 5144 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5145 (not:V8HI 5146 (eq:V8HI (match_operand:V8HI 1 "altivec_register_operand" "v") 5147 (match_operand:V8HI 2 "altivec_register_operand" "v"))))] 5148 "TARGET_P9_VECTOR" 5149 "vcmpneh %0,%1,%2" 5150 [(set_attr "type" "vecsimple")]) 5151 5152;; Vector Compare Not Equal or Zero Half Word 5153(define_insn "vcmpnezh" 5154 [(set (match_operand:V8HI 0 "altivec_register_operand" "=v") 5155 (unspec:V8HI [(match_operand:V8HI 1 "altivec_register_operand" "v") 5156 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5157 UNSPEC_VCMPNEZH))] 5158 "TARGET_P9_VECTOR" 5159 "vcmpnezh %0,%1,%2" 5160 [(set_attr "type" "vecsimple")]) 5161 5162;; Vector Compare Not Equal Word (specified/not+eq:) 5163(define_insn "vcmpnew" 5164 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5165 (not:V4SI 5166 (eq:V4SI (match_operand:V4SI 1 "altivec_register_operand" "v") 5167 (match_operand:V4SI 2 "altivec_register_operand" "v"))))] 5168 "TARGET_P9_VECTOR" 5169 "vcmpnew %0,%1,%2" 5170 [(set_attr "type" "vecsimple")]) 5171 5172;; Vector Compare Not Equal or Zero Word 5173(define_insn "vcmpnezw" 5174 [(set (match_operand:V4SI 0 "altivec_register_operand" "=v") 5175 (unspec:V4SI [(match_operand:V4SI 1 "altivec_register_operand" "v") 5176 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5177 UNSPEC_VCMPNEZW))] 5178 "TARGET_P9_VECTOR" 5179 "vcmpnezw %0,%1,%2" 5180 [(set_attr "type" "vecsimple")]) 5181 5182;; Vector Count Leading Zero Least-Significant Bits Byte 5183(define_insn "vclzlsbb_<mode>" 5184 [(set (match_operand:SI 0 "register_operand" "=r") 5185 (unspec:SI 5186 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5187 UNSPEC_VCLZLSBB))] 5188 "TARGET_P9_VECTOR" 5189 "vclzlsbb %0,%1" 5190 [(set_attr "type" "vecsimple")]) 5191 5192;; Vector Count Trailing Zero Least-Significant Bits Byte 5193(define_insn "vctzlsbb_<mode>" 5194 [(set (match_operand:SI 0 "register_operand" "=r") 5195 (unspec:SI 5196 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")] 5197 UNSPEC_VCTZLSBB))] 5198 "TARGET_P9_VECTOR" 5199 "vctzlsbb %0,%1" 5200 [(set_attr "type" "vecsimple")]) 5201 5202;; Vector Extract Unsigned Byte Left-Indexed 5203(define_insn "vextublx" 5204 [(set (match_operand:SI 0 "register_operand" "=r") 5205 (unspec:SI 5206 [(match_operand:SI 1 "register_operand" "r") 5207 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5208 UNSPEC_VEXTUBLX))] 5209 "TARGET_P9_VECTOR" 5210 "vextublx %0,%1,%2" 5211 [(set_attr "type" "vecsimple")]) 5212 5213;; Vector Extract Unsigned Byte Right-Indexed 5214(define_insn "vextubrx" 5215 [(set (match_operand:SI 0 "register_operand" "=r") 5216 (unspec:SI 5217 [(match_operand:SI 1 "register_operand" "r") 5218 (match_operand:V16QI 2 "altivec_register_operand" "v")] 5219 UNSPEC_VEXTUBRX))] 5220 "TARGET_P9_VECTOR" 5221 "vextubrx %0,%1,%2" 5222 [(set_attr "type" "vecsimple")]) 5223 5224;; Vector Extract Unsigned Half Word Left-Indexed 5225(define_insn "vextuhlx" 5226 [(set (match_operand:SI 0 "register_operand" "=r") 5227 (unspec:SI 5228 [(match_operand:SI 1 "register_operand" "r") 5229 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5230 UNSPEC_VEXTUHLX))] 5231 "TARGET_P9_VECTOR" 5232 "vextuhlx %0,%1,%2" 5233 [(set_attr "type" "vecsimple")]) 5234 5235;; Vector Extract Unsigned Half Word Right-Indexed 5236(define_insn "vextuhrx" 5237 [(set (match_operand:SI 0 "register_operand" "=r") 5238 (unspec:SI 5239 [(match_operand:SI 1 "register_operand" "r") 5240 (match_operand:V8HI 2 "altivec_register_operand" "v")] 5241 UNSPEC_VEXTUHRX))] 5242 "TARGET_P9_VECTOR" 5243 "vextuhrx %0,%1,%2" 5244 [(set_attr "type" "vecsimple")]) 5245 5246;; Vector Extract Unsigned Word Left-Indexed 5247(define_insn "vextuwlx" 5248 [(set (match_operand:SI 0 "register_operand" "=r") 5249 (unspec:SI 5250 [(match_operand:SI 1 "register_operand" "r") 5251 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5252 UNSPEC_VEXTUWLX))] 5253 "TARGET_P9_VECTOR" 5254 "vextuwlx %0,%1,%2" 5255 [(set_attr "type" "vecsimple")]) 5256 5257;; Vector Extract Unsigned Word Right-Indexed 5258(define_insn "vextuwrx" 5259 [(set (match_operand:SI 0 "register_operand" "=r") 5260 (unspec:SI 5261 [(match_operand:SI 1 "register_operand" "r") 5262 (match_operand:V4SI 2 "altivec_register_operand" "v")] 5263 UNSPEC_VEXTUWRX))] 5264 "TARGET_P9_VECTOR" 5265 "vextuwrx %0,%1,%2" 5266 [(set_attr "type" "vecsimple")]) 5267 5268;; Vector insert/extract word at arbitrary byte values. Note, the little 5269;; endian version needs to adjust the byte number, and the V4SI element in 5270;; vinsert4b. 5271(define_insn "extract4b" 5272 [(set (match_operand:V2DI 0 "vsx_register_operand") 5273 (unspec:V2DI [(match_operand:V16QI 1 "vsx_register_operand" "wa") 5274 (match_operand:QI 2 "const_0_to_12_operand" "n")] 5275 UNSPEC_XXEXTRACTUW))] 5276 "TARGET_P9_VECTOR" 5277{ 5278 if (!BYTES_BIG_ENDIAN) 5279 operands[2] = GEN_INT (12 - INTVAL (operands[2])); 5280 5281 return "xxextractuw %x0,%x1,%2"; 5282}) 5283 5284(define_expand "insert4b" 5285 [(set (match_operand:V16QI 0 "vsx_register_operand") 5286 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") 5287 (match_operand:V16QI 2 "vsx_register_operand") 5288 (match_operand:QI 3 "const_0_to_12_operand")] 5289 UNSPEC_XXINSERTW))] 5290 "TARGET_P9_VECTOR" 5291{ 5292 if (!BYTES_BIG_ENDIAN) 5293 { 5294 rtx op1 = operands[1]; 5295 rtx v4si_tmp = gen_reg_rtx (V4SImode); 5296 emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx)); 5297 operands[1] = v4si_tmp; 5298 operands[3] = GEN_INT (12 - INTVAL (operands[3])); 5299 } 5300}) 5301 5302(define_insn "*insert4b_internal" 5303 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5304 (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") 5305 (match_operand:V16QI 2 "vsx_register_operand" "0") 5306 (match_operand:QI 3 "const_0_to_12_operand" "n")] 5307 UNSPEC_XXINSERTW))] 5308 "TARGET_P9_VECTOR" 5309 "xxinsertw %x0,%x1,%3" 5310 [(set_attr "type" "vecperm")]) 5311 5312 5313;; Generate vector extract four float 32 values from left four elements 5314;; of eight element vector of float 16 values. 5315(define_expand "vextract_fp_from_shorth" 5316 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5317 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5318 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTH))] 5319 "TARGET_P9_VECTOR" 5320{ 5321 int i; 5322 int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5323 int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5324 5325 rtx rvals[16]; 5326 rtx mask = gen_reg_rtx (V16QImode); 5327 rtx tmp = gen_reg_rtx (V16QImode); 5328 rtvec v; 5329 5330 for (i = 0; i < 16; i++) 5331 if (!BYTES_BIG_ENDIAN) 5332 rvals[i] = GEN_INT (vals_le[i]); 5333 else 5334 rvals[i] = GEN_INT (vals_be[i]); 5335 5336 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5337 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5338 src half words 0,1,2,3 (LE), src half words 4,5,6,7 (BE) for the 5339 conversion instruction. */ 5340 v = gen_rtvec_v (16, rvals); 5341 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5342 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5343 operands[1], mask)); 5344 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5345 DONE; 5346}) 5347 5348;; Generate vector extract four float 32 values from right four elements 5349;; of eight element vector of float 16 values. 5350(define_expand "vextract_fp_from_shortl" 5351 [(set (match_operand:V4SF 0 "register_operand" "=wa") 5352 (unspec:V4SF [(match_operand:V8HI 1 "register_operand" "wa")] 5353 UNSPEC_VSX_VEXTRACT_FP_FROM_SHORTL))] 5354 "TARGET_P9_VECTOR" 5355{ 5356 int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0}; 5357 int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0}; 5358 5359 int i; 5360 rtx rvals[16]; 5361 rtx mask = gen_reg_rtx (V16QImode); 5362 rtx tmp = gen_reg_rtx (V16QImode); 5363 rtvec v; 5364 5365 for (i = 0; i < 16; i++) 5366 if (!BYTES_BIG_ENDIAN) 5367 rvals[i] = GEN_INT (vals_le[i]); 5368 else 5369 rvals[i] = GEN_INT (vals_be[i]); 5370 5371 /* xvcvhpsp - vector convert F16 to vector F32 requires the four F16 5372 inputs in half words 1,3,5,7 (IBM numbering). Use xxperm to move 5373 src half words 4,5,6,7 (LE), src half words 0,1,2,3 (BE) for the 5374 conversion instruction. */ 5375 v = gen_rtvec_v (16, rvals); 5376 emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode, v))); 5377 emit_insn (gen_altivec_vperm_v8hiv16qi (tmp, operands[1], 5378 operands[1], mask)); 5379 emit_insn (gen_vsx_xvcvhpsp (operands[0], tmp)); 5380 DONE; 5381}) 5382 5383;; Support for ISA 3.0 vector byte reverse 5384 5385;; Swap all bytes with in a vector 5386(define_insn "p9_xxbrq_v1ti" 5387 [(set (match_operand:V1TI 0 "vsx_register_operand" "=wa") 5388 (bswap:V1TI (match_operand:V1TI 1 "vsx_register_operand" "wa")))] 5389 "TARGET_P9_VECTOR" 5390 "xxbrq %x0,%x1" 5391 [(set_attr "type" "vecperm")]) 5392 5393(define_expand "p9_xxbrq_v16qi" 5394 [(use (match_operand:V16QI 0 "vsx_register_operand" "=wa")) 5395 (use (match_operand:V16QI 1 "vsx_register_operand" "wa"))] 5396 "TARGET_P9_VECTOR" 5397{ 5398 rtx op0 = gen_reg_rtx (V1TImode); 5399 rtx op1 = gen_lowpart (V1TImode, operands[1]); 5400 emit_insn (gen_p9_xxbrq_v1ti (op0, op1)); 5401 emit_move_insn (operands[0], gen_lowpart (V16QImode, op0)); 5402 DONE; 5403}) 5404 5405;; Swap all bytes in each 64-bit element 5406(define_insn "p9_xxbrd_v2di" 5407 [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") 5408 (bswap:V2DI (match_operand:V2DI 1 "vsx_register_operand" "wa")))] 5409 "TARGET_P9_VECTOR" 5410 "xxbrd %x0,%x1" 5411 [(set_attr "type" "vecperm")]) 5412 5413(define_expand "p9_xxbrd_v2df" 5414 [(use (match_operand:V2DF 0 "vsx_register_operand" "=wa")) 5415 (use (match_operand:V2DF 1 "vsx_register_operand" "wa"))] 5416 "TARGET_P9_VECTOR" 5417{ 5418 rtx op0 = gen_reg_rtx (V2DImode); 5419 rtx op1 = gen_lowpart (V2DImode, operands[1]); 5420 emit_insn (gen_p9_xxbrd_v2di (op0, op1)); 5421 emit_move_insn (operands[0], gen_lowpart (V2DFmode, op0)); 5422 DONE; 5423}) 5424 5425;; Swap all bytes in each 32-bit element 5426(define_insn "p9_xxbrw_v4si" 5427 [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa") 5428 (bswap:V4SI (match_operand:V4SI 1 "vsx_register_operand" "wa")))] 5429 "TARGET_P9_VECTOR" 5430 "xxbrw %x0,%x1" 5431 [(set_attr "type" "vecperm")]) 5432 5433(define_expand "p9_xxbrw_v4sf" 5434 [(use (match_operand:V4SF 0 "vsx_register_operand" "=wa")) 5435 (use (match_operand:V4SF 1 "vsx_register_operand" "wa"))] 5436 "TARGET_P9_VECTOR" 5437{ 5438 rtx op0 = gen_reg_rtx (V4SImode); 5439 rtx op1 = gen_lowpart (V4SImode, operands[1]); 5440 emit_insn (gen_p9_xxbrw_v4si (op0, op1)); 5441 emit_move_insn (operands[0], gen_lowpart (V4SFmode, op0)); 5442 DONE; 5443}) 5444 5445;; Swap all bytes in each element of vector 5446(define_expand "revb_<mode>" 5447 [(use (match_operand:VEC_REVB 0 "vsx_register_operand")) 5448 (use (match_operand:VEC_REVB 1 "vsx_register_operand"))] 5449 "" 5450{ 5451 if (TARGET_P9_VECTOR) 5452 emit_insn (gen_p9_xxbr<VSX_XXBR>_<mode> (operands[0], operands[1])); 5453 else 5454 { 5455 /* Want to have the elements in reverse order relative 5456 to the endian mode in use, i.e. in LE mode, put elements 5457 in BE order. */ 5458 rtx sel = swap_endian_selector_for_mode(<MODE>mode); 5459 emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], 5460 operands[1], sel)); 5461 } 5462 5463 DONE; 5464}) 5465 5466;; Reversing bytes in vector char is just a NOP. 5467(define_expand "revb_v16qi" 5468 [(set (match_operand:V16QI 0 "vsx_register_operand") 5469 (bswap:V16QI (match_operand:V16QI 1 "vsx_register_operand")))] 5470 "" 5471{ 5472 emit_move_insn (operands[0], operands[1]); 5473 DONE; 5474}) 5475 5476;; Swap all bytes in each 16-bit element 5477(define_insn "p9_xxbrh_v8hi" 5478 [(set (match_operand:V8HI 0 "vsx_register_operand" "=wa") 5479 (bswap:V8HI (match_operand:V8HI 1 "vsx_register_operand" "wa")))] 5480 "TARGET_P9_VECTOR" 5481 "xxbrh %x0,%x1" 5482 [(set_attr "type" "vecperm")]) 5483 5484 5485;; Operand numbers for the following peephole2 5486(define_constants 5487 [(SFBOOL_TMP_GPR 0) ;; GPR temporary 5488 (SFBOOL_TMP_VSX 1) ;; vector temporary 5489 (SFBOOL_MFVSR_D 2) ;; move to gpr dest 5490 (SFBOOL_MFVSR_A 3) ;; move to gpr src 5491 (SFBOOL_BOOL_D 4) ;; and/ior/xor dest 5492 (SFBOOL_BOOL_A1 5) ;; and/ior/xor arg1 5493 (SFBOOL_BOOL_A2 6) ;; and/ior/xor arg1 5494 (SFBOOL_SHL_D 7) ;; shift left dest 5495 (SFBOOL_SHL_A 8) ;; shift left arg 5496 (SFBOOL_MTVSR_D 9) ;; move to vecter dest 5497 (SFBOOL_MFVSR_A_V4SF 10) ;; SFBOOL_MFVSR_A as V4SFmode 5498 (SFBOOL_BOOL_A_DI 11) ;; SFBOOL_BOOL_A1/A2 as DImode 5499 (SFBOOL_TMP_VSX_DI 12) ;; SFBOOL_TMP_VSX as DImode 5500 (SFBOOL_MTVSR_D_V4SF 13)]) ;; SFBOOL_MTVSRD_D as V4SFmode 5501 5502;; Attempt to optimize some common GLIBC operations using logical operations to 5503;; pick apart SFmode operations. For example, there is code from e_powf.c 5504;; after macro expansion that looks like: 5505;; 5506;; typedef union { 5507;; float value; 5508;; uint32_t word; 5509;; } ieee_float_shape_type; 5510;; 5511;; float t1; 5512;; int32_t is; 5513;; 5514;; do { 5515;; ieee_float_shape_type gf_u; 5516;; gf_u.value = (t1); 5517;; (is) = gf_u.word; 5518;; } while (0); 5519;; 5520;; do { 5521;; ieee_float_shape_type sf_u; 5522;; sf_u.word = (is & 0xfffff000); 5523;; (t1) = sf_u.value; 5524;; } while (0); 5525;; 5526;; 5527;; This would result in two direct move operations (convert to memory format, 5528;; direct move to GPR, do the AND operation, direct move to VSX, convert to 5529;; scalar format). With this peephole, we eliminate the direct move to the 5530;; GPR, and instead move the integer mask value to the vector register after a 5531;; shift and do the VSX logical operation. 5532 5533;; The insns for dealing with SFmode in GPR registers looks like: 5534;; (set (reg:V4SF reg2) (unspec:V4SF [(reg:SF reg1)] UNSPEC_VSX_CVDPSPN)) 5535;; 5536;; (set (reg:DI reg3) (unspec:DI [(reg:V4SF reg2)] UNSPEC_P8V_RELOAD_FROM_VSX)) 5537;; 5538;; (set (reg:DI reg4) (and:DI (reg:DI reg3) (reg:DI reg3))) 5539;; 5540;; (set (reg:DI reg5) (ashift:DI (reg:DI reg4) (const_int 32))) 5541;; 5542;; (set (reg:SF reg6) (unspec:SF [(reg:DI reg5)] UNSPEC_P8V_MTVSRD)) 5543;; 5544;; (set (reg:SF reg6) (unspec:SF [(reg:SF reg6)] UNSPEC_VSX_CVSPDPN)) 5545 5546(define_peephole2 5547 [(match_scratch:DI SFBOOL_TMP_GPR "r") 5548 (match_scratch:V4SF SFBOOL_TMP_VSX "wa") 5549 5550 ;; MFVSRWZ (aka zero_extend) 5551 (set (match_operand:DI SFBOOL_MFVSR_D "int_reg_operand") 5552 (zero_extend:DI 5553 (match_operand:SI SFBOOL_MFVSR_A "vsx_register_operand"))) 5554 5555 ;; AND/IOR/XOR operation on int 5556 (set (match_operand:SI SFBOOL_BOOL_D "int_reg_operand") 5557 (and_ior_xor:SI (match_operand:SI SFBOOL_BOOL_A1 "int_reg_operand") 5558 (match_operand:SI SFBOOL_BOOL_A2 "reg_or_cint_operand"))) 5559 5560 ;; SLDI 5561 (set (match_operand:DI SFBOOL_SHL_D "int_reg_operand") 5562 (ashift:DI (match_operand:DI SFBOOL_SHL_A "int_reg_operand") 5563 (const_int 32))) 5564 5565 ;; MTVSRD 5566 (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand") 5567 (unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))] 5568 5569 "TARGET_POWERPC64 && TARGET_DIRECT_MOVE 5570 /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO 5571 to compare registers, when the mode is different. */ 5572 && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D]) 5573 && REG_P (operands[SFBOOL_BOOL_A1]) && REG_P (operands[SFBOOL_SHL_D]) 5574 && REG_P (operands[SFBOOL_SHL_A]) && REG_P (operands[SFBOOL_MTVSR_D]) 5575 && (REG_P (operands[SFBOOL_BOOL_A2]) 5576 || CONST_INT_P (operands[SFBOOL_BOOL_A2])) 5577 && (REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_MFVSR_D]) 5578 || peep2_reg_dead_p (2, operands[SFBOOL_MFVSR_D])) 5579 && (REGNO (operands[SFBOOL_MFVSR_D]) == REGNO (operands[SFBOOL_BOOL_A1]) 5580 || (REG_P (operands[SFBOOL_BOOL_A2]) 5581 && REGNO (operands[SFBOOL_MFVSR_D]) 5582 == REGNO (operands[SFBOOL_BOOL_A2]))) 5583 && REGNO (operands[SFBOOL_BOOL_D]) == REGNO (operands[SFBOOL_SHL_A]) 5584 && (REGNO (operands[SFBOOL_SHL_D]) == REGNO (operands[SFBOOL_BOOL_D]) 5585 || peep2_reg_dead_p (3, operands[SFBOOL_BOOL_D])) 5586 && peep2_reg_dead_p (4, operands[SFBOOL_SHL_D])" 5587 [(set (match_dup SFBOOL_TMP_GPR) 5588 (ashift:DI (match_dup SFBOOL_BOOL_A_DI) 5589 (const_int 32))) 5590 5591 (set (match_dup SFBOOL_TMP_VSX_DI) 5592 (match_dup SFBOOL_TMP_GPR)) 5593 5594 (set (match_dup SFBOOL_MTVSR_D_V4SF) 5595 (and_ior_xor:V4SF (match_dup SFBOOL_MFVSR_A_V4SF) 5596 (match_dup SFBOOL_TMP_VSX)))] 5597{ 5598 rtx bool_a1 = operands[SFBOOL_BOOL_A1]; 5599 rtx bool_a2 = operands[SFBOOL_BOOL_A2]; 5600 int regno_mfvsr_d = REGNO (operands[SFBOOL_MFVSR_D]); 5601 int regno_mfvsr_a = REGNO (operands[SFBOOL_MFVSR_A]); 5602 int regno_tmp_vsx = REGNO (operands[SFBOOL_TMP_VSX]); 5603 int regno_mtvsr_d = REGNO (operands[SFBOOL_MTVSR_D]); 5604 5605 if (CONST_INT_P (bool_a2)) 5606 { 5607 rtx tmp_gpr = operands[SFBOOL_TMP_GPR]; 5608 emit_move_insn (tmp_gpr, bool_a2); 5609 operands[SFBOOL_BOOL_A_DI] = tmp_gpr; 5610 } 5611 else 5612 { 5613 int regno_bool_a1 = REGNO (bool_a1); 5614 int regno_bool_a2 = REGNO (bool_a2); 5615 int regno_bool_a = (regno_mfvsr_d == regno_bool_a1 5616 ? regno_bool_a2 : regno_bool_a1); 5617 operands[SFBOOL_BOOL_A_DI] = gen_rtx_REG (DImode, regno_bool_a); 5618 } 5619 5620 operands[SFBOOL_MFVSR_A_V4SF] = gen_rtx_REG (V4SFmode, regno_mfvsr_a); 5621 operands[SFBOOL_TMP_VSX_DI] = gen_rtx_REG (DImode, regno_tmp_vsx); 5622 operands[SFBOOL_MTVSR_D_V4SF] = gen_rtx_REG (V4SFmode, regno_mtvsr_d); 5623}) 5624 5625;; Support signed/unsigned long long to float conversion vectorization. 5626;; Note that any_float (pc) here is just for code attribute <su>. 5627(define_expand "vec_pack<su>_float_v2di" 5628 [(match_operand:V4SF 0 "vfloat_operand") 5629 (match_operand:V2DI 1 "vint_operand") 5630 (match_operand:V2DI 2 "vint_operand") 5631 (any_float (pc))] 5632 "TARGET_VSX" 5633{ 5634 rtx r1 = gen_reg_rtx (V4SFmode); 5635 rtx r2 = gen_reg_rtx (V4SFmode); 5636 emit_insn (gen_vsx_xvcv<su>xdsp (r1, operands[1])); 5637 emit_insn (gen_vsx_xvcv<su>xdsp (r2, operands[2])); 5638 rs6000_expand_extract_even (operands[0], r1, r2); 5639 DONE; 5640}) 5641 5642;; Support float to signed/unsigned long long conversion vectorization. 5643;; Note that any_fix (pc) here is just for code attribute <su>. 5644(define_expand "vec_unpack_<su>fix_trunc_hi_v4sf" 5645 [(match_operand:V2DI 0 "vint_operand") 5646 (match_operand:V4SF 1 "vfloat_operand") 5647 (any_fix (pc))] 5648 "TARGET_VSX" 5649{ 5650 rtx reg = gen_reg_rtx (V4SFmode); 5651 rs6000_expand_interleave (reg, operands[1], operands[1], BYTES_BIG_ENDIAN); 5652 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); 5653 DONE; 5654}) 5655 5656;; Note that any_fix (pc) here is just for code attribute <su>. 5657(define_expand "vec_unpack_<su>fix_trunc_lo_v4sf" 5658 [(match_operand:V2DI 0 "vint_operand") 5659 (match_operand:V4SF 1 "vfloat_operand") 5660 (any_fix (pc))] 5661 "TARGET_VSX" 5662{ 5663 rtx reg = gen_reg_rtx (V4SFmode); 5664 rs6000_expand_interleave (reg, operands[1], operands[1], !BYTES_BIG_ENDIAN); 5665 emit_insn (gen_vsx_xvcvsp<su>xds (operands[0], reg)); 5666 DONE; 5667}) 5668 5669(define_insn "vsx_<xvcvbf16>" 5670 [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") 5671 (unspec:V16QI [(match_operand:V16QI 1 "vsx_register_operand" "wa")] 5672 XVCVBF16))] 5673 "TARGET_POWER10" 5674 "<xvcvbf16> %x0,%x1" 5675 [(set_attr "type" "vecfloat")]) 5676