1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005-2013 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify 7;; it under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, 12;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14;; GNU General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19 20(define_c_enum "unspec" [ 21 ;; SSE 22 UNSPEC_MOVNT 23 UNSPEC_LOADU 24 UNSPEC_STOREU 25 26 ;; SSE3 27 UNSPEC_LDDQU 28 29 ;; SSSE3 30 UNSPEC_PSHUFB 31 UNSPEC_PSIGN 32 UNSPEC_PALIGNR 33 34 ;; For SSE4A support 35 UNSPEC_EXTRQI 36 UNSPEC_EXTRQ 37 UNSPEC_INSERTQI 38 UNSPEC_INSERTQ 39 40 ;; For SSE4.1 support 41 UNSPEC_BLENDV 42 UNSPEC_INSERTPS 43 UNSPEC_DP 44 UNSPEC_MOVNTDQA 45 UNSPEC_MPSADBW 46 UNSPEC_PHMINPOSUW 47 UNSPEC_PTEST 48 49 ;; For SSE4.2 support 50 UNSPEC_PCMPESTR 51 UNSPEC_PCMPISTR 52 53 ;; For FMA4 support 54 UNSPEC_FMADDSUB 55 UNSPEC_XOP_UNSIGNED_CMP 56 UNSPEC_XOP_TRUEFALSE 57 UNSPEC_XOP_PERMUTE 58 UNSPEC_FRCZ 59 60 ;; For AES support 61 UNSPEC_AESENC 62 UNSPEC_AESENCLAST 63 UNSPEC_AESDEC 64 UNSPEC_AESDECLAST 65 UNSPEC_AESIMC 66 UNSPEC_AESKEYGENASSIST 67 68 ;; For PCLMUL support 69 UNSPEC_PCLMUL 70 71 ;; For AVX support 72 UNSPEC_PCMP 73 UNSPEC_VPERMIL 74 UNSPEC_VPERMIL2 75 UNSPEC_VPERMIL2F128 76 UNSPEC_CAST 77 UNSPEC_VTESTP 78 UNSPEC_VCVTPH2PS 79 UNSPEC_VCVTPS2PH 80 81 ;; For AVX2 support 82 UNSPEC_VPERMVAR 83 UNSPEC_VPERMTI 84 UNSPEC_GATHER 85 UNSPEC_VSIBADDR 86]) 87 88(define_c_enum "unspecv" [ 89 UNSPECV_LDMXCSR 90 UNSPECV_STMXCSR 91 UNSPECV_CLFLUSH 92 UNSPECV_MONITOR 93 UNSPECV_MWAIT 94 UNSPECV_VZEROALL 95 UNSPECV_VZEROUPPER 96]) 97 98;; All vector modes including V?TImode, used in move patterns. 99(define_mode_iterator V16 100 [(V32QI "TARGET_AVX") V16QI 101 (V16HI "TARGET_AVX") V8HI 102 (V8SI "TARGET_AVX") V4SI 103 (V4DI "TARGET_AVX") V2DI 104 (V2TI "TARGET_AVX") V1TI 105 (V8SF "TARGET_AVX") V4SF 106 (V4DF "TARGET_AVX") V2DF]) 107 108;; All vector modes 109(define_mode_iterator V 110 [(V32QI "TARGET_AVX") V16QI 111 (V16HI "TARGET_AVX") V8HI 112 (V8SI "TARGET_AVX") V4SI 113 (V4DI "TARGET_AVX") V2DI 114 (V8SF "TARGET_AVX") V4SF 115 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 116 117;; All 128bit vector modes 118(define_mode_iterator V_128 119 [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")]) 120 121;; All 256bit vector modes 122(define_mode_iterator V_256 123 [V32QI V16HI V8SI V4DI V8SF V4DF]) 124 125;; All vector float modes 126(define_mode_iterator VF 127 [(V8SF "TARGET_AVX") V4SF 128 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 129 130;; All SFmode vector float modes 131(define_mode_iterator VF1 132 [(V8SF "TARGET_AVX") V4SF]) 133 134;; All DFmode vector float modes 135(define_mode_iterator VF2 136 [(V4DF "TARGET_AVX") V2DF]) 137 138;; All 128bit vector float modes 139(define_mode_iterator VF_128 140 [V4SF (V2DF "TARGET_SSE2")]) 141 142;; All 256bit vector float modes 143(define_mode_iterator VF_256 144 [V8SF V4DF]) 145 146;; All vector integer modes 147(define_mode_iterator VI 148 [(V32QI "TARGET_AVX") V16QI 149 (V16HI "TARGET_AVX") V8HI 150 (V8SI "TARGET_AVX") V4SI 151 (V4DI "TARGET_AVX") V2DI]) 152 153(define_mode_iterator VI_AVX2 154 [(V32QI "TARGET_AVX2") V16QI 155 (V16HI "TARGET_AVX2") V8HI 156 (V8SI "TARGET_AVX2") V4SI 157 (V4DI "TARGET_AVX2") V2DI]) 158 159;; All QImode vector integer modes 160(define_mode_iterator VI1 161 [(V32QI "TARGET_AVX") V16QI]) 162 163;; All DImode vector integer modes 164(define_mode_iterator VI8 165 [(V4DI "TARGET_AVX") V2DI]) 166 167(define_mode_iterator VI1_AVX2 168 [(V32QI "TARGET_AVX2") V16QI]) 169 170(define_mode_iterator VI2_AVX2 171 [(V16HI "TARGET_AVX2") V8HI]) 172 173(define_mode_iterator VI4_AVX2 174 [(V8SI "TARGET_AVX2") V4SI]) 175 176(define_mode_iterator VI8_AVX2 177 [(V4DI "TARGET_AVX2") V2DI]) 178 179;; ??? We should probably use TImode instead. 180(define_mode_iterator VIMAX_AVX2 181 [(V2TI "TARGET_AVX2") V1TI]) 182 183;; ??? This should probably be dropped in favor of VIMAX_AVX2. 184(define_mode_iterator SSESCALARMODE 185 [(V2TI "TARGET_AVX2") TI]) 186 187(define_mode_iterator VI12_AVX2 188 [(V32QI "TARGET_AVX2") V16QI 189 (V16HI "TARGET_AVX2") V8HI]) 190 191(define_mode_iterator VI24_AVX2 192 [(V16HI "TARGET_AVX2") V8HI 193 (V8SI "TARGET_AVX2") V4SI]) 194 195(define_mode_iterator VI124_AVX2 196 [(V32QI "TARGET_AVX2") V16QI 197 (V16HI "TARGET_AVX2") V8HI 198 (V8SI "TARGET_AVX2") V4SI]) 199 200(define_mode_iterator VI248_AVX2 201 [(V16HI "TARGET_AVX2") V8HI 202 (V8SI "TARGET_AVX2") V4SI 203 (V4DI "TARGET_AVX2") V2DI]) 204 205(define_mode_iterator VI48_AVX2 206 [(V8SI "TARGET_AVX2") V4SI 207 (V4DI "TARGET_AVX2") V2DI]) 208 209(define_mode_iterator V48_AVX2 210 [V4SF V2DF 211 V8SF V4DF 212 (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") 213 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) 214 215(define_mode_attr sse2_avx2 216 [(V16QI "sse2") (V32QI "avx2") 217 (V8HI "sse2") (V16HI "avx2") 218 (V4SI "sse2") (V8SI "avx2") 219 (V2DI "sse2") (V4DI "avx2") 220 (V1TI "sse2") (V2TI "avx2")]) 221 222(define_mode_attr ssse3_avx2 223 [(V16QI "ssse3") (V32QI "avx2") 224 (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") 225 (V4SI "ssse3") (V8SI "avx2") 226 (V2DI "ssse3") (V4DI "avx2") 227 (TI "ssse3") (V2TI "avx2")]) 228 229(define_mode_attr sse4_1_avx2 230 [(V16QI "sse4_1") (V32QI "avx2") 231 (V8HI "sse4_1") (V16HI "avx2") 232 (V4SI "sse4_1") (V8SI "avx2") 233 (V2DI "sse4_1") (V4DI "avx2")]) 234 235(define_mode_attr avx_avx2 236 [(V4SF "avx") (V2DF "avx") 237 (V8SF "avx") (V4DF "avx") 238 (V4SI "avx2") (V2DI "avx2") 239 (V8SI "avx2") (V4DI "avx2")]) 240 241(define_mode_attr vec_avx2 242 [(V16QI "vec") (V32QI "avx2") 243 (V8HI "vec") (V16HI "avx2") 244 (V4SI "vec") (V8SI "avx2") 245 (V2DI "vec") (V4DI "avx2")]) 246 247(define_mode_attr ssedoublemode 248 [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") 249 (V32QI "V32HI") (V16QI "V16HI")]) 250 251(define_mode_attr ssebytemode 252 [(V4DI "V32QI") (V2DI "V16QI")]) 253 254;; All 128bit vector integer modes 255(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI]) 256 257;; All 256bit vector integer modes 258(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI]) 259 260;; Random 128bit vector integer mode combinations 261(define_mode_iterator VI12_128 [V16QI V8HI]) 262(define_mode_iterator VI14_128 [V16QI V4SI]) 263(define_mode_iterator VI124_128 [V16QI V8HI V4SI]) 264(define_mode_iterator VI128_128 [V16QI V8HI V2DI]) 265(define_mode_iterator VI24_128 [V8HI V4SI]) 266(define_mode_iterator VI248_128 [V8HI V4SI V2DI]) 267(define_mode_iterator VI48_128 [V4SI V2DI]) 268 269;; Random 256bit vector integer mode combinations 270(define_mode_iterator VI124_256 [V32QI V16HI V8SI]) 271(define_mode_iterator VI48_256 [V8SI V4DI]) 272 273;; Int-float size matches 274(define_mode_iterator VI4F_128 [V4SI V4SF]) 275(define_mode_iterator VI8F_128 [V2DI V2DF]) 276(define_mode_iterator VI4F_256 [V8SI V8SF]) 277(define_mode_iterator VI8F_256 [V4DI V4DF]) 278 279;; Mapping from float mode to required SSE level 280(define_mode_attr sse 281 [(SF "sse") (DF "sse2") 282 (V4SF "sse") (V2DF "sse2") 283 (V8SF "avx") (V4DF "avx")]) 284 285(define_mode_attr sse2 286 [(V16QI "sse2") (V32QI "avx") 287 (V2DI "sse2") (V4DI "avx")]) 288 289(define_mode_attr sse3 290 [(V16QI "sse3") (V32QI "avx")]) 291 292(define_mode_attr sse4_1 293 [(V4SF "sse4_1") (V2DF "sse4_1") 294 (V8SF "avx") (V4DF "avx")]) 295 296(define_mode_attr avxsizesuffix 297 [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256") 298 (V16QI "") (V8HI "") (V4SI "") (V2DI "") 299 (V8SF "256") (V4DF "256") 300 (V4SF "") (V2DF "")]) 301 302;; SSE instruction mode 303(define_mode_attr sseinsnmode 304 [(V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI") 305 (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") 306 (V8SF "V8SF") (V4DF "V4DF") 307 (V4SF "V4SF") (V2DF "V2DF") 308 (TI "TI")]) 309 310;; Mapping of vector float modes to an integer mode of the same size 311(define_mode_attr sseintvecmode 312 [(V8SF "V8SI") (V4DF "V4DI") 313 (V4SF "V4SI") (V2DF "V2DI") 314 (V8SI "V8SI") (V4DI "V4DI") 315 (V4SI "V4SI") (V2DI "V2DI") 316 (V16HI "V16HI") (V8HI "V8HI") 317 (V32QI "V32QI") (V16QI "V16QI")]) 318 319(define_mode_attr sseintvecmodelower 320 [(V8SF "v8si") (V4DF "v4di") 321 (V4SF "v4si") (V2DF "v2di") 322 (V8SI "v8si") (V4DI "v4di") 323 (V4SI "v4si") (V2DI "v2di") 324 (V16HI "v16hi") (V8HI "v8hi") 325 (V32QI "v32qi") (V16QI "v16qi")]) 326 327;; Mapping of vector modes to a vector mode of double size 328(define_mode_attr ssedoublevecmode 329 [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI") 330 (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI") 331 (V8SF "V16SF") (V4DF "V8DF") 332 (V4SF "V8SF") (V2DF "V4DF")]) 333 334;; Mapping of vector modes to a vector mode of half size 335(define_mode_attr ssehalfvecmode 336 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") 337 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") 338 (V8SF "V4SF") (V4DF "V2DF") 339 (V4SF "V2SF")]) 340 341;; Mapping of vector modes ti packed single mode of the same size 342(define_mode_attr ssePSmode 343 [(V32QI "V8SF") (V16QI "V4SF") 344 (V16HI "V8SF") (V8HI "V4SF") 345 (V8SI "V8SF") (V4SI "V4SF") 346 (V4DI "V8SF") (V2DI "V4SF") 347 (V2TI "V8SF") (V1TI "V4SF") 348 (V8SF "V8SF") (V4SF "V4SF") 349 (V4DF "V8SF") (V2DF "V4SF")]) 350 351;; Mapping of vector modes back to the scalar modes 352(define_mode_attr ssescalarmode 353 [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") 354 (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") 355 (V8SF "SF") (V4DF "DF") 356 (V4SF "SF") (V2DF "DF")]) 357 358;; Number of scalar elements in each vector type 359(define_mode_attr ssescalarnum 360 [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4") 361 (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") 362 (V8SF "8") (V4DF "4") 363 (V4SF "4") (V2DF "2")]) 364 365;; SSE prefix for integer vector modes 366(define_mode_attr sseintprefix 367 [(V2DI "p") (V2DF "") 368 (V4DI "p") (V4DF "") 369 (V4SI "p") (V4SF "") 370 (V8SI "p") (V8SF "")]) 371 372;; SSE scalar suffix for vector modes 373(define_mode_attr ssescalarmodesuffix 374 [(SF "ss") (DF "sd") 375 (V8SF "ss") (V4DF "sd") 376 (V4SF "ss") (V2DF "sd") 377 (V8SI "ss") (V4DI "sd") 378 (V4SI "d")]) 379 380;; Pack/unpack vector modes 381(define_mode_attr sseunpackmode 382 [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI") 383 (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")]) 384 385(define_mode_attr ssepackmode 386 [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI") 387 (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")]) 388 389;; Mapping of the max integer size for xop rotate immediate constraint 390(define_mode_attr sserotatemax 391 [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) 392 393;; Mapping of mode to cast intrinsic name 394(define_mode_attr castmode [(V8SI "si") (V8SF "ps") (V4DF "pd")]) 395 396;; Instruction suffix for sign and zero extensions. 397(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")]) 398 399;; i128 for integer vectors and TARGET_AVX2, f128 otherwise. 400(define_mode_attr i128 401 [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128") 402 (V8SI "%~128") (V4DI "%~128")]) 403 404;; Mix-n-match 405(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) 406 407;; Mapping of immediate bits for blend instructions 408(define_mode_attr blendbits 409 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) 410 411;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 412 413;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 414;; 415;; Move patterns 416;; 417;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 418 419;; All of these patterns are enabled for SSE1 as well as SSE2. 420;; This is essential for maintaining stable calling conventions. 421 422(define_expand "mov<mode>" 423 [(set (match_operand:V16 0 "nonimmediate_operand") 424 (match_operand:V16 1 "nonimmediate_operand"))] 425 "TARGET_SSE" 426{ 427 ix86_expand_vector_move (<MODE>mode, operands); 428 DONE; 429}) 430 431(define_insn "*mov<mode>_internal" 432 [(set (match_operand:V16 0 "nonimmediate_operand" "=x,x ,m") 433 (match_operand:V16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 434 "TARGET_SSE 435 && (register_operand (operands[0], <MODE>mode) 436 || register_operand (operands[1], <MODE>mode))" 437{ 438 switch (which_alternative) 439 { 440 case 0: 441 return standard_sse_constant_opcode (insn, operands[1]); 442 case 1: 443 case 2: 444 switch (get_attr_mode (insn)) 445 { 446 case MODE_V8SF: 447 case MODE_V4SF: 448 if (TARGET_AVX 449 && (misaligned_operand (operands[0], <MODE>mode) 450 || misaligned_operand (operands[1], <MODE>mode))) 451 return "vmovups\t{%1, %0|%0, %1}"; 452 else 453 return "%vmovaps\t{%1, %0|%0, %1}"; 454 455 case MODE_V4DF: 456 case MODE_V2DF: 457 if (TARGET_AVX 458 && (misaligned_operand (operands[0], <MODE>mode) 459 || misaligned_operand (operands[1], <MODE>mode))) 460 return "vmovupd\t{%1, %0|%0, %1}"; 461 else 462 return "%vmovapd\t{%1, %0|%0, %1}"; 463 464 case MODE_OI: 465 case MODE_TI: 466 if (TARGET_AVX 467 && (misaligned_operand (operands[0], <MODE>mode) 468 || misaligned_operand (operands[1], <MODE>mode))) 469 return "vmovdqu\t{%1, %0|%0, %1}"; 470 else 471 return "%vmovdqa\t{%1, %0|%0, %1}"; 472 473 default: 474 gcc_unreachable (); 475 } 476 default: 477 gcc_unreachable (); 478 } 479} 480 [(set_attr "type" "sselog1,ssemov,ssemov") 481 (set_attr "prefix" "maybe_vex") 482 (set (attr "mode") 483 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 484 (const_string "<ssePSmode>") 485 (and (eq_attr "alternative" "2") 486 (match_test "TARGET_SSE_TYPELESS_STORES")) 487 (const_string "<ssePSmode>") 488 (match_test "TARGET_AVX") 489 (const_string "<sseinsnmode>") 490 (ior (not (match_test "TARGET_SSE2")) 491 (match_test "optimize_function_for_size_p (cfun)")) 492 (const_string "V4SF") 493 (and (eq_attr "alternative" "0") 494 (match_test "TARGET_SSE_LOAD0_BY_PXOR")) 495 (const_string "TI") 496 ] 497 (const_string "<sseinsnmode>")))]) 498 499(define_insn "sse2_movq128" 500 [(set (match_operand:V2DI 0 "register_operand" "=x") 501 (vec_concat:V2DI 502 (vec_select:DI 503 (match_operand:V2DI 1 "nonimmediate_operand" "xm") 504 (parallel [(const_int 0)])) 505 (const_int 0)))] 506 "TARGET_SSE2" 507 "%vmovq\t{%1, %0|%0, %1}" 508 [(set_attr "type" "ssemov") 509 (set_attr "prefix" "maybe_vex") 510 (set_attr "mode" "TI")]) 511 512;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm. 513;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded 514;; from memory, we'd prefer to load the memory directly into the %xmm 515;; register. To facilitate this happy circumstance, this pattern won't 516;; split until after register allocation. If the 64-bit value didn't 517;; come from memory, this is the best we can do. This is much better 518;; than storing %edx:%eax into a stack temporary and loading an %xmm 519;; from there. 520 521(define_insn_and_split "movdi_to_sse" 522 [(parallel 523 [(set (match_operand:V4SI 0 "register_operand" "=?x,x") 524 (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0)) 525 (clobber (match_scratch:V4SI 2 "=&x,X"))])] 526 "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES" 527 "#" 528 "&& reload_completed" 529 [(const_int 0)] 530{ 531 if (register_operand (operands[1], DImode)) 532 { 533 /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax). 534 Assemble the 64-bit DImode value in an xmm register. */ 535 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), 536 gen_rtx_SUBREG (SImode, operands[1], 0))); 537 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), 538 gen_rtx_SUBREG (SImode, operands[1], 4))); 539 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], 540 operands[2])); 541 } 542 else if (memory_operand (operands[1], DImode)) 543 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), 544 operands[1], const0_rtx)); 545 else 546 gcc_unreachable (); 547}) 548 549(define_split 550 [(set (match_operand:V4SF 0 "register_operand") 551 (match_operand:V4SF 1 "zero_extended_scalar_load_operand"))] 552 "TARGET_SSE && reload_completed" 553 [(set (match_dup 0) 554 (vec_merge:V4SF 555 (vec_duplicate:V4SF (match_dup 1)) 556 (match_dup 2) 557 (const_int 1)))] 558{ 559 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 560 operands[2] = CONST0_RTX (V4SFmode); 561}) 562 563(define_split 564 [(set (match_operand:V2DF 0 "register_operand") 565 (match_operand:V2DF 1 "zero_extended_scalar_load_operand"))] 566 "TARGET_SSE2 && reload_completed" 567 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 568{ 569 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 570 operands[2] = CONST0_RTX (DFmode); 571}) 572 573(define_expand "push<mode>1" 574 [(match_operand:V16 0 "register_operand")] 575 "TARGET_SSE" 576{ 577 ix86_expand_push (<MODE>mode, operands[0]); 578 DONE; 579}) 580 581(define_expand "movmisalign<mode>" 582 [(set (match_operand:V16 0 "nonimmediate_operand") 583 (match_operand:V16 1 "nonimmediate_operand"))] 584 "TARGET_SSE" 585{ 586 ix86_expand_vector_move_misalign (<MODE>mode, operands); 587 DONE; 588}) 589 590(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>" 591 [(set (match_operand:VF 0 "register_operand" "=x") 592 (unspec:VF 593 [(match_operand:VF 1 "memory_operand" "m")] 594 UNSPEC_LOADU))] 595 "TARGET_SSE" 596{ 597 switch (get_attr_mode (insn)) 598 { 599 case MODE_V8SF: 600 case MODE_V4SF: 601 return "%vmovups\t{%1, %0|%0, %1}"; 602 default: 603 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; 604 } 605} 606 [(set_attr "type" "ssemov") 607 (set_attr "movu" "1") 608 (set_attr "ssememalign" "8") 609 (set_attr "prefix" "maybe_vex") 610 (set (attr "mode") 611 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 612 (const_string "<ssePSmode>") 613 (match_test "TARGET_AVX") 614 (const_string "<MODE>") 615 (match_test "optimize_function_for_size_p (cfun)") 616 (const_string "V4SF") 617 ] 618 (const_string "<MODE>")))]) 619 620(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" 621 [(set (match_operand:VF 0 "memory_operand" "=m") 622 (unspec:VF 623 [(match_operand:VF 1 "register_operand" "x")] 624 UNSPEC_STOREU))] 625 "TARGET_SSE" 626{ 627 switch (get_attr_mode (insn)) 628 { 629 case MODE_V8SF: 630 case MODE_V4SF: 631 return "%vmovups\t{%1, %0|%0, %1}"; 632 default: 633 return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"; 634 } 635} 636 [(set_attr "type" "ssemov") 637 (set_attr "movu" "1") 638 (set_attr "ssememalign" "8") 639 (set_attr "prefix" "maybe_vex") 640 (set (attr "mode") 641 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 642 (match_test "TARGET_SSE_TYPELESS_STORES")) 643 (const_string "<ssePSmode>") 644 (match_test "TARGET_AVX") 645 (const_string "<MODE>") 646 (match_test "optimize_function_for_size_p (cfun)") 647 (const_string "V4SF") 648 ] 649 (const_string "<MODE>")))]) 650 651(define_insn "<sse2>_loaddqu<avxsizesuffix>" 652 [(set (match_operand:VI1 0 "register_operand" "=x") 653 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] 654 UNSPEC_LOADU))] 655 "TARGET_SSE2" 656{ 657 switch (get_attr_mode (insn)) 658 { 659 case MODE_V8SF: 660 case MODE_V4SF: 661 return "%vmovups\t{%1, %0|%0, %1}"; 662 default: 663 return "%vmovdqu\t{%1, %0|%0, %1}"; 664 } 665} 666 [(set_attr "type" "ssemov") 667 (set_attr "movu" "1") 668 (set_attr "ssememalign" "8") 669 (set (attr "prefix_data16") 670 (if_then_else 671 (match_test "TARGET_AVX") 672 (const_string "*") 673 (const_string "1"))) 674 (set_attr "prefix" "maybe_vex") 675 (set (attr "mode") 676 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 677 (const_string "<ssePSmode>") 678 (match_test "TARGET_AVX") 679 (const_string "<sseinsnmode>") 680 (match_test "optimize_function_for_size_p (cfun)") 681 (const_string "V4SF") 682 ] 683 (const_string "<sseinsnmode>")))]) 684 685(define_insn "<sse2>_storedqu<avxsizesuffix>" 686 [(set (match_operand:VI1 0 "memory_operand" "=m") 687 (unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")] 688 UNSPEC_STOREU))] 689 "TARGET_SSE2" 690{ 691 switch (get_attr_mode (insn)) 692 { 693 case MODE_V8SF: 694 case MODE_V4SF: 695 return "%vmovups\t{%1, %0|%0, %1}"; 696 default: 697 return "%vmovdqu\t{%1, %0|%0, %1}"; 698 } 699} 700 [(set_attr "type" "ssemov") 701 (set_attr "movu" "1") 702 (set_attr "ssememalign" "8") 703 (set (attr "prefix_data16") 704 (if_then_else 705 (match_test "TARGET_AVX") 706 (const_string "*") 707 (const_string "1"))) 708 (set_attr "prefix" "maybe_vex") 709 (set (attr "mode") 710 (cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 711 (match_test "TARGET_SSE_TYPELESS_STORES")) 712 (const_string "<ssePSmode>") 713 (match_test "TARGET_AVX") 714 (const_string "<sseinsnmode>") 715 (match_test "optimize_function_for_size_p (cfun)") 716 (const_string "V4SF") 717 ] 718 (const_string "<sseinsnmode>")))]) 719 720(define_insn "<sse3>_lddqu<avxsizesuffix>" 721 [(set (match_operand:VI1 0 "register_operand" "=x") 722 (unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")] 723 UNSPEC_LDDQU))] 724 "TARGET_SSE3" 725 "%vlddqu\t{%1, %0|%0, %1}" 726 [(set_attr "type" "ssemov") 727 (set_attr "movu" "1") 728 (set_attr "ssememalign" "8") 729 (set (attr "prefix_data16") 730 (if_then_else 731 (match_test "TARGET_AVX") 732 (const_string "*") 733 (const_string "0"))) 734 (set (attr "prefix_rep") 735 (if_then_else 736 (match_test "TARGET_AVX") 737 (const_string "*") 738 (const_string "1"))) 739 (set_attr "prefix" "maybe_vex") 740 (set_attr "mode" "<sseinsnmode>")]) 741 742(define_insn "sse2_movnti<mode>" 743 [(set (match_operand:SWI48 0 "memory_operand" "=m") 744 (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")] 745 UNSPEC_MOVNT))] 746 "TARGET_SSE2" 747 "movnti\t{%1, %0|%0, %1}" 748 [(set_attr "type" "ssemov") 749 (set_attr "prefix_data16" "0") 750 (set_attr "mode" "<MODE>")]) 751 752(define_insn "<sse>_movnt<mode>" 753 [(set (match_operand:VF 0 "memory_operand" "=m") 754 (unspec:VF [(match_operand:VF 1 "register_operand" "x")] 755 UNSPEC_MOVNT))] 756 "TARGET_SSE" 757 "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}" 758 [(set_attr "type" "ssemov") 759 (set_attr "prefix" "maybe_vex") 760 (set_attr "mode" "<MODE>")]) 761 762(define_insn "<sse2>_movnt<mode>" 763 [(set (match_operand:VI8 0 "memory_operand" "=m") 764 (unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")] 765 UNSPEC_MOVNT))] 766 "TARGET_SSE2" 767 "%vmovntdq\t{%1, %0|%0, %1}" 768 [(set_attr "type" "ssecvt") 769 (set (attr "prefix_data16") 770 (if_then_else 771 (match_test "TARGET_AVX") 772 (const_string "*") 773 (const_string "1"))) 774 (set_attr "prefix" "maybe_vex") 775 (set_attr "mode" "<sseinsnmode>")]) 776 777; Expand patterns for non-temporal stores. At the moment, only those 778; that directly map to insns are defined; it would be possible to 779; define patterns for other modes that would expand to several insns. 780 781;; Modes handled by storent patterns. 782(define_mode_iterator STORENT_MODE 783 [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2") 784 (SF "TARGET_SSE4A") (DF "TARGET_SSE4A") 785 (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2") 786 (V8SF "TARGET_AVX") V4SF 787 (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) 788 789(define_expand "storent<mode>" 790 [(set (match_operand:STORENT_MODE 0 "memory_operand") 791 (unspec:STORENT_MODE 792 [(match_operand:STORENT_MODE 1 "register_operand")] 793 UNSPEC_MOVNT))] 794 "TARGET_SSE") 795 796;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 797;; 798;; Parallel floating point arithmetic 799;; 800;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 801 802(define_expand "<code><mode>2" 803 [(set (match_operand:VF 0 "register_operand") 804 (absneg:VF 805 (match_operand:VF 1 "register_operand")))] 806 "TARGET_SSE" 807 "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;") 808 809(define_insn_and_split "*absneg<mode>2" 810 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x") 811 (match_operator:VF 3 "absneg_operator" 812 [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")])) 813 (use (match_operand:VF 2 "nonimmediate_operand" "xm,0, xm,x"))] 814 "TARGET_SSE" 815 "#" 816 "&& reload_completed" 817 [(const_int 0)] 818{ 819 enum rtx_code absneg_op; 820 rtx op1, op2; 821 rtx t; 822 823 if (TARGET_AVX) 824 { 825 if (MEM_P (operands[1])) 826 op1 = operands[2], op2 = operands[1]; 827 else 828 op1 = operands[1], op2 = operands[2]; 829 } 830 else 831 { 832 op1 = operands[0]; 833 if (rtx_equal_p (operands[0], operands[1])) 834 op2 = operands[2]; 835 else 836 op2 = operands[1]; 837 } 838 839 absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND; 840 t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2); 841 t = gen_rtx_SET (VOIDmode, operands[0], t); 842 emit_insn (t); 843 DONE; 844} 845 [(set_attr "isa" "noavx,noavx,avx,avx")]) 846 847(define_expand "<plusminus_insn><mode>3" 848 [(set (match_operand:VF 0 "register_operand") 849 (plusminus:VF 850 (match_operand:VF 1 "nonimmediate_operand") 851 (match_operand:VF 2 "nonimmediate_operand")))] 852 "TARGET_SSE" 853 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 854 855(define_insn "*<plusminus_insn><mode>3" 856 [(set (match_operand:VF 0 "register_operand" "=x,x") 857 (plusminus:VF 858 (match_operand:VF 1 "nonimmediate_operand" "<comm>0,x") 859 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 860 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 861 "@ 862 <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 863 v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 864 [(set_attr "isa" "noavx,avx") 865 (set_attr "type" "sseadd") 866 (set_attr "prefix" "orig,vex") 867 (set_attr "mode" "<MODE>")]) 868 869(define_insn "<sse>_vm<plusminus_insn><mode>3" 870 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 871 (vec_merge:VF_128 872 (plusminus:VF_128 873 (match_operand:VF_128 1 "register_operand" "0,x") 874 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 875 (match_dup 1) 876 (const_int 1)))] 877 "TARGET_SSE" 878 "@ 879 <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2} 880 v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 881 [(set_attr "isa" "noavx,avx") 882 (set_attr "type" "sseadd") 883 (set_attr "prefix" "orig,vex") 884 (set_attr "mode" "<ssescalarmode>")]) 885 886(define_expand "mul<mode>3" 887 [(set (match_operand:VF 0 "register_operand") 888 (mult:VF 889 (match_operand:VF 1 "nonimmediate_operand") 890 (match_operand:VF 2 "nonimmediate_operand")))] 891 "TARGET_SSE" 892 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 893 894(define_insn "*mul<mode>3" 895 [(set (match_operand:VF 0 "register_operand" "=x,x") 896 (mult:VF 897 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 898 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 899 "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 900 "@ 901 mul<ssemodesuffix>\t{%2, %0|%0, %2} 902 vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 903 [(set_attr "isa" "noavx,avx") 904 (set_attr "type" "ssemul") 905 (set_attr "prefix" "orig,vex") 906 (set_attr "btver2_decode" "direct,double") 907 (set_attr "mode" "<MODE>")]) 908 909(define_insn "<sse>_vmmul<mode>3" 910 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 911 (vec_merge:VF_128 912 (mult:VF_128 913 (match_operand:VF_128 1 "register_operand" "0,x") 914 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 915 (match_dup 1) 916 (const_int 1)))] 917 "TARGET_SSE" 918 "@ 919 mul<ssescalarmodesuffix>\t{%2, %0|%0, %2} 920 vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 921 [(set_attr "isa" "noavx,avx") 922 (set_attr "type" "ssemul") 923 (set_attr "prefix" "orig,vex") 924 (set_attr "mode" "<ssescalarmode>")]) 925 926(define_expand "div<mode>3" 927 [(set (match_operand:VF2 0 "register_operand") 928 (div:VF2 (match_operand:VF2 1 "register_operand") 929 (match_operand:VF2 2 "nonimmediate_operand")))] 930 "TARGET_SSE2" 931 "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);") 932 933(define_expand "div<mode>3" 934 [(set (match_operand:VF1 0 "register_operand") 935 (div:VF1 (match_operand:VF1 1 "register_operand") 936 (match_operand:VF1 2 "nonimmediate_operand")))] 937 "TARGET_SSE" 938{ 939 ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands); 940 941 if (TARGET_SSE_MATH 942 && TARGET_RECIP_VEC_DIV 943 && !optimize_insn_for_size_p () 944 && flag_finite_math_only && !flag_trapping_math 945 && flag_unsafe_math_optimizations) 946 { 947 ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode); 948 DONE; 949 } 950}) 951 952(define_insn "<sse>_div<mode>3" 953 [(set (match_operand:VF 0 "register_operand" "=x,x") 954 (div:VF 955 (match_operand:VF 1 "register_operand" "0,x") 956 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 957 "TARGET_SSE" 958 "@ 959 div<ssemodesuffix>\t{%2, %0|%0, %2} 960 vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 961 [(set_attr "isa" "noavx,avx") 962 (set_attr "type" "ssediv") 963 (set_attr "prefix" "orig,vex") 964 (set_attr "mode" "<MODE>")]) 965 966(define_insn "<sse>_vmdiv<mode>3" 967 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 968 (vec_merge:VF_128 969 (div:VF_128 970 (match_operand:VF_128 1 "register_operand" "0,x") 971 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 972 (match_dup 1) 973 (const_int 1)))] 974 "TARGET_SSE" 975 "@ 976 div<ssescalarmodesuffix>\t{%2, %0|%0, %2} 977 vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 978 [(set_attr "isa" "noavx,avx") 979 (set_attr "type" "ssediv") 980 (set_attr "prefix" "orig,vex") 981 (set_attr "btver2_decode" "direct,double") 982 (set_attr "mode" "<ssescalarmode>")]) 983 984(define_insn "<sse>_rcp<mode>2" 985 [(set (match_operand:VF1 0 "register_operand" "=x") 986 (unspec:VF1 987 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 988 "TARGET_SSE" 989 "%vrcpps\t{%1, %0|%0, %1}" 990 [(set_attr "type" "sse") 991 (set_attr "atom_sse_attr" "rcp") 992 (set_attr "btver2_sse_attr" "rcp") 993 (set_attr "prefix" "maybe_vex") 994 (set_attr "mode" "<MODE>")]) 995 996(define_insn "sse_vmrcpv4sf2" 997 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 998 (vec_merge:V4SF 999 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] 1000 UNSPEC_RCP) 1001 (match_operand:V4SF 2 "register_operand" "0,x") 1002 (const_int 1)))] 1003 "TARGET_SSE" 1004 "@ 1005 rcpss\t{%1, %0|%0, %1} 1006 vrcpss\t{%1, %2, %0|%0, %2, %1}" 1007 [(set_attr "isa" "noavx,avx") 1008 (set_attr "type" "sse") 1009 (set_attr "ssememalign" "32") 1010 (set_attr "atom_sse_attr" "rcp") 1011 (set_attr "btver2_sse_attr" "rcp") 1012 (set_attr "prefix" "orig,vex") 1013 (set_attr "mode" "SF")]) 1014 1015(define_expand "sqrt<mode>2" 1016 [(set (match_operand:VF2 0 "register_operand") 1017 (sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))] 1018 "TARGET_SSE2") 1019 1020(define_expand "sqrt<mode>2" 1021 [(set (match_operand:VF1 0 "register_operand") 1022 (sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))] 1023 "TARGET_SSE" 1024{ 1025 if (TARGET_SSE_MATH 1026 && TARGET_RECIP_VEC_SQRT 1027 && !optimize_insn_for_size_p () 1028 && flag_finite_math_only && !flag_trapping_math 1029 && flag_unsafe_math_optimizations) 1030 { 1031 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false); 1032 DONE; 1033 } 1034}) 1035 1036(define_insn "<sse>_sqrt<mode>2" 1037 [(set (match_operand:VF 0 "register_operand" "=x") 1038 (sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "xm")))] 1039 "TARGET_SSE" 1040 "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}" 1041 [(set_attr "type" "sse") 1042 (set_attr "atom_sse_attr" "sqrt") 1043 (set_attr "btver2_sse_attr" "sqrt") 1044 (set_attr "prefix" "maybe_vex") 1045 (set_attr "mode" "<MODE>")]) 1046 1047(define_insn "<sse>_vmsqrt<mode>2" 1048 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1049 (vec_merge:VF_128 1050 (sqrt:VF_128 1051 (match_operand:VF_128 1 "nonimmediate_operand" "xm,xm")) 1052 (match_operand:VF_128 2 "register_operand" "0,x") 1053 (const_int 1)))] 1054 "TARGET_SSE" 1055 "@ 1056 sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1} 1057 vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" 1058 [(set_attr "isa" "noavx,avx") 1059 (set_attr "type" "sse") 1060 (set_attr "atom_sse_attr" "sqrt") 1061 (set_attr "btver2_sse_attr" "sqrt") 1062 (set_attr "prefix" "orig,vex") 1063 (set_attr "mode" "<ssescalarmode>")]) 1064 1065(define_expand "rsqrt<mode>2" 1066 [(set (match_operand:VF1 0 "register_operand") 1067 (unspec:VF1 1068 [(match_operand:VF1 1 "nonimmediate_operand")] UNSPEC_RSQRT))] 1069 "TARGET_SSE_MATH" 1070{ 1071 ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true); 1072 DONE; 1073}) 1074 1075(define_insn "<sse>_rsqrt<mode>2" 1076 [(set (match_operand:VF1 0 "register_operand" "=x") 1077 (unspec:VF1 1078 [(match_operand:VF1 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 1079 "TARGET_SSE" 1080 "%vrsqrtps\t{%1, %0|%0, %1}" 1081 [(set_attr "type" "sse") 1082 (set_attr "prefix" "maybe_vex") 1083 (set_attr "mode" "<MODE>")]) 1084 1085(define_insn "sse_vmrsqrtv4sf2" 1086 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1087 (vec_merge:V4SF 1088 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")] 1089 UNSPEC_RSQRT) 1090 (match_operand:V4SF 2 "register_operand" "0,x") 1091 (const_int 1)))] 1092 "TARGET_SSE" 1093 "@ 1094 rsqrtss\t{%1, %0|%0, %1} 1095 vrsqrtss\t{%1, %2, %0|%0, %2, %1}" 1096 [(set_attr "isa" "noavx,avx") 1097 (set_attr "type" "sse") 1098 (set_attr "ssememalign" "32") 1099 (set_attr "prefix" "orig,vex") 1100 (set_attr "mode" "SF")]) 1101 1102;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 1103;; isn't really correct, as those rtl operators aren't defined when 1104;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 1105 1106(define_expand "<code><mode>3" 1107 [(set (match_operand:VF 0 "register_operand") 1108 (smaxmin:VF 1109 (match_operand:VF 1 "nonimmediate_operand") 1110 (match_operand:VF 2 "nonimmediate_operand")))] 1111 "TARGET_SSE" 1112{ 1113 if (!flag_finite_math_only) 1114 operands[1] = force_reg (<MODE>mode, operands[1]); 1115 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 1116}) 1117 1118(define_insn "*<code><mode>3_finite" 1119 [(set (match_operand:VF 0 "register_operand" "=x,x") 1120 (smaxmin:VF 1121 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 1122 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1123 "TARGET_SSE && flag_finite_math_only 1124 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1125 "@ 1126 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} 1127 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1128 [(set_attr "isa" "noavx,avx") 1129 (set_attr "type" "sseadd") 1130 (set_attr "btver2_sse_attr" "maxmin") 1131 (set_attr "prefix" "orig,vex") 1132 (set_attr "mode" "<MODE>")]) 1133 1134(define_insn "*<code><mode>3" 1135 [(set (match_operand:VF 0 "register_operand" "=x,x") 1136 (smaxmin:VF 1137 (match_operand:VF 1 "register_operand" "0,x") 1138 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1139 "TARGET_SSE && !flag_finite_math_only" 1140 "@ 1141 <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2} 1142 v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1143 [(set_attr "isa" "noavx,avx") 1144 (set_attr "type" "sseadd") 1145 (set_attr "btver2_sse_attr" "maxmin") 1146 (set_attr "prefix" "orig,vex") 1147 (set_attr "mode" "<MODE>")]) 1148 1149(define_insn "<sse>_vm<code><mode>3" 1150 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1151 (vec_merge:VF_128 1152 (smaxmin:VF_128 1153 (match_operand:VF_128 1 "register_operand" "0,x") 1154 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")) 1155 (match_dup 1) 1156 (const_int 1)))] 1157 "TARGET_SSE" 1158 "@ 1159 <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %2} 1160 v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1161 [(set_attr "isa" "noavx,avx") 1162 (set_attr "type" "sse") 1163 (set_attr "btver2_sse_attr" "maxmin") 1164 (set_attr "prefix" "orig,vex") 1165 (set_attr "mode" "<ssescalarmode>")]) 1166 1167;; These versions of the min/max patterns implement exactly the operations 1168;; min = (op1 < op2 ? op1 : op2) 1169;; max = (!(op1 < op2) ? op1 : op2) 1170;; Their operands are not commutative, and thus they may be used in the 1171;; presence of -0.0 and NaN. 1172 1173(define_insn "*ieee_smin<mode>3" 1174 [(set (match_operand:VF 0 "register_operand" "=x,x") 1175 (unspec:VF 1176 [(match_operand:VF 1 "register_operand" "0,x") 1177 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] 1178 UNSPEC_IEEE_MIN))] 1179 "TARGET_SSE" 1180 "@ 1181 min<ssemodesuffix>\t{%2, %0|%0, %2} 1182 vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1183 [(set_attr "isa" "noavx,avx") 1184 (set_attr "type" "sseadd") 1185 (set_attr "prefix" "orig,vex") 1186 (set_attr "mode" "<MODE>")]) 1187 1188(define_insn "*ieee_smax<mode>3" 1189 [(set (match_operand:VF 0 "register_operand" "=x,x") 1190 (unspec:VF 1191 [(match_operand:VF 1 "register_operand" "0,x") 1192 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")] 1193 UNSPEC_IEEE_MAX))] 1194 "TARGET_SSE" 1195 "@ 1196 max<ssemodesuffix>\t{%2, %0|%0, %2} 1197 vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1198 [(set_attr "isa" "noavx,avx") 1199 (set_attr "type" "sseadd") 1200 (set_attr "prefix" "orig,vex") 1201 (set_attr "mode" "<MODE>")]) 1202 1203(define_insn "avx_addsubv4df3" 1204 [(set (match_operand:V4DF 0 "register_operand" "=x") 1205 (vec_merge:V4DF 1206 (plus:V4DF 1207 (match_operand:V4DF 1 "register_operand" "x") 1208 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 1209 (minus:V4DF (match_dup 1) (match_dup 2)) 1210 (const_int 10)))] 1211 "TARGET_AVX" 1212 "vaddsubpd\t{%2, %1, %0|%0, %1, %2}" 1213 [(set_attr "type" "sseadd") 1214 (set_attr "prefix" "vex") 1215 (set_attr "mode" "V4DF")]) 1216 1217(define_insn "sse3_addsubv2df3" 1218 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1219 (vec_merge:V2DF 1220 (plus:V2DF 1221 (match_operand:V2DF 1 "register_operand" "0,x") 1222 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")) 1223 (minus:V2DF (match_dup 1) (match_dup 2)) 1224 (const_int 2)))] 1225 "TARGET_SSE3" 1226 "@ 1227 addsubpd\t{%2, %0|%0, %2} 1228 vaddsubpd\t{%2, %1, %0|%0, %1, %2}" 1229 [(set_attr "isa" "noavx,avx") 1230 (set_attr "type" "sseadd") 1231 (set_attr "atom_unit" "complex") 1232 (set_attr "prefix" "orig,vex") 1233 (set_attr "mode" "V2DF")]) 1234 1235(define_insn "avx_addsubv8sf3" 1236 [(set (match_operand:V8SF 0 "register_operand" "=x") 1237 (vec_merge:V8SF 1238 (plus:V8SF 1239 (match_operand:V8SF 1 "register_operand" "x") 1240 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 1241 (minus:V8SF (match_dup 1) (match_dup 2)) 1242 (const_int 170)))] 1243 "TARGET_AVX" 1244 "vaddsubps\t{%2, %1, %0|%0, %1, %2}" 1245 [(set_attr "type" "sseadd") 1246 (set_attr "prefix" "vex") 1247 (set_attr "mode" "V8SF")]) 1248 1249(define_insn "sse3_addsubv4sf3" 1250 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1251 (vec_merge:V4SF 1252 (plus:V4SF 1253 (match_operand:V4SF 1 "register_operand" "0,x") 1254 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 1255 (minus:V4SF (match_dup 1) (match_dup 2)) 1256 (const_int 10)))] 1257 "TARGET_SSE3" 1258 "@ 1259 addsubps\t{%2, %0|%0, %2} 1260 vaddsubps\t{%2, %1, %0|%0, %1, %2}" 1261 [(set_attr "isa" "noavx,avx") 1262 (set_attr "type" "sseadd") 1263 (set_attr "prefix" "orig,vex") 1264 (set_attr "prefix_rep" "1,*") 1265 (set_attr "mode" "V4SF")]) 1266 1267(define_insn "avx_h<plusminus_insn>v4df3" 1268 [(set (match_operand:V4DF 0 "register_operand" "=x") 1269 (vec_concat:V4DF 1270 (vec_concat:V2DF 1271 (plusminus:DF 1272 (vec_select:DF 1273 (match_operand:V4DF 1 "register_operand" "x") 1274 (parallel [(const_int 0)])) 1275 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1276 (plusminus:DF 1277 (vec_select:DF 1278 (match_operand:V4DF 2 "nonimmediate_operand" "xm") 1279 (parallel [(const_int 0)])) 1280 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))) 1281 (vec_concat:V2DF 1282 (plusminus:DF 1283 (vec_select:DF (match_dup 1) (parallel [(const_int 2)])) 1284 (vec_select:DF (match_dup 1) (parallel [(const_int 3)]))) 1285 (plusminus:DF 1286 (vec_select:DF (match_dup 2) (parallel [(const_int 2)])) 1287 (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))] 1288 "TARGET_AVX" 1289 "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}" 1290 [(set_attr "type" "sseadd") 1291 (set_attr "prefix" "vex") 1292 (set_attr "mode" "V4DF")]) 1293 1294(define_expand "sse3_haddv2df3" 1295 [(set (match_operand:V2DF 0 "register_operand") 1296 (vec_concat:V2DF 1297 (plus:DF 1298 (vec_select:DF 1299 (match_operand:V2DF 1 "register_operand") 1300 (parallel [(const_int 0)])) 1301 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1302 (plus:DF 1303 (vec_select:DF 1304 (match_operand:V2DF 2 "nonimmediate_operand") 1305 (parallel [(const_int 0)])) 1306 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1307 "TARGET_SSE3") 1308 1309(define_insn "*sse3_haddv2df3" 1310 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1311 (vec_concat:V2DF 1312 (plus:DF 1313 (vec_select:DF 1314 (match_operand:V2DF 1 "register_operand" "0,x") 1315 (parallel [(match_operand:SI 3 "const_0_to_1_operand")])) 1316 (vec_select:DF 1317 (match_dup 1) 1318 (parallel [(match_operand:SI 4 "const_0_to_1_operand")]))) 1319 (plus:DF 1320 (vec_select:DF 1321 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") 1322 (parallel [(match_operand:SI 5 "const_0_to_1_operand")])) 1323 (vec_select:DF 1324 (match_dup 2) 1325 (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))] 1326 "TARGET_SSE3 1327 && INTVAL (operands[3]) != INTVAL (operands[4]) 1328 && INTVAL (operands[5]) != INTVAL (operands[6])" 1329 "@ 1330 haddpd\t{%2, %0|%0, %2} 1331 vhaddpd\t{%2, %1, %0|%0, %1, %2}" 1332 [(set_attr "isa" "noavx,avx") 1333 (set_attr "type" "sseadd") 1334 (set_attr "prefix" "orig,vex") 1335 (set_attr "mode" "V2DF")]) 1336 1337(define_insn "sse3_hsubv2df3" 1338 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1339 (vec_concat:V2DF 1340 (minus:DF 1341 (vec_select:DF 1342 (match_operand:V2DF 1 "register_operand" "0,x") 1343 (parallel [(const_int 0)])) 1344 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1345 (minus:DF 1346 (vec_select:DF 1347 (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm") 1348 (parallel [(const_int 0)])) 1349 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1350 "TARGET_SSE3" 1351 "@ 1352 hsubpd\t{%2, %0|%0, %2} 1353 vhsubpd\t{%2, %1, %0|%0, %1, %2}" 1354 [(set_attr "isa" "noavx,avx") 1355 (set_attr "type" "sseadd") 1356 (set_attr "prefix" "orig,vex") 1357 (set_attr "mode" "V2DF")]) 1358 1359(define_insn "*sse3_haddv2df3_low" 1360 [(set (match_operand:DF 0 "register_operand" "=x,x") 1361 (plus:DF 1362 (vec_select:DF 1363 (match_operand:V2DF 1 "register_operand" "0,x") 1364 (parallel [(match_operand:SI 2 "const_0_to_1_operand")])) 1365 (vec_select:DF 1366 (match_dup 1) 1367 (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))] 1368 "TARGET_SSE3 1369 && INTVAL (operands[2]) != INTVAL (operands[3])" 1370 "@ 1371 haddpd\t{%0, %0|%0, %0} 1372 vhaddpd\t{%1, %1, %0|%0, %1, %1}" 1373 [(set_attr "isa" "noavx,avx") 1374 (set_attr "type" "sseadd1") 1375 (set_attr "prefix" "orig,vex") 1376 (set_attr "mode" "V2DF")]) 1377 1378(define_insn "*sse3_hsubv2df3_low" 1379 [(set (match_operand:DF 0 "register_operand" "=x,x") 1380 (minus:DF 1381 (vec_select:DF 1382 (match_operand:V2DF 1 "register_operand" "0,x") 1383 (parallel [(const_int 0)])) 1384 (vec_select:DF 1385 (match_dup 1) 1386 (parallel [(const_int 1)]))))] 1387 "TARGET_SSE3" 1388 "@ 1389 hsubpd\t{%0, %0|%0, %0} 1390 vhsubpd\t{%1, %1, %0|%0, %1, %1}" 1391 [(set_attr "isa" "noavx,avx") 1392 (set_attr "type" "sseadd1") 1393 (set_attr "prefix" "orig,vex") 1394 (set_attr "mode" "V2DF")]) 1395 1396(define_insn "avx_h<plusminus_insn>v8sf3" 1397 [(set (match_operand:V8SF 0 "register_operand" "=x") 1398 (vec_concat:V8SF 1399 (vec_concat:V4SF 1400 (vec_concat:V2SF 1401 (plusminus:SF 1402 (vec_select:SF 1403 (match_operand:V8SF 1 "register_operand" "x") 1404 (parallel [(const_int 0)])) 1405 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 1406 (plusminus:SF 1407 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 1408 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 1409 (vec_concat:V2SF 1410 (plusminus:SF 1411 (vec_select:SF 1412 (match_operand:V8SF 2 "nonimmediate_operand" "xm") 1413 (parallel [(const_int 0)])) 1414 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 1415 (plusminus:SF 1416 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1417 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))) 1418 (vec_concat:V4SF 1419 (vec_concat:V2SF 1420 (plusminus:SF 1421 (vec_select:SF (match_dup 1) (parallel [(const_int 4)])) 1422 (vec_select:SF (match_dup 1) (parallel [(const_int 5)]))) 1423 (plusminus:SF 1424 (vec_select:SF (match_dup 1) (parallel [(const_int 6)])) 1425 (vec_select:SF (match_dup 1) (parallel [(const_int 7)])))) 1426 (vec_concat:V2SF 1427 (plusminus:SF 1428 (vec_select:SF (match_dup 2) (parallel [(const_int 4)])) 1429 (vec_select:SF (match_dup 2) (parallel [(const_int 5)]))) 1430 (plusminus:SF 1431 (vec_select:SF (match_dup 2) (parallel [(const_int 6)])) 1432 (vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))] 1433 "TARGET_AVX" 1434 "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" 1435 [(set_attr "type" "sseadd") 1436 (set_attr "prefix" "vex") 1437 (set_attr "mode" "V8SF")]) 1438 1439(define_insn "sse3_h<plusminus_insn>v4sf3" 1440 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1441 (vec_concat:V4SF 1442 (vec_concat:V2SF 1443 (plusminus:SF 1444 (vec_select:SF 1445 (match_operand:V4SF 1 "register_operand" "0,x") 1446 (parallel [(const_int 0)])) 1447 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 1448 (plusminus:SF 1449 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 1450 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 1451 (vec_concat:V2SF 1452 (plusminus:SF 1453 (vec_select:SF 1454 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") 1455 (parallel [(const_int 0)])) 1456 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 1457 (plusminus:SF 1458 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1459 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 1460 "TARGET_SSE3" 1461 "@ 1462 h<plusminus_mnemonic>ps\t{%2, %0|%0, %2} 1463 vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}" 1464 [(set_attr "isa" "noavx,avx") 1465 (set_attr "type" "sseadd") 1466 (set_attr "atom_unit" "complex") 1467 (set_attr "prefix" "orig,vex") 1468 (set_attr "prefix_rep" "1,*") 1469 (set_attr "mode" "V4SF")]) 1470 1471(define_expand "reduc_splus_v4df" 1472 [(match_operand:V4DF 0 "register_operand") 1473 (match_operand:V4DF 1 "register_operand")] 1474 "TARGET_AVX" 1475{ 1476 rtx tmp = gen_reg_rtx (V4DFmode); 1477 rtx tmp2 = gen_reg_rtx (V4DFmode); 1478 emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1])); 1479 emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1))); 1480 emit_insn (gen_addv4df3 (operands[0], tmp, tmp2)); 1481 DONE; 1482}) 1483 1484(define_expand "reduc_splus_v2df" 1485 [(match_operand:V2DF 0 "register_operand") 1486 (match_operand:V2DF 1 "register_operand")] 1487 "TARGET_SSE3" 1488{ 1489 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1490 DONE; 1491}) 1492 1493(define_expand "reduc_splus_v8sf" 1494 [(match_operand:V8SF 0 "register_operand") 1495 (match_operand:V8SF 1 "register_operand")] 1496 "TARGET_AVX" 1497{ 1498 rtx tmp = gen_reg_rtx (V8SFmode); 1499 rtx tmp2 = gen_reg_rtx (V8SFmode); 1500 emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1])); 1501 emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp)); 1502 emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1))); 1503 emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2)); 1504 DONE; 1505}) 1506 1507(define_expand "reduc_splus_v4sf" 1508 [(match_operand:V4SF 0 "register_operand") 1509 (match_operand:V4SF 1 "register_operand")] 1510 "TARGET_SSE" 1511{ 1512 if (TARGET_SSE3) 1513 { 1514 rtx tmp = gen_reg_rtx (V4SFmode); 1515 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 1516 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 1517 } 1518 else 1519 ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]); 1520 DONE; 1521}) 1522 1523;; Modes handled by reduc_sm{in,ax}* patterns. 1524(define_mode_iterator REDUC_SMINMAX_MODE 1525 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") 1526 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") 1527 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") 1528 (V4SF "TARGET_SSE")]) 1529 1530(define_expand "reduc_<code>_<mode>" 1531 [(smaxmin:REDUC_SMINMAX_MODE 1532 (match_operand:REDUC_SMINMAX_MODE 0 "register_operand") 1533 (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))] 1534 "" 1535{ 1536 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); 1537 DONE; 1538}) 1539 1540(define_expand "reduc_<code>_<mode>" 1541 [(umaxmin:VI_256 1542 (match_operand:VI_256 0 "register_operand") 1543 (match_operand:VI_256 1 "register_operand"))] 1544 "TARGET_AVX2" 1545{ 1546 ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]); 1547 DONE; 1548}) 1549 1550(define_expand "reduc_umin_v8hi" 1551 [(umin:V8HI 1552 (match_operand:V8HI 0 "register_operand") 1553 (match_operand:V8HI 1 "register_operand"))] 1554 "TARGET_SSE4_1" 1555{ 1556 ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]); 1557 DONE; 1558}) 1559 1560;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1561;; 1562;; Parallel floating point comparisons 1563;; 1564;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1565 1566(define_insn "avx_cmp<mode>3" 1567 [(set (match_operand:VF 0 "register_operand" "=x") 1568 (unspec:VF 1569 [(match_operand:VF 1 "register_operand" "x") 1570 (match_operand:VF 2 "nonimmediate_operand" "xm") 1571 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1572 UNSPEC_PCMP))] 1573 "TARGET_AVX" 1574 "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1575 [(set_attr "type" "ssecmp") 1576 (set_attr "length_immediate" "1") 1577 (set_attr "prefix" "vex") 1578 (set_attr "mode" "<MODE>")]) 1579 1580(define_insn "avx_vmcmp<mode>3" 1581 [(set (match_operand:VF_128 0 "register_operand" "=x") 1582 (vec_merge:VF_128 1583 (unspec:VF_128 1584 [(match_operand:VF_128 1 "register_operand" "x") 1585 (match_operand:VF_128 2 "nonimmediate_operand" "xm") 1586 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1587 UNSPEC_PCMP) 1588 (match_dup 1) 1589 (const_int 1)))] 1590 "TARGET_AVX" 1591 "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1592 [(set_attr "type" "ssecmp") 1593 (set_attr "length_immediate" "1") 1594 (set_attr "prefix" "vex") 1595 (set_attr "mode" "<ssescalarmode>")]) 1596 1597(define_insn "*<sse>_maskcmp<mode>3_comm" 1598 [(set (match_operand:VF 0 "register_operand" "=x,x") 1599 (match_operator:VF 3 "sse_comparison_operator" 1600 [(match_operand:VF 1 "register_operand" "%0,x") 1601 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] 1602 "TARGET_SSE 1603 && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE" 1604 "@ 1605 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} 1606 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1607 [(set_attr "isa" "noavx,avx") 1608 (set_attr "type" "ssecmp") 1609 (set_attr "length_immediate" "1") 1610 (set_attr "prefix" "orig,vex") 1611 (set_attr "mode" "<MODE>")]) 1612 1613(define_insn "<sse>_maskcmp<mode>3" 1614 [(set (match_operand:VF 0 "register_operand" "=x,x") 1615 (match_operator:VF 3 "sse_comparison_operator" 1616 [(match_operand:VF 1 "register_operand" "0,x") 1617 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")]))] 1618 "TARGET_SSE" 1619 "@ 1620 cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2} 1621 vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1622 [(set_attr "isa" "noavx,avx") 1623 (set_attr "type" "ssecmp") 1624 (set_attr "length_immediate" "1") 1625 (set_attr "prefix" "orig,vex") 1626 (set_attr "mode" "<MODE>")]) 1627 1628(define_insn "<sse>_vmmaskcmp<mode>3" 1629 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 1630 (vec_merge:VF_128 1631 (match_operator:VF_128 3 "sse_comparison_operator" 1632 [(match_operand:VF_128 1 "register_operand" "0,x") 1633 (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")]) 1634 (match_dup 1) 1635 (const_int 1)))] 1636 "TARGET_SSE" 1637 "@ 1638 cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %2} 1639 vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" 1640 [(set_attr "isa" "noavx,avx") 1641 (set_attr "type" "ssecmp") 1642 (set_attr "length_immediate" "1,*") 1643 (set_attr "prefix" "orig,vex") 1644 (set_attr "mode" "<ssescalarmode>")]) 1645 1646(define_insn "<sse>_comi" 1647 [(set (reg:CCFP FLAGS_REG) 1648 (compare:CCFP 1649 (vec_select:MODEF 1650 (match_operand:<ssevecmode> 0 "register_operand" "x") 1651 (parallel [(const_int 0)])) 1652 (vec_select:MODEF 1653 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") 1654 (parallel [(const_int 0)]))))] 1655 "SSE_FLOAT_MODE_P (<MODE>mode)" 1656 "%vcomi<ssemodesuffix>\t{%1, %0|%0, %1}" 1657 [(set_attr "type" "ssecomi") 1658 (set_attr "prefix" "maybe_vex") 1659 (set_attr "prefix_rep" "0") 1660 (set (attr "prefix_data16") 1661 (if_then_else (eq_attr "mode" "DF") 1662 (const_string "1") 1663 (const_string "0"))) 1664 (set_attr "mode" "<MODE>")]) 1665 1666(define_insn "<sse>_ucomi" 1667 [(set (reg:CCFPU FLAGS_REG) 1668 (compare:CCFPU 1669 (vec_select:MODEF 1670 (match_operand:<ssevecmode> 0 "register_operand" "x") 1671 (parallel [(const_int 0)])) 1672 (vec_select:MODEF 1673 (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm") 1674 (parallel [(const_int 0)]))))] 1675 "SSE_FLOAT_MODE_P (<MODE>mode)" 1676 "%vucomi<ssemodesuffix>\t{%1, %0|%0, %1}" 1677 [(set_attr "type" "ssecomi") 1678 (set_attr "prefix" "maybe_vex") 1679 (set_attr "prefix_rep" "0") 1680 (set (attr "prefix_data16") 1681 (if_then_else (eq_attr "mode" "DF") 1682 (const_string "1") 1683 (const_string "0"))) 1684 (set_attr "mode" "<MODE>")]) 1685 1686(define_expand "vcond<V_256:mode><VF_256:mode>" 1687 [(set (match_operand:V_256 0 "register_operand") 1688 (if_then_else:V_256 1689 (match_operator 3 "" 1690 [(match_operand:VF_256 4 "nonimmediate_operand") 1691 (match_operand:VF_256 5 "nonimmediate_operand")]) 1692 (match_operand:V_256 1 "general_operand") 1693 (match_operand:V_256 2 "general_operand")))] 1694 "TARGET_AVX 1695 && (GET_MODE_NUNITS (<V_256:MODE>mode) 1696 == GET_MODE_NUNITS (<VF_256:MODE>mode))" 1697{ 1698 bool ok = ix86_expand_fp_vcond (operands); 1699 gcc_assert (ok); 1700 DONE; 1701}) 1702 1703(define_expand "vcond<V_128:mode><VF_128:mode>" 1704 [(set (match_operand:V_128 0 "register_operand") 1705 (if_then_else:V_128 1706 (match_operator 3 "" 1707 [(match_operand:VF_128 4 "nonimmediate_operand") 1708 (match_operand:VF_128 5 "nonimmediate_operand")]) 1709 (match_operand:V_128 1 "general_operand") 1710 (match_operand:V_128 2 "general_operand")))] 1711 "TARGET_SSE 1712 && (GET_MODE_NUNITS (<V_128:MODE>mode) 1713 == GET_MODE_NUNITS (<VF_128:MODE>mode))" 1714{ 1715 bool ok = ix86_expand_fp_vcond (operands); 1716 gcc_assert (ok); 1717 DONE; 1718}) 1719 1720;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1721;; 1722;; Parallel floating point logical operations 1723;; 1724;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1725 1726(define_insn "<sse>_andnot<mode>3" 1727 [(set (match_operand:VF 0 "register_operand" "=x,x") 1728 (and:VF 1729 (not:VF 1730 (match_operand:VF 1 "register_operand" "0,x")) 1731 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1732 "TARGET_SSE" 1733{ 1734 static char buf[32]; 1735 const char *ops; 1736 const char *suffix; 1737 1738 switch (get_attr_mode (insn)) 1739 { 1740 case MODE_V8SF: 1741 case MODE_V4SF: 1742 suffix = "ps"; 1743 break; 1744 default: 1745 suffix = "<ssemodesuffix>"; 1746 } 1747 1748 switch (which_alternative) 1749 { 1750 case 0: 1751 ops = "andn%s\t{%%2, %%0|%%0, %%2}"; 1752 break; 1753 case 1: 1754 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1755 break; 1756 default: 1757 gcc_unreachable (); 1758 } 1759 1760 snprintf (buf, sizeof (buf), ops, suffix); 1761 return buf; 1762} 1763 [(set_attr "isa" "noavx,avx") 1764 (set_attr "type" "sselog") 1765 (set_attr "prefix" "orig,vex") 1766 (set (attr "mode") 1767 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 1768 (const_string "<ssePSmode>") 1769 (match_test "TARGET_AVX") 1770 (const_string "<MODE>") 1771 (match_test "optimize_function_for_size_p (cfun)") 1772 (const_string "V4SF") 1773 ] 1774 (const_string "<MODE>")))]) 1775 1776(define_expand "<code><mode>3" 1777 [(set (match_operand:VF 0 "register_operand") 1778 (any_logic:VF 1779 (match_operand:VF 1 "nonimmediate_operand") 1780 (match_operand:VF 2 "nonimmediate_operand")))] 1781 "TARGET_SSE" 1782 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 1783 1784(define_insn "*<code><mode>3" 1785 [(set (match_operand:VF 0 "register_operand" "=x,x") 1786 (any_logic:VF 1787 (match_operand:VF 1 "nonimmediate_operand" "%0,x") 1788 (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))] 1789 "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1790{ 1791 static char buf[32]; 1792 const char *ops; 1793 const char *suffix; 1794 1795 switch (get_attr_mode (insn)) 1796 { 1797 case MODE_V8SF: 1798 case MODE_V4SF: 1799 suffix = "ps"; 1800 break; 1801 default: 1802 suffix = "<ssemodesuffix>"; 1803 } 1804 1805 switch (which_alternative) 1806 { 1807 case 0: 1808 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; 1809 break; 1810 case 1: 1811 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1812 break; 1813 default: 1814 gcc_unreachable (); 1815 } 1816 1817 snprintf (buf, sizeof (buf), ops, suffix); 1818 return buf; 1819} 1820 [(set_attr "isa" "noavx,avx") 1821 (set_attr "type" "sselog") 1822 (set_attr "prefix" "orig,vex") 1823 (set (attr "mode") 1824 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 1825 (const_string "<ssePSmode>") 1826 (match_test "TARGET_AVX") 1827 (const_string "<MODE>") 1828 (match_test "optimize_function_for_size_p (cfun)") 1829 (const_string "V4SF") 1830 ] 1831 (const_string "<MODE>")))]) 1832 1833(define_expand "copysign<mode>3" 1834 [(set (match_dup 4) 1835 (and:VF 1836 (not:VF (match_dup 3)) 1837 (match_operand:VF 1 "nonimmediate_operand"))) 1838 (set (match_dup 5) 1839 (and:VF (match_dup 3) 1840 (match_operand:VF 2 "nonimmediate_operand"))) 1841 (set (match_operand:VF 0 "register_operand") 1842 (ior:VF (match_dup 4) (match_dup 5)))] 1843 "TARGET_SSE" 1844{ 1845 operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0); 1846 1847 operands[4] = gen_reg_rtx (<MODE>mode); 1848 operands[5] = gen_reg_rtx (<MODE>mode); 1849}) 1850 1851;; Also define scalar versions. These are used for abs, neg, and 1852;; conditional move. Using subregs into vector modes causes register 1853;; allocation lossage. These patterns do not allow memory operands 1854;; because the native instructions read the full 128-bits. 1855 1856(define_insn "*andnot<mode>3" 1857 [(set (match_operand:MODEF 0 "register_operand" "=x,x") 1858 (and:MODEF 1859 (not:MODEF 1860 (match_operand:MODEF 1 "register_operand" "0,x")) 1861 (match_operand:MODEF 2 "register_operand" "x,x")))] 1862 "SSE_FLOAT_MODE_P (<MODE>mode)" 1863{ 1864 static char buf[32]; 1865 const char *ops; 1866 const char *suffix 1867 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; 1868 1869 switch (which_alternative) 1870 { 1871 case 0: 1872 ops = "andn%s\t{%%2, %%0|%%0, %%2}"; 1873 break; 1874 case 1: 1875 ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1876 break; 1877 default: 1878 gcc_unreachable (); 1879 } 1880 1881 snprintf (buf, sizeof (buf), ops, suffix); 1882 return buf; 1883} 1884 [(set_attr "isa" "noavx,avx") 1885 (set_attr "type" "sselog") 1886 (set_attr "prefix" "orig,vex") 1887 (set (attr "mode") 1888 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 1889 (const_string "V4SF") 1890 (match_test "TARGET_AVX") 1891 (const_string "<ssevecmode>") 1892 (match_test "optimize_function_for_size_p (cfun)") 1893 (const_string "V4SF") 1894 ] 1895 (const_string "<ssevecmode>")))]) 1896 1897(define_insn "*andnottf3" 1898 [(set (match_operand:TF 0 "register_operand" "=x,x") 1899 (and:TF 1900 (not:TF (match_operand:TF 1 "register_operand" "0,x")) 1901 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] 1902 "TARGET_SSE" 1903{ 1904 static char buf[32]; 1905 const char *ops; 1906 const char *tmp 1907 = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn"; 1908 1909 switch (which_alternative) 1910 { 1911 case 0: 1912 ops = "%s\t{%%2, %%0|%%0, %%2}"; 1913 break; 1914 case 1: 1915 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1916 break; 1917 default: 1918 gcc_unreachable (); 1919 } 1920 1921 snprintf (buf, sizeof (buf), ops, tmp); 1922 return buf; 1923} 1924 [(set_attr "isa" "noavx,avx") 1925 (set_attr "type" "sselog") 1926 (set (attr "prefix_data16") 1927 (if_then_else 1928 (and (eq_attr "alternative" "0") 1929 (eq_attr "mode" "TI")) 1930 (const_string "1") 1931 (const_string "*"))) 1932 (set_attr "prefix" "orig,vex") 1933 (set (attr "mode") 1934 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 1935 (const_string "V4SF") 1936 (match_test "TARGET_AVX") 1937 (const_string "TI") 1938 (ior (not (match_test "TARGET_SSE2")) 1939 (match_test "optimize_function_for_size_p (cfun)")) 1940 (const_string "V4SF") 1941 ] 1942 (const_string "TI")))]) 1943 1944(define_insn "*<code><mode>3" 1945 [(set (match_operand:MODEF 0 "register_operand" "=x,x") 1946 (any_logic:MODEF 1947 (match_operand:MODEF 1 "register_operand" "%0,x") 1948 (match_operand:MODEF 2 "register_operand" "x,x")))] 1949 "SSE_FLOAT_MODE_P (<MODE>mode)" 1950{ 1951 static char buf[32]; 1952 const char *ops; 1953 const char *suffix 1954 = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>"; 1955 1956 switch (which_alternative) 1957 { 1958 case 0: 1959 ops = "<logic>%s\t{%%2, %%0|%%0, %%2}"; 1960 break; 1961 case 1: 1962 ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 1963 break; 1964 default: 1965 gcc_unreachable (); 1966 } 1967 1968 snprintf (buf, sizeof (buf), ops, suffix); 1969 return buf; 1970} 1971 [(set_attr "isa" "noavx,avx") 1972 (set_attr "type" "sselog") 1973 (set_attr "prefix" "orig,vex") 1974 (set (attr "mode") 1975 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 1976 (const_string "V4SF") 1977 (match_test "TARGET_AVX") 1978 (const_string "<ssevecmode>") 1979 (match_test "optimize_function_for_size_p (cfun)") 1980 (const_string "V4SF") 1981 ] 1982 (const_string "<ssevecmode>")))]) 1983 1984(define_expand "<code>tf3" 1985 [(set (match_operand:TF 0 "register_operand") 1986 (any_logic:TF 1987 (match_operand:TF 1 "nonimmediate_operand") 1988 (match_operand:TF 2 "nonimmediate_operand")))] 1989 "TARGET_SSE" 1990 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") 1991 1992(define_insn "*<code>tf3" 1993 [(set (match_operand:TF 0 "register_operand" "=x,x") 1994 (any_logic:TF 1995 (match_operand:TF 1 "nonimmediate_operand" "%0,x") 1996 (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))] 1997 "TARGET_SSE 1998 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" 1999{ 2000 static char buf[32]; 2001 const char *ops; 2002 const char *tmp 2003 = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>"; 2004 2005 switch (which_alternative) 2006 { 2007 case 0: 2008 ops = "%s\t{%%2, %%0|%%0, %%2}"; 2009 break; 2010 case 1: 2011 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 2012 break; 2013 default: 2014 gcc_unreachable (); 2015 } 2016 2017 snprintf (buf, sizeof (buf), ops, tmp); 2018 return buf; 2019} 2020 [(set_attr "isa" "noavx,avx") 2021 (set_attr "type" "sselog") 2022 (set (attr "prefix_data16") 2023 (if_then_else 2024 (and (eq_attr "alternative" "0") 2025 (eq_attr "mode" "TI")) 2026 (const_string "1") 2027 (const_string "*"))) 2028 (set_attr "prefix" "orig,vex") 2029 (set (attr "mode") 2030 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 2031 (const_string "V4SF") 2032 (match_test "TARGET_AVX") 2033 (const_string "TI") 2034 (ior (not (match_test "TARGET_SSE2")) 2035 (match_test "optimize_function_for_size_p (cfun)")) 2036 (const_string "V4SF") 2037 ] 2038 (const_string "TI")))]) 2039 2040;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2041;; 2042;; FMA floating point multiply/accumulate instructions. These include 2043;; scalar versions of the instructions as well as vector versions. 2044;; 2045;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2046 2047;; The standard names for scalar FMA are only available with SSE math enabled. 2048(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH") 2049 (DF "TARGET_SSE_MATH") 2050 V4SF V2DF V8SF V4DF]) 2051 2052(define_expand "fma<mode>4" 2053 [(set (match_operand:FMAMODEM 0 "register_operand") 2054 (fma:FMAMODEM 2055 (match_operand:FMAMODEM 1 "nonimmediate_operand") 2056 (match_operand:FMAMODEM 2 "nonimmediate_operand") 2057 (match_operand:FMAMODEM 3 "nonimmediate_operand")))] 2058 "TARGET_FMA || TARGET_FMA4") 2059 2060(define_expand "fms<mode>4" 2061 [(set (match_operand:FMAMODEM 0 "register_operand") 2062 (fma:FMAMODEM 2063 (match_operand:FMAMODEM 1 "nonimmediate_operand") 2064 (match_operand:FMAMODEM 2 "nonimmediate_operand") 2065 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] 2066 "TARGET_FMA || TARGET_FMA4") 2067 2068(define_expand "fnma<mode>4" 2069 [(set (match_operand:FMAMODEM 0 "register_operand") 2070 (fma:FMAMODEM 2071 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) 2072 (match_operand:FMAMODEM 2 "nonimmediate_operand") 2073 (match_operand:FMAMODEM 3 "nonimmediate_operand")))] 2074 "TARGET_FMA || TARGET_FMA4") 2075 2076(define_expand "fnms<mode>4" 2077 [(set (match_operand:FMAMODEM 0 "register_operand") 2078 (fma:FMAMODEM 2079 (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand")) 2080 (match_operand:FMAMODEM 2 "nonimmediate_operand") 2081 (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))] 2082 "TARGET_FMA || TARGET_FMA4") 2083 2084;; The builtins for intrinsics are not constrained by SSE math enabled. 2085(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) 2086 2087(define_expand "fma4i_fmadd_<mode>" 2088 [(set (match_operand:FMAMODE 0 "register_operand") 2089 (fma:FMAMODE 2090 (match_operand:FMAMODE 1 "nonimmediate_operand") 2091 (match_operand:FMAMODE 2 "nonimmediate_operand") 2092 (match_operand:FMAMODE 3 "nonimmediate_operand")))] 2093 "TARGET_FMA || TARGET_FMA4") 2094 2095(define_insn "*fma_fmadd_<mode>" 2096 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 2097 (fma:FMAMODE 2098 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") 2099 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 2100 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] 2101 "TARGET_FMA || TARGET_FMA4" 2102 "@ 2103 vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 2104 vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 2105 vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 2106 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 2107 vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2108 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 2109 (set_attr "type" "ssemuladd") 2110 (set_attr "mode" "<MODE>")]) 2111 2112(define_insn "*fma_fmsub_<mode>" 2113 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 2114 (fma:FMAMODE 2115 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x") 2116 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 2117 (neg:FMAMODE 2118 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] 2119 "TARGET_FMA || TARGET_FMA4" 2120 "@ 2121 vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 2122 vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 2123 vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 2124 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 2125 vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2126 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 2127 (set_attr "type" "ssemuladd") 2128 (set_attr "mode" "<MODE>")]) 2129 2130(define_insn "*fma_fnmadd_<mode>" 2131 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 2132 (fma:FMAMODE 2133 (neg:FMAMODE 2134 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) 2135 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 2136 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x")))] 2137 "TARGET_FMA || TARGET_FMA4" 2138 "@ 2139 vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 2140 vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 2141 vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 2142 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 2143 vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2144 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 2145 (set_attr "type" "ssemuladd") 2146 (set_attr "mode" "<MODE>")]) 2147 2148(define_insn "*fma_fnmsub_<mode>" 2149 [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x,x,x") 2150 (fma:FMAMODE 2151 (neg:FMAMODE 2152 (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x, x,x")) 2153 (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm,x,m") 2154 (neg:FMAMODE 2155 (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0,xm,x"))))] 2156 "TARGET_FMA || TARGET_FMA4" 2157 "@ 2158 vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 2159 vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 2160 vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 2161 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 2162 vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2163 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 2164 (set_attr "type" "ssemuladd") 2165 (set_attr "mode" "<MODE>")]) 2166 2167;; FMA parallel floating point multiply addsub and subadd operations. 2168 2169;; It would be possible to represent these without the UNSPEC as 2170;; 2171;; (vec_merge 2172;; (fma op1 op2 op3) 2173;; (fma op1 op2 (neg op3)) 2174;; (merge-const)) 2175;; 2176;; But this doesn't seem useful in practice. 2177 2178(define_expand "fmaddsub_<mode>" 2179 [(set (match_operand:VF 0 "register_operand") 2180 (unspec:VF 2181 [(match_operand:VF 1 "nonimmediate_operand") 2182 (match_operand:VF 2 "nonimmediate_operand") 2183 (match_operand:VF 3 "nonimmediate_operand")] 2184 UNSPEC_FMADDSUB))] 2185 "TARGET_FMA || TARGET_FMA4") 2186 2187(define_insn "*fma_fmaddsub_<mode>" 2188 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") 2189 (unspec:VF 2190 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") 2191 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") 2192 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x")] 2193 UNSPEC_FMADDSUB))] 2194 "TARGET_FMA || TARGET_FMA4" 2195 "@ 2196 vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 2197 vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 2198 vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 2199 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 2200 vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2201 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 2202 (set_attr "type" "ssemuladd") 2203 (set_attr "mode" "<MODE>")]) 2204 2205(define_insn "*fma_fmsubadd_<mode>" 2206 [(set (match_operand:VF 0 "register_operand" "=x,x,x,x,x") 2207 (unspec:VF 2208 [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x, x,x") 2209 (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm,x,m") 2210 (neg:VF 2211 (match_operand:VF 3 "nonimmediate_operand" " x,xm,0,xm,x"))] 2212 UNSPEC_FMADDSUB))] 2213 "TARGET_FMA || TARGET_FMA4" 2214 "@ 2215 vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} 2216 vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 2217 vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2} 2218 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} 2219 vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2220 [(set_attr "isa" "fma,fma,fma,fma4,fma4") 2221 (set_attr "type" "ssemuladd") 2222 (set_attr "mode" "<MODE>")]) 2223 2224;; FMA3 floating point scalar intrinsics. These merge result with 2225;; high-order elements from the destination register. 2226 2227(define_expand "fmai_vmfmadd_<mode>" 2228 [(set (match_operand:VF_128 0 "register_operand") 2229 (vec_merge:VF_128 2230 (fma:VF_128 2231 (match_operand:VF_128 1 "nonimmediate_operand") 2232 (match_operand:VF_128 2 "nonimmediate_operand") 2233 (match_operand:VF_128 3 "nonimmediate_operand")) 2234 (match_dup 1) 2235 (const_int 1)))] 2236 "TARGET_FMA") 2237 2238(define_insn "*fmai_fmadd_<mode>" 2239 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2240 (vec_merge:VF_128 2241 (fma:VF_128 2242 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 2243 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") 2244 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) 2245 (match_dup 1) 2246 (const_int 1)))] 2247 "TARGET_FMA" 2248 "@ 2249 vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 2250 vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 2251 [(set_attr "type" "ssemuladd") 2252 (set_attr "mode" "<MODE>")]) 2253 2254(define_insn "*fmai_fmsub_<mode>" 2255 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2256 (vec_merge:VF_128 2257 (fma:VF_128 2258 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 2259 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x") 2260 (neg:VF_128 2261 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) 2262 (match_dup 1) 2263 (const_int 1)))] 2264 "TARGET_FMA" 2265 "@ 2266 vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 2267 vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 2268 [(set_attr "type" "ssemuladd") 2269 (set_attr "mode" "<MODE>")]) 2270 2271(define_insn "*fmai_fnmadd_<mode>" 2272 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2273 (vec_merge:VF_128 2274 (fma:VF_128 2275 (neg:VF_128 2276 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) 2277 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 2278 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm")) 2279 (match_dup 1) 2280 (const_int 1)))] 2281 "TARGET_FMA" 2282 "@ 2283 vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 2284 vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 2285 [(set_attr "type" "ssemuladd") 2286 (set_attr "mode" "<MODE>")]) 2287 2288(define_insn "*fmai_fnmsub_<mode>" 2289 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2290 (vec_merge:VF_128 2291 (fma:VF_128 2292 (neg:VF_128 2293 (match_operand:VF_128 2 "nonimmediate_operand" "xm, x")) 2294 (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0") 2295 (neg:VF_128 2296 (match_operand:VF_128 3 "nonimmediate_operand" " x,xm"))) 2297 (match_dup 1) 2298 (const_int 1)))] 2299 "TARGET_FMA" 2300 "@ 2301 vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2} 2302 vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}" 2303 [(set_attr "type" "ssemuladd") 2304 (set_attr "mode" "<MODE>")]) 2305 2306;; FMA4 floating point scalar intrinsics. These write the 2307;; entire destination register, with the high-order elements zeroed. 2308 2309(define_expand "fma4i_vmfmadd_<mode>" 2310 [(set (match_operand:VF_128 0 "register_operand") 2311 (vec_merge:VF_128 2312 (fma:VF_128 2313 (match_operand:VF_128 1 "nonimmediate_operand") 2314 (match_operand:VF_128 2 "nonimmediate_operand") 2315 (match_operand:VF_128 3 "nonimmediate_operand")) 2316 (match_dup 4) 2317 (const_int 1)))] 2318 "TARGET_FMA4" 2319 "operands[4] = CONST0_RTX (<MODE>mode);") 2320 2321(define_insn "*fma4i_vmfmadd_<mode>" 2322 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2323 (vec_merge:VF_128 2324 (fma:VF_128 2325 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") 2326 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2327 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) 2328 (match_operand:VF_128 4 "const0_operand") 2329 (const_int 1)))] 2330 "TARGET_FMA4" 2331 "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2332 [(set_attr "type" "ssemuladd") 2333 (set_attr "mode" "<MODE>")]) 2334 2335(define_insn "*fma4i_vmfmsub_<mode>" 2336 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2337 (vec_merge:VF_128 2338 (fma:VF_128 2339 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x") 2340 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2341 (neg:VF_128 2342 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) 2343 (match_operand:VF_128 4 "const0_operand") 2344 (const_int 1)))] 2345 "TARGET_FMA4" 2346 "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2347 [(set_attr "type" "ssemuladd") 2348 (set_attr "mode" "<MODE>")]) 2349 2350(define_insn "*fma4i_vmfnmadd_<mode>" 2351 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2352 (vec_merge:VF_128 2353 (fma:VF_128 2354 (neg:VF_128 2355 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) 2356 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2357 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")) 2358 (match_operand:VF_128 4 "const0_operand") 2359 (const_int 1)))] 2360 "TARGET_FMA4" 2361 "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2362 [(set_attr "type" "ssemuladd") 2363 (set_attr "mode" "<MODE>")]) 2364 2365(define_insn "*fma4i_vmfnmsub_<mode>" 2366 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 2367 (vec_merge:VF_128 2368 (fma:VF_128 2369 (neg:VF_128 2370 (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")) 2371 (match_operand:VF_128 2 "nonimmediate_operand" " x,m") 2372 (neg:VF_128 2373 (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))) 2374 (match_operand:VF_128 4 "const0_operand") 2375 (const_int 1)))] 2376 "TARGET_FMA4" 2377 "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2378 [(set_attr "type" "ssemuladd") 2379 (set_attr "mode" "<MODE>")]) 2380 2381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2382;; 2383;; Parallel single-precision floating point conversion operations 2384;; 2385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2386 2387(define_insn "sse_cvtpi2ps" 2388 [(set (match_operand:V4SF 0 "register_operand" "=x") 2389 (vec_merge:V4SF 2390 (vec_duplicate:V4SF 2391 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 2392 (match_operand:V4SF 1 "register_operand" "0") 2393 (const_int 3)))] 2394 "TARGET_SSE" 2395 "cvtpi2ps\t{%2, %0|%0, %2}" 2396 [(set_attr "type" "ssecvt") 2397 (set_attr "mode" "V4SF")]) 2398 2399(define_insn "sse_cvtps2pi" 2400 [(set (match_operand:V2SI 0 "register_operand" "=y") 2401 (vec_select:V2SI 2402 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 2403 UNSPEC_FIX_NOTRUNC) 2404 (parallel [(const_int 0) (const_int 1)])))] 2405 "TARGET_SSE" 2406 "cvtps2pi\t{%1, %0|%0, %1}" 2407 [(set_attr "type" "ssecvt") 2408 (set_attr "unit" "mmx") 2409 (set_attr "mode" "DI")]) 2410 2411(define_insn "sse_cvttps2pi" 2412 [(set (match_operand:V2SI 0 "register_operand" "=y") 2413 (vec_select:V2SI 2414 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 2415 (parallel [(const_int 0) (const_int 1)])))] 2416 "TARGET_SSE" 2417 "cvttps2pi\t{%1, %0|%0, %1}" 2418 [(set_attr "type" "ssecvt") 2419 (set_attr "unit" "mmx") 2420 (set_attr "prefix_rep" "0") 2421 (set_attr "mode" "SF")]) 2422 2423(define_insn "sse_cvtsi2ss" 2424 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2425 (vec_merge:V4SF 2426 (vec_duplicate:V4SF 2427 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) 2428 (match_operand:V4SF 1 "register_operand" "0,0,x") 2429 (const_int 1)))] 2430 "TARGET_SSE" 2431 "@ 2432 cvtsi2ss\t{%2, %0|%0, %2} 2433 cvtsi2ss\t{%2, %0|%0, %2} 2434 vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}" 2435 [(set_attr "isa" "noavx,noavx,avx") 2436 (set_attr "type" "sseicvt") 2437 (set_attr "athlon_decode" "vector,double,*") 2438 (set_attr "amdfam10_decode" "vector,double,*") 2439 (set_attr "bdver1_decode" "double,direct,*") 2440 (set_attr "btver2_decode" "double,double,double") 2441 (set_attr "prefix" "orig,orig,vex") 2442 (set_attr "mode" "SF")]) 2443 2444(define_insn "sse_cvtsi2ssq" 2445 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2446 (vec_merge:V4SF 2447 (vec_duplicate:V4SF 2448 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) 2449 (match_operand:V4SF 1 "register_operand" "0,0,x") 2450 (const_int 1)))] 2451 "TARGET_SSE && TARGET_64BIT" 2452 "@ 2453 cvtsi2ssq\t{%2, %0|%0, %2} 2454 cvtsi2ssq\t{%2, %0|%0, %2} 2455 vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" 2456 [(set_attr "isa" "noavx,noavx,avx") 2457 (set_attr "type" "sseicvt") 2458 (set_attr "athlon_decode" "vector,double,*") 2459 (set_attr "amdfam10_decode" "vector,double,*") 2460 (set_attr "bdver1_decode" "double,direct,*") 2461 (set_attr "btver2_decode" "double,double,double") 2462 (set_attr "length_vex" "*,*,4") 2463 (set_attr "prefix_rex" "1,1,*") 2464 (set_attr "prefix" "orig,orig,vex") 2465 (set_attr "mode" "SF")]) 2466 2467(define_insn "sse_cvtss2si" 2468 [(set (match_operand:SI 0 "register_operand" "=r,r") 2469 (unspec:SI 2470 [(vec_select:SF 2471 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2472 (parallel [(const_int 0)]))] 2473 UNSPEC_FIX_NOTRUNC))] 2474 "TARGET_SSE" 2475 "%vcvtss2si\t{%1, %0|%0, %1}" 2476 [(set_attr "type" "sseicvt") 2477 (set_attr "athlon_decode" "double,vector") 2478 (set_attr "bdver1_decode" "double,double") 2479 (set_attr "prefix_rep" "1") 2480 (set_attr "prefix" "maybe_vex") 2481 (set_attr "mode" "SI")]) 2482 2483(define_insn "sse_cvtss2si_2" 2484 [(set (match_operand:SI 0 "register_operand" "=r,r") 2485 (unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] 2486 UNSPEC_FIX_NOTRUNC))] 2487 "TARGET_SSE" 2488 "%vcvtss2si\t{%1, %0|%0, %1}" 2489 [(set_attr "type" "sseicvt") 2490 (set_attr "athlon_decode" "double,vector") 2491 (set_attr "amdfam10_decode" "double,double") 2492 (set_attr "bdver1_decode" "double,double") 2493 (set_attr "prefix_rep" "1") 2494 (set_attr "prefix" "maybe_vex") 2495 (set_attr "mode" "SI")]) 2496 2497(define_insn "sse_cvtss2siq" 2498 [(set (match_operand:DI 0 "register_operand" "=r,r") 2499 (unspec:DI 2500 [(vec_select:SF 2501 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2502 (parallel [(const_int 0)]))] 2503 UNSPEC_FIX_NOTRUNC))] 2504 "TARGET_SSE && TARGET_64BIT" 2505 "%vcvtss2si{q}\t{%1, %0|%0, %1}" 2506 [(set_attr "type" "sseicvt") 2507 (set_attr "athlon_decode" "double,vector") 2508 (set_attr "bdver1_decode" "double,double") 2509 (set_attr "prefix_rep" "1") 2510 (set_attr "prefix" "maybe_vex") 2511 (set_attr "mode" "DI")]) 2512 2513(define_insn "sse_cvtss2siq_2" 2514 [(set (match_operand:DI 0 "register_operand" "=r,r") 2515 (unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")] 2516 UNSPEC_FIX_NOTRUNC))] 2517 "TARGET_SSE && TARGET_64BIT" 2518 "%vcvtss2si{q}\t{%1, %0|%0, %1}" 2519 [(set_attr "type" "sseicvt") 2520 (set_attr "athlon_decode" "double,vector") 2521 (set_attr "amdfam10_decode" "double,double") 2522 (set_attr "bdver1_decode" "double,double") 2523 (set_attr "prefix_rep" "1") 2524 (set_attr "prefix" "maybe_vex") 2525 (set_attr "mode" "DI")]) 2526 2527(define_insn "sse_cvttss2si" 2528 [(set (match_operand:SI 0 "register_operand" "=r,r") 2529 (fix:SI 2530 (vec_select:SF 2531 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2532 (parallel [(const_int 0)]))))] 2533 "TARGET_SSE" 2534 "%vcvttss2si\t{%1, %0|%0, %1}" 2535 [(set_attr "type" "sseicvt") 2536 (set_attr "athlon_decode" "double,vector") 2537 (set_attr "amdfam10_decode" "double,double") 2538 (set_attr "bdver1_decode" "double,double") 2539 (set_attr "prefix_rep" "1") 2540 (set_attr "prefix" "maybe_vex") 2541 (set_attr "mode" "SI")]) 2542 2543(define_insn "sse_cvttss2siq" 2544 [(set (match_operand:DI 0 "register_operand" "=r,r") 2545 (fix:DI 2546 (vec_select:SF 2547 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 2548 (parallel [(const_int 0)]))))] 2549 "TARGET_SSE && TARGET_64BIT" 2550 "%vcvttss2si{q}\t{%1, %0|%0, %1}" 2551 [(set_attr "type" "sseicvt") 2552 (set_attr "athlon_decode" "double,vector") 2553 (set_attr "amdfam10_decode" "double,double") 2554 (set_attr "bdver1_decode" "double,double") 2555 (set_attr "prefix_rep" "1") 2556 (set_attr "prefix" "maybe_vex") 2557 (set_attr "mode" "DI")]) 2558 2559(define_insn "float<sseintvecmodelower><mode>2" 2560 [(set (match_operand:VF1 0 "register_operand" "=x") 2561 (float:VF1 2562 (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))] 2563 "TARGET_SSE2" 2564 "%vcvtdq2ps\t{%1, %0|%0, %1}" 2565 [(set_attr "type" "ssecvt") 2566 (set_attr "prefix" "maybe_vex") 2567 (set_attr "mode" "<sseinsnmode>")]) 2568 2569(define_expand "floatuns<sseintvecmodelower><mode>2" 2570 [(match_operand:VF1 0 "register_operand") 2571 (match_operand:<sseintvecmode> 1 "register_operand")] 2572 "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)" 2573{ 2574 ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]); 2575 DONE; 2576}) 2577 2578(define_insn "avx_cvtps2dq256" 2579 [(set (match_operand:V8SI 0 "register_operand" "=x") 2580 (unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")] 2581 UNSPEC_FIX_NOTRUNC))] 2582 "TARGET_AVX" 2583 "vcvtps2dq\t{%1, %0|%0, %1}" 2584 [(set_attr "type" "ssecvt") 2585 (set_attr "prefix" "vex") 2586 (set_attr "mode" "OI")]) 2587 2588(define_insn "sse2_cvtps2dq" 2589 [(set (match_operand:V4SI 0 "register_operand" "=x") 2590 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 2591 UNSPEC_FIX_NOTRUNC))] 2592 "TARGET_SSE2" 2593 "%vcvtps2dq\t{%1, %0|%0, %1}" 2594 [(set_attr "type" "ssecvt") 2595 (set (attr "prefix_data16") 2596 (if_then_else 2597 (match_test "TARGET_AVX") 2598 (const_string "*") 2599 (const_string "1"))) 2600 (set_attr "prefix" "maybe_vex") 2601 (set_attr "mode" "TI")]) 2602 2603(define_insn "fix_truncv8sfv8si2" 2604 [(set (match_operand:V8SI 0 "register_operand" "=x") 2605 (fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))] 2606 "TARGET_AVX" 2607 "vcvttps2dq\t{%1, %0|%0, %1}" 2608 [(set_attr "type" "ssecvt") 2609 (set_attr "prefix" "vex") 2610 (set_attr "mode" "OI")]) 2611 2612(define_insn "fix_truncv4sfv4si2" 2613 [(set (match_operand:V4SI 0 "register_operand" "=x") 2614 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 2615 "TARGET_SSE2" 2616 "%vcvttps2dq\t{%1, %0|%0, %1}" 2617 [(set_attr "type" "ssecvt") 2618 (set (attr "prefix_rep") 2619 (if_then_else 2620 (match_test "TARGET_AVX") 2621 (const_string "*") 2622 (const_string "1"))) 2623 (set (attr "prefix_data16") 2624 (if_then_else 2625 (match_test "TARGET_AVX") 2626 (const_string "*") 2627 (const_string "0"))) 2628 (set_attr "prefix_data16" "0") 2629 (set_attr "prefix" "maybe_vex") 2630 (set_attr "mode" "TI")]) 2631 2632(define_expand "fixuns_trunc<mode><sseintvecmodelower>2" 2633 [(match_operand:<sseintvecmode> 0 "register_operand") 2634 (match_operand:VF1 1 "register_operand")] 2635 "TARGET_SSE2" 2636{ 2637 rtx tmp[3]; 2638 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); 2639 tmp[1] = gen_reg_rtx (<sseintvecmode>mode); 2640 emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0])); 2641 emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2])); 2642 DONE; 2643}) 2644 2645;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2646;; 2647;; Parallel double-precision floating point conversion operations 2648;; 2649;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2650 2651(define_insn "sse2_cvtpi2pd" 2652 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 2653 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 2654 "TARGET_SSE2" 2655 "cvtpi2pd\t{%1, %0|%0, %1}" 2656 [(set_attr "type" "ssecvt") 2657 (set_attr "unit" "mmx,*") 2658 (set_attr "prefix_data16" "1,*") 2659 (set_attr "mode" "V2DF")]) 2660 2661(define_insn "sse2_cvtpd2pi" 2662 [(set (match_operand:V2SI 0 "register_operand" "=y") 2663 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2664 UNSPEC_FIX_NOTRUNC))] 2665 "TARGET_SSE2" 2666 "cvtpd2pi\t{%1, %0|%0, %1}" 2667 [(set_attr "type" "ssecvt") 2668 (set_attr "unit" "mmx") 2669 (set_attr "bdver1_decode" "double") 2670 (set_attr "btver2_decode" "direct") 2671 (set_attr "prefix_data16" "1") 2672 (set_attr "mode" "DI")]) 2673 2674(define_insn "sse2_cvttpd2pi" 2675 [(set (match_operand:V2SI 0 "register_operand" "=y") 2676 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 2677 "TARGET_SSE2" 2678 "cvttpd2pi\t{%1, %0|%0, %1}" 2679 [(set_attr "type" "ssecvt") 2680 (set_attr "unit" "mmx") 2681 (set_attr "bdver1_decode" "double") 2682 (set_attr "prefix_data16" "1") 2683 (set_attr "mode" "TI")]) 2684 2685(define_insn "sse2_cvtsi2sd" 2686 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2687 (vec_merge:V2DF 2688 (vec_duplicate:V2DF 2689 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm"))) 2690 (match_operand:V2DF 1 "register_operand" "0,0,x") 2691 (const_int 1)))] 2692 "TARGET_SSE2" 2693 "@ 2694 cvtsi2sd\t{%2, %0|%0, %2} 2695 cvtsi2sd\t{%2, %0|%0, %2} 2696 vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}" 2697 [(set_attr "isa" "noavx,noavx,avx") 2698 (set_attr "type" "sseicvt") 2699 (set_attr "athlon_decode" "double,direct,*") 2700 (set_attr "amdfam10_decode" "vector,double,*") 2701 (set_attr "bdver1_decode" "double,direct,*") 2702 (set_attr "btver2_decode" "double,double,double") 2703 (set_attr "prefix" "orig,orig,vex") 2704 (set_attr "mode" "DF")]) 2705 2706(define_insn "sse2_cvtsi2sdq" 2707 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 2708 (vec_merge:V2DF 2709 (vec_duplicate:V2DF 2710 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm"))) 2711 (match_operand:V2DF 1 "register_operand" "0,0,x") 2712 (const_int 1)))] 2713 "TARGET_SSE2 && TARGET_64BIT" 2714 "@ 2715 cvtsi2sdq\t{%2, %0|%0, %2} 2716 cvtsi2sdq\t{%2, %0|%0, %2} 2717 vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" 2718 [(set_attr "isa" "noavx,noavx,avx") 2719 (set_attr "type" "sseicvt") 2720 (set_attr "athlon_decode" "double,direct,*") 2721 (set_attr "amdfam10_decode" "vector,double,*") 2722 (set_attr "bdver1_decode" "double,direct,*") 2723 (set_attr "length_vex" "*,*,4") 2724 (set_attr "prefix_rex" "1,1,*") 2725 (set_attr "prefix" "orig,orig,vex") 2726 (set_attr "mode" "DF")]) 2727 2728(define_insn "sse2_cvtsd2si" 2729 [(set (match_operand:SI 0 "register_operand" "=r,r") 2730 (unspec:SI 2731 [(vec_select:DF 2732 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2733 (parallel [(const_int 0)]))] 2734 UNSPEC_FIX_NOTRUNC))] 2735 "TARGET_SSE2" 2736 "%vcvtsd2si\t{%1, %0|%0, %1}" 2737 [(set_attr "type" "sseicvt") 2738 (set_attr "athlon_decode" "double,vector") 2739 (set_attr "bdver1_decode" "double,double") 2740 (set_attr "btver2_decode" "double,double") 2741 (set_attr "prefix_rep" "1") 2742 (set_attr "prefix" "maybe_vex") 2743 (set_attr "mode" "SI")]) 2744 2745(define_insn "sse2_cvtsd2si_2" 2746 [(set (match_operand:SI 0 "register_operand" "=r,r") 2747 (unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] 2748 UNSPEC_FIX_NOTRUNC))] 2749 "TARGET_SSE2" 2750 "%vcvtsd2si\t{%1, %0|%0, %1}" 2751 [(set_attr "type" "sseicvt") 2752 (set_attr "athlon_decode" "double,vector") 2753 (set_attr "amdfam10_decode" "double,double") 2754 (set_attr "bdver1_decode" "double,double") 2755 (set_attr "prefix_rep" "1") 2756 (set_attr "prefix" "maybe_vex") 2757 (set_attr "mode" "SI")]) 2758 2759(define_insn "sse2_cvtsd2siq" 2760 [(set (match_operand:DI 0 "register_operand" "=r,r") 2761 (unspec:DI 2762 [(vec_select:DF 2763 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2764 (parallel [(const_int 0)]))] 2765 UNSPEC_FIX_NOTRUNC))] 2766 "TARGET_SSE2 && TARGET_64BIT" 2767 "%vcvtsd2si{q}\t{%1, %0|%0, %1}" 2768 [(set_attr "type" "sseicvt") 2769 (set_attr "athlon_decode" "double,vector") 2770 (set_attr "bdver1_decode" "double,double") 2771 (set_attr "prefix_rep" "1") 2772 (set_attr "prefix" "maybe_vex") 2773 (set_attr "mode" "DI")]) 2774 2775(define_insn "sse2_cvtsd2siq_2" 2776 [(set (match_operand:DI 0 "register_operand" "=r,r") 2777 (unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "x,m")] 2778 UNSPEC_FIX_NOTRUNC))] 2779 "TARGET_SSE2 && TARGET_64BIT" 2780 "%vcvtsd2si{q}\t{%1, %0|%0, %1}" 2781 [(set_attr "type" "sseicvt") 2782 (set_attr "athlon_decode" "double,vector") 2783 (set_attr "amdfam10_decode" "double,double") 2784 (set_attr "bdver1_decode" "double,double") 2785 (set_attr "prefix_rep" "1") 2786 (set_attr "prefix" "maybe_vex") 2787 (set_attr "mode" "DI")]) 2788 2789(define_insn "sse2_cvttsd2si" 2790 [(set (match_operand:SI 0 "register_operand" "=r,r") 2791 (fix:SI 2792 (vec_select:DF 2793 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2794 (parallel [(const_int 0)]))))] 2795 "TARGET_SSE2" 2796 "%vcvttsd2si\t{%1, %0|%0, %1}" 2797 [(set_attr "type" "sseicvt") 2798 (set_attr "athlon_decode" "double,vector") 2799 (set_attr "amdfam10_decode" "double,double") 2800 (set_attr "bdver1_decode" "double,double") 2801 (set_attr "btver2_decode" "double,double") 2802 (set_attr "prefix_rep" "1") 2803 (set_attr "prefix" "maybe_vex") 2804 (set_attr "mode" "SI")]) 2805 2806(define_insn "sse2_cvttsd2siq" 2807 [(set (match_operand:DI 0 "register_operand" "=r,r") 2808 (fix:DI 2809 (vec_select:DF 2810 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 2811 (parallel [(const_int 0)]))))] 2812 "TARGET_SSE2 && TARGET_64BIT" 2813 "%vcvttsd2si{q}\t{%1, %0|%0, %1}" 2814 [(set_attr "type" "sseicvt") 2815 (set_attr "athlon_decode" "double,vector") 2816 (set_attr "amdfam10_decode" "double,double") 2817 (set_attr "bdver1_decode" "double,double") 2818 (set_attr "prefix_rep" "1") 2819 (set_attr "prefix" "maybe_vex") 2820 (set_attr "mode" "DI")]) 2821 2822(define_insn "floatv4siv4df2" 2823 [(set (match_operand:V4DF 0 "register_operand" "=x") 2824 (float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 2825 "TARGET_AVX" 2826 "vcvtdq2pd\t{%1, %0|%0, %1}" 2827 [(set_attr "type" "ssecvt") 2828 (set_attr "prefix" "vex") 2829 (set_attr "mode" "V4DF")]) 2830 2831(define_insn "avx_cvtdq2pd256_2" 2832 [(set (match_operand:V4DF 0 "register_operand" "=x") 2833 (float:V4DF 2834 (vec_select:V4SI 2835 (match_operand:V8SI 1 "nonimmediate_operand" "xm") 2836 (parallel [(const_int 0) (const_int 1) 2837 (const_int 2) (const_int 3)]))))] 2838 "TARGET_AVX" 2839 "vcvtdq2pd\t{%x1, %0|%0, %x1}" 2840 [(set_attr "type" "ssecvt") 2841 (set_attr "prefix" "vex") 2842 (set_attr "mode" "V4DF")]) 2843 2844(define_insn "sse2_cvtdq2pd" 2845 [(set (match_operand:V2DF 0 "register_operand" "=x") 2846 (float:V2DF 2847 (vec_select:V2SI 2848 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 2849 (parallel [(const_int 0) (const_int 1)]))))] 2850 "TARGET_SSE2" 2851 "%vcvtdq2pd\t{%1, %0|%0, %q1}" 2852 [(set_attr "type" "ssecvt") 2853 (set_attr "prefix" "maybe_vex") 2854 (set_attr "ssememalign" "64") 2855 (set_attr "mode" "V2DF")]) 2856 2857(define_insn "avx_cvtpd2dq256" 2858 [(set (match_operand:V4SI 0 "register_operand" "=x") 2859 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] 2860 UNSPEC_FIX_NOTRUNC))] 2861 "TARGET_AVX" 2862 "vcvtpd2dq{y}\t{%1, %0|%0, %1}" 2863 [(set_attr "type" "ssecvt") 2864 (set_attr "prefix" "vex") 2865 (set_attr "mode" "OI")]) 2866 2867(define_expand "avx_cvtpd2dq256_2" 2868 [(set (match_operand:V8SI 0 "register_operand") 2869 (vec_concat:V8SI 2870 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")] 2871 UNSPEC_FIX_NOTRUNC) 2872 (match_dup 2)))] 2873 "TARGET_AVX" 2874 "operands[2] = CONST0_RTX (V4SImode);") 2875 2876(define_insn "*avx_cvtpd2dq256_2" 2877 [(set (match_operand:V8SI 0 "register_operand" "=x") 2878 (vec_concat:V8SI 2879 (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")] 2880 UNSPEC_FIX_NOTRUNC) 2881 (match_operand:V4SI 2 "const0_operand")))] 2882 "TARGET_AVX" 2883 "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}" 2884 [(set_attr "type" "ssecvt") 2885 (set_attr "prefix" "vex") 2886 (set_attr "btver2_decode" "vector") 2887 (set_attr "mode" "OI")]) 2888 2889(define_expand "sse2_cvtpd2dq" 2890 [(set (match_operand:V4SI 0 "register_operand") 2891 (vec_concat:V4SI 2892 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand")] 2893 UNSPEC_FIX_NOTRUNC) 2894 (match_dup 2)))] 2895 "TARGET_SSE2" 2896 "operands[2] = CONST0_RTX (V2SImode);") 2897 2898(define_insn "*sse2_cvtpd2dq" 2899 [(set (match_operand:V4SI 0 "register_operand" "=x") 2900 (vec_concat:V4SI 2901 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2902 UNSPEC_FIX_NOTRUNC) 2903 (match_operand:V2SI 2 "const0_operand")))] 2904 "TARGET_SSE2" 2905{ 2906 if (TARGET_AVX) 2907 return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; 2908 else 2909 return "cvtpd2dq\t{%1, %0|%0, %1}"; 2910} 2911 [(set_attr "type" "ssecvt") 2912 (set_attr "prefix_rep" "1") 2913 (set_attr "prefix_data16" "0") 2914 (set_attr "prefix" "maybe_vex") 2915 (set_attr "mode" "TI") 2916 (set_attr "amdfam10_decode" "double") 2917 (set_attr "athlon_decode" "vector") 2918 (set_attr "bdver1_decode" "double")]) 2919 2920(define_insn "fix_truncv4dfv4si2" 2921 [(set (match_operand:V4SI 0 "register_operand" "=x") 2922 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] 2923 "TARGET_AVX" 2924 "vcvttpd2dq{y}\t{%1, %0|%0, %1}" 2925 [(set_attr "type" "ssecvt") 2926 (set_attr "prefix" "vex") 2927 (set_attr "mode" "OI")]) 2928 2929(define_expand "avx_cvttpd2dq256_2" 2930 [(set (match_operand:V8SI 0 "register_operand") 2931 (vec_concat:V8SI 2932 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")) 2933 (match_dup 2)))] 2934 "TARGET_AVX" 2935 "operands[2] = CONST0_RTX (V4SImode);") 2936 2937(define_insn "*avx_cvttpd2dq256_2" 2938 [(set (match_operand:V8SI 0 "register_operand" "=x") 2939 (vec_concat:V8SI 2940 (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")) 2941 (match_operand:V4SI 2 "const0_operand")))] 2942 "TARGET_AVX" 2943 "vcvttpd2dq{y}\t{%1, %x0|%x0, %1}" 2944 [(set_attr "type" "ssecvt") 2945 (set_attr "prefix" "vex") 2946 (set_attr "btver2_decode" "vector") 2947 (set_attr "mode" "OI")]) 2948 2949(define_expand "sse2_cvttpd2dq" 2950 [(set (match_operand:V4SI 0 "register_operand") 2951 (vec_concat:V4SI 2952 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand")) 2953 (match_dup 2)))] 2954 "TARGET_SSE2" 2955 "operands[2] = CONST0_RTX (V2SImode);") 2956 2957(define_insn "*sse2_cvttpd2dq" 2958 [(set (match_operand:V4SI 0 "register_operand" "=x") 2959 (vec_concat:V4SI 2960 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2961 (match_operand:V2SI 2 "const0_operand")))] 2962 "TARGET_SSE2" 2963{ 2964 if (TARGET_AVX) 2965 return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; 2966 else 2967 return "cvttpd2dq\t{%1, %0|%0, %1}"; 2968} 2969 [(set_attr "type" "ssecvt") 2970 (set_attr "amdfam10_decode" "double") 2971 (set_attr "athlon_decode" "vector") 2972 (set_attr "bdver1_decode" "double") 2973 (set_attr "prefix" "maybe_vex") 2974 (set_attr "mode" "TI")]) 2975 2976(define_insn "sse2_cvtsd2ss" 2977 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 2978 (vec_merge:V4SF 2979 (vec_duplicate:V4SF 2980 (float_truncate:V2SF 2981 (match_operand:V2DF 2 "nonimmediate_operand" "x,m,xm"))) 2982 (match_operand:V4SF 1 "register_operand" "0,0,x") 2983 (const_int 1)))] 2984 "TARGET_SSE2" 2985 "@ 2986 cvtsd2ss\t{%2, %0|%0, %2} 2987 cvtsd2ss\t{%2, %0|%0, %2} 2988 vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}" 2989 [(set_attr "isa" "noavx,noavx,avx") 2990 (set_attr "type" "ssecvt") 2991 (set_attr "athlon_decode" "vector,double,*") 2992 (set_attr "amdfam10_decode" "vector,double,*") 2993 (set_attr "bdver1_decode" "direct,direct,*") 2994 (set_attr "btver2_decode" "double,double,double") 2995 (set_attr "prefix" "orig,orig,vex") 2996 (set_attr "mode" "SF")]) 2997 2998(define_insn "sse2_cvtss2sd" 2999 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x") 3000 (vec_merge:V2DF 3001 (float_extend:V2DF 3002 (vec_select:V2SF 3003 (match_operand:V4SF 2 "nonimmediate_operand" "x,m,xm") 3004 (parallel [(const_int 0) (const_int 1)]))) 3005 (match_operand:V2DF 1 "register_operand" "0,0,x") 3006 (const_int 1)))] 3007 "TARGET_SSE2" 3008 "@ 3009 cvtss2sd\t{%2, %0|%0, %2} 3010 cvtss2sd\t{%2, %0|%0, %2} 3011 vcvtss2sd\t{%2, %1, %0|%0, %1, %2}" 3012 [(set_attr "isa" "noavx,noavx,avx") 3013 (set_attr "type" "ssecvt") 3014 (set_attr "amdfam10_decode" "vector,double,*") 3015 (set_attr "athlon_decode" "direct,direct,*") 3016 (set_attr "bdver1_decode" "direct,direct,*") 3017 (set_attr "btver2_decode" "double,double,double") 3018 (set_attr "prefix" "orig,orig,vex") 3019 (set_attr "mode" "DF")]) 3020 3021(define_insn "avx_cvtpd2ps256" 3022 [(set (match_operand:V4SF 0 "register_operand" "=x") 3023 (float_truncate:V4SF 3024 (match_operand:V4DF 1 "nonimmediate_operand" "xm")))] 3025 "TARGET_AVX" 3026 "vcvtpd2ps{y}\t{%1, %0|%0, %1}" 3027 [(set_attr "type" "ssecvt") 3028 (set_attr "prefix" "vex") 3029 (set_attr "btver2_decode" "vector") 3030 (set_attr "mode" "V4SF")]) 3031 3032(define_expand "sse2_cvtpd2ps" 3033 [(set (match_operand:V4SF 0 "register_operand") 3034 (vec_concat:V4SF 3035 (float_truncate:V2SF 3036 (match_operand:V2DF 1 "nonimmediate_operand")) 3037 (match_dup 2)))] 3038 "TARGET_SSE2" 3039 "operands[2] = CONST0_RTX (V2SFmode);") 3040 3041(define_insn "*sse2_cvtpd2ps" 3042 [(set (match_operand:V4SF 0 "register_operand" "=x") 3043 (vec_concat:V4SF 3044 (float_truncate:V2SF 3045 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 3046 (match_operand:V2SF 2 "const0_operand")))] 3047 "TARGET_SSE2" 3048{ 3049 if (TARGET_AVX) 3050 return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; 3051 else 3052 return "cvtpd2ps\t{%1, %0|%0, %1}"; 3053} 3054 [(set_attr "type" "ssecvt") 3055 (set_attr "amdfam10_decode" "double") 3056 (set_attr "athlon_decode" "vector") 3057 (set_attr "bdver1_decode" "double") 3058 (set_attr "prefix_data16" "1") 3059 (set_attr "prefix" "maybe_vex") 3060 (set_attr "mode" "V4SF")]) 3061 3062(define_insn "avx_cvtps2pd256" 3063 [(set (match_operand:V4DF 0 "register_operand" "=x") 3064 (float_extend:V4DF 3065 (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 3066 "TARGET_AVX" 3067 "vcvtps2pd\t{%1, %0|%0, %1}" 3068 [(set_attr "type" "ssecvt") 3069 (set_attr "prefix" "vex") 3070 (set_attr "mode" "V4DF")]) 3071 3072(define_insn "*avx_cvtps2pd256_2" 3073 [(set (match_operand:V4DF 0 "register_operand" "=x") 3074 (float_extend:V4DF 3075 (vec_select:V4SF 3076 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3077 (parallel [(const_int 0) (const_int 1) 3078 (const_int 2) (const_int 3)]))))] 3079 "TARGET_AVX" 3080 "vcvtps2pd\t{%x1, %0|%0, %x1}" 3081 [(set_attr "type" "ssecvt") 3082 (set_attr "prefix" "vex") 3083 (set_attr "mode" "V4DF")]) 3084 3085(define_insn "sse2_cvtps2pd" 3086 [(set (match_operand:V2DF 0 "register_operand" "=x") 3087 (float_extend:V2DF 3088 (vec_select:V2SF 3089 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3090 (parallel [(const_int 0) (const_int 1)]))))] 3091 "TARGET_SSE2" 3092 "%vcvtps2pd\t{%1, %0|%0, %q1}" 3093 [(set_attr "type" "ssecvt") 3094 (set_attr "amdfam10_decode" "direct") 3095 (set_attr "athlon_decode" "double") 3096 (set_attr "bdver1_decode" "double") 3097 (set_attr "prefix_data16" "0") 3098 (set_attr "prefix" "maybe_vex") 3099 (set_attr "mode" "V2DF")]) 3100 3101(define_expand "vec_unpacks_hi_v4sf" 3102 [(set (match_dup 2) 3103 (vec_select:V4SF 3104 (vec_concat:V8SF 3105 (match_dup 2) 3106 (match_operand:V4SF 1 "nonimmediate_operand")) 3107 (parallel [(const_int 6) (const_int 7) 3108 (const_int 2) (const_int 3)]))) 3109 (set (match_operand:V2DF 0 "register_operand") 3110 (float_extend:V2DF 3111 (vec_select:V2SF 3112 (match_dup 2) 3113 (parallel [(const_int 0) (const_int 1)]))))] 3114 "TARGET_SSE2" 3115 "operands[2] = gen_reg_rtx (V4SFmode);") 3116 3117(define_expand "vec_unpacks_hi_v8sf" 3118 [(set (match_dup 2) 3119 (vec_select:V4SF 3120 (match_operand:V8SF 1 "register_operand") 3121 (parallel [(const_int 4) (const_int 5) 3122 (const_int 6) (const_int 7)]))) 3123 (set (match_operand:V4DF 0 "register_operand") 3124 (float_extend:V4DF 3125 (match_dup 2)))] 3126 "TARGET_AVX" 3127 "operands[2] = gen_reg_rtx (V4SFmode);") 3128 3129(define_expand "vec_unpacks_lo_v4sf" 3130 [(set (match_operand:V2DF 0 "register_operand") 3131 (float_extend:V2DF 3132 (vec_select:V2SF 3133 (match_operand:V4SF 1 "nonimmediate_operand") 3134 (parallel [(const_int 0) (const_int 1)]))))] 3135 "TARGET_SSE2") 3136 3137(define_expand "vec_unpacks_lo_v8sf" 3138 [(set (match_operand:V4DF 0 "register_operand") 3139 (float_extend:V4DF 3140 (vec_select:V4SF 3141 (match_operand:V8SF 1 "nonimmediate_operand") 3142 (parallel [(const_int 0) (const_int 1) 3143 (const_int 2) (const_int 3)]))))] 3144 "TARGET_AVX") 3145 3146(define_mode_attr sseunpackfltmode 3147 [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")]) 3148 3149(define_expand "vec_unpacks_float_hi_<mode>" 3150 [(match_operand:<sseunpackfltmode> 0 "register_operand") 3151 (match_operand:VI2_AVX2 1 "register_operand")] 3152 "TARGET_SSE2" 3153{ 3154 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 3155 3156 emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1])); 3157 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 3158 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 3159 DONE; 3160}) 3161 3162(define_expand "vec_unpacks_float_lo_<mode>" 3163 [(match_operand:<sseunpackfltmode> 0 "register_operand") 3164 (match_operand:VI2_AVX2 1 "register_operand")] 3165 "TARGET_SSE2" 3166{ 3167 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 3168 3169 emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1])); 3170 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 3171 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 3172 DONE; 3173}) 3174 3175(define_expand "vec_unpacku_float_hi_<mode>" 3176 [(match_operand:<sseunpackfltmode> 0 "register_operand") 3177 (match_operand:VI2_AVX2 1 "register_operand")] 3178 "TARGET_SSE2" 3179{ 3180 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 3181 3182 emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1])); 3183 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 3184 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 3185 DONE; 3186}) 3187 3188(define_expand "vec_unpacku_float_lo_<mode>" 3189 [(match_operand:<sseunpackfltmode> 0 "register_operand") 3190 (match_operand:VI2_AVX2 1 "register_operand")] 3191 "TARGET_SSE2" 3192{ 3193 rtx tmp = gen_reg_rtx (<sseunpackmode>mode); 3194 3195 emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1])); 3196 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 3197 gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp))); 3198 DONE; 3199}) 3200 3201(define_expand "vec_unpacks_float_hi_v4si" 3202 [(set (match_dup 2) 3203 (vec_select:V4SI 3204 (match_operand:V4SI 1 "nonimmediate_operand") 3205 (parallel [(const_int 2) (const_int 3) 3206 (const_int 2) (const_int 3)]))) 3207 (set (match_operand:V2DF 0 "register_operand") 3208 (float:V2DF 3209 (vec_select:V2SI 3210 (match_dup 2) 3211 (parallel [(const_int 0) (const_int 1)]))))] 3212 "TARGET_SSE2" 3213 "operands[2] = gen_reg_rtx (V4SImode);") 3214 3215(define_expand "vec_unpacks_float_lo_v4si" 3216 [(set (match_operand:V2DF 0 "register_operand") 3217 (float:V2DF 3218 (vec_select:V2SI 3219 (match_operand:V4SI 1 "nonimmediate_operand") 3220 (parallel [(const_int 0) (const_int 1)]))))] 3221 "TARGET_SSE2") 3222 3223(define_expand "vec_unpacks_float_hi_v8si" 3224 [(set (match_dup 2) 3225 (vec_select:V4SI 3226 (match_operand:V8SI 1 "nonimmediate_operand") 3227 (parallel [(const_int 4) (const_int 5) 3228 (const_int 6) (const_int 7)]))) 3229 (set (match_operand:V4DF 0 "register_operand") 3230 (float:V4DF 3231 (match_dup 2)))] 3232 "TARGET_AVX" 3233 "operands[2] = gen_reg_rtx (V4SImode);") 3234 3235(define_expand "vec_unpacks_float_lo_v8si" 3236 [(set (match_operand:V4DF 0 "register_operand") 3237 (float:V4DF 3238 (vec_select:V4SI 3239 (match_operand:V8SI 1 "nonimmediate_operand") 3240 (parallel [(const_int 0) (const_int 1) 3241 (const_int 2) (const_int 3)]))))] 3242 "TARGET_AVX") 3243 3244(define_expand "vec_unpacku_float_hi_v4si" 3245 [(set (match_dup 5) 3246 (vec_select:V4SI 3247 (match_operand:V4SI 1 "nonimmediate_operand") 3248 (parallel [(const_int 2) (const_int 3) 3249 (const_int 2) (const_int 3)]))) 3250 (set (match_dup 6) 3251 (float:V2DF 3252 (vec_select:V2SI 3253 (match_dup 5) 3254 (parallel [(const_int 0) (const_int 1)])))) 3255 (set (match_dup 7) 3256 (lt:V2DF (match_dup 6) (match_dup 3))) 3257 (set (match_dup 8) 3258 (and:V2DF (match_dup 7) (match_dup 4))) 3259 (set (match_operand:V2DF 0 "register_operand") 3260 (plus:V2DF (match_dup 6) (match_dup 8)))] 3261 "TARGET_SSE2" 3262{ 3263 REAL_VALUE_TYPE TWO32r; 3264 rtx x; 3265 int i; 3266 3267 real_ldexp (&TWO32r, &dconst1, 32); 3268 x = const_double_from_real_value (TWO32r, DFmode); 3269 3270 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); 3271 operands[4] = force_reg (V2DFmode, 3272 ix86_build_const_vector (V2DFmode, 1, x)); 3273 3274 operands[5] = gen_reg_rtx (V4SImode); 3275 3276 for (i = 6; i < 9; i++) 3277 operands[i] = gen_reg_rtx (V2DFmode); 3278}) 3279 3280(define_expand "vec_unpacku_float_lo_v4si" 3281 [(set (match_dup 5) 3282 (float:V2DF 3283 (vec_select:V2SI 3284 (match_operand:V4SI 1 "nonimmediate_operand") 3285 (parallel [(const_int 0) (const_int 1)])))) 3286 (set (match_dup 6) 3287 (lt:V2DF (match_dup 5) (match_dup 3))) 3288 (set (match_dup 7) 3289 (and:V2DF (match_dup 6) (match_dup 4))) 3290 (set (match_operand:V2DF 0 "register_operand") 3291 (plus:V2DF (match_dup 5) (match_dup 7)))] 3292 "TARGET_SSE2" 3293{ 3294 REAL_VALUE_TYPE TWO32r; 3295 rtx x; 3296 int i; 3297 3298 real_ldexp (&TWO32r, &dconst1, 32); 3299 x = const_double_from_real_value (TWO32r, DFmode); 3300 3301 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); 3302 operands[4] = force_reg (V2DFmode, 3303 ix86_build_const_vector (V2DFmode, 1, x)); 3304 3305 for (i = 5; i < 8; i++) 3306 operands[i] = gen_reg_rtx (V2DFmode); 3307}) 3308 3309(define_expand "vec_unpacku_float_hi_v8si" 3310 [(match_operand:V4DF 0 "register_operand") 3311 (match_operand:V8SI 1 "register_operand")] 3312 "TARGET_AVX" 3313{ 3314 REAL_VALUE_TYPE TWO32r; 3315 rtx x, tmp[6]; 3316 int i; 3317 3318 real_ldexp (&TWO32r, &dconst1, 32); 3319 x = const_double_from_real_value (TWO32r, DFmode); 3320 3321 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); 3322 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); 3323 tmp[5] = gen_reg_rtx (V4SImode); 3324 3325 for (i = 2; i < 5; i++) 3326 tmp[i] = gen_reg_rtx (V4DFmode); 3327 emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1])); 3328 emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5])); 3329 emit_insn (gen_rtx_SET (VOIDmode, tmp[3], 3330 gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); 3331 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); 3332 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); 3333 DONE; 3334}) 3335 3336(define_expand "vec_unpacku_float_lo_v8si" 3337 [(match_operand:V4DF 0 "register_operand") 3338 (match_operand:V8SI 1 "nonimmediate_operand")] 3339 "TARGET_AVX" 3340{ 3341 REAL_VALUE_TYPE TWO32r; 3342 rtx x, tmp[5]; 3343 int i; 3344 3345 real_ldexp (&TWO32r, &dconst1, 32); 3346 x = const_double_from_real_value (TWO32r, DFmode); 3347 3348 tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode)); 3349 tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x)); 3350 3351 for (i = 2; i < 5; i++) 3352 tmp[i] = gen_reg_rtx (V4DFmode); 3353 emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1])); 3354 emit_insn (gen_rtx_SET (VOIDmode, tmp[3], 3355 gen_rtx_LT (V4DFmode, tmp[2], tmp[0]))); 3356 emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1])); 3357 emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4])); 3358 DONE; 3359}) 3360 3361(define_expand "vec_pack_trunc_v4df" 3362 [(set (match_dup 3) 3363 (float_truncate:V4SF 3364 (match_operand:V4DF 1 "nonimmediate_operand"))) 3365 (set (match_dup 4) 3366 (float_truncate:V4SF 3367 (match_operand:V4DF 2 "nonimmediate_operand"))) 3368 (set (match_operand:V8SF 0 "register_operand") 3369 (vec_concat:V8SF 3370 (match_dup 3) 3371 (match_dup 4)))] 3372 "TARGET_AVX" 3373{ 3374 operands[3] = gen_reg_rtx (V4SFmode); 3375 operands[4] = gen_reg_rtx (V4SFmode); 3376}) 3377 3378(define_expand "vec_pack_trunc_v2df" 3379 [(match_operand:V4SF 0 "register_operand") 3380 (match_operand:V2DF 1 "nonimmediate_operand") 3381 (match_operand:V2DF 2 "nonimmediate_operand")] 3382 "TARGET_SSE2" 3383{ 3384 rtx tmp0, tmp1; 3385 3386 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3387 { 3388 tmp0 = gen_reg_rtx (V4DFmode); 3389 tmp1 = force_reg (V2DFmode, operands[1]); 3390 3391 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3392 emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); 3393 } 3394 else 3395 { 3396 tmp0 = gen_reg_rtx (V4SFmode); 3397 tmp1 = gen_reg_rtx (V4SFmode); 3398 3399 emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); 3400 emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); 3401 emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); 3402 } 3403 DONE; 3404}) 3405 3406(define_expand "vec_pack_sfix_trunc_v4df" 3407 [(match_operand:V8SI 0 "register_operand") 3408 (match_operand:V4DF 1 "nonimmediate_operand") 3409 (match_operand:V4DF 2 "nonimmediate_operand")] 3410 "TARGET_AVX" 3411{ 3412 rtx r1, r2; 3413 3414 r1 = gen_reg_rtx (V4SImode); 3415 r2 = gen_reg_rtx (V4SImode); 3416 3417 emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); 3418 emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); 3419 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); 3420 DONE; 3421}) 3422 3423(define_expand "vec_pack_sfix_trunc_v2df" 3424 [(match_operand:V4SI 0 "register_operand") 3425 (match_operand:V2DF 1 "nonimmediate_operand") 3426 (match_operand:V2DF 2 "nonimmediate_operand")] 3427 "TARGET_SSE2" 3428{ 3429 rtx tmp0, tmp1; 3430 3431 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3432 { 3433 tmp0 = gen_reg_rtx (V4DFmode); 3434 tmp1 = force_reg (V2DFmode, operands[1]); 3435 3436 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3437 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); 3438 } 3439 else 3440 { 3441 tmp0 = gen_reg_rtx (V4SImode); 3442 tmp1 = gen_reg_rtx (V4SImode); 3443 3444 emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); 3445 emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); 3446 emit_insn 3447 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), 3448 gen_lowpart (V2DImode, tmp0), 3449 gen_lowpart (V2DImode, tmp1))); 3450 } 3451 DONE; 3452}) 3453 3454(define_mode_attr ssepackfltmode 3455 [(V4DF "V8SI") (V2DF "V4SI")]) 3456 3457(define_expand "vec_pack_ufix_trunc_<mode>" 3458 [(match_operand:<ssepackfltmode> 0 "register_operand") 3459 (match_operand:VF2 1 "register_operand") 3460 (match_operand:VF2 2 "register_operand")] 3461 "TARGET_SSE2" 3462{ 3463 rtx tmp[7]; 3464 tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]); 3465 tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]); 3466 tmp[4] = gen_reg_rtx (<ssepackfltmode>mode); 3467 emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1])); 3468 if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2) 3469 { 3470 tmp[5] = gen_reg_rtx (<ssepackfltmode>mode); 3471 ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0); 3472 } 3473 else 3474 { 3475 tmp[5] = gen_reg_rtx (V8SFmode); 3476 ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]), 3477 gen_lowpart (V8SFmode, tmp[3]), 0); 3478 tmp[5] = gen_lowpart (V8SImode, tmp[5]); 3479 } 3480 tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5], 3481 operands[0], 0, OPTAB_DIRECT); 3482 if (tmp[6] != operands[0]) 3483 emit_move_insn (operands[0], tmp[6]); 3484 DONE; 3485}) 3486 3487(define_expand "vec_pack_sfix_v4df" 3488 [(match_operand:V8SI 0 "register_operand") 3489 (match_operand:V4DF 1 "nonimmediate_operand") 3490 (match_operand:V4DF 2 "nonimmediate_operand")] 3491 "TARGET_AVX" 3492{ 3493 rtx r1, r2; 3494 3495 r1 = gen_reg_rtx (V4SImode); 3496 r2 = gen_reg_rtx (V4SImode); 3497 3498 emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); 3499 emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); 3500 emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); 3501 DONE; 3502}) 3503 3504(define_expand "vec_pack_sfix_v2df" 3505 [(match_operand:V4SI 0 "register_operand") 3506 (match_operand:V2DF 1 "nonimmediate_operand") 3507 (match_operand:V2DF 2 "nonimmediate_operand")] 3508 "TARGET_SSE2" 3509{ 3510 rtx tmp0, tmp1; 3511 3512 if (TARGET_AVX && !TARGET_PREFER_AVX128) 3513 { 3514 tmp0 = gen_reg_rtx (V4DFmode); 3515 tmp1 = force_reg (V2DFmode, operands[1]); 3516 3517 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 3518 emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); 3519 } 3520 else 3521 { 3522 tmp0 = gen_reg_rtx (V4SImode); 3523 tmp1 = gen_reg_rtx (V4SImode); 3524 3525 emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); 3526 emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); 3527 emit_insn 3528 (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), 3529 gen_lowpart (V2DImode, tmp0), 3530 gen_lowpart (V2DImode, tmp1))); 3531 } 3532 DONE; 3533}) 3534 3535;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3536;; 3537;; Parallel single-precision floating point element swizzling 3538;; 3539;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3540 3541(define_expand "sse_movhlps_exp" 3542 [(set (match_operand:V4SF 0 "nonimmediate_operand") 3543 (vec_select:V4SF 3544 (vec_concat:V8SF 3545 (match_operand:V4SF 1 "nonimmediate_operand") 3546 (match_operand:V4SF 2 "nonimmediate_operand")) 3547 (parallel [(const_int 6) 3548 (const_int 7) 3549 (const_int 2) 3550 (const_int 3)])))] 3551 "TARGET_SSE" 3552{ 3553 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3554 3555 emit_insn (gen_sse_movhlps (dst, operands[1], operands[2])); 3556 3557 /* Fix up the destination if needed. */ 3558 if (dst != operands[0]) 3559 emit_move_insn (operands[0], dst); 3560 3561 DONE; 3562}) 3563 3564(define_insn "sse_movhlps" 3565 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") 3566 (vec_select:V4SF 3567 (vec_concat:V8SF 3568 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3569 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x")) 3570 (parallel [(const_int 6) 3571 (const_int 7) 3572 (const_int 2) 3573 (const_int 3)])))] 3574 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 3575 "@ 3576 movhlps\t{%2, %0|%0, %2} 3577 vmovhlps\t{%2, %1, %0|%0, %1, %2} 3578 movlps\t{%H2, %0|%0, %H2} 3579 vmovlps\t{%H2, %1, %0|%0, %1, %H2} 3580 %vmovhps\t{%2, %0|%0, %2}" 3581 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3582 (set_attr "type" "ssemov") 3583 (set_attr "ssememalign" "64") 3584 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3585 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3586 3587(define_expand "sse_movlhps_exp" 3588 [(set (match_operand:V4SF 0 "nonimmediate_operand") 3589 (vec_select:V4SF 3590 (vec_concat:V8SF 3591 (match_operand:V4SF 1 "nonimmediate_operand") 3592 (match_operand:V4SF 2 "nonimmediate_operand")) 3593 (parallel [(const_int 0) 3594 (const_int 1) 3595 (const_int 4) 3596 (const_int 5)])))] 3597 "TARGET_SSE" 3598{ 3599 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3600 3601 emit_insn (gen_sse_movlhps (dst, operands[1], operands[2])); 3602 3603 /* Fix up the destination if needed. */ 3604 if (dst != operands[0]) 3605 emit_move_insn (operands[0], dst); 3606 3607 DONE; 3608}) 3609 3610(define_insn "sse_movlhps" 3611 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") 3612 (vec_select:V4SF 3613 (vec_concat:V8SF 3614 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3615 (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x")) 3616 (parallel [(const_int 0) 3617 (const_int 1) 3618 (const_int 4) 3619 (const_int 5)])))] 3620 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 3621 "@ 3622 movlhps\t{%2, %0|%0, %2} 3623 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3624 movhps\t{%2, %0|%0, %2} 3625 vmovhps\t{%2, %1, %0|%0, %1, %2} 3626 %vmovlps\t{%2, %H0|%H0, %2}" 3627 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3628 (set_attr "type" "ssemov") 3629 (set_attr "ssememalign" "64") 3630 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3631 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 3632 3633;; Recall that the 256-bit unpck insns only shuffle within their lanes. 3634(define_insn "avx_unpckhps256" 3635 [(set (match_operand:V8SF 0 "register_operand" "=x") 3636 (vec_select:V8SF 3637 (vec_concat:V16SF 3638 (match_operand:V8SF 1 "register_operand" "x") 3639 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3640 (parallel [(const_int 2) (const_int 10) 3641 (const_int 3) (const_int 11) 3642 (const_int 6) (const_int 14) 3643 (const_int 7) (const_int 15)])))] 3644 "TARGET_AVX" 3645 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3646 [(set_attr "type" "sselog") 3647 (set_attr "prefix" "vex") 3648 (set_attr "mode" "V8SF")]) 3649 3650(define_expand "vec_interleave_highv8sf" 3651 [(set (match_dup 3) 3652 (vec_select:V8SF 3653 (vec_concat:V16SF 3654 (match_operand:V8SF 1 "register_operand" "x") 3655 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3656 (parallel [(const_int 0) (const_int 8) 3657 (const_int 1) (const_int 9) 3658 (const_int 4) (const_int 12) 3659 (const_int 5) (const_int 13)]))) 3660 (set (match_dup 4) 3661 (vec_select:V8SF 3662 (vec_concat:V16SF 3663 (match_dup 1) 3664 (match_dup 2)) 3665 (parallel [(const_int 2) (const_int 10) 3666 (const_int 3) (const_int 11) 3667 (const_int 6) (const_int 14) 3668 (const_int 7) (const_int 15)]))) 3669 (set (match_operand:V8SF 0 "register_operand") 3670 (vec_select:V8SF 3671 (vec_concat:V16SF 3672 (match_dup 3) 3673 (match_dup 4)) 3674 (parallel [(const_int 4) (const_int 5) 3675 (const_int 6) (const_int 7) 3676 (const_int 12) (const_int 13) 3677 (const_int 14) (const_int 15)])))] 3678 "TARGET_AVX" 3679{ 3680 operands[3] = gen_reg_rtx (V8SFmode); 3681 operands[4] = gen_reg_rtx (V8SFmode); 3682}) 3683 3684(define_insn "vec_interleave_highv4sf" 3685 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3686 (vec_select:V4SF 3687 (vec_concat:V8SF 3688 (match_operand:V4SF 1 "register_operand" "0,x") 3689 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 3690 (parallel [(const_int 2) (const_int 6) 3691 (const_int 3) (const_int 7)])))] 3692 "TARGET_SSE" 3693 "@ 3694 unpckhps\t{%2, %0|%0, %2} 3695 vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3696 [(set_attr "isa" "noavx,avx") 3697 (set_attr "type" "sselog") 3698 (set_attr "prefix" "orig,vex") 3699 (set_attr "mode" "V4SF")]) 3700 3701;; Recall that the 256-bit unpck insns only shuffle within their lanes. 3702(define_insn "avx_unpcklps256" 3703 [(set (match_operand:V8SF 0 "register_operand" "=x") 3704 (vec_select:V8SF 3705 (vec_concat:V16SF 3706 (match_operand:V8SF 1 "register_operand" "x") 3707 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3708 (parallel [(const_int 0) (const_int 8) 3709 (const_int 1) (const_int 9) 3710 (const_int 4) (const_int 12) 3711 (const_int 5) (const_int 13)])))] 3712 "TARGET_AVX" 3713 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3714 [(set_attr "type" "sselog") 3715 (set_attr "prefix" "vex") 3716 (set_attr "mode" "V8SF")]) 3717 3718(define_expand "vec_interleave_lowv8sf" 3719 [(set (match_dup 3) 3720 (vec_select:V8SF 3721 (vec_concat:V16SF 3722 (match_operand:V8SF 1 "register_operand" "x") 3723 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3724 (parallel [(const_int 0) (const_int 8) 3725 (const_int 1) (const_int 9) 3726 (const_int 4) (const_int 12) 3727 (const_int 5) (const_int 13)]))) 3728 (set (match_dup 4) 3729 (vec_select:V8SF 3730 (vec_concat:V16SF 3731 (match_dup 1) 3732 (match_dup 2)) 3733 (parallel [(const_int 2) (const_int 10) 3734 (const_int 3) (const_int 11) 3735 (const_int 6) (const_int 14) 3736 (const_int 7) (const_int 15)]))) 3737 (set (match_operand:V8SF 0 "register_operand") 3738 (vec_select:V8SF 3739 (vec_concat:V16SF 3740 (match_dup 3) 3741 (match_dup 4)) 3742 (parallel [(const_int 0) (const_int 1) 3743 (const_int 2) (const_int 3) 3744 (const_int 8) (const_int 9) 3745 (const_int 10) (const_int 11)])))] 3746 "TARGET_AVX" 3747{ 3748 operands[3] = gen_reg_rtx (V8SFmode); 3749 operands[4] = gen_reg_rtx (V8SFmode); 3750}) 3751 3752(define_insn "vec_interleave_lowv4sf" 3753 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 3754 (vec_select:V4SF 3755 (vec_concat:V8SF 3756 (match_operand:V4SF 1 "register_operand" "0,x") 3757 (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")) 3758 (parallel [(const_int 0) (const_int 4) 3759 (const_int 1) (const_int 5)])))] 3760 "TARGET_SSE" 3761 "@ 3762 unpcklps\t{%2, %0|%0, %2} 3763 vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3764 [(set_attr "isa" "noavx,avx") 3765 (set_attr "type" "sselog") 3766 (set_attr "prefix" "orig,vex") 3767 (set_attr "mode" "V4SF")]) 3768 3769;; These are modeled with the same vec_concat as the others so that we 3770;; capture users of shufps that can use the new instructions 3771(define_insn "avx_movshdup256" 3772 [(set (match_operand:V8SF 0 "register_operand" "=x") 3773 (vec_select:V8SF 3774 (vec_concat:V16SF 3775 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3776 (match_dup 1)) 3777 (parallel [(const_int 1) (const_int 1) 3778 (const_int 3) (const_int 3) 3779 (const_int 5) (const_int 5) 3780 (const_int 7) (const_int 7)])))] 3781 "TARGET_AVX" 3782 "vmovshdup\t{%1, %0|%0, %1}" 3783 [(set_attr "type" "sse") 3784 (set_attr "prefix" "vex") 3785 (set_attr "mode" "V8SF")]) 3786 3787(define_insn "sse3_movshdup" 3788 [(set (match_operand:V4SF 0 "register_operand" "=x") 3789 (vec_select:V4SF 3790 (vec_concat:V8SF 3791 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3792 (match_dup 1)) 3793 (parallel [(const_int 1) 3794 (const_int 1) 3795 (const_int 7) 3796 (const_int 7)])))] 3797 "TARGET_SSE3" 3798 "%vmovshdup\t{%1, %0|%0, %1}" 3799 [(set_attr "type" "sse") 3800 (set_attr "prefix_rep" "1") 3801 (set_attr "prefix" "maybe_vex") 3802 (set_attr "mode" "V4SF")]) 3803 3804(define_insn "avx_movsldup256" 3805 [(set (match_operand:V8SF 0 "register_operand" "=x") 3806 (vec_select:V8SF 3807 (vec_concat:V16SF 3808 (match_operand:V8SF 1 "nonimmediate_operand" "xm") 3809 (match_dup 1)) 3810 (parallel [(const_int 0) (const_int 0) 3811 (const_int 2) (const_int 2) 3812 (const_int 4) (const_int 4) 3813 (const_int 6) (const_int 6)])))] 3814 "TARGET_AVX" 3815 "vmovsldup\t{%1, %0|%0, %1}" 3816 [(set_attr "type" "sse") 3817 (set_attr "prefix" "vex") 3818 (set_attr "mode" "V8SF")]) 3819 3820(define_insn "sse3_movsldup" 3821 [(set (match_operand:V4SF 0 "register_operand" "=x") 3822 (vec_select:V4SF 3823 (vec_concat:V8SF 3824 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 3825 (match_dup 1)) 3826 (parallel [(const_int 0) 3827 (const_int 0) 3828 (const_int 6) 3829 (const_int 6)])))] 3830 "TARGET_SSE3" 3831 "%vmovsldup\t{%1, %0|%0, %1}" 3832 [(set_attr "type" "sse") 3833 (set_attr "prefix_rep" "1") 3834 (set_attr "prefix" "maybe_vex") 3835 (set_attr "mode" "V4SF")]) 3836 3837(define_expand "avx_shufps256" 3838 [(match_operand:V8SF 0 "register_operand") 3839 (match_operand:V8SF 1 "register_operand") 3840 (match_operand:V8SF 2 "nonimmediate_operand") 3841 (match_operand:SI 3 "const_int_operand")] 3842 "TARGET_AVX" 3843{ 3844 int mask = INTVAL (operands[3]); 3845 emit_insn (gen_avx_shufps256_1 (operands[0], operands[1], operands[2], 3846 GEN_INT ((mask >> 0) & 3), 3847 GEN_INT ((mask >> 2) & 3), 3848 GEN_INT (((mask >> 4) & 3) + 8), 3849 GEN_INT (((mask >> 6) & 3) + 8), 3850 GEN_INT (((mask >> 0) & 3) + 4), 3851 GEN_INT (((mask >> 2) & 3) + 4), 3852 GEN_INT (((mask >> 4) & 3) + 12), 3853 GEN_INT (((mask >> 6) & 3) + 12))); 3854 DONE; 3855}) 3856 3857;; One bit in mask selects 2 elements. 3858(define_insn "avx_shufps256_1" 3859 [(set (match_operand:V8SF 0 "register_operand" "=x") 3860 (vec_select:V8SF 3861 (vec_concat:V16SF 3862 (match_operand:V8SF 1 "register_operand" "x") 3863 (match_operand:V8SF 2 "nonimmediate_operand" "xm")) 3864 (parallel [(match_operand 3 "const_0_to_3_operand" ) 3865 (match_operand 4 "const_0_to_3_operand" ) 3866 (match_operand 5 "const_8_to_11_operand" ) 3867 (match_operand 6 "const_8_to_11_operand" ) 3868 (match_operand 7 "const_4_to_7_operand" ) 3869 (match_operand 8 "const_4_to_7_operand" ) 3870 (match_operand 9 "const_12_to_15_operand") 3871 (match_operand 10 "const_12_to_15_operand")])))] 3872 "TARGET_AVX 3873 && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4) 3874 && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4) 3875 && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4) 3876 && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))" 3877{ 3878 int mask; 3879 mask = INTVAL (operands[3]); 3880 mask |= INTVAL (operands[4]) << 2; 3881 mask |= (INTVAL (operands[5]) - 8) << 4; 3882 mask |= (INTVAL (operands[6]) - 8) << 6; 3883 operands[3] = GEN_INT (mask); 3884 3885 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3886} 3887 [(set_attr "type" "sseshuf") 3888 (set_attr "length_immediate" "1") 3889 (set_attr "prefix" "vex") 3890 (set_attr "mode" "V8SF")]) 3891 3892(define_expand "sse_shufps" 3893 [(match_operand:V4SF 0 "register_operand") 3894 (match_operand:V4SF 1 "register_operand") 3895 (match_operand:V4SF 2 "nonimmediate_operand") 3896 (match_operand:SI 3 "const_int_operand")] 3897 "TARGET_SSE" 3898{ 3899 int mask = INTVAL (operands[3]); 3900 emit_insn (gen_sse_shufps_v4sf (operands[0], operands[1], operands[2], 3901 GEN_INT ((mask >> 0) & 3), 3902 GEN_INT ((mask >> 2) & 3), 3903 GEN_INT (((mask >> 4) & 3) + 4), 3904 GEN_INT (((mask >> 6) & 3) + 4))); 3905 DONE; 3906}) 3907 3908(define_insn "sse_shufps_<mode>" 3909 [(set (match_operand:VI4F_128 0 "register_operand" "=x,x") 3910 (vec_select:VI4F_128 3911 (vec_concat:<ssedoublevecmode> 3912 (match_operand:VI4F_128 1 "register_operand" "0,x") 3913 (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm")) 3914 (parallel [(match_operand 3 "const_0_to_3_operand") 3915 (match_operand 4 "const_0_to_3_operand") 3916 (match_operand 5 "const_4_to_7_operand") 3917 (match_operand 6 "const_4_to_7_operand")])))] 3918 "TARGET_SSE" 3919{ 3920 int mask = 0; 3921 mask |= INTVAL (operands[3]) << 0; 3922 mask |= INTVAL (operands[4]) << 2; 3923 mask |= (INTVAL (operands[5]) - 4) << 4; 3924 mask |= (INTVAL (operands[6]) - 4) << 6; 3925 operands[3] = GEN_INT (mask); 3926 3927 switch (which_alternative) 3928 { 3929 case 0: 3930 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 3931 case 1: 3932 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3933 default: 3934 gcc_unreachable (); 3935 } 3936} 3937 [(set_attr "isa" "noavx,avx") 3938 (set_attr "type" "sseshuf") 3939 (set_attr "length_immediate" "1") 3940 (set_attr "prefix" "orig,vex") 3941 (set_attr "mode" "V4SF")]) 3942 3943(define_insn "sse_storehps" 3944 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 3945 (vec_select:V2SF 3946 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 3947 (parallel [(const_int 2) (const_int 3)])))] 3948 "TARGET_SSE" 3949 "@ 3950 %vmovhps\t{%1, %0|%0, %1} 3951 %vmovhlps\t{%1, %d0|%d0, %1} 3952 %vmovlps\t{%H1, %d0|%d0, %H1}" 3953 [(set_attr "type" "ssemov") 3954 (set_attr "ssememalign" "64") 3955 (set_attr "prefix" "maybe_vex") 3956 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3957 3958(define_expand "sse_loadhps_exp" 3959 [(set (match_operand:V4SF 0 "nonimmediate_operand") 3960 (vec_concat:V4SF 3961 (vec_select:V2SF 3962 (match_operand:V4SF 1 "nonimmediate_operand") 3963 (parallel [(const_int 0) (const_int 1)])) 3964 (match_operand:V2SF 2 "nonimmediate_operand")))] 3965 "TARGET_SSE" 3966{ 3967 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 3968 3969 emit_insn (gen_sse_loadhps (dst, operands[1], operands[2])); 3970 3971 /* Fix up the destination if needed. */ 3972 if (dst != operands[0]) 3973 emit_move_insn (operands[0], dst); 3974 3975 DONE; 3976}) 3977 3978(define_insn "sse_loadhps" 3979 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,o") 3980 (vec_concat:V4SF 3981 (vec_select:V2SF 3982 (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0") 3983 (parallel [(const_int 0) (const_int 1)])) 3984 (match_operand:V2SF 2 "nonimmediate_operand" " m,m,x,x,x")))] 3985 "TARGET_SSE" 3986 "@ 3987 movhps\t{%2, %0|%0, %2} 3988 vmovhps\t{%2, %1, %0|%0, %1, %2} 3989 movlhps\t{%2, %0|%0, %2} 3990 vmovlhps\t{%2, %1, %0|%0, %1, %2} 3991 %vmovlps\t{%2, %H0|%H0, %2}" 3992 [(set_attr "isa" "noavx,avx,noavx,avx,*") 3993 (set_attr "type" "ssemov") 3994 (set_attr "ssememalign" "64") 3995 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 3996 (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")]) 3997 3998(define_insn "sse_storelps" 3999 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 4000 (vec_select:V2SF 4001 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m") 4002 (parallel [(const_int 0) (const_int 1)])))] 4003 "TARGET_SSE" 4004 "@ 4005 %vmovlps\t{%1, %0|%0, %1} 4006 %vmovaps\t{%1, %0|%0, %1} 4007 %vmovlps\t{%1, %d0|%d0, %1}" 4008 [(set_attr "type" "ssemov") 4009 (set_attr "prefix" "maybe_vex") 4010 (set_attr "mode" "V2SF,V4SF,V2SF")]) 4011 4012(define_expand "sse_loadlps_exp" 4013 [(set (match_operand:V4SF 0 "nonimmediate_operand") 4014 (vec_concat:V4SF 4015 (match_operand:V2SF 2 "nonimmediate_operand") 4016 (vec_select:V2SF 4017 (match_operand:V4SF 1 "nonimmediate_operand") 4018 (parallel [(const_int 2) (const_int 3)]))))] 4019 "TARGET_SSE" 4020{ 4021 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands); 4022 4023 emit_insn (gen_sse_loadlps (dst, operands[1], operands[2])); 4024 4025 /* Fix up the destination if needed. */ 4026 if (dst != operands[0]) 4027 emit_move_insn (operands[0], dst); 4028 4029 DONE; 4030}) 4031 4032(define_insn "sse_loadlps" 4033 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,x,m") 4034 (vec_concat:V4SF 4035 (match_operand:V2SF 2 "nonimmediate_operand" " 0,x,m,m,x") 4036 (vec_select:V2SF 4037 (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0") 4038 (parallel [(const_int 2) (const_int 3)]))))] 4039 "TARGET_SSE" 4040 "@ 4041 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 4042 vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} 4043 movlps\t{%2, %0|%0, %2} 4044 vmovlps\t{%2, %1, %0|%0, %1, %2} 4045 %vmovlps\t{%2, %0|%0, %2}" 4046 [(set_attr "isa" "noavx,avx,noavx,avx,*") 4047 (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov") 4048 (set_attr "ssememalign" "64") 4049 (set_attr "length_immediate" "1,1,*,*,*") 4050 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex") 4051 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")]) 4052 4053(define_insn "sse_movss" 4054 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 4055 (vec_merge:V4SF 4056 (match_operand:V4SF 2 "register_operand" " x,x") 4057 (match_operand:V4SF 1 "register_operand" " 0,x") 4058 (const_int 1)))] 4059 "TARGET_SSE" 4060 "@ 4061 movss\t{%2, %0|%0, %2} 4062 vmovss\t{%2, %1, %0|%0, %1, %2}" 4063 [(set_attr "isa" "noavx,avx") 4064 (set_attr "type" "ssemov") 4065 (set_attr "prefix" "orig,vex") 4066 (set_attr "mode" "SF")]) 4067 4068(define_insn "avx2_vec_dup<mode>" 4069 [(set (match_operand:VF1 0 "register_operand" "=x") 4070 (vec_duplicate:VF1 4071 (vec_select:SF 4072 (match_operand:V4SF 1 "register_operand" "x") 4073 (parallel [(const_int 0)]))))] 4074 "TARGET_AVX2" 4075 "vbroadcastss\t{%1, %0|%0, %1}" 4076 [(set_attr "type" "sselog1") 4077 (set_attr "prefix" "vex") 4078 (set_attr "mode" "<MODE>")]) 4079 4080(define_insn "avx2_vec_dupv8sf_1" 4081 [(set (match_operand:V8SF 0 "register_operand" "=x") 4082 (vec_duplicate:V8SF 4083 (vec_select:SF 4084 (match_operand:V8SF 1 "register_operand" "x") 4085 (parallel [(const_int 0)]))))] 4086 "TARGET_AVX2" 4087 "vbroadcastss\t{%x1, %0|%0, %x1}" 4088 [(set_attr "type" "sselog1") 4089 (set_attr "prefix" "vex") 4090 (set_attr "mode" "V8SF")]) 4091 4092(define_insn "vec_dupv4sf" 4093 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 4094 (vec_duplicate:V4SF 4095 (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))] 4096 "TARGET_SSE" 4097 "@ 4098 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} 4099 vbroadcastss\t{%1, %0|%0, %1} 4100 shufps\t{$0, %0, %0|%0, %0, 0}" 4101 [(set_attr "isa" "avx,avx,noavx") 4102 (set_attr "type" "sseshuf1,ssemov,sseshuf1") 4103 (set_attr "length_immediate" "1,0,1") 4104 (set_attr "prefix_extra" "0,1,*") 4105 (set_attr "prefix" "vex,vex,orig") 4106 (set_attr "mode" "V4SF")]) 4107 4108;; Although insertps takes register source, we prefer 4109;; unpcklps with register source since it is shorter. 4110(define_insn "*vec_concatv2sf_sse4_1" 4111 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,x,x,*y ,*y") 4112 (vec_concat:V2SF 4113 (match_operand:SF 1 "nonimmediate_operand" " 0,x,0,x,m, 0 , m") 4114 (match_operand:SF 2 "vector_move_operand" " x,x,m,m,C,*ym, C")))] 4115 "TARGET_SSE4_1" 4116 "@ 4117 unpcklps\t{%2, %0|%0, %2} 4118 vunpcklps\t{%2, %1, %0|%0, %1, %2} 4119 insertps\t{$0x10, %2, %0|%0, %2, 0x10} 4120 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} 4121 %vmovss\t{%1, %0|%0, %1} 4122 punpckldq\t{%2, %0|%0, %2} 4123 movd\t{%1, %0|%0, %1}" 4124 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 4125 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") 4126 (set_attr "prefix_data16" "*,*,1,*,*,*,*") 4127 (set_attr "prefix_extra" "*,*,1,1,*,*,*") 4128 (set_attr "length_immediate" "*,*,1,1,*,*,*") 4129 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") 4130 (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,SF,DI,DI")]) 4131 4132;; ??? In theory we can match memory for the MMX alternative, but allowing 4133;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 4134;; alternatives pretty much forces the MMX alternative to be chosen. 4135(define_insn "*vec_concatv2sf_sse" 4136 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 4137 (vec_concat:V2SF 4138 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 4139 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 4140 "TARGET_SSE" 4141 "@ 4142 unpcklps\t{%2, %0|%0, %2} 4143 movss\t{%1, %0|%0, %1} 4144 punpckldq\t{%2, %0|%0, %2} 4145 movd\t{%1, %0|%0, %1}" 4146 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 4147 (set_attr "mode" "V4SF,SF,DI,DI")]) 4148 4149(define_insn "*vec_concatv4sf" 4150 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x,x") 4151 (vec_concat:V4SF 4152 (match_operand:V2SF 1 "register_operand" " 0,x,0,x") 4153 (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))] 4154 "TARGET_SSE" 4155 "@ 4156 movlhps\t{%2, %0|%0, %2} 4157 vmovlhps\t{%2, %1, %0|%0, %1, %2} 4158 movhps\t{%2, %0|%0, %2} 4159 vmovhps\t{%2, %1, %0|%0, %1, %2}" 4160 [(set_attr "isa" "noavx,avx,noavx,avx") 4161 (set_attr "type" "ssemov") 4162 (set_attr "prefix" "orig,vex,orig,vex") 4163 (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) 4164 4165(define_expand "vec_init<mode>" 4166 [(match_operand:V_128 0 "register_operand") 4167 (match_operand 1)] 4168 "TARGET_SSE" 4169{ 4170 ix86_expand_vector_init (false, operands[0], operands[1]); 4171 DONE; 4172}) 4173 4174;; Avoid combining registers from different units in a single alternative, 4175;; see comment above inline_secondary_memory_needed function in i386.c 4176(define_insn "vec_set<mode>_0" 4177 [(set (match_operand:VI4F_128 0 "nonimmediate_operand" 4178 "=x,x,x ,x,x,x,x ,x ,m ,m ,m") 4179 (vec_merge:VI4F_128 4180 (vec_duplicate:VI4F_128 4181 (match_operand:<ssescalarmode> 2 "general_operand" 4182 " x,m,*r,m,x,x,*rm,*rm,!x,!*re,!*fF")) 4183 (match_operand:VI4F_128 1 "vector_move_operand" 4184 " C,C,C ,C,0,x,0 ,x ,0 ,0 ,0") 4185 (const_int 1)))] 4186 "TARGET_SSE" 4187 "@ 4188 %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe} 4189 %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2} 4190 %vmovd\t{%2, %0|%0, %2} 4191 movss\t{%2, %0|%0, %2} 4192 movss\t{%2, %0|%0, %2} 4193 vmovss\t{%2, %1, %0|%0, %1, %2} 4194 pinsrd\t{$0, %2, %0|%0, %2, 0} 4195 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} 4196 # 4197 # 4198 #" 4199 [(set_attr "isa" "sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,avx,*,*,*") 4200 (set (attr "type") 4201 (cond [(eq_attr "alternative" "0,6,7") 4202 (const_string "sselog") 4203 (eq_attr "alternative" "9") 4204 (const_string "imov") 4205 (eq_attr "alternative" "10") 4206 (const_string "fmov") 4207 ] 4208 (const_string "ssemov"))) 4209 (set_attr "prefix_extra" "*,*,*,*,*,*,1,1,*,*,*") 4210 (set_attr "length_immediate" "*,*,*,*,*,*,1,1,*,*,*") 4211 (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex,*,*,*") 4212 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,*,*,*")]) 4213 4214;; A subset is vec_setv4sf. 4215(define_insn "*vec_setv4sf_sse4_1" 4216 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 4217 (vec_merge:V4SF 4218 (vec_duplicate:V4SF 4219 (match_operand:SF 2 "nonimmediate_operand" "xm,xm")) 4220 (match_operand:V4SF 1 "register_operand" "0,x") 4221 (match_operand:SI 3 "const_int_operand")))] 4222 "TARGET_SSE4_1 4223 && ((unsigned) exact_log2 (INTVAL (operands[3])) 4224 < GET_MODE_NUNITS (V4SFmode))" 4225{ 4226 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); 4227 switch (which_alternative) 4228 { 4229 case 0: 4230 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 4231 case 1: 4232 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4233 default: 4234 gcc_unreachable (); 4235 } 4236} 4237 [(set_attr "isa" "noavx,avx") 4238 (set_attr "type" "sselog") 4239 (set_attr "prefix_data16" "1,*") 4240 (set_attr "prefix_extra" "1") 4241 (set_attr "length_immediate" "1") 4242 (set_attr "prefix" "orig,vex") 4243 (set_attr "mode" "V4SF")]) 4244 4245(define_insn "sse4_1_insertps" 4246 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 4247 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm") 4248 (match_operand:V4SF 1 "register_operand" "0,x") 4249 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 4250 UNSPEC_INSERTPS))] 4251 "TARGET_SSE4_1" 4252{ 4253 if (MEM_P (operands[2])) 4254 { 4255 unsigned count_s = INTVAL (operands[3]) >> 6; 4256 if (count_s) 4257 operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); 4258 operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); 4259 } 4260 switch (which_alternative) 4261 { 4262 case 0: 4263 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 4264 case 1: 4265 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4266 default: 4267 gcc_unreachable (); 4268 } 4269} 4270 [(set_attr "isa" "noavx,avx") 4271 (set_attr "type" "sselog") 4272 (set_attr "prefix_data16" "1,*") 4273 (set_attr "prefix_extra" "1") 4274 (set_attr "length_immediate" "1") 4275 (set_attr "prefix" "orig,vex") 4276 (set_attr "mode" "V4SF")]) 4277 4278(define_split 4279 [(set (match_operand:VI4F_128 0 "memory_operand") 4280 (vec_merge:VI4F_128 4281 (vec_duplicate:VI4F_128 4282 (match_operand:<ssescalarmode> 1 "nonmemory_operand")) 4283 (match_dup 0) 4284 (const_int 1)))] 4285 "TARGET_SSE && reload_completed" 4286 [(const_int 0)] 4287{ 4288 emit_move_insn (adjust_address (operands[0], <ssescalarmode>mode, 0), 4289 operands[1]); 4290 DONE; 4291}) 4292 4293(define_expand "vec_set<mode>" 4294 [(match_operand:V 0 "register_operand") 4295 (match_operand:<ssescalarmode> 1 "register_operand") 4296 (match_operand 2 "const_int_operand")] 4297 "TARGET_SSE" 4298{ 4299 ix86_expand_vector_set (false, operands[0], operands[1], 4300 INTVAL (operands[2])); 4301 DONE; 4302}) 4303 4304(define_insn_and_split "*vec_extractv4sf_0" 4305 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r") 4306 (vec_select:SF 4307 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m") 4308 (parallel [(const_int 0)])))] 4309 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4310 "#" 4311 "&& reload_completed" 4312 [(const_int 0)] 4313{ 4314 rtx op1 = operands[1]; 4315 if (REG_P (op1)) 4316 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 4317 else 4318 op1 = gen_lowpart (SFmode, op1); 4319 emit_move_insn (operands[0], op1); 4320 DONE; 4321}) 4322 4323(define_insn_and_split "*sse4_1_extractps" 4324 [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,x,x") 4325 (vec_select:SF 4326 (match_operand:V4SF 1 "register_operand" "x,0,x") 4327 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] 4328 "TARGET_SSE4_1" 4329 "@ 4330 %vextractps\t{%2, %1, %0|%0, %1, %2} 4331 # 4332 #" 4333 "&& reload_completed && SSE_REG_P (operands[0])" 4334 [(const_int 0)] 4335{ 4336 rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0])); 4337 switch (INTVAL (operands[2])) 4338 { 4339 case 1: 4340 case 3: 4341 emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1], 4342 operands[2], operands[2], 4343 GEN_INT (INTVAL (operands[2]) + 4), 4344 GEN_INT (INTVAL (operands[2]) + 4))); 4345 break; 4346 case 2: 4347 emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1])); 4348 break; 4349 default: 4350 /* 0 should be handled by the *vec_extractv4sf_0 pattern above. */ 4351 gcc_unreachable (); 4352 } 4353 DONE; 4354} 4355 [(set_attr "isa" "*,noavx,avx") 4356 (set_attr "type" "sselog,*,*") 4357 (set_attr "prefix_data16" "1,*,*") 4358 (set_attr "prefix_extra" "1,*,*") 4359 (set_attr "length_immediate" "1,*,*") 4360 (set_attr "prefix" "maybe_vex,*,*") 4361 (set_attr "mode" "V4SF,*,*")]) 4362 4363(define_insn_and_split "*vec_extract_v4sf_mem" 4364 [(set (match_operand:SF 0 "register_operand" "=x,*r,f") 4365 (vec_select:SF 4366 (match_operand:V4SF 1 "memory_operand" "o,o,o") 4367 (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))] 4368 "TARGET_SSE" 4369 "#" 4370 "&& reload_completed" 4371 [(const_int 0)] 4372{ 4373 int i = INTVAL (operands[2]); 4374 4375 emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4)); 4376 DONE; 4377}) 4378 4379(define_expand "avx_vextractf128<mode>" 4380 [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand") 4381 (match_operand:V_256 1 "register_operand") 4382 (match_operand:SI 2 "const_0_to_1_operand")] 4383 "TARGET_AVX" 4384{ 4385 rtx (*insn)(rtx, rtx); 4386 4387 switch (INTVAL (operands[2])) 4388 { 4389 case 0: 4390 insn = gen_vec_extract_lo_<mode>; 4391 break; 4392 case 1: 4393 insn = gen_vec_extract_hi_<mode>; 4394 break; 4395 default: 4396 gcc_unreachable (); 4397 } 4398 4399 emit_insn (insn (operands[0], operands[1])); 4400 DONE; 4401}) 4402 4403(define_insn_and_split "vec_extract_lo_<mode>" 4404 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4405 (vec_select:<ssehalfvecmode> 4406 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x") 4407 (parallel [(const_int 0) (const_int 1)])))] 4408 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4409 "#" 4410 "&& reload_completed" 4411 [(const_int 0)] 4412{ 4413 rtx op1 = operands[1]; 4414 if (REG_P (op1)) 4415 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); 4416 else 4417 op1 = gen_lowpart (<ssehalfvecmode>mode, op1); 4418 emit_move_insn (operands[0], op1); 4419 DONE; 4420}) 4421 4422(define_insn "vec_extract_hi_<mode>" 4423 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4424 (vec_select:<ssehalfvecmode> 4425 (match_operand:VI8F_256 1 "register_operand" "x,x") 4426 (parallel [(const_int 2) (const_int 3)])))] 4427 "TARGET_AVX" 4428 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}" 4429 [(set_attr "type" "sselog") 4430 (set_attr "prefix_extra" "1") 4431 (set_attr "length_immediate" "1") 4432 (set_attr "memory" "none,store") 4433 (set_attr "prefix" "vex") 4434 (set_attr "mode" "<sseinsnmode>")]) 4435 4436(define_insn_and_split "vec_extract_lo_<mode>" 4437 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4438 (vec_select:<ssehalfvecmode> 4439 (match_operand:VI4F_256 1 "nonimmediate_operand" "xm,x") 4440 (parallel [(const_int 0) (const_int 1) 4441 (const_int 2) (const_int 3)])))] 4442 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4443 "#" 4444 "&& reload_completed" 4445 [(const_int 0)] 4446{ 4447 rtx op1 = operands[1]; 4448 if (REG_P (op1)) 4449 op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1)); 4450 else 4451 op1 = gen_lowpart (<ssehalfvecmode>mode, op1); 4452 emit_move_insn (operands[0], op1); 4453 DONE; 4454}) 4455 4456(define_insn "vec_extract_hi_<mode>" 4457 [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m") 4458 (vec_select:<ssehalfvecmode> 4459 (match_operand:VI4F_256 1 "register_operand" "x,x") 4460 (parallel [(const_int 4) (const_int 5) 4461 (const_int 6) (const_int 7)])))] 4462 "TARGET_AVX" 4463 "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}" 4464 [(set_attr "type" "sselog") 4465 (set_attr "prefix_extra" "1") 4466 (set_attr "length_immediate" "1") 4467 (set_attr "memory" "none,store") 4468 (set_attr "prefix" "vex") 4469 (set_attr "mode" "<sseinsnmode>")]) 4470 4471(define_insn_and_split "vec_extract_lo_v16hi" 4472 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") 4473 (vec_select:V8HI 4474 (match_operand:V16HI 1 "nonimmediate_operand" "xm,x") 4475 (parallel [(const_int 0) (const_int 1) 4476 (const_int 2) (const_int 3) 4477 (const_int 4) (const_int 5) 4478 (const_int 6) (const_int 7)])))] 4479 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4480 "#" 4481 "&& reload_completed" 4482 [(const_int 0)] 4483{ 4484 rtx op1 = operands[1]; 4485 if (REG_P (op1)) 4486 op1 = gen_rtx_REG (V8HImode, REGNO (op1)); 4487 else 4488 op1 = gen_lowpart (V8HImode, op1); 4489 emit_move_insn (operands[0], op1); 4490 DONE; 4491}) 4492 4493(define_insn "vec_extract_hi_v16hi" 4494 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m") 4495 (vec_select:V8HI 4496 (match_operand:V16HI 1 "register_operand" "x,x") 4497 (parallel [(const_int 8) (const_int 9) 4498 (const_int 10) (const_int 11) 4499 (const_int 12) (const_int 13) 4500 (const_int 14) (const_int 15)])))] 4501 "TARGET_AVX" 4502 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" 4503 [(set_attr "type" "sselog") 4504 (set_attr "prefix_extra" "1") 4505 (set_attr "length_immediate" "1") 4506 (set_attr "memory" "none,store") 4507 (set_attr "prefix" "vex") 4508 (set_attr "mode" "OI")]) 4509 4510(define_insn_and_split "vec_extract_lo_v32qi" 4511 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 4512 (vec_select:V16QI 4513 (match_operand:V32QI 1 "nonimmediate_operand" "xm,x") 4514 (parallel [(const_int 0) (const_int 1) 4515 (const_int 2) (const_int 3) 4516 (const_int 4) (const_int 5) 4517 (const_int 6) (const_int 7) 4518 (const_int 8) (const_int 9) 4519 (const_int 10) (const_int 11) 4520 (const_int 12) (const_int 13) 4521 (const_int 14) (const_int 15)])))] 4522 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4523 "#" 4524 "&& reload_completed" 4525 [(const_int 0)] 4526{ 4527 rtx op1 = operands[1]; 4528 if (REG_P (op1)) 4529 op1 = gen_rtx_REG (V16QImode, REGNO (op1)); 4530 else 4531 op1 = gen_lowpart (V16QImode, op1); 4532 emit_move_insn (operands[0], op1); 4533 DONE; 4534}) 4535 4536(define_insn "vec_extract_hi_v32qi" 4537 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 4538 (vec_select:V16QI 4539 (match_operand:V32QI 1 "register_operand" "x,x") 4540 (parallel [(const_int 16) (const_int 17) 4541 (const_int 18) (const_int 19) 4542 (const_int 20) (const_int 21) 4543 (const_int 22) (const_int 23) 4544 (const_int 24) (const_int 25) 4545 (const_int 26) (const_int 27) 4546 (const_int 28) (const_int 29) 4547 (const_int 30) (const_int 31)])))] 4548 "TARGET_AVX" 4549 "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}" 4550 [(set_attr "type" "sselog") 4551 (set_attr "prefix_extra" "1") 4552 (set_attr "length_immediate" "1") 4553 (set_attr "memory" "none,store") 4554 (set_attr "prefix" "vex") 4555 (set_attr "mode" "OI")]) 4556 4557;; Modes handled by vec_extract patterns. 4558(define_mode_iterator VEC_EXTRACT_MODE 4559 [(V32QI "TARGET_AVX") V16QI 4560 (V16HI "TARGET_AVX") V8HI 4561 (V8SI "TARGET_AVX") V4SI 4562 (V4DI "TARGET_AVX") V2DI 4563 (V8SF "TARGET_AVX") V4SF 4564 (V4DF "TARGET_AVX") V2DF]) 4565 4566(define_expand "vec_extract<mode>" 4567 [(match_operand:<ssescalarmode> 0 "register_operand") 4568 (match_operand:VEC_EXTRACT_MODE 1 "register_operand") 4569 (match_operand 2 "const_int_operand")] 4570 "TARGET_SSE" 4571{ 4572 ix86_expand_vector_extract (false, operands[0], operands[1], 4573 INTVAL (operands[2])); 4574 DONE; 4575}) 4576 4577;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4578;; 4579;; Parallel double-precision floating point element swizzling 4580;; 4581;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4582 4583;; Recall that the 256-bit unpck insns only shuffle within their lanes. 4584(define_insn "avx_unpckhpd256" 4585 [(set (match_operand:V4DF 0 "register_operand" "=x") 4586 (vec_select:V4DF 4587 (vec_concat:V8DF 4588 (match_operand:V4DF 1 "register_operand" "x") 4589 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4590 (parallel [(const_int 1) (const_int 5) 4591 (const_int 3) (const_int 7)])))] 4592 "TARGET_AVX" 4593 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}" 4594 [(set_attr "type" "sselog") 4595 (set_attr "prefix" "vex") 4596 (set_attr "mode" "V4DF")]) 4597 4598(define_expand "vec_interleave_highv4df" 4599 [(set (match_dup 3) 4600 (vec_select:V4DF 4601 (vec_concat:V8DF 4602 (match_operand:V4DF 1 "register_operand" "x") 4603 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4604 (parallel [(const_int 0) (const_int 4) 4605 (const_int 2) (const_int 6)]))) 4606 (set (match_dup 4) 4607 (vec_select:V4DF 4608 (vec_concat:V8DF 4609 (match_dup 1) 4610 (match_dup 2)) 4611 (parallel [(const_int 1) (const_int 5) 4612 (const_int 3) (const_int 7)]))) 4613 (set (match_operand:V4DF 0 "register_operand") 4614 (vec_select:V4DF 4615 (vec_concat:V8DF 4616 (match_dup 3) 4617 (match_dup 4)) 4618 (parallel [(const_int 2) (const_int 3) 4619 (const_int 6) (const_int 7)])))] 4620 "TARGET_AVX" 4621{ 4622 operands[3] = gen_reg_rtx (V4DFmode); 4623 operands[4] = gen_reg_rtx (V4DFmode); 4624}) 4625 4626 4627(define_expand "vec_interleave_highv2df" 4628 [(set (match_operand:V2DF 0 "register_operand") 4629 (vec_select:V2DF 4630 (vec_concat:V4DF 4631 (match_operand:V2DF 1 "nonimmediate_operand") 4632 (match_operand:V2DF 2 "nonimmediate_operand")) 4633 (parallel [(const_int 1) 4634 (const_int 3)])))] 4635 "TARGET_SSE2" 4636{ 4637 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) 4638 operands[2] = force_reg (V2DFmode, operands[2]); 4639}) 4640 4641(define_insn "*vec_interleave_highv2df" 4642 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 4643 (vec_select:V2DF 4644 (vec_concat:V4DF 4645 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x") 4646 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0")) 4647 (parallel [(const_int 1) 4648 (const_int 3)])))] 4649 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" 4650 "@ 4651 unpckhpd\t{%2, %0|%0, %2} 4652 vunpckhpd\t{%2, %1, %0|%0, %1, %2} 4653 %vmovddup\t{%H1, %0|%0, %H1} 4654 movlpd\t{%H1, %0|%0, %H1} 4655 vmovlpd\t{%H1, %2, %0|%0, %2, %H1} 4656 %vmovhpd\t{%1, %0|%0, %1}" 4657 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") 4658 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") 4659 (set_attr "ssememalign" "64") 4660 (set_attr "prefix_data16" "*,*,*,1,*,1") 4661 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") 4662 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) 4663 4664;; Recall that the 256-bit unpck insns only shuffle within their lanes. 4665(define_expand "avx_movddup256" 4666 [(set (match_operand:V4DF 0 "register_operand") 4667 (vec_select:V4DF 4668 (vec_concat:V8DF 4669 (match_operand:V4DF 1 "nonimmediate_operand") 4670 (match_dup 1)) 4671 (parallel [(const_int 0) (const_int 4) 4672 (const_int 2) (const_int 6)])))] 4673 "TARGET_AVX") 4674 4675(define_expand "avx_unpcklpd256" 4676 [(set (match_operand:V4DF 0 "register_operand") 4677 (vec_select:V4DF 4678 (vec_concat:V8DF 4679 (match_operand:V4DF 1 "register_operand") 4680 (match_operand:V4DF 2 "nonimmediate_operand")) 4681 (parallel [(const_int 0) (const_int 4) 4682 (const_int 2) (const_int 6)])))] 4683 "TARGET_AVX") 4684 4685(define_insn "*avx_unpcklpd256" 4686 [(set (match_operand:V4DF 0 "register_operand" "=x,x") 4687 (vec_select:V4DF 4688 (vec_concat:V8DF 4689 (match_operand:V4DF 1 "nonimmediate_operand" " x,m") 4690 (match_operand:V4DF 2 "nonimmediate_operand" "xm,1")) 4691 (parallel [(const_int 0) (const_int 4) 4692 (const_int 2) (const_int 6)])))] 4693 "TARGET_AVX" 4694 "@ 4695 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4696 vmovddup\t{%1, %0|%0, %1}" 4697 [(set_attr "type" "sselog") 4698 (set_attr "prefix" "vex") 4699 (set_attr "mode" "V4DF")]) 4700 4701(define_expand "vec_interleave_lowv4df" 4702 [(set (match_dup 3) 4703 (vec_select:V4DF 4704 (vec_concat:V8DF 4705 (match_operand:V4DF 1 "register_operand" "x") 4706 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4707 (parallel [(const_int 0) (const_int 4) 4708 (const_int 2) (const_int 6)]))) 4709 (set (match_dup 4) 4710 (vec_select:V4DF 4711 (vec_concat:V8DF 4712 (match_dup 1) 4713 (match_dup 2)) 4714 (parallel [(const_int 1) (const_int 5) 4715 (const_int 3) (const_int 7)]))) 4716 (set (match_operand:V4DF 0 "register_operand") 4717 (vec_select:V4DF 4718 (vec_concat:V8DF 4719 (match_dup 3) 4720 (match_dup 4)) 4721 (parallel [(const_int 0) (const_int 1) 4722 (const_int 4) (const_int 5)])))] 4723 "TARGET_AVX" 4724{ 4725 operands[3] = gen_reg_rtx (V4DFmode); 4726 operands[4] = gen_reg_rtx (V4DFmode); 4727}) 4728 4729(define_expand "vec_interleave_lowv2df" 4730 [(set (match_operand:V2DF 0 "register_operand") 4731 (vec_select:V2DF 4732 (vec_concat:V4DF 4733 (match_operand:V2DF 1 "nonimmediate_operand") 4734 (match_operand:V2DF 2 "nonimmediate_operand")) 4735 (parallel [(const_int 0) 4736 (const_int 2)])))] 4737 "TARGET_SSE2" 4738{ 4739 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) 4740 operands[1] = force_reg (V2DFmode, operands[1]); 4741}) 4742 4743(define_insn "*vec_interleave_lowv2df" 4744 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,o") 4745 (vec_select:V2DF 4746 (vec_concat:V4DF 4747 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0") 4748 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x")) 4749 (parallel [(const_int 0) 4750 (const_int 2)])))] 4751 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" 4752 "@ 4753 unpcklpd\t{%2, %0|%0, %2} 4754 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 4755 %vmovddup\t{%1, %0|%0, %1} 4756 movhpd\t{%2, %0|%0, %2} 4757 vmovhpd\t{%2, %1, %0|%0, %1, %2} 4758 %vmovlpd\t{%2, %H0|%H0, %2}" 4759 [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*") 4760 (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov") 4761 (set_attr "ssememalign" "64") 4762 (set_attr "prefix_data16" "*,*,*,1,*,1") 4763 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex") 4764 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")]) 4765 4766(define_split 4767 [(set (match_operand:V2DF 0 "memory_operand") 4768 (vec_select:V2DF 4769 (vec_concat:V4DF 4770 (match_operand:V2DF 1 "register_operand") 4771 (match_dup 1)) 4772 (parallel [(const_int 0) 4773 (const_int 2)])))] 4774 "TARGET_SSE3 && reload_completed" 4775 [(const_int 0)] 4776{ 4777 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 4778 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 4779 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 4780 DONE; 4781}) 4782 4783(define_split 4784 [(set (match_operand:V2DF 0 "register_operand") 4785 (vec_select:V2DF 4786 (vec_concat:V4DF 4787 (match_operand:V2DF 1 "memory_operand") 4788 (match_dup 1)) 4789 (parallel [(match_operand:SI 2 "const_0_to_1_operand") 4790 (match_operand:SI 3 "const_int_operand")])))] 4791 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" 4792 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] 4793{ 4794 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); 4795}) 4796 4797(define_expand "avx_shufpd256" 4798 [(match_operand:V4DF 0 "register_operand") 4799 (match_operand:V4DF 1 "register_operand") 4800 (match_operand:V4DF 2 "nonimmediate_operand") 4801 (match_operand:SI 3 "const_int_operand")] 4802 "TARGET_AVX" 4803{ 4804 int mask = INTVAL (operands[3]); 4805 emit_insn (gen_avx_shufpd256_1 (operands[0], operands[1], operands[2], 4806 GEN_INT (mask & 1), 4807 GEN_INT (mask & 2 ? 5 : 4), 4808 GEN_INT (mask & 4 ? 3 : 2), 4809 GEN_INT (mask & 8 ? 7 : 6))); 4810 DONE; 4811}) 4812 4813(define_insn "avx_shufpd256_1" 4814 [(set (match_operand:V4DF 0 "register_operand" "=x") 4815 (vec_select:V4DF 4816 (vec_concat:V8DF 4817 (match_operand:V4DF 1 "register_operand" "x") 4818 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) 4819 (parallel [(match_operand 3 "const_0_to_1_operand") 4820 (match_operand 4 "const_4_to_5_operand") 4821 (match_operand 5 "const_2_to_3_operand") 4822 (match_operand 6 "const_6_to_7_operand")])))] 4823 "TARGET_AVX" 4824{ 4825 int mask; 4826 mask = INTVAL (operands[3]); 4827 mask |= (INTVAL (operands[4]) - 4) << 1; 4828 mask |= (INTVAL (operands[5]) - 2) << 2; 4829 mask |= (INTVAL (operands[6]) - 6) << 3; 4830 operands[3] = GEN_INT (mask); 4831 4832 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4833} 4834 [(set_attr "type" "sseshuf") 4835 (set_attr "length_immediate" "1") 4836 (set_attr "prefix" "vex") 4837 (set_attr "mode" "V4DF")]) 4838 4839(define_expand "sse2_shufpd" 4840 [(match_operand:V2DF 0 "register_operand") 4841 (match_operand:V2DF 1 "register_operand") 4842 (match_operand:V2DF 2 "nonimmediate_operand") 4843 (match_operand:SI 3 "const_int_operand")] 4844 "TARGET_SSE2" 4845{ 4846 int mask = INTVAL (operands[3]); 4847 emit_insn (gen_sse2_shufpd_v2df (operands[0], operands[1], operands[2], 4848 GEN_INT (mask & 1), 4849 GEN_INT (mask & 2 ? 3 : 2))); 4850 DONE; 4851}) 4852 4853;; punpcklqdq and punpckhqdq are shorter than shufpd. 4854(define_insn "avx2_interleave_highv4di" 4855 [(set (match_operand:V4DI 0 "register_operand" "=x") 4856 (vec_select:V4DI 4857 (vec_concat:V8DI 4858 (match_operand:V4DI 1 "register_operand" "x") 4859 (match_operand:V4DI 2 "nonimmediate_operand" "xm")) 4860 (parallel [(const_int 1) 4861 (const_int 5) 4862 (const_int 3) 4863 (const_int 7)])))] 4864 "TARGET_AVX2" 4865 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4866 [(set_attr "type" "sselog") 4867 (set_attr "prefix" "vex") 4868 (set_attr "mode" "OI")]) 4869 4870(define_insn "vec_interleave_highv2di" 4871 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 4872 (vec_select:V2DI 4873 (vec_concat:V4DI 4874 (match_operand:V2DI 1 "register_operand" "0,x") 4875 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")) 4876 (parallel [(const_int 1) 4877 (const_int 3)])))] 4878 "TARGET_SSE2" 4879 "@ 4880 punpckhqdq\t{%2, %0|%0, %2} 4881 vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4882 [(set_attr "isa" "noavx,avx") 4883 (set_attr "type" "sselog") 4884 (set_attr "prefix_data16" "1,*") 4885 (set_attr "prefix" "orig,vex") 4886 (set_attr "mode" "TI")]) 4887 4888(define_insn "avx2_interleave_lowv4di" 4889 [(set (match_operand:V4DI 0 "register_operand" "=x") 4890 (vec_select:V4DI 4891 (vec_concat:V8DI 4892 (match_operand:V4DI 1 "register_operand" "x") 4893 (match_operand:V4DI 2 "nonimmediate_operand" "xm")) 4894 (parallel [(const_int 0) 4895 (const_int 4) 4896 (const_int 2) 4897 (const_int 6)])))] 4898 "TARGET_AVX2" 4899 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4900 [(set_attr "type" "sselog") 4901 (set_attr "prefix" "vex") 4902 (set_attr "mode" "OI")]) 4903 4904(define_insn "vec_interleave_lowv2di" 4905 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 4906 (vec_select:V2DI 4907 (vec_concat:V4DI 4908 (match_operand:V2DI 1 "register_operand" "0,x") 4909 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")) 4910 (parallel [(const_int 0) 4911 (const_int 2)])))] 4912 "TARGET_SSE2" 4913 "@ 4914 punpcklqdq\t{%2, %0|%0, %2} 4915 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4916 [(set_attr "isa" "noavx,avx") 4917 (set_attr "type" "sselog") 4918 (set_attr "prefix_data16" "1,*") 4919 (set_attr "prefix" "orig,vex") 4920 (set_attr "mode" "TI")]) 4921 4922(define_insn "sse2_shufpd_<mode>" 4923 [(set (match_operand:VI8F_128 0 "register_operand" "=x,x") 4924 (vec_select:VI8F_128 4925 (vec_concat:<ssedoublevecmode> 4926 (match_operand:VI8F_128 1 "register_operand" "0,x") 4927 (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm")) 4928 (parallel [(match_operand 3 "const_0_to_1_operand") 4929 (match_operand 4 "const_2_to_3_operand")])))] 4930 "TARGET_SSE2" 4931{ 4932 int mask; 4933 mask = INTVAL (operands[3]); 4934 mask |= (INTVAL (operands[4]) - 2) << 1; 4935 operands[3] = GEN_INT (mask); 4936 4937 switch (which_alternative) 4938 { 4939 case 0: 4940 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 4941 case 1: 4942 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4943 default: 4944 gcc_unreachable (); 4945 } 4946} 4947 [(set_attr "isa" "noavx,avx") 4948 (set_attr "type" "sseshuf") 4949 (set_attr "length_immediate" "1") 4950 (set_attr "prefix" "orig,vex") 4951 (set_attr "mode" "V2DF")]) 4952 4953;; Avoid combining registers from different units in a single alternative, 4954;; see comment above inline_secondary_memory_needed function in i386.c 4955(define_insn "sse2_storehpd" 4956 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,x,*f,r") 4957 (vec_select:DF 4958 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o") 4959 (parallel [(const_int 1)])))] 4960 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4961 "@ 4962 %vmovhpd\t{%1, %0|%0, %1} 4963 unpckhpd\t%0, %0 4964 vunpckhpd\t{%d1, %0|%0, %d1} 4965 # 4966 # 4967 #" 4968 [(set_attr "isa" "*,noavx,avx,*,*,*") 4969 (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov") 4970 (set (attr "prefix_data16") 4971 (if_then_else 4972 (and (eq_attr "alternative" "0") 4973 (not (match_test "TARGET_AVX"))) 4974 (const_string "1") 4975 (const_string "*"))) 4976 (set_attr "prefix" "maybe_vex,orig,vex,*,*,*") 4977 (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")]) 4978 4979(define_split 4980 [(set (match_operand:DF 0 "register_operand") 4981 (vec_select:DF 4982 (match_operand:V2DF 1 "memory_operand") 4983 (parallel [(const_int 1)])))] 4984 "TARGET_SSE2 && reload_completed" 4985 [(set (match_dup 0) (match_dup 1))] 4986 "operands[1] = adjust_address (operands[1], DFmode, 8);") 4987 4988(define_insn "*vec_extractv2df_1_sse" 4989 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 4990 (vec_select:DF 4991 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") 4992 (parallel [(const_int 1)])))] 4993 "!TARGET_SSE2 && TARGET_SSE 4994 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4995 "@ 4996 movhps\t{%1, %0|%0, %1} 4997 movhlps\t{%1, %0|%0, %1} 4998 movlps\t{%H1, %0|%0, %H1}" 4999 [(set_attr "type" "ssemov") 5000 (set_attr "ssememalign" "64") 5001 (set_attr "mode" "V2SF,V4SF,V2SF")]) 5002 5003;; Avoid combining registers from different units in a single alternative, 5004;; see comment above inline_secondary_memory_needed function in i386.c 5005(define_insn "sse2_storelpd" 5006 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x,*f,r") 5007 (vec_select:DF 5008 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m") 5009 (parallel [(const_int 0)])))] 5010 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 5011 "@ 5012 %vmovlpd\t{%1, %0|%0, %1} 5013 # 5014 # 5015 # 5016 #" 5017 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") 5018 (set_attr "prefix_data16" "1,*,*,*,*") 5019 (set_attr "prefix" "maybe_vex") 5020 (set_attr "mode" "V1DF,DF,DF,DF,DF")]) 5021 5022(define_split 5023 [(set (match_operand:DF 0 "register_operand") 5024 (vec_select:DF 5025 (match_operand:V2DF 1 "nonimmediate_operand") 5026 (parallel [(const_int 0)])))] 5027 "TARGET_SSE2 && reload_completed" 5028 [(const_int 0)] 5029{ 5030 rtx op1 = operands[1]; 5031 if (REG_P (op1)) 5032 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 5033 else 5034 op1 = gen_lowpart (DFmode, op1); 5035 emit_move_insn (operands[0], op1); 5036 DONE; 5037}) 5038 5039(define_insn "*vec_extractv2df_0_sse" 5040 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 5041 (vec_select:DF 5042 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") 5043 (parallel [(const_int 0)])))] 5044 "!TARGET_SSE2 && TARGET_SSE 5045 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 5046 "@ 5047 movlps\t{%1, %0|%0, %1} 5048 movaps\t{%1, %0|%0, %1} 5049 movlps\t{%1, %0|%0, %1}" 5050 [(set_attr "type" "ssemov") 5051 (set_attr "mode" "V2SF,V4SF,V2SF")]) 5052 5053(define_expand "sse2_loadhpd_exp" 5054 [(set (match_operand:V2DF 0 "nonimmediate_operand") 5055 (vec_concat:V2DF 5056 (vec_select:DF 5057 (match_operand:V2DF 1 "nonimmediate_operand") 5058 (parallel [(const_int 0)])) 5059 (match_operand:DF 2 "nonimmediate_operand")))] 5060 "TARGET_SSE2" 5061{ 5062 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); 5063 5064 emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2])); 5065 5066 /* Fix up the destination if needed. */ 5067 if (dst != operands[0]) 5068 emit_move_insn (operands[0], dst); 5069 5070 DONE; 5071}) 5072 5073;; Avoid combining registers from different units in a single alternative, 5074;; see comment above inline_secondary_memory_needed function in i386.c 5075(define_insn "sse2_loadhpd" 5076 [(set (match_operand:V2DF 0 "nonimmediate_operand" 5077 "=x,x,x,x,o,o ,o") 5078 (vec_concat:V2DF 5079 (vec_select:DF 5080 (match_operand:V2DF 1 "nonimmediate_operand" 5081 " 0,x,0,x,0,0 ,0") 5082 (parallel [(const_int 0)])) 5083 (match_operand:DF 2 "nonimmediate_operand" 5084 " m,m,x,x,x,*f,r")))] 5085 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 5086 "@ 5087 movhpd\t{%2, %0|%0, %2} 5088 vmovhpd\t{%2, %1, %0|%0, %1, %2} 5089 unpcklpd\t{%2, %0|%0, %2} 5090 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 5091 # 5092 # 5093 #" 5094 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 5095 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov") 5096 (set_attr "ssememalign" "64") 5097 (set_attr "prefix_data16" "1,*,*,*,*,*,*") 5098 (set_attr "prefix" "orig,vex,orig,vex,*,*,*") 5099 (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")]) 5100 5101(define_split 5102 [(set (match_operand:V2DF 0 "memory_operand") 5103 (vec_concat:V2DF 5104 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 5105 (match_operand:DF 1 "register_operand")))] 5106 "TARGET_SSE2 && reload_completed" 5107 [(set (match_dup 0) (match_dup 1))] 5108 "operands[0] = adjust_address (operands[0], DFmode, 8);") 5109 5110(define_expand "sse2_loadlpd_exp" 5111 [(set (match_operand:V2DF 0 "nonimmediate_operand") 5112 (vec_concat:V2DF 5113 (match_operand:DF 2 "nonimmediate_operand") 5114 (vec_select:DF 5115 (match_operand:V2DF 1 "nonimmediate_operand") 5116 (parallel [(const_int 1)]))))] 5117 "TARGET_SSE2" 5118{ 5119 rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands); 5120 5121 emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2])); 5122 5123 /* Fix up the destination if needed. */ 5124 if (dst != operands[0]) 5125 emit_move_insn (operands[0], dst); 5126 5127 DONE; 5128}) 5129 5130;; Avoid combining registers from different units in a single alternative, 5131;; see comment above inline_secondary_memory_needed function in i386.c 5132(define_insn "sse2_loadlpd" 5133 [(set (match_operand:V2DF 0 "nonimmediate_operand" 5134 "=x,x,x,x,x,x,x,x,m,m ,m") 5135 (vec_concat:V2DF 5136 (match_operand:DF 2 "nonimmediate_operand" 5137 " m,m,m,x,x,0,0,x,x,*f,r") 5138 (vec_select:DF 5139 (match_operand:V2DF 1 "vector_move_operand" 5140 " C,0,x,0,x,x,o,o,0,0 ,0") 5141 (parallel [(const_int 1)]))))] 5142 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 5143 "@ 5144 %vmovsd\t{%2, %0|%0, %2} 5145 movlpd\t{%2, %0|%0, %2} 5146 vmovlpd\t{%2, %1, %0|%0, %1, %2} 5147 movsd\t{%2, %0|%0, %2} 5148 vmovsd\t{%2, %1, %0|%0, %1, %2} 5149 shufpd\t{$2, %1, %0|%0, %1, 2} 5150 movhpd\t{%H1, %0|%0, %H1} 5151 vmovhpd\t{%H1, %2, %0|%0, %2, %H1} 5152 # 5153 # 5154 #" 5155 [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*") 5156 (set (attr "type") 5157 (cond [(eq_attr "alternative" "5") 5158 (const_string "sselog") 5159 (eq_attr "alternative" "9") 5160 (const_string "fmov") 5161 (eq_attr "alternative" "10") 5162 (const_string "imov") 5163 ] 5164 (const_string "ssemov"))) 5165 (set_attr "ssememalign" "64") 5166 (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*") 5167 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*") 5168 (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*") 5169 (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")]) 5170 5171(define_split 5172 [(set (match_operand:V2DF 0 "memory_operand") 5173 (vec_concat:V2DF 5174 (match_operand:DF 1 "register_operand") 5175 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 5176 "TARGET_SSE2 && reload_completed" 5177 [(set (match_dup 0) (match_dup 1))] 5178 "operands[0] = adjust_address (operands[0], DFmode, 0);") 5179 5180(define_insn "sse2_movsd" 5181 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,m,x,x,x,o") 5182 (vec_merge:V2DF 5183 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0") 5184 (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x") 5185 (const_int 1)))] 5186 "TARGET_SSE2" 5187 "@ 5188 movsd\t{%2, %0|%0, %2} 5189 vmovsd\t{%2, %1, %0|%0, %1, %2} 5190 movlpd\t{%2, %0|%0, %2} 5191 vmovlpd\t{%2, %1, %0|%0, %1, %2} 5192 %vmovlpd\t{%2, %0|%0, %2} 5193 shufpd\t{$2, %1, %0|%0, %1, 2} 5194 movhps\t{%H1, %0|%0, %H1} 5195 vmovhps\t{%H1, %2, %0|%0, %2, %H1} 5196 %vmovhps\t{%1, %H0|%H0, %1}" 5197 [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*") 5198 (set (attr "type") 5199 (if_then_else 5200 (eq_attr "alternative" "5") 5201 (const_string "sselog") 5202 (const_string "ssemov"))) 5203 (set (attr "prefix_data16") 5204 (if_then_else 5205 (and (eq_attr "alternative" "2,4") 5206 (not (match_test "TARGET_AVX"))) 5207 (const_string "1") 5208 (const_string "*"))) 5209 (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*") 5210 (set_attr "ssememalign" "64") 5211 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex") 5212 (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")]) 5213 5214(define_insn "vec_dupv2df" 5215 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 5216 (vec_duplicate:V2DF 5217 (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))] 5218 "TARGET_SSE2" 5219 "@ 5220 unpcklpd\t%0, %0 5221 %vmovddup\t{%1, %0|%0, %1}" 5222 [(set_attr "isa" "noavx,sse3") 5223 (set_attr "type" "sselog1") 5224 (set_attr "prefix" "orig,maybe_vex") 5225 (set_attr "mode" "V2DF,DF")]) 5226 5227(define_insn "*vec_concatv2df" 5228 [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x") 5229 (vec_concat:V2DF 5230 (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0") 5231 (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))] 5232 "TARGET_SSE" 5233 "@ 5234 unpcklpd\t{%2, %0|%0, %2} 5235 vunpcklpd\t{%2, %1, %0|%0, %1, %2} 5236 %vmovddup\t{%1, %0|%0, %1} 5237 movhpd\t{%2, %0|%0, %2} 5238 vmovhpd\t{%2, %1, %0|%0, %1, %2} 5239 %vmovsd\t{%1, %0|%0, %1} 5240 movlhps\t{%2, %0|%0, %2} 5241 movhps\t{%2, %0|%0, %2}" 5242 [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx") 5243 (set (attr "type") 5244 (if_then_else 5245 (eq_attr "alternative" "0,1,2") 5246 (const_string "sselog") 5247 (const_string "ssemov"))) 5248 (set_attr "prefix_data16" "*,*,*,1,*,*,*,*") 5249 (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig") 5250 (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")]) 5251 5252;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5253;; 5254;; Parallel integral arithmetic 5255;; 5256;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5257 5258(define_expand "neg<mode>2" 5259 [(set (match_operand:VI_AVX2 0 "register_operand") 5260 (minus:VI_AVX2 5261 (match_dup 2) 5262 (match_operand:VI_AVX2 1 "nonimmediate_operand")))] 5263 "TARGET_SSE2" 5264 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 5265 5266(define_expand "<plusminus_insn><mode>3" 5267 [(set (match_operand:VI_AVX2 0 "register_operand") 5268 (plusminus:VI_AVX2 5269 (match_operand:VI_AVX2 1 "nonimmediate_operand") 5270 (match_operand:VI_AVX2 2 "nonimmediate_operand")))] 5271 "TARGET_SSE2" 5272 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 5273 5274(define_insn "*<plusminus_insn><mode>3" 5275 [(set (match_operand:VI_AVX2 0 "register_operand" "=x,x") 5276 (plusminus:VI_AVX2 5277 (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,x") 5278 (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5279 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 5280 "@ 5281 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 5282 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5283 [(set_attr "isa" "noavx,avx") 5284 (set_attr "type" "sseiadd") 5285 (set_attr "prefix_data16" "1,*") 5286 (set_attr "prefix" "orig,vex") 5287 (set_attr "mode" "<sseinsnmode>")]) 5288 5289(define_expand "<sse2_avx2>_<plusminus_insn><mode>3" 5290 [(set (match_operand:VI12_AVX2 0 "register_operand") 5291 (sat_plusminus:VI12_AVX2 5292 (match_operand:VI12_AVX2 1 "nonimmediate_operand") 5293 (match_operand:VI12_AVX2 2 "nonimmediate_operand")))] 5294 "TARGET_SSE2" 5295 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 5296 5297(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3" 5298 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") 5299 (sat_plusminus:VI12_AVX2 5300 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,x") 5301 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5302 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 5303 "@ 5304 p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2} 5305 vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5306 [(set_attr "isa" "noavx,avx") 5307 (set_attr "type" "sseiadd") 5308 (set_attr "prefix_data16" "1,*") 5309 (set_attr "prefix" "orig,vex") 5310 (set_attr "mode" "TI")]) 5311 5312(define_expand "mul<mode>3" 5313 [(set (match_operand:VI1_AVX2 0 "register_operand") 5314 (mult:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand") 5315 (match_operand:VI1_AVX2 2 "register_operand")))] 5316 "TARGET_SSE2" 5317{ 5318 ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]); 5319 DONE; 5320}) 5321 5322(define_expand "mul<mode>3" 5323 [(set (match_operand:VI2_AVX2 0 "register_operand") 5324 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand") 5325 (match_operand:VI2_AVX2 2 "nonimmediate_operand")))] 5326 "TARGET_SSE2" 5327 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 5328 5329(define_insn "*mul<mode>3" 5330 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 5331 (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x") 5332 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5333 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5334 "@ 5335 pmullw\t{%2, %0|%0, %2} 5336 vpmullw\t{%2, %1, %0|%0, %1, %2}" 5337 [(set_attr "isa" "noavx,avx") 5338 (set_attr "type" "sseimul") 5339 (set_attr "prefix_data16" "1,*") 5340 (set_attr "prefix" "orig,vex") 5341 (set_attr "mode" "<sseinsnmode>")]) 5342 5343(define_expand "<s>mul<mode>3_highpart" 5344 [(set (match_operand:VI2_AVX2 0 "register_operand") 5345 (truncate:VI2_AVX2 5346 (lshiftrt:<ssedoublemode> 5347 (mult:<ssedoublemode> 5348 (any_extend:<ssedoublemode> 5349 (match_operand:VI2_AVX2 1 "nonimmediate_operand")) 5350 (any_extend:<ssedoublemode> 5351 (match_operand:VI2_AVX2 2 "nonimmediate_operand"))) 5352 (const_int 16))))] 5353 "TARGET_SSE2" 5354 "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);") 5355 5356(define_insn "*<s>mul<mode>3_highpart" 5357 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 5358 (truncate:VI2_AVX2 5359 (lshiftrt:<ssedoublemode> 5360 (mult:<ssedoublemode> 5361 (any_extend:<ssedoublemode> 5362 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) 5363 (any_extend:<ssedoublemode> 5364 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) 5365 (const_int 16))))] 5366 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5367 "@ 5368 pmulh<u>w\t{%2, %0|%0, %2} 5369 vpmulh<u>w\t{%2, %1, %0|%0, %1, %2}" 5370 [(set_attr "isa" "noavx,avx") 5371 (set_attr "type" "sseimul") 5372 (set_attr "prefix_data16" "1,*") 5373 (set_attr "prefix" "orig,vex") 5374 (set_attr "mode" "<sseinsnmode>")]) 5375 5376(define_expand "vec_widen_umult_even_v8si" 5377 [(set (match_operand:V4DI 0 "register_operand") 5378 (mult:V4DI 5379 (zero_extend:V4DI 5380 (vec_select:V4SI 5381 (match_operand:V8SI 1 "nonimmediate_operand") 5382 (parallel [(const_int 0) (const_int 2) 5383 (const_int 4) (const_int 6)]))) 5384 (zero_extend:V4DI 5385 (vec_select:V4SI 5386 (match_operand:V8SI 2 "nonimmediate_operand") 5387 (parallel [(const_int 0) (const_int 2) 5388 (const_int 4) (const_int 6)])))))] 5389 "TARGET_AVX2" 5390 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") 5391 5392(define_insn "*vec_widen_umult_even_v8si" 5393 [(set (match_operand:V4DI 0 "register_operand" "=x") 5394 (mult:V4DI 5395 (zero_extend:V4DI 5396 (vec_select:V4SI 5397 (match_operand:V8SI 1 "nonimmediate_operand" "%x") 5398 (parallel [(const_int 0) (const_int 2) 5399 (const_int 4) (const_int 6)]))) 5400 (zero_extend:V4DI 5401 (vec_select:V4SI 5402 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 5403 (parallel [(const_int 0) (const_int 2) 5404 (const_int 4) (const_int 6)])))))] 5405 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" 5406 "vpmuludq\t{%2, %1, %0|%0, %1, %2}" 5407 [(set_attr "type" "sseimul") 5408 (set_attr "prefix" "vex") 5409 (set_attr "mode" "OI")]) 5410 5411(define_expand "vec_widen_umult_even_v4si" 5412 [(set (match_operand:V2DI 0 "register_operand") 5413 (mult:V2DI 5414 (zero_extend:V2DI 5415 (vec_select:V2SI 5416 (match_operand:V4SI 1 "nonimmediate_operand") 5417 (parallel [(const_int 0) (const_int 2)]))) 5418 (zero_extend:V2DI 5419 (vec_select:V2SI 5420 (match_operand:V4SI 2 "nonimmediate_operand") 5421 (parallel [(const_int 0) (const_int 2)])))))] 5422 "TARGET_SSE2" 5423 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") 5424 5425(define_insn "*vec_widen_umult_even_v4si" 5426 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 5427 (mult:V2DI 5428 (zero_extend:V2DI 5429 (vec_select:V2SI 5430 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") 5431 (parallel [(const_int 0) (const_int 2)]))) 5432 (zero_extend:V2DI 5433 (vec_select:V2SI 5434 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 5435 (parallel [(const_int 0) (const_int 2)])))))] 5436 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5437 "@ 5438 pmuludq\t{%2, %0|%0, %2} 5439 vpmuludq\t{%2, %1, %0|%0, %1, %2}" 5440 [(set_attr "isa" "noavx,avx") 5441 (set_attr "type" "sseimul") 5442 (set_attr "prefix_data16" "1,*") 5443 (set_attr "prefix" "orig,vex") 5444 (set_attr "mode" "TI")]) 5445 5446(define_expand "vec_widen_smult_even_v8si" 5447 [(set (match_operand:V4DI 0 "register_operand") 5448 (mult:V4DI 5449 (sign_extend:V4DI 5450 (vec_select:V4SI 5451 (match_operand:V8SI 1 "nonimmediate_operand") 5452 (parallel [(const_int 0) (const_int 2) 5453 (const_int 4) (const_int 6)]))) 5454 (sign_extend:V4DI 5455 (vec_select:V4SI 5456 (match_operand:V8SI 2 "nonimmediate_operand") 5457 (parallel [(const_int 0) (const_int 2) 5458 (const_int 4) (const_int 6)])))))] 5459 "TARGET_AVX2" 5460 "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);") 5461 5462(define_insn "*vec_widen_smult_even_v8si" 5463 [(set (match_operand:V4DI 0 "register_operand" "=x") 5464 (mult:V4DI 5465 (sign_extend:V4DI 5466 (vec_select:V4SI 5467 (match_operand:V8SI 1 "nonimmediate_operand" "x") 5468 (parallel [(const_int 0) (const_int 2) 5469 (const_int 4) (const_int 6)]))) 5470 (sign_extend:V4DI 5471 (vec_select:V4SI 5472 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 5473 (parallel [(const_int 0) (const_int 2) 5474 (const_int 4) (const_int 6)])))))] 5475 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V8SImode, operands)" 5476 "vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5477 [(set_attr "isa" "avx") 5478 (set_attr "type" "sseimul") 5479 (set_attr "prefix_extra" "1") 5480 (set_attr "prefix" "vex") 5481 (set_attr "mode" "OI")]) 5482 5483(define_expand "sse4_1_mulv2siv2di3" 5484 [(set (match_operand:V2DI 0 "register_operand") 5485 (mult:V2DI 5486 (sign_extend:V2DI 5487 (vec_select:V2SI 5488 (match_operand:V4SI 1 "nonimmediate_operand") 5489 (parallel [(const_int 0) (const_int 2)]))) 5490 (sign_extend:V2DI 5491 (vec_select:V2SI 5492 (match_operand:V4SI 2 "nonimmediate_operand") 5493 (parallel [(const_int 0) (const_int 2)])))))] 5494 "TARGET_SSE4_1" 5495 "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);") 5496 5497(define_insn "*sse4_1_mulv2siv2di3" 5498 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 5499 (mult:V2DI 5500 (sign_extend:V2DI 5501 (vec_select:V2SI 5502 (match_operand:V4SI 1 "nonimmediate_operand" "%0,x") 5503 (parallel [(const_int 0) (const_int 2)]))) 5504 (sign_extend:V2DI 5505 (vec_select:V2SI 5506 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 5507 (parallel [(const_int 0) (const_int 2)])))))] 5508 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5509 "@ 5510 pmuldq\t{%2, %0|%0, %2} 5511 vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5512 [(set_attr "isa" "noavx,avx") 5513 (set_attr "type" "sseimul") 5514 (set_attr "prefix_data16" "1,*") 5515 (set_attr "prefix_extra" "1") 5516 (set_attr "prefix" "orig,vex") 5517 (set_attr "mode" "TI")]) 5518 5519(define_expand "avx2_pmaddwd" 5520 [(set (match_operand:V8SI 0 "register_operand") 5521 (plus:V8SI 5522 (mult:V8SI 5523 (sign_extend:V8SI 5524 (vec_select:V8HI 5525 (match_operand:V16HI 1 "nonimmediate_operand") 5526 (parallel [(const_int 0) (const_int 2) 5527 (const_int 4) (const_int 6) 5528 (const_int 8) (const_int 10) 5529 (const_int 12) (const_int 14)]))) 5530 (sign_extend:V8SI 5531 (vec_select:V8HI 5532 (match_operand:V16HI 2 "nonimmediate_operand") 5533 (parallel [(const_int 0) (const_int 2) 5534 (const_int 4) (const_int 6) 5535 (const_int 8) (const_int 10) 5536 (const_int 12) (const_int 14)])))) 5537 (mult:V8SI 5538 (sign_extend:V8SI 5539 (vec_select:V8HI (match_dup 1) 5540 (parallel [(const_int 1) (const_int 3) 5541 (const_int 5) (const_int 7) 5542 (const_int 9) (const_int 11) 5543 (const_int 13) (const_int 15)]))) 5544 (sign_extend:V8SI 5545 (vec_select:V8HI (match_dup 2) 5546 (parallel [(const_int 1) (const_int 3) 5547 (const_int 5) (const_int 7) 5548 (const_int 9) (const_int 11) 5549 (const_int 13) (const_int 15)]))))))] 5550 "TARGET_AVX2" 5551 "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") 5552 5553(define_insn "*avx2_pmaddwd" 5554 [(set (match_operand:V8SI 0 "register_operand" "=x") 5555 (plus:V8SI 5556 (mult:V8SI 5557 (sign_extend:V8SI 5558 (vec_select:V8HI 5559 (match_operand:V16HI 1 "nonimmediate_operand" "%x") 5560 (parallel [(const_int 0) (const_int 2) 5561 (const_int 4) (const_int 6) 5562 (const_int 8) (const_int 10) 5563 (const_int 12) (const_int 14)]))) 5564 (sign_extend:V8SI 5565 (vec_select:V8HI 5566 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 5567 (parallel [(const_int 0) (const_int 2) 5568 (const_int 4) (const_int 6) 5569 (const_int 8) (const_int 10) 5570 (const_int 12) (const_int 14)])))) 5571 (mult:V8SI 5572 (sign_extend:V8SI 5573 (vec_select:V8HI (match_dup 1) 5574 (parallel [(const_int 1) (const_int 3) 5575 (const_int 5) (const_int 7) 5576 (const_int 9) (const_int 11) 5577 (const_int 13) (const_int 15)]))) 5578 (sign_extend:V8SI 5579 (vec_select:V8HI (match_dup 2) 5580 (parallel [(const_int 1) (const_int 3) 5581 (const_int 5) (const_int 7) 5582 (const_int 9) (const_int 11) 5583 (const_int 13) (const_int 15)]))))))] 5584 "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)" 5585 "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" 5586 [(set_attr "type" "sseiadd") 5587 (set_attr "prefix" "vex") 5588 (set_attr "mode" "OI")]) 5589 5590(define_expand "sse2_pmaddwd" 5591 [(set (match_operand:V4SI 0 "register_operand") 5592 (plus:V4SI 5593 (mult:V4SI 5594 (sign_extend:V4SI 5595 (vec_select:V4HI 5596 (match_operand:V8HI 1 "nonimmediate_operand") 5597 (parallel [(const_int 0) (const_int 2) 5598 (const_int 4) (const_int 6)]))) 5599 (sign_extend:V4SI 5600 (vec_select:V4HI 5601 (match_operand:V8HI 2 "nonimmediate_operand") 5602 (parallel [(const_int 0) (const_int 2) 5603 (const_int 4) (const_int 6)])))) 5604 (mult:V4SI 5605 (sign_extend:V4SI 5606 (vec_select:V4HI (match_dup 1) 5607 (parallel [(const_int 1) (const_int 3) 5608 (const_int 5) (const_int 7)]))) 5609 (sign_extend:V4SI 5610 (vec_select:V4HI (match_dup 2) 5611 (parallel [(const_int 1) (const_int 3) 5612 (const_int 5) (const_int 7)]))))))] 5613 "TARGET_SSE2" 5614 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 5615 5616(define_insn "*sse2_pmaddwd" 5617 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 5618 (plus:V4SI 5619 (mult:V4SI 5620 (sign_extend:V4SI 5621 (vec_select:V4HI 5622 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") 5623 (parallel [(const_int 0) (const_int 2) 5624 (const_int 4) (const_int 6)]))) 5625 (sign_extend:V4SI 5626 (vec_select:V4HI 5627 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 5628 (parallel [(const_int 0) (const_int 2) 5629 (const_int 4) (const_int 6)])))) 5630 (mult:V4SI 5631 (sign_extend:V4SI 5632 (vec_select:V4HI (match_dup 1) 5633 (parallel [(const_int 1) (const_int 3) 5634 (const_int 5) (const_int 7)]))) 5635 (sign_extend:V4SI 5636 (vec_select:V4HI (match_dup 2) 5637 (parallel [(const_int 1) (const_int 3) 5638 (const_int 5) (const_int 7)]))))))] 5639 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 5640 "@ 5641 pmaddwd\t{%2, %0|%0, %2} 5642 vpmaddwd\t{%2, %1, %0|%0, %1, %2}" 5643 [(set_attr "isa" "noavx,avx") 5644 (set_attr "type" "sseiadd") 5645 (set_attr "atom_unit" "simul") 5646 (set_attr "prefix_data16" "1,*") 5647 (set_attr "prefix" "orig,vex") 5648 (set_attr "mode" "TI")]) 5649 5650(define_expand "mul<mode>3" 5651 [(set (match_operand:VI4_AVX2 0 "register_operand") 5652 (mult:VI4_AVX2 5653 (match_operand:VI4_AVX2 1 "nonimmediate_operand") 5654 (match_operand:VI4_AVX2 2 "nonimmediate_operand")))] 5655 "TARGET_SSE2" 5656{ 5657 if (TARGET_SSE4_1) 5658 { 5659 if (CONSTANT_P (operands[2])) 5660 operands[2] = validize_mem (force_const_mem (<MODE>mode, operands[2])); 5661 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); 5662 } 5663 else 5664 { 5665 ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]); 5666 DONE; 5667 } 5668}) 5669 5670(define_insn "*<sse4_1_avx2>_mul<mode>3" 5671 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,x") 5672 (mult:VI4_AVX2 5673 (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,x") 5674 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,xm")))] 5675 "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 5676 "@ 5677 pmulld\t{%2, %0|%0, %2} 5678 vpmulld\t{%2, %1, %0|%0, %1, %2}" 5679 [(set_attr "isa" "noavx,avx") 5680 (set_attr "type" "sseimul") 5681 (set_attr "prefix_extra" "1") 5682 (set_attr "prefix" "orig,vex") 5683 (set_attr "btver2_decode" "vector,vector") 5684 (set_attr "mode" "<sseinsnmode>")]) 5685 5686(define_expand "mul<mode>3" 5687 [(set (match_operand:VI8_AVX2 0 "register_operand") 5688 (mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand") 5689 (match_operand:VI8_AVX2 2 "register_operand")))] 5690 "TARGET_SSE2" 5691{ 5692 ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]); 5693 DONE; 5694}) 5695 5696(define_expand "vec_widen_<s>mult_hi_<mode>" 5697 [(match_operand:<sseunpackmode> 0 "register_operand") 5698 (any_extend:<sseunpackmode> 5699 (match_operand:VI124_AVX2 1 "register_operand")) 5700 (match_operand:VI124_AVX2 2 "register_operand")] 5701 "TARGET_SSE2" 5702{ 5703 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2], 5704 <u_bool>, true); 5705 DONE; 5706}) 5707 5708(define_expand "vec_widen_<s>mult_lo_<mode>" 5709 [(match_operand:<sseunpackmode> 0 "register_operand") 5710 (any_extend:<sseunpackmode> 5711 (match_operand:VI124_AVX2 1 "register_operand")) 5712 (match_operand:VI124_AVX2 2 "register_operand")] 5713 "TARGET_SSE2" 5714{ 5715 ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2], 5716 <u_bool>, false); 5717 DONE; 5718}) 5719 5720;; Most widen_<s>mult_even_<mode> can be handled directly from other 5721;; named patterns, but signed V4SI needs special help for plain SSE2. 5722(define_expand "vec_widen_smult_even_v4si" 5723 [(match_operand:V2DI 0 "register_operand") 5724 (match_operand:V4SI 1 "register_operand") 5725 (match_operand:V4SI 2 "register_operand")] 5726 "TARGET_SSE2" 5727{ 5728 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2], 5729 false, false); 5730 DONE; 5731}) 5732 5733(define_expand "vec_widen_<s>mult_odd_<mode>" 5734 [(match_operand:<sseunpackmode> 0 "register_operand") 5735 (any_extend:<sseunpackmode> 5736 (match_operand:VI4_AVX2 1 "register_operand")) 5737 (match_operand:VI4_AVX2 2 "register_operand")] 5738 "TARGET_SSE2" 5739{ 5740 ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2], 5741 <u_bool>, true); 5742 DONE; 5743}) 5744 5745(define_expand "sdot_prod<mode>" 5746 [(match_operand:<sseunpackmode> 0 "register_operand") 5747 (match_operand:VI2_AVX2 1 "register_operand") 5748 (match_operand:VI2_AVX2 2 "register_operand") 5749 (match_operand:<sseunpackmode> 3 "register_operand")] 5750 "TARGET_SSE2" 5751{ 5752 rtx t = gen_reg_rtx (<sseunpackmode>mode); 5753 emit_insn (gen_<sse2_avx2>_pmaddwd (t, operands[1], operands[2])); 5754 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 5755 gen_rtx_PLUS (<sseunpackmode>mode, 5756 operands[3], t))); 5757 DONE; 5758}) 5759 5760;; Normally we use widen_mul_even/odd, but combine can't quite get it all 5761;; back together when madd is available. 5762(define_expand "sdot_prodv4si" 5763 [(match_operand:V2DI 0 "register_operand") 5764 (match_operand:V4SI 1 "register_operand") 5765 (match_operand:V4SI 2 "register_operand") 5766 (match_operand:V2DI 3 "register_operand")] 5767 "TARGET_XOP" 5768{ 5769 rtx t = gen_reg_rtx (V2DImode); 5770 emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3])); 5771 emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t)); 5772 DONE; 5773}) 5774 5775(define_insn "ashr<mode>3" 5776 [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") 5777 (ashiftrt:VI24_AVX2 5778 (match_operand:VI24_AVX2 1 "register_operand" "0,x") 5779 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] 5780 "TARGET_SSE2" 5781 "@ 5782 psra<ssemodesuffix>\t{%2, %0|%0, %2} 5783 vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5784 [(set_attr "isa" "noavx,avx") 5785 (set_attr "type" "sseishft") 5786 (set (attr "length_immediate") 5787 (if_then_else (match_operand 2 "const_int_operand") 5788 (const_string "1") 5789 (const_string "0"))) 5790 (set_attr "prefix_data16" "1,*") 5791 (set_attr "prefix" "orig,vex") 5792 (set_attr "mode" "<sseinsnmode>")]) 5793 5794(define_insn "<shift_insn><mode>3" 5795 [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") 5796 (any_lshift:VI248_AVX2 5797 (match_operand:VI248_AVX2 1 "register_operand" "0,x") 5798 (match_operand:SI 2 "nonmemory_operand" "xN,xN")))] 5799 "TARGET_SSE2" 5800 "@ 5801 p<vshift><ssemodesuffix>\t{%2, %0|%0, %2} 5802 vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5803 [(set_attr "isa" "noavx,avx") 5804 (set_attr "type" "sseishft") 5805 (set (attr "length_immediate") 5806 (if_then_else (match_operand 2 "const_int_operand") 5807 (const_string "1") 5808 (const_string "0"))) 5809 (set_attr "prefix_data16" "1,*") 5810 (set_attr "prefix" "orig,vex") 5811 (set_attr "mode" "<sseinsnmode>")]) 5812 5813(define_expand "vec_shl_<mode>" 5814 [(set (match_operand:VI_128 0 "register_operand") 5815 (ashift:V1TI 5816 (match_operand:VI_128 1 "register_operand") 5817 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))] 5818 "TARGET_SSE2" 5819{ 5820 operands[0] = gen_lowpart (V1TImode, operands[0]); 5821 operands[1] = gen_lowpart (V1TImode, operands[1]); 5822}) 5823 5824(define_insn "<sse2_avx2>_ashl<mode>3" 5825 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") 5826 (ashift:VIMAX_AVX2 5827 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") 5828 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] 5829 "TARGET_SSE2" 5830{ 5831 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 5832 5833 switch (which_alternative) 5834 { 5835 case 0: 5836 return "pslldq\t{%2, %0|%0, %2}"; 5837 case 1: 5838 return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; 5839 default: 5840 gcc_unreachable (); 5841 } 5842} 5843 [(set_attr "isa" "noavx,avx") 5844 (set_attr "type" "sseishft") 5845 (set_attr "length_immediate" "1") 5846 (set_attr "prefix_data16" "1,*") 5847 (set_attr "prefix" "orig,vex") 5848 (set_attr "mode" "<sseinsnmode>")]) 5849 5850(define_expand "vec_shr_<mode>" 5851 [(set (match_operand:VI_128 0 "register_operand") 5852 (lshiftrt:V1TI 5853 (match_operand:VI_128 1 "register_operand") 5854 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))] 5855 "TARGET_SSE2" 5856{ 5857 operands[0] = gen_lowpart (V1TImode, operands[0]); 5858 operands[1] = gen_lowpart (V1TImode, operands[1]); 5859}) 5860 5861(define_insn "<sse2_avx2>_lshr<mode>3" 5862 [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,x") 5863 (lshiftrt:VIMAX_AVX2 5864 (match_operand:VIMAX_AVX2 1 "register_operand" "0,x") 5865 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] 5866 "TARGET_SSE2" 5867{ 5868 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 5869 5870 switch (which_alternative) 5871 { 5872 case 0: 5873 return "psrldq\t{%2, %0|%0, %2}"; 5874 case 1: 5875 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; 5876 default: 5877 gcc_unreachable (); 5878 } 5879} 5880 [(set_attr "isa" "noavx,avx") 5881 (set_attr "type" "sseishft") 5882 (set_attr "length_immediate" "1") 5883 (set_attr "atom_unit" "sishuf") 5884 (set_attr "prefix_data16" "1,*") 5885 (set_attr "prefix" "orig,vex") 5886 (set_attr "mode" "<sseinsnmode>")]) 5887 5888 5889(define_expand "<code><mode>3" 5890 [(set (match_operand:VI124_256 0 "register_operand") 5891 (maxmin:VI124_256 5892 (match_operand:VI124_256 1 "nonimmediate_operand") 5893 (match_operand:VI124_256 2 "nonimmediate_operand")))] 5894 "TARGET_AVX2" 5895 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 5896 5897(define_insn "*avx2_<code><mode>3" 5898 [(set (match_operand:VI124_256 0 "register_operand" "=x") 5899 (maxmin:VI124_256 5900 (match_operand:VI124_256 1 "nonimmediate_operand" "%x") 5901 (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))] 5902 "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 5903 "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5904 [(set_attr "type" "sseiadd") 5905 (set_attr "prefix_extra" "1") 5906 (set_attr "prefix" "vex") 5907 (set_attr "mode" "OI")]) 5908 5909(define_expand "<code><mode>3" 5910 [(set (match_operand:VI8_AVX2 0 "register_operand") 5911 (maxmin:VI8_AVX2 5912 (match_operand:VI8_AVX2 1 "register_operand") 5913 (match_operand:VI8_AVX2 2 "register_operand")))] 5914 "TARGET_SSE4_2" 5915{ 5916 enum rtx_code code; 5917 rtx xops[6]; 5918 bool ok; 5919 5920 xops[0] = operands[0]; 5921 5922 if (<CODE> == SMAX || <CODE> == UMAX) 5923 { 5924 xops[1] = operands[1]; 5925 xops[2] = operands[2]; 5926 } 5927 else 5928 { 5929 xops[1] = operands[2]; 5930 xops[2] = operands[1]; 5931 } 5932 5933 code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT; 5934 5935 xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]); 5936 xops[4] = operands[1]; 5937 xops[5] = operands[2]; 5938 5939 ok = ix86_expand_int_vcond (xops); 5940 gcc_assert (ok); 5941 DONE; 5942}) 5943 5944(define_expand "<code><mode>3" 5945 [(set (match_operand:VI124_128 0 "register_operand") 5946 (smaxmin:VI124_128 5947 (match_operand:VI124_128 1 "nonimmediate_operand") 5948 (match_operand:VI124_128 2 "nonimmediate_operand")))] 5949 "TARGET_SSE2" 5950{ 5951 if (TARGET_SSE4_1 || <MODE>mode == V8HImode) 5952 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 5953 else 5954 { 5955 rtx xops[6]; 5956 bool ok; 5957 5958 xops[0] = operands[0]; 5959 operands[1] = force_reg (<MODE>mode, operands[1]); 5960 operands[2] = force_reg (<MODE>mode, operands[2]); 5961 5962 if (<CODE> == SMAX) 5963 { 5964 xops[1] = operands[1]; 5965 xops[2] = operands[2]; 5966 } 5967 else 5968 { 5969 xops[1] = operands[2]; 5970 xops[2] = operands[1]; 5971 } 5972 5973 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 5974 xops[4] = operands[1]; 5975 xops[5] = operands[2]; 5976 5977 ok = ix86_expand_int_vcond (xops); 5978 gcc_assert (ok); 5979 DONE; 5980 } 5981}) 5982 5983(define_insn "*sse4_1_<code><mode>3" 5984 [(set (match_operand:VI14_128 0 "register_operand" "=x,x") 5985 (smaxmin:VI14_128 5986 (match_operand:VI14_128 1 "nonimmediate_operand" "%0,x") 5987 (match_operand:VI14_128 2 "nonimmediate_operand" "xm,xm")))] 5988 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 5989 "@ 5990 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} 5991 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 5992 [(set_attr "isa" "noavx,avx") 5993 (set_attr "type" "sseiadd") 5994 (set_attr "prefix_extra" "1,*") 5995 (set_attr "prefix" "orig,vex") 5996 (set_attr "mode" "TI")]) 5997 5998(define_insn "*<code>v8hi3" 5999 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6000 (smaxmin:V8HI 6001 (match_operand:V8HI 1 "nonimmediate_operand" "%0,x") 6002 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))] 6003 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)" 6004 "@ 6005 p<maxmin_int>w\t{%2, %0|%0, %2} 6006 vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}" 6007 [(set_attr "isa" "noavx,avx") 6008 (set_attr "type" "sseiadd") 6009 (set_attr "prefix_data16" "1,*") 6010 (set_attr "prefix_extra" "*,1") 6011 (set_attr "prefix" "orig,vex") 6012 (set_attr "mode" "TI")]) 6013 6014(define_expand "<code><mode>3" 6015 [(set (match_operand:VI124_128 0 "register_operand") 6016 (umaxmin:VI124_128 6017 (match_operand:VI124_128 1 "nonimmediate_operand") 6018 (match_operand:VI124_128 2 "nonimmediate_operand")))] 6019 "TARGET_SSE2" 6020{ 6021 if (TARGET_SSE4_1 || <MODE>mode == V16QImode) 6022 ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands); 6023 else if (<CODE> == UMAX && <MODE>mode == V8HImode) 6024 { 6025 rtx op0 = operands[0], op2 = operands[2], op3 = op0; 6026 operands[1] = force_reg (<MODE>mode, operands[1]); 6027 if (rtx_equal_p (op3, op2)) 6028 op3 = gen_reg_rtx (V8HImode); 6029 emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2)); 6030 emit_insn (gen_addv8hi3 (op0, op3, op2)); 6031 DONE; 6032 } 6033 else 6034 { 6035 rtx xops[6]; 6036 bool ok; 6037 6038 operands[1] = force_reg (<MODE>mode, operands[1]); 6039 operands[2] = force_reg (<MODE>mode, operands[2]); 6040 6041 xops[0] = operands[0]; 6042 6043 if (<CODE> == UMAX) 6044 { 6045 xops[1] = operands[1]; 6046 xops[2] = operands[2]; 6047 } 6048 else 6049 { 6050 xops[1] = operands[2]; 6051 xops[2] = operands[1]; 6052 } 6053 6054 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 6055 xops[4] = operands[1]; 6056 xops[5] = operands[2]; 6057 6058 ok = ix86_expand_int_vcond (xops); 6059 gcc_assert (ok); 6060 DONE; 6061 } 6062}) 6063 6064(define_insn "*sse4_1_<code><mode>3" 6065 [(set (match_operand:VI24_128 0 "register_operand" "=x,x") 6066 (umaxmin:VI24_128 6067 (match_operand:VI24_128 1 "nonimmediate_operand" "%0,x") 6068 (match_operand:VI24_128 2 "nonimmediate_operand" "xm,xm")))] 6069 "TARGET_SSE4_1 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6070 "@ 6071 p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2} 6072 vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6073 [(set_attr "isa" "noavx,avx") 6074 (set_attr "type" "sseiadd") 6075 (set_attr "prefix_extra" "1,*") 6076 (set_attr "prefix" "orig,vex") 6077 (set_attr "mode" "TI")]) 6078 6079(define_insn "*<code>v16qi3" 6080 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6081 (umaxmin:V16QI 6082 (match_operand:V16QI 1 "nonimmediate_operand" "%0,x") 6083 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))] 6084 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)" 6085 "@ 6086 p<maxmin_int>b\t{%2, %0|%0, %2} 6087 vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}" 6088 [(set_attr "isa" "noavx,avx") 6089 (set_attr "type" "sseiadd") 6090 (set_attr "prefix_data16" "1,*") 6091 (set_attr "prefix_extra" "*,1") 6092 (set_attr "prefix" "orig,vex") 6093 (set_attr "mode" "TI")]) 6094 6095;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6096;; 6097;; Parallel integral comparisons 6098;; 6099;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6100 6101(define_expand "avx2_eq<mode>3" 6102 [(set (match_operand:VI_256 0 "register_operand") 6103 (eq:VI_256 6104 (match_operand:VI_256 1 "nonimmediate_operand") 6105 (match_operand:VI_256 2 "nonimmediate_operand")))] 6106 "TARGET_AVX2" 6107 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6108 6109(define_insn "*avx2_eq<mode>3" 6110 [(set (match_operand:VI_256 0 "register_operand" "=x") 6111 (eq:VI_256 6112 (match_operand:VI_256 1 "nonimmediate_operand" "%x") 6113 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] 6114 "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6115 "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6116 [(set_attr "type" "ssecmp") 6117 (set_attr "prefix_extra" "1") 6118 (set_attr "prefix" "vex") 6119 (set_attr "mode" "OI")]) 6120 6121(define_insn "*sse4_1_eqv2di3" 6122 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 6123 (eq:V2DI 6124 (match_operand:V2DI 1 "nonimmediate_operand" "%0,x") 6125 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] 6126 "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)" 6127 "@ 6128 pcmpeqq\t{%2, %0|%0, %2} 6129 vpcmpeqq\t{%2, %1, %0|%0, %1, %2}" 6130 [(set_attr "isa" "noavx,avx") 6131 (set_attr "type" "ssecmp") 6132 (set_attr "prefix_extra" "1") 6133 (set_attr "prefix" "orig,vex") 6134 (set_attr "mode" "TI")]) 6135 6136(define_insn "*sse2_eq<mode>3" 6137 [(set (match_operand:VI124_128 0 "register_operand" "=x,x") 6138 (eq:VI124_128 6139 (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x") 6140 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] 6141 "TARGET_SSE2 && !TARGET_XOP 6142 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6143 "@ 6144 pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2} 6145 vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6146 [(set_attr "isa" "noavx,avx") 6147 (set_attr "type" "ssecmp") 6148 (set_attr "prefix_data16" "1,*") 6149 (set_attr "prefix" "orig,vex") 6150 (set_attr "mode" "TI")]) 6151 6152(define_expand "sse2_eq<mode>3" 6153 [(set (match_operand:VI124_128 0 "register_operand") 6154 (eq:VI124_128 6155 (match_operand:VI124_128 1 "nonimmediate_operand") 6156 (match_operand:VI124_128 2 "nonimmediate_operand")))] 6157 "TARGET_SSE2 && !TARGET_XOP " 6158 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6159 6160(define_expand "sse4_1_eqv2di3" 6161 [(set (match_operand:V2DI 0 "register_operand") 6162 (eq:V2DI 6163 (match_operand:V2DI 1 "nonimmediate_operand") 6164 (match_operand:V2DI 2 "nonimmediate_operand")))] 6165 "TARGET_SSE4_1" 6166 "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);") 6167 6168(define_insn "sse4_2_gtv2di3" 6169 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 6170 (gt:V2DI 6171 (match_operand:V2DI 1 "register_operand" "0,x") 6172 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")))] 6173 "TARGET_SSE4_2" 6174 "@ 6175 pcmpgtq\t{%2, %0|%0, %2} 6176 vpcmpgtq\t{%2, %1, %0|%0, %1, %2}" 6177 [(set_attr "isa" "noavx,avx") 6178 (set_attr "type" "ssecmp") 6179 (set_attr "prefix_extra" "1") 6180 (set_attr "prefix" "orig,vex") 6181 (set_attr "mode" "TI")]) 6182 6183(define_insn "avx2_gt<mode>3" 6184 [(set (match_operand:VI_256 0 "register_operand" "=x") 6185 (gt:VI_256 6186 (match_operand:VI_256 1 "register_operand" "x") 6187 (match_operand:VI_256 2 "nonimmediate_operand" "xm")))] 6188 "TARGET_AVX2" 6189 "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6190 [(set_attr "type" "ssecmp") 6191 (set_attr "prefix_extra" "1") 6192 (set_attr "prefix" "vex") 6193 (set_attr "mode" "OI")]) 6194 6195(define_insn "sse2_gt<mode>3" 6196 [(set (match_operand:VI124_128 0 "register_operand" "=x,x") 6197 (gt:VI124_128 6198 (match_operand:VI124_128 1 "register_operand" "0,x") 6199 (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))] 6200 "TARGET_SSE2 && !TARGET_XOP" 6201 "@ 6202 pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2} 6203 vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 6204 [(set_attr "isa" "noavx,avx") 6205 (set_attr "type" "ssecmp") 6206 (set_attr "prefix_data16" "1,*") 6207 (set_attr "prefix" "orig,vex") 6208 (set_attr "mode" "TI")]) 6209 6210(define_expand "vcond<V_256:mode><VI_256:mode>" 6211 [(set (match_operand:V_256 0 "register_operand") 6212 (if_then_else:V_256 6213 (match_operator 3 "" 6214 [(match_operand:VI_256 4 "nonimmediate_operand") 6215 (match_operand:VI_256 5 "general_operand")]) 6216 (match_operand:V_256 1) 6217 (match_operand:V_256 2)))] 6218 "TARGET_AVX2 6219 && (GET_MODE_NUNITS (<V_256:MODE>mode) 6220 == GET_MODE_NUNITS (<VI_256:MODE>mode))" 6221{ 6222 bool ok = ix86_expand_int_vcond (operands); 6223 gcc_assert (ok); 6224 DONE; 6225}) 6226 6227(define_expand "vcond<V_128:mode><VI124_128:mode>" 6228 [(set (match_operand:V_128 0 "register_operand") 6229 (if_then_else:V_128 6230 (match_operator 3 "" 6231 [(match_operand:VI124_128 4 "nonimmediate_operand") 6232 (match_operand:VI124_128 5 "general_operand")]) 6233 (match_operand:V_128 1) 6234 (match_operand:V_128 2)))] 6235 "TARGET_SSE2 6236 && (GET_MODE_NUNITS (<V_128:MODE>mode) 6237 == GET_MODE_NUNITS (<VI124_128:MODE>mode))" 6238{ 6239 bool ok = ix86_expand_int_vcond (operands); 6240 gcc_assert (ok); 6241 DONE; 6242}) 6243 6244(define_expand "vcond<VI8F_128:mode>v2di" 6245 [(set (match_operand:VI8F_128 0 "register_operand") 6246 (if_then_else:VI8F_128 6247 (match_operator 3 "" 6248 [(match_operand:V2DI 4 "nonimmediate_operand") 6249 (match_operand:V2DI 5 "general_operand")]) 6250 (match_operand:VI8F_128 1) 6251 (match_operand:VI8F_128 2)))] 6252 "TARGET_SSE4_2" 6253{ 6254 bool ok = ix86_expand_int_vcond (operands); 6255 gcc_assert (ok); 6256 DONE; 6257}) 6258 6259(define_expand "vcondu<V_256:mode><VI_256:mode>" 6260 [(set (match_operand:V_256 0 "register_operand") 6261 (if_then_else:V_256 6262 (match_operator 3 "" 6263 [(match_operand:VI_256 4 "nonimmediate_operand") 6264 (match_operand:VI_256 5 "nonimmediate_operand")]) 6265 (match_operand:V_256 1 "general_operand") 6266 (match_operand:V_256 2 "general_operand")))] 6267 "TARGET_AVX2 6268 && (GET_MODE_NUNITS (<V_256:MODE>mode) 6269 == GET_MODE_NUNITS (<VI_256:MODE>mode))" 6270{ 6271 bool ok = ix86_expand_int_vcond (operands); 6272 gcc_assert (ok); 6273 DONE; 6274}) 6275 6276(define_expand "vcondu<V_128:mode><VI124_128:mode>" 6277 [(set (match_operand:V_128 0 "register_operand") 6278 (if_then_else:V_128 6279 (match_operator 3 "" 6280 [(match_operand:VI124_128 4 "nonimmediate_operand") 6281 (match_operand:VI124_128 5 "nonimmediate_operand")]) 6282 (match_operand:V_128 1 "general_operand") 6283 (match_operand:V_128 2 "general_operand")))] 6284 "TARGET_SSE2 6285 && (GET_MODE_NUNITS (<V_128:MODE>mode) 6286 == GET_MODE_NUNITS (<VI124_128:MODE>mode))" 6287{ 6288 bool ok = ix86_expand_int_vcond (operands); 6289 gcc_assert (ok); 6290 DONE; 6291}) 6292 6293(define_expand "vcondu<VI8F_128:mode>v2di" 6294 [(set (match_operand:VI8F_128 0 "register_operand") 6295 (if_then_else:VI8F_128 6296 (match_operator 3 "" 6297 [(match_operand:V2DI 4 "nonimmediate_operand") 6298 (match_operand:V2DI 5 "nonimmediate_operand")]) 6299 (match_operand:VI8F_128 1 "general_operand") 6300 (match_operand:VI8F_128 2 "general_operand")))] 6301 "TARGET_SSE4_2" 6302{ 6303 bool ok = ix86_expand_int_vcond (operands); 6304 gcc_assert (ok); 6305 DONE; 6306}) 6307 6308(define_mode_iterator VEC_PERM_AVX2 6309 [V16QI V8HI V4SI V2DI V4SF V2DF 6310 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2") 6311 (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2") 6312 (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")]) 6313 6314(define_expand "vec_perm<mode>" 6315 [(match_operand:VEC_PERM_AVX2 0 "register_operand") 6316 (match_operand:VEC_PERM_AVX2 1 "register_operand") 6317 (match_operand:VEC_PERM_AVX2 2 "register_operand") 6318 (match_operand:<sseintvecmode> 3 "register_operand")] 6319 "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP" 6320{ 6321 ix86_expand_vec_perm (operands); 6322 DONE; 6323}) 6324 6325(define_mode_iterator VEC_PERM_CONST 6326 [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE") 6327 (V2DF "TARGET_SSE") (V2DI "TARGET_SSE") 6328 (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2") 6329 (V8SF "TARGET_AVX") (V4DF "TARGET_AVX") 6330 (V8SI "TARGET_AVX") (V4DI "TARGET_AVX") 6331 (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")]) 6332 6333(define_expand "vec_perm_const<mode>" 6334 [(match_operand:VEC_PERM_CONST 0 "register_operand") 6335 (match_operand:VEC_PERM_CONST 1 "register_operand") 6336 (match_operand:VEC_PERM_CONST 2 "register_operand") 6337 (match_operand:<sseintvecmode> 3)] 6338 "" 6339{ 6340 if (ix86_expand_vec_perm_const (operands)) 6341 DONE; 6342 else 6343 FAIL; 6344}) 6345 6346;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6347;; 6348;; Parallel bitwise logical operations 6349;; 6350;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6351 6352(define_expand "one_cmpl<mode>2" 6353 [(set (match_operand:VI 0 "register_operand") 6354 (xor:VI (match_operand:VI 1 "nonimmediate_operand") 6355 (match_dup 2)))] 6356 "TARGET_SSE" 6357{ 6358 int i, n = GET_MODE_NUNITS (<MODE>mode); 6359 rtvec v = rtvec_alloc (n); 6360 6361 for (i = 0; i < n; ++i) 6362 RTVEC_ELT (v, i) = constm1_rtx; 6363 6364 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 6365}) 6366 6367(define_expand "<sse2_avx2>_andnot<mode>3" 6368 [(set (match_operand:VI_AVX2 0 "register_operand") 6369 (and:VI_AVX2 6370 (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand")) 6371 (match_operand:VI_AVX2 2 "nonimmediate_operand")))] 6372 "TARGET_SSE2") 6373 6374(define_insn "*andnot<mode>3" 6375 [(set (match_operand:VI 0 "register_operand" "=x,x") 6376 (and:VI 6377 (not:VI (match_operand:VI 1 "register_operand" "0,x")) 6378 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] 6379 "TARGET_SSE" 6380{ 6381 static char buf[32]; 6382 const char *ops; 6383 const char *tmp; 6384 6385 switch (get_attr_mode (insn)) 6386 { 6387 case MODE_OI: 6388 gcc_assert (TARGET_AVX2); 6389 case MODE_TI: 6390 gcc_assert (TARGET_SSE2); 6391 6392 tmp = "pandn"; 6393 break; 6394 6395 case MODE_V8SF: 6396 gcc_assert (TARGET_AVX); 6397 case MODE_V4SF: 6398 gcc_assert (TARGET_SSE); 6399 6400 tmp = "andnps"; 6401 break; 6402 6403 default: 6404 gcc_unreachable (); 6405 } 6406 6407 switch (which_alternative) 6408 { 6409 case 0: 6410 ops = "%s\t{%%2, %%0|%%0, %%2}"; 6411 break; 6412 case 1: 6413 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 6414 break; 6415 default: 6416 gcc_unreachable (); 6417 } 6418 6419 snprintf (buf, sizeof (buf), ops, tmp); 6420 return buf; 6421} 6422 [(set_attr "isa" "noavx,avx") 6423 (set_attr "type" "sselog") 6424 (set (attr "prefix_data16") 6425 (if_then_else 6426 (and (eq_attr "alternative" "0") 6427 (eq_attr "mode" "TI")) 6428 (const_string "1") 6429 (const_string "*"))) 6430 (set_attr "prefix" "orig,vex") 6431 (set (attr "mode") 6432 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 6433 (const_string "<ssePSmode>") 6434 (match_test "TARGET_AVX2") 6435 (const_string "<sseinsnmode>") 6436 (match_test "TARGET_AVX") 6437 (if_then_else 6438 (match_test "GET_MODE_SIZE (<MODE>mode) > 16") 6439 (const_string "V8SF") 6440 (const_string "<sseinsnmode>")) 6441 (ior (not (match_test "TARGET_SSE2")) 6442 (match_test "optimize_function_for_size_p (cfun)")) 6443 (const_string "V4SF") 6444 ] 6445 (const_string "<sseinsnmode>")))]) 6446 6447(define_expand "<code><mode>3" 6448 [(set (match_operand:VI 0 "register_operand") 6449 (any_logic:VI 6450 (match_operand:VI 1 "nonimmediate_or_const_vector_operand") 6451 (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))] 6452 "TARGET_SSE" 6453{ 6454 ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands); 6455 DONE; 6456}) 6457 6458(define_insn "*<code><mode>3" 6459 [(set (match_operand:VI 0 "register_operand" "=x,x") 6460 (any_logic:VI 6461 (match_operand:VI 1 "nonimmediate_operand" "%0,x") 6462 (match_operand:VI 2 "nonimmediate_operand" "xm,xm")))] 6463 "TARGET_SSE 6464 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6465{ 6466 static char buf[32]; 6467 const char *ops; 6468 const char *tmp; 6469 6470 switch (get_attr_mode (insn)) 6471 { 6472 case MODE_OI: 6473 gcc_assert (TARGET_AVX2); 6474 case MODE_TI: 6475 gcc_assert (TARGET_SSE2); 6476 6477 tmp = "p<logic>"; 6478 break; 6479 6480 case MODE_V8SF: 6481 gcc_assert (TARGET_AVX); 6482 case MODE_V4SF: 6483 gcc_assert (TARGET_SSE); 6484 6485 tmp = "<logic>ps"; 6486 break; 6487 6488 default: 6489 gcc_unreachable (); 6490 } 6491 6492 switch (which_alternative) 6493 { 6494 case 0: 6495 ops = "%s\t{%%2, %%0|%%0, %%2}"; 6496 break; 6497 case 1: 6498 ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; 6499 break; 6500 default: 6501 gcc_unreachable (); 6502 } 6503 6504 snprintf (buf, sizeof (buf), ops, tmp); 6505 return buf; 6506} 6507 [(set_attr "isa" "noavx,avx") 6508 (set_attr "type" "sselog") 6509 (set (attr "prefix_data16") 6510 (if_then_else 6511 (and (eq_attr "alternative" "0") 6512 (eq_attr "mode" "TI")) 6513 (const_string "1") 6514 (const_string "*"))) 6515 (set_attr "prefix" "orig,vex") 6516 (set (attr "mode") 6517 (cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL") 6518 (const_string "<ssePSmode>") 6519 (match_test "TARGET_AVX2") 6520 (const_string "<sseinsnmode>") 6521 (match_test "TARGET_AVX") 6522 (if_then_else 6523 (match_test "GET_MODE_SIZE (<MODE>mode) > 16") 6524 (const_string "V8SF") 6525 (const_string "<sseinsnmode>")) 6526 (ior (not (match_test "TARGET_SSE2")) 6527 (match_test "optimize_function_for_size_p (cfun)")) 6528 (const_string "V4SF") 6529 ] 6530 (const_string "<sseinsnmode>")))]) 6531 6532;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6533;; 6534;; Parallel integral element swizzling 6535;; 6536;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6537 6538(define_expand "vec_pack_trunc_<mode>" 6539 [(match_operand:<ssepackmode> 0 "register_operand") 6540 (match_operand:VI248_AVX2 1 "register_operand") 6541 (match_operand:VI248_AVX2 2 "register_operand")] 6542 "TARGET_SSE2" 6543{ 6544 rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]); 6545 rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]); 6546 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); 6547 DONE; 6548}) 6549 6550(define_insn "<sse2_avx2>_packsswb" 6551 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 6552 (vec_concat:VI1_AVX2 6553 (ss_truncate:<ssehalfvecmode> 6554 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6555 (ss_truncate:<ssehalfvecmode> 6556 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6557 "TARGET_SSE2" 6558 "@ 6559 packsswb\t{%2, %0|%0, %2} 6560 vpacksswb\t{%2, %1, %0|%0, %1, %2}" 6561 [(set_attr "isa" "noavx,avx") 6562 (set_attr "type" "sselog") 6563 (set_attr "prefix_data16" "1,*") 6564 (set_attr "prefix" "orig,vex") 6565 (set_attr "mode" "<sseinsnmode>")]) 6566 6567(define_insn "<sse2_avx2>_packssdw" 6568 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 6569 (vec_concat:VI2_AVX2 6570 (ss_truncate:<ssehalfvecmode> 6571 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6572 (ss_truncate:<ssehalfvecmode> 6573 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6574 "TARGET_SSE2" 6575 "@ 6576 packssdw\t{%2, %0|%0, %2} 6577 vpackssdw\t{%2, %1, %0|%0, %1, %2}" 6578 [(set_attr "isa" "noavx,avx") 6579 (set_attr "type" "sselog") 6580 (set_attr "prefix_data16" "1,*") 6581 (set_attr "prefix" "orig,vex") 6582 (set_attr "mode" "<sseinsnmode>")]) 6583 6584(define_insn "<sse2_avx2>_packuswb" 6585 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 6586 (vec_concat:VI1_AVX2 6587 (us_truncate:<ssehalfvecmode> 6588 (match_operand:<sseunpackmode> 1 "register_operand" "0,x")) 6589 (us_truncate:<ssehalfvecmode> 6590 (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,xm"))))] 6591 "TARGET_SSE2" 6592 "@ 6593 packuswb\t{%2, %0|%0, %2} 6594 vpackuswb\t{%2, %1, %0|%0, %1, %2}" 6595 [(set_attr "isa" "noavx,avx") 6596 (set_attr "type" "sselog") 6597 (set_attr "prefix_data16" "1,*") 6598 (set_attr "prefix" "orig,vex") 6599 (set_attr "mode" "<sseinsnmode>")]) 6600 6601(define_insn "avx2_interleave_highv32qi" 6602 [(set (match_operand:V32QI 0 "register_operand" "=x") 6603 (vec_select:V32QI 6604 (vec_concat:V64QI 6605 (match_operand:V32QI 1 "register_operand" "x") 6606 (match_operand:V32QI 2 "nonimmediate_operand" "xm")) 6607 (parallel [(const_int 8) (const_int 40) 6608 (const_int 9) (const_int 41) 6609 (const_int 10) (const_int 42) 6610 (const_int 11) (const_int 43) 6611 (const_int 12) (const_int 44) 6612 (const_int 13) (const_int 45) 6613 (const_int 14) (const_int 46) 6614 (const_int 15) (const_int 47) 6615 (const_int 24) (const_int 56) 6616 (const_int 25) (const_int 57) 6617 (const_int 26) (const_int 58) 6618 (const_int 27) (const_int 59) 6619 (const_int 28) (const_int 60) 6620 (const_int 29) (const_int 61) 6621 (const_int 30) (const_int 62) 6622 (const_int 31) (const_int 63)])))] 6623 "TARGET_AVX2" 6624 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6625 [(set_attr "type" "sselog") 6626 (set_attr "prefix" "vex") 6627 (set_attr "mode" "OI")]) 6628 6629(define_insn "vec_interleave_highv16qi" 6630 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6631 (vec_select:V16QI 6632 (vec_concat:V32QI 6633 (match_operand:V16QI 1 "register_operand" "0,x") 6634 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")) 6635 (parallel [(const_int 8) (const_int 24) 6636 (const_int 9) (const_int 25) 6637 (const_int 10) (const_int 26) 6638 (const_int 11) (const_int 27) 6639 (const_int 12) (const_int 28) 6640 (const_int 13) (const_int 29) 6641 (const_int 14) (const_int 30) 6642 (const_int 15) (const_int 31)])))] 6643 "TARGET_SSE2" 6644 "@ 6645 punpckhbw\t{%2, %0|%0, %2} 6646 vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6647 [(set_attr "isa" "noavx,avx") 6648 (set_attr "type" "sselog") 6649 (set_attr "prefix_data16" "1,*") 6650 (set_attr "prefix" "orig,vex") 6651 (set_attr "mode" "TI")]) 6652 6653(define_insn "avx2_interleave_lowv32qi" 6654 [(set (match_operand:V32QI 0 "register_operand" "=x") 6655 (vec_select:V32QI 6656 (vec_concat:V64QI 6657 (match_operand:V32QI 1 "register_operand" "x") 6658 (match_operand:V32QI 2 "nonimmediate_operand" "xm")) 6659 (parallel [(const_int 0) (const_int 32) 6660 (const_int 1) (const_int 33) 6661 (const_int 2) (const_int 34) 6662 (const_int 3) (const_int 35) 6663 (const_int 4) (const_int 36) 6664 (const_int 5) (const_int 37) 6665 (const_int 6) (const_int 38) 6666 (const_int 7) (const_int 39) 6667 (const_int 16) (const_int 48) 6668 (const_int 17) (const_int 49) 6669 (const_int 18) (const_int 50) 6670 (const_int 19) (const_int 51) 6671 (const_int 20) (const_int 52) 6672 (const_int 21) (const_int 53) 6673 (const_int 22) (const_int 54) 6674 (const_int 23) (const_int 55)])))] 6675 "TARGET_AVX2" 6676 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6677 [(set_attr "type" "sselog") 6678 (set_attr "prefix" "vex") 6679 (set_attr "mode" "OI")]) 6680 6681(define_insn "vec_interleave_lowv16qi" 6682 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 6683 (vec_select:V16QI 6684 (vec_concat:V32QI 6685 (match_operand:V16QI 1 "register_operand" "0,x") 6686 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")) 6687 (parallel [(const_int 0) (const_int 16) 6688 (const_int 1) (const_int 17) 6689 (const_int 2) (const_int 18) 6690 (const_int 3) (const_int 19) 6691 (const_int 4) (const_int 20) 6692 (const_int 5) (const_int 21) 6693 (const_int 6) (const_int 22) 6694 (const_int 7) (const_int 23)])))] 6695 "TARGET_SSE2" 6696 "@ 6697 punpcklbw\t{%2, %0|%0, %2} 6698 vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6699 [(set_attr "isa" "noavx,avx") 6700 (set_attr "type" "sselog") 6701 (set_attr "prefix_data16" "1,*") 6702 (set_attr "prefix" "orig,vex") 6703 (set_attr "mode" "TI")]) 6704 6705(define_insn "avx2_interleave_highv16hi" 6706 [(set (match_operand:V16HI 0 "register_operand" "=x") 6707 (vec_select:V16HI 6708 (vec_concat:V32HI 6709 (match_operand:V16HI 1 "register_operand" "x") 6710 (match_operand:V16HI 2 "nonimmediate_operand" "xm")) 6711 (parallel [(const_int 4) (const_int 20) 6712 (const_int 5) (const_int 21) 6713 (const_int 6) (const_int 22) 6714 (const_int 7) (const_int 23) 6715 (const_int 12) (const_int 28) 6716 (const_int 13) (const_int 29) 6717 (const_int 14) (const_int 30) 6718 (const_int 15) (const_int 31)])))] 6719 "TARGET_AVX2" 6720 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6721 [(set_attr "type" "sselog") 6722 (set_attr "prefix" "vex") 6723 (set_attr "mode" "OI")]) 6724 6725(define_insn "vec_interleave_highv8hi" 6726 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6727 (vec_select:V8HI 6728 (vec_concat:V16HI 6729 (match_operand:V8HI 1 "register_operand" "0,x") 6730 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")) 6731 (parallel [(const_int 4) (const_int 12) 6732 (const_int 5) (const_int 13) 6733 (const_int 6) (const_int 14) 6734 (const_int 7) (const_int 15)])))] 6735 "TARGET_SSE2" 6736 "@ 6737 punpckhwd\t{%2, %0|%0, %2} 6738 vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6739 [(set_attr "isa" "noavx,avx") 6740 (set_attr "type" "sselog") 6741 (set_attr "prefix_data16" "1,*") 6742 (set_attr "prefix" "orig,vex") 6743 (set_attr "mode" "TI")]) 6744 6745(define_insn "avx2_interleave_lowv16hi" 6746 [(set (match_operand:V16HI 0 "register_operand" "=x") 6747 (vec_select:V16HI 6748 (vec_concat:V32HI 6749 (match_operand:V16HI 1 "register_operand" "x") 6750 (match_operand:V16HI 2 "nonimmediate_operand" "xm")) 6751 (parallel [(const_int 0) (const_int 16) 6752 (const_int 1) (const_int 17) 6753 (const_int 2) (const_int 18) 6754 (const_int 3) (const_int 19) 6755 (const_int 8) (const_int 24) 6756 (const_int 9) (const_int 25) 6757 (const_int 10) (const_int 26) 6758 (const_int 11) (const_int 27)])))] 6759 "TARGET_AVX2" 6760 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6761 [(set_attr "type" "sselog") 6762 (set_attr "prefix" "vex") 6763 (set_attr "mode" "OI")]) 6764 6765(define_insn "vec_interleave_lowv8hi" 6766 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 6767 (vec_select:V8HI 6768 (vec_concat:V16HI 6769 (match_operand:V8HI 1 "register_operand" "0,x") 6770 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")) 6771 (parallel [(const_int 0) (const_int 8) 6772 (const_int 1) (const_int 9) 6773 (const_int 2) (const_int 10) 6774 (const_int 3) (const_int 11)])))] 6775 "TARGET_SSE2" 6776 "@ 6777 punpcklwd\t{%2, %0|%0, %2} 6778 vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6779 [(set_attr "isa" "noavx,avx") 6780 (set_attr "type" "sselog") 6781 (set_attr "prefix_data16" "1,*") 6782 (set_attr "prefix" "orig,vex") 6783 (set_attr "mode" "TI")]) 6784 6785(define_insn "avx2_interleave_highv8si" 6786 [(set (match_operand:V8SI 0 "register_operand" "=x") 6787 (vec_select:V8SI 6788 (vec_concat:V16SI 6789 (match_operand:V8SI 1 "register_operand" "x") 6790 (match_operand:V8SI 2 "nonimmediate_operand" "xm")) 6791 (parallel [(const_int 2) (const_int 10) 6792 (const_int 3) (const_int 11) 6793 (const_int 6) (const_int 14) 6794 (const_int 7) (const_int 15)])))] 6795 "TARGET_AVX2" 6796 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6797 [(set_attr "type" "sselog") 6798 (set_attr "prefix" "vex") 6799 (set_attr "mode" "OI")]) 6800 6801(define_insn "vec_interleave_highv4si" 6802 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 6803 (vec_select:V4SI 6804 (vec_concat:V8SI 6805 (match_operand:V4SI 1 "register_operand" "0,x") 6806 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) 6807 (parallel [(const_int 2) (const_int 6) 6808 (const_int 3) (const_int 7)])))] 6809 "TARGET_SSE2" 6810 "@ 6811 punpckhdq\t{%2, %0|%0, %2} 6812 vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6813 [(set_attr "isa" "noavx,avx") 6814 (set_attr "type" "sselog") 6815 (set_attr "prefix_data16" "1,*") 6816 (set_attr "prefix" "orig,vex") 6817 (set_attr "mode" "TI")]) 6818 6819(define_insn "avx2_interleave_lowv8si" 6820 [(set (match_operand:V8SI 0 "register_operand" "=x") 6821 (vec_select:V8SI 6822 (vec_concat:V16SI 6823 (match_operand:V8SI 1 "register_operand" "x") 6824 (match_operand:V8SI 2 "nonimmediate_operand" "xm")) 6825 (parallel [(const_int 0) (const_int 8) 6826 (const_int 1) (const_int 9) 6827 (const_int 4) (const_int 12) 6828 (const_int 5) (const_int 13)])))] 6829 "TARGET_AVX2" 6830 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6831 [(set_attr "type" "sselog") 6832 (set_attr "prefix" "vex") 6833 (set_attr "mode" "OI")]) 6834 6835(define_insn "vec_interleave_lowv4si" 6836 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 6837 (vec_select:V4SI 6838 (vec_concat:V8SI 6839 (match_operand:V4SI 1 "register_operand" "0,x") 6840 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")) 6841 (parallel [(const_int 0) (const_int 4) 6842 (const_int 1) (const_int 5)])))] 6843 "TARGET_SSE2" 6844 "@ 6845 punpckldq\t{%2, %0|%0, %2} 6846 vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6847 [(set_attr "isa" "noavx,avx") 6848 (set_attr "type" "sselog") 6849 (set_attr "prefix_data16" "1,*") 6850 (set_attr "prefix" "orig,vex") 6851 (set_attr "mode" "TI")]) 6852 6853(define_expand "vec_interleave_high<mode>" 6854 [(match_operand:VI_256 0 "register_operand" "=x") 6855 (match_operand:VI_256 1 "register_operand" "x") 6856 (match_operand:VI_256 2 "nonimmediate_operand" "xm")] 6857 "TARGET_AVX2" 6858{ 6859 rtx t1 = gen_reg_rtx (<MODE>mode); 6860 rtx t2 = gen_reg_rtx (<MODE>mode); 6861 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); 6862 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); 6863 emit_insn (gen_avx2_permv2ti 6864 (gen_lowpart (V4DImode, operands[0]), 6865 gen_lowpart (V4DImode, t1), 6866 gen_lowpart (V4DImode, t2), GEN_INT (1 + (3 << 4)))); 6867 DONE; 6868}) 6869 6870(define_expand "vec_interleave_low<mode>" 6871 [(match_operand:VI_256 0 "register_operand" "=x") 6872 (match_operand:VI_256 1 "register_operand" "x") 6873 (match_operand:VI_256 2 "nonimmediate_operand" "xm")] 6874 "TARGET_AVX2" 6875{ 6876 rtx t1 = gen_reg_rtx (<MODE>mode); 6877 rtx t2 = gen_reg_rtx (<MODE>mode); 6878 emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2])); 6879 emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2])); 6880 emit_insn (gen_avx2_permv2ti 6881 (gen_lowpart (V4DImode, operands[0]), 6882 gen_lowpart (V4DImode, t1), 6883 gen_lowpart (V4DImode, t2), GEN_INT (0 + (2 << 4)))); 6884 DONE; 6885}) 6886 6887;; Modes handled by pinsr patterns. 6888(define_mode_iterator PINSR_MODE 6889 [(V16QI "TARGET_SSE4_1") V8HI 6890 (V4SI "TARGET_SSE4_1") 6891 (V2DI "TARGET_SSE4_1 && TARGET_64BIT")]) 6892 6893(define_mode_attr sse2p4_1 6894 [(V16QI "sse4_1") (V8HI "sse2") 6895 (V4SI "sse4_1") (V2DI "sse4_1")]) 6896 6897;; sse4_1_pinsrd must come before sse2_loadld since it is preferred. 6898(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>" 6899 [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x") 6900 (vec_merge:PINSR_MODE 6901 (vec_duplicate:PINSR_MODE 6902 (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m")) 6903 (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x") 6904 (match_operand:SI 3 "const_int_operand")))] 6905 "TARGET_SSE2 6906 && ((unsigned) exact_log2 (INTVAL (operands[3])) 6907 < GET_MODE_NUNITS (<MODE>mode))" 6908{ 6909 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6910 6911 switch (which_alternative) 6912 { 6913 case 0: 6914 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) 6915 return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}"; 6916 /* FALLTHRU */ 6917 case 1: 6918 return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"; 6919 case 2: 6920 if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode)) 6921 return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; 6922 /* FALLTHRU */ 6923 case 3: 6924 return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 6925 default: 6926 gcc_unreachable (); 6927 } 6928} 6929 [(set_attr "isa" "noavx,noavx,avx,avx") 6930 (set_attr "type" "sselog") 6931 (set (attr "prefix_rex") 6932 (if_then_else 6933 (and (not (match_test "TARGET_AVX")) 6934 (eq (const_string "<MODE>mode") (const_string "V2DImode"))) 6935 (const_string "1") 6936 (const_string "*"))) 6937 (set (attr "prefix_data16") 6938 (if_then_else 6939 (and (not (match_test "TARGET_AVX")) 6940 (eq (const_string "<MODE>mode") (const_string "V8HImode"))) 6941 (const_string "1") 6942 (const_string "*"))) 6943 (set (attr "prefix_extra") 6944 (if_then_else 6945 (and (not (match_test "TARGET_AVX")) 6946 (eq (const_string "<MODE>mode") (const_string "V8HImode"))) 6947 (const_string "*") 6948 (const_string "1"))) 6949 (set_attr "length_immediate" "1") 6950 (set_attr "prefix" "orig,orig,vex,vex") 6951 (set_attr "mode" "TI")]) 6952 6953(define_insn "*sse4_1_pextrb_<mode>" 6954 [(set (match_operand:SWI48 0 "register_operand" "=r") 6955 (zero_extend:SWI48 6956 (vec_select:QI 6957 (match_operand:V16QI 1 "register_operand" "x") 6958 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] 6959 "TARGET_SSE4_1" 6960 "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" 6961 [(set_attr "type" "sselog") 6962 (set_attr "prefix_extra" "1") 6963 (set_attr "length_immediate" "1") 6964 (set_attr "prefix" "maybe_vex") 6965 (set_attr "mode" "TI")]) 6966 6967(define_insn "*sse4_1_pextrb_memory" 6968 [(set (match_operand:QI 0 "memory_operand" "=m") 6969 (vec_select:QI 6970 (match_operand:V16QI 1 "register_operand" "x") 6971 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] 6972 "TARGET_SSE4_1" 6973 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" 6974 [(set_attr "type" "sselog") 6975 (set_attr "prefix_extra" "1") 6976 (set_attr "length_immediate" "1") 6977 (set_attr "prefix" "maybe_vex") 6978 (set_attr "mode" "TI")]) 6979 6980(define_insn "*sse2_pextrw_<mode>" 6981 [(set (match_operand:SWI48 0 "register_operand" "=r") 6982 (zero_extend:SWI48 6983 (vec_select:HI 6984 (match_operand:V8HI 1 "register_operand" "x") 6985 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 6986 "TARGET_SSE2" 6987 "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}" 6988 [(set_attr "type" "sselog") 6989 (set_attr "prefix_data16" "1") 6990 (set_attr "length_immediate" "1") 6991 (set_attr "prefix" "maybe_vex") 6992 (set_attr "mode" "TI")]) 6993 6994(define_insn "*sse4_1_pextrw_memory" 6995 [(set (match_operand:HI 0 "memory_operand" "=m") 6996 (vec_select:HI 6997 (match_operand:V8HI 1 "register_operand" "x") 6998 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] 6999 "TARGET_SSE4_1" 7000 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" 7001 [(set_attr "type" "sselog") 7002 (set_attr "prefix_extra" "1") 7003 (set_attr "length_immediate" "1") 7004 (set_attr "prefix" "maybe_vex") 7005 (set_attr "mode" "TI")]) 7006 7007(define_insn "*sse4_1_pextrd" 7008 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") 7009 (vec_select:SI 7010 (match_operand:V4SI 1 "register_operand" "x") 7011 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] 7012 "TARGET_SSE4_1" 7013 "%vpextrd\t{%2, %1, %0|%0, %1, %2}" 7014 [(set_attr "type" "sselog") 7015 (set_attr "prefix_extra" "1") 7016 (set_attr "length_immediate" "1") 7017 (set_attr "prefix" "maybe_vex") 7018 (set_attr "mode" "TI")]) 7019 7020(define_insn "*sse4_1_pextrd_zext" 7021 [(set (match_operand:DI 0 "register_operand" "=r") 7022 (zero_extend:DI 7023 (vec_select:SI 7024 (match_operand:V4SI 1 "register_operand" "x") 7025 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))] 7026 "TARGET_64BIT && TARGET_SSE4_1" 7027 "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}" 7028 [(set_attr "type" "sselog") 7029 (set_attr "prefix_extra" "1") 7030 (set_attr "length_immediate" "1") 7031 (set_attr "prefix" "maybe_vex") 7032 (set_attr "mode" "TI")]) 7033 7034;; It must come before *vec_extractv2di_1_rex64 since it is preferred. 7035(define_insn "*sse4_1_pextrq" 7036 [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") 7037 (vec_select:DI 7038 (match_operand:V2DI 1 "register_operand" "x") 7039 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] 7040 "TARGET_SSE4_1 && TARGET_64BIT" 7041 "%vpextrq\t{%2, %1, %0|%0, %1, %2}" 7042 [(set_attr "type" "sselog") 7043 (set_attr "prefix_rex" "1") 7044 (set_attr "prefix_extra" "1") 7045 (set_attr "length_immediate" "1") 7046 (set_attr "prefix" "maybe_vex") 7047 (set_attr "mode" "TI")]) 7048 7049(define_expand "avx2_pshufdv3" 7050 [(match_operand:V8SI 0 "register_operand") 7051 (match_operand:V8SI 1 "nonimmediate_operand") 7052 (match_operand:SI 2 "const_0_to_255_operand")] 7053 "TARGET_AVX2" 7054{ 7055 int mask = INTVAL (operands[2]); 7056 emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1], 7057 GEN_INT ((mask >> 0) & 3), 7058 GEN_INT ((mask >> 2) & 3), 7059 GEN_INT ((mask >> 4) & 3), 7060 GEN_INT ((mask >> 6) & 3), 7061 GEN_INT (((mask >> 0) & 3) + 4), 7062 GEN_INT (((mask >> 2) & 3) + 4), 7063 GEN_INT (((mask >> 4) & 3) + 4), 7064 GEN_INT (((mask >> 6) & 3) + 4))); 7065 DONE; 7066}) 7067 7068(define_insn "avx2_pshufd_1" 7069 [(set (match_operand:V8SI 0 "register_operand" "=x") 7070 (vec_select:V8SI 7071 (match_operand:V8SI 1 "nonimmediate_operand" "xm") 7072 (parallel [(match_operand 2 "const_0_to_3_operand") 7073 (match_operand 3 "const_0_to_3_operand") 7074 (match_operand 4 "const_0_to_3_operand") 7075 (match_operand 5 "const_0_to_3_operand") 7076 (match_operand 6 "const_4_to_7_operand") 7077 (match_operand 7 "const_4_to_7_operand") 7078 (match_operand 8 "const_4_to_7_operand") 7079 (match_operand 9 "const_4_to_7_operand")])))] 7080 "TARGET_AVX2 7081 && INTVAL (operands[2]) + 4 == INTVAL (operands[6]) 7082 && INTVAL (operands[3]) + 4 == INTVAL (operands[7]) 7083 && INTVAL (operands[4]) + 4 == INTVAL (operands[8]) 7084 && INTVAL (operands[5]) + 4 == INTVAL (operands[9])" 7085{ 7086 int mask = 0; 7087 mask |= INTVAL (operands[2]) << 0; 7088 mask |= INTVAL (operands[3]) << 2; 7089 mask |= INTVAL (operands[4]) << 4; 7090 mask |= INTVAL (operands[5]) << 6; 7091 operands[2] = GEN_INT (mask); 7092 7093 return "vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7094} 7095 [(set_attr "type" "sselog1") 7096 (set_attr "prefix" "vex") 7097 (set_attr "length_immediate" "1") 7098 (set_attr "mode" "OI")]) 7099 7100(define_expand "sse2_pshufd" 7101 [(match_operand:V4SI 0 "register_operand") 7102 (match_operand:V4SI 1 "nonimmediate_operand") 7103 (match_operand:SI 2 "const_int_operand")] 7104 "TARGET_SSE2" 7105{ 7106 int mask = INTVAL (operands[2]); 7107 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 7108 GEN_INT ((mask >> 0) & 3), 7109 GEN_INT ((mask >> 2) & 3), 7110 GEN_INT ((mask >> 4) & 3), 7111 GEN_INT ((mask >> 6) & 3))); 7112 DONE; 7113}) 7114 7115(define_insn "sse2_pshufd_1" 7116 [(set (match_operand:V4SI 0 "register_operand" "=x") 7117 (vec_select:V4SI 7118 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 7119 (parallel [(match_operand 2 "const_0_to_3_operand") 7120 (match_operand 3 "const_0_to_3_operand") 7121 (match_operand 4 "const_0_to_3_operand") 7122 (match_operand 5 "const_0_to_3_operand")])))] 7123 "TARGET_SSE2" 7124{ 7125 int mask = 0; 7126 mask |= INTVAL (operands[2]) << 0; 7127 mask |= INTVAL (operands[3]) << 2; 7128 mask |= INTVAL (operands[4]) << 4; 7129 mask |= INTVAL (operands[5]) << 6; 7130 operands[2] = GEN_INT (mask); 7131 7132 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7133} 7134 [(set_attr "type" "sselog1") 7135 (set_attr "prefix_data16" "1") 7136 (set_attr "prefix" "maybe_vex") 7137 (set_attr "length_immediate" "1") 7138 (set_attr "mode" "TI")]) 7139 7140(define_expand "avx2_pshuflwv3" 7141 [(match_operand:V16HI 0 "register_operand") 7142 (match_operand:V16HI 1 "nonimmediate_operand") 7143 (match_operand:SI 2 "const_0_to_255_operand")] 7144 "TARGET_AVX2" 7145{ 7146 int mask = INTVAL (operands[2]); 7147 emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1], 7148 GEN_INT ((mask >> 0) & 3), 7149 GEN_INT ((mask >> 2) & 3), 7150 GEN_INT ((mask >> 4) & 3), 7151 GEN_INT ((mask >> 6) & 3), 7152 GEN_INT (((mask >> 0) & 3) + 8), 7153 GEN_INT (((mask >> 2) & 3) + 8), 7154 GEN_INT (((mask >> 4) & 3) + 8), 7155 GEN_INT (((mask >> 6) & 3) + 8))); 7156 DONE; 7157}) 7158 7159(define_insn "avx2_pshuflw_1" 7160 [(set (match_operand:V16HI 0 "register_operand" "=x") 7161 (vec_select:V16HI 7162 (match_operand:V16HI 1 "nonimmediate_operand" "xm") 7163 (parallel [(match_operand 2 "const_0_to_3_operand") 7164 (match_operand 3 "const_0_to_3_operand") 7165 (match_operand 4 "const_0_to_3_operand") 7166 (match_operand 5 "const_0_to_3_operand") 7167 (const_int 4) 7168 (const_int 5) 7169 (const_int 6) 7170 (const_int 7) 7171 (match_operand 6 "const_8_to_11_operand") 7172 (match_operand 7 "const_8_to_11_operand") 7173 (match_operand 8 "const_8_to_11_operand") 7174 (match_operand 9 "const_8_to_11_operand") 7175 (const_int 12) 7176 (const_int 13) 7177 (const_int 14) 7178 (const_int 15)])))] 7179 "TARGET_AVX2 7180 && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) 7181 && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) 7182 && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) 7183 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" 7184{ 7185 int mask = 0; 7186 mask |= INTVAL (operands[2]) << 0; 7187 mask |= INTVAL (operands[3]) << 2; 7188 mask |= INTVAL (operands[4]) << 4; 7189 mask |= INTVAL (operands[5]) << 6; 7190 operands[2] = GEN_INT (mask); 7191 7192 return "vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7193} 7194 [(set_attr "type" "sselog") 7195 (set_attr "prefix" "vex") 7196 (set_attr "length_immediate" "1") 7197 (set_attr "mode" "OI")]) 7198 7199(define_expand "sse2_pshuflw" 7200 [(match_operand:V8HI 0 "register_operand") 7201 (match_operand:V8HI 1 "nonimmediate_operand") 7202 (match_operand:SI 2 "const_int_operand")] 7203 "TARGET_SSE2" 7204{ 7205 int mask = INTVAL (operands[2]); 7206 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 7207 GEN_INT ((mask >> 0) & 3), 7208 GEN_INT ((mask >> 2) & 3), 7209 GEN_INT ((mask >> 4) & 3), 7210 GEN_INT ((mask >> 6) & 3))); 7211 DONE; 7212}) 7213 7214(define_insn "sse2_pshuflw_1" 7215 [(set (match_operand:V8HI 0 "register_operand" "=x") 7216 (vec_select:V8HI 7217 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 7218 (parallel [(match_operand 2 "const_0_to_3_operand") 7219 (match_operand 3 "const_0_to_3_operand") 7220 (match_operand 4 "const_0_to_3_operand") 7221 (match_operand 5 "const_0_to_3_operand") 7222 (const_int 4) 7223 (const_int 5) 7224 (const_int 6) 7225 (const_int 7)])))] 7226 "TARGET_SSE2" 7227{ 7228 int mask = 0; 7229 mask |= INTVAL (operands[2]) << 0; 7230 mask |= INTVAL (operands[3]) << 2; 7231 mask |= INTVAL (operands[4]) << 4; 7232 mask |= INTVAL (operands[5]) << 6; 7233 operands[2] = GEN_INT (mask); 7234 7235 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7236} 7237 [(set_attr "type" "sselog") 7238 (set_attr "prefix_data16" "0") 7239 (set_attr "prefix_rep" "1") 7240 (set_attr "prefix" "maybe_vex") 7241 (set_attr "length_immediate" "1") 7242 (set_attr "mode" "TI")]) 7243 7244(define_expand "avx2_pshufhwv3" 7245 [(match_operand:V16HI 0 "register_operand") 7246 (match_operand:V16HI 1 "nonimmediate_operand") 7247 (match_operand:SI 2 "const_0_to_255_operand")] 7248 "TARGET_AVX2" 7249{ 7250 int mask = INTVAL (operands[2]); 7251 emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1], 7252 GEN_INT (((mask >> 0) & 3) + 4), 7253 GEN_INT (((mask >> 2) & 3) + 4), 7254 GEN_INT (((mask >> 4) & 3) + 4), 7255 GEN_INT (((mask >> 6) & 3) + 4), 7256 GEN_INT (((mask >> 0) & 3) + 12), 7257 GEN_INT (((mask >> 2) & 3) + 12), 7258 GEN_INT (((mask >> 4) & 3) + 12), 7259 GEN_INT (((mask >> 6) & 3) + 12))); 7260 DONE; 7261}) 7262 7263(define_insn "avx2_pshufhw_1" 7264 [(set (match_operand:V16HI 0 "register_operand" "=x") 7265 (vec_select:V16HI 7266 (match_operand:V16HI 1 "nonimmediate_operand" "xm") 7267 (parallel [(const_int 0) 7268 (const_int 1) 7269 (const_int 2) 7270 (const_int 3) 7271 (match_operand 2 "const_4_to_7_operand") 7272 (match_operand 3 "const_4_to_7_operand") 7273 (match_operand 4 "const_4_to_7_operand") 7274 (match_operand 5 "const_4_to_7_operand") 7275 (const_int 8) 7276 (const_int 9) 7277 (const_int 10) 7278 (const_int 11) 7279 (match_operand 6 "const_12_to_15_operand") 7280 (match_operand 7 "const_12_to_15_operand") 7281 (match_operand 8 "const_12_to_15_operand") 7282 (match_operand 9 "const_12_to_15_operand")])))] 7283 "TARGET_AVX2 7284 && INTVAL (operands[2]) + 8 == INTVAL (operands[6]) 7285 && INTVAL (operands[3]) + 8 == INTVAL (operands[7]) 7286 && INTVAL (operands[4]) + 8 == INTVAL (operands[8]) 7287 && INTVAL (operands[5]) + 8 == INTVAL (operands[9])" 7288{ 7289 int mask = 0; 7290 mask |= (INTVAL (operands[2]) - 4) << 0; 7291 mask |= (INTVAL (operands[3]) - 4) << 2; 7292 mask |= (INTVAL (operands[4]) - 4) << 4; 7293 mask |= (INTVAL (operands[5]) - 4) << 6; 7294 operands[2] = GEN_INT (mask); 7295 7296 return "vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7297} 7298 [(set_attr "type" "sselog") 7299 (set_attr "prefix" "vex") 7300 (set_attr "length_immediate" "1") 7301 (set_attr "mode" "OI")]) 7302 7303(define_expand "sse2_pshufhw" 7304 [(match_operand:V8HI 0 "register_operand") 7305 (match_operand:V8HI 1 "nonimmediate_operand") 7306 (match_operand:SI 2 "const_int_operand")] 7307 "TARGET_SSE2" 7308{ 7309 int mask = INTVAL (operands[2]); 7310 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 7311 GEN_INT (((mask >> 0) & 3) + 4), 7312 GEN_INT (((mask >> 2) & 3) + 4), 7313 GEN_INT (((mask >> 4) & 3) + 4), 7314 GEN_INT (((mask >> 6) & 3) + 4))); 7315 DONE; 7316}) 7317 7318(define_insn "sse2_pshufhw_1" 7319 [(set (match_operand:V8HI 0 "register_operand" "=x") 7320 (vec_select:V8HI 7321 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 7322 (parallel [(const_int 0) 7323 (const_int 1) 7324 (const_int 2) 7325 (const_int 3) 7326 (match_operand 2 "const_4_to_7_operand") 7327 (match_operand 3 "const_4_to_7_operand") 7328 (match_operand 4 "const_4_to_7_operand") 7329 (match_operand 5 "const_4_to_7_operand")])))] 7330 "TARGET_SSE2" 7331{ 7332 int mask = 0; 7333 mask |= (INTVAL (operands[2]) - 4) << 0; 7334 mask |= (INTVAL (operands[3]) - 4) << 2; 7335 mask |= (INTVAL (operands[4]) - 4) << 4; 7336 mask |= (INTVAL (operands[5]) - 4) << 6; 7337 operands[2] = GEN_INT (mask); 7338 7339 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7340} 7341 [(set_attr "type" "sselog") 7342 (set_attr "prefix_rep" "1") 7343 (set_attr "prefix_data16" "0") 7344 (set_attr "prefix" "maybe_vex") 7345 (set_attr "length_immediate" "1") 7346 (set_attr "mode" "TI")]) 7347 7348(define_expand "sse2_loadd" 7349 [(set (match_operand:V4SI 0 "register_operand") 7350 (vec_merge:V4SI 7351 (vec_duplicate:V4SI 7352 (match_operand:SI 1 "nonimmediate_operand")) 7353 (match_dup 2) 7354 (const_int 1)))] 7355 "TARGET_SSE" 7356 "operands[2] = CONST0_RTX (V4SImode);") 7357 7358(define_insn "sse2_loadld" 7359 [(set (match_operand:V4SI 0 "register_operand" "=x,Yi,x,x,x") 7360 (vec_merge:V4SI 7361 (vec_duplicate:V4SI 7362 (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x")) 7363 (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0,x") 7364 (const_int 1)))] 7365 "TARGET_SSE" 7366 "@ 7367 %vmovd\t{%2, %0|%0, %2} 7368 %vmovd\t{%2, %0|%0, %2} 7369 movss\t{%2, %0|%0, %2} 7370 movss\t{%2, %0|%0, %2} 7371 vmovss\t{%2, %1, %0|%0, %1, %2}" 7372 [(set_attr "isa" "sse2,sse2,noavx,noavx,avx") 7373 (set_attr "type" "ssemov") 7374 (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex") 7375 (set_attr "mode" "TI,TI,V4SF,SF,SF")]) 7376 7377(define_insn_and_split "sse2_stored" 7378 [(set (match_operand:SI 0 "nonimmediate_operand" "=xm,r") 7379 (vec_select:SI 7380 (match_operand:V4SI 1 "register_operand" "x,Yi") 7381 (parallel [(const_int 0)])))] 7382 "TARGET_SSE" 7383 "#" 7384 "&& reload_completed 7385 && (TARGET_INTER_UNIT_MOVES 7386 || MEM_P (operands [0]) 7387 || !GENERAL_REGNO_P (true_regnum (operands [0])))" 7388 [(set (match_dup 0) (match_dup 1))] 7389 "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));") 7390 7391(define_insn_and_split "*vec_ext_v4si_mem" 7392 [(set (match_operand:SI 0 "register_operand" "=r") 7393 (vec_select:SI 7394 (match_operand:V4SI 1 "memory_operand" "o") 7395 (parallel [(match_operand 2 "const_0_to_3_operand")])))] 7396 "" 7397 "#" 7398 "reload_completed" 7399 [(const_int 0)] 7400{ 7401 int i = INTVAL (operands[2]); 7402 7403 emit_move_insn (operands[0], adjust_address (operands[1], SImode, i*4)); 7404 DONE; 7405}) 7406 7407(define_expand "sse_storeq" 7408 [(set (match_operand:DI 0 "nonimmediate_operand") 7409 (vec_select:DI 7410 (match_operand:V2DI 1 "register_operand") 7411 (parallel [(const_int 0)])))] 7412 "TARGET_SSE") 7413 7414(define_insn "*sse2_storeq_rex64" 7415 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm,*r,r") 7416 (vec_select:DI 7417 (match_operand:V2DI 1 "nonimmediate_operand" "x,Yi,o") 7418 (parallel [(const_int 0)])))] 7419 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7420 "@ 7421 # 7422 # 7423 mov{q}\t{%1, %0|%0, %1}" 7424 [(set_attr "type" "*,*,imov") 7425 (set_attr "mode" "*,*,DI")]) 7426 7427(define_insn "*sse2_storeq" 7428 [(set (match_operand:DI 0 "nonimmediate_operand" "=xm") 7429 (vec_select:DI 7430 (match_operand:V2DI 1 "register_operand" "x") 7431 (parallel [(const_int 0)])))] 7432 "TARGET_SSE" 7433 "#") 7434 7435(define_split 7436 [(set (match_operand:DI 0 "nonimmediate_operand") 7437 (vec_select:DI 7438 (match_operand:V2DI 1 "register_operand") 7439 (parallel [(const_int 0)])))] 7440 "TARGET_SSE 7441 && reload_completed 7442 && (TARGET_INTER_UNIT_MOVES 7443 || MEM_P (operands [0]) 7444 || !GENERAL_REGNO_P (true_regnum (operands [0])))" 7445 [(set (match_dup 0) (match_dup 1))] 7446 "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));") 7447 7448(define_insn "*vec_extractv2di_1_rex64" 7449 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,r") 7450 (vec_select:DI 7451 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,o") 7452 (parallel [(const_int 1)])))] 7453 "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7454 "@ 7455 %vmovhps\t{%1, %0|%0, %1} 7456 psrldq\t{$8, %0|%0, 8} 7457 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7458 %vmovq\t{%H1, %0|%0, %H1} 7459 mov{q}\t{%H1, %0|%0, %H1}" 7460 [(set_attr "isa" "*,noavx,avx,*,*") 7461 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,imov") 7462 (set_attr "length_immediate" "*,1,1,*,*") 7463 (set_attr "memory" "*,none,none,*,*") 7464 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig") 7465 (set_attr "mode" "V2SF,TI,TI,TI,DI")]) 7466 7467(define_insn "*vec_extractv2di_1" 7468 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,x,x,x") 7469 (vec_select:DI 7470 (match_operand:V2DI 1 "nonimmediate_operand" " x,0,x,o,x,o") 7471 (parallel [(const_int 1)])))] 7472 "!TARGET_64BIT && TARGET_SSE 7473 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 7474 "@ 7475 %vmovhps\t{%1, %0|%0, %1} 7476 psrldq\t{$8, %0|%0, 8} 7477 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7478 %vmovq\t{%H1, %0|%0, %H1} 7479 movhlps\t{%1, %0|%0, %1} 7480 movlps\t{%H1, %0|%0, %H1}" 7481 [(set_attr "isa" "*,sse2_noavx,avx,sse2,noavx,noavx") 7482 (set_attr "type" "ssemov,sseishft1,sseishft1,ssemov,ssemov,ssemov") 7483 (set_attr "length_immediate" "*,1,1,*,*,*") 7484 (set_attr "memory" "*,none,none,*,*,*") 7485 (set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig") 7486 (set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")]) 7487 7488(define_insn "*vec_dupv4si" 7489 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 7490 (vec_duplicate:V4SI 7491 (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))] 7492 "TARGET_SSE" 7493 "@ 7494 %vpshufd\t{$0, %1, %0|%0, %1, 0} 7495 vbroadcastss\t{%1, %0|%0, %1} 7496 shufps\t{$0, %0, %0|%0, %0, 0}" 7497 [(set_attr "isa" "sse2,avx,noavx") 7498 (set_attr "type" "sselog1,ssemov,sselog1") 7499 (set_attr "length_immediate" "1,0,1") 7500 (set_attr "prefix_extra" "0,1,*") 7501 (set_attr "prefix" "maybe_vex,vex,orig") 7502 (set_attr "mode" "TI,V4SF,V4SF")]) 7503 7504(define_insn "*vec_dupv2di" 7505 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x") 7506 (vec_duplicate:V2DI 7507 (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))] 7508 "TARGET_SSE" 7509 "@ 7510 punpcklqdq\t%0, %0 7511 vpunpcklqdq\t{%d1, %0|%0, %d1} 7512 %vmovddup\t{%1, %0|%0, %1} 7513 movlhps\t%0, %0" 7514 [(set_attr "isa" "sse2_noavx,avx,sse3,noavx") 7515 (set_attr "type" "sselog1,sselog1,sselog1,ssemov") 7516 (set_attr "prefix" "orig,vex,maybe_vex,orig") 7517 (set_attr "mode" "TI,TI,DF,V4SF")]) 7518 7519(define_insn "*vec_concatv2si_sse4_1" 7520 [(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y") 7521 (vec_concat:V2SI 7522 (match_operand:SI 1 "nonimmediate_operand" " 0, x,0,x,rm, 0,rm") 7523 (match_operand:SI 2 "vector_move_operand" "rm,rm,x,x, C,*ym, C")))] 7524 "TARGET_SSE4_1" 7525 "@ 7526 pinsrd\t{$1, %2, %0|%0, %2, 1} 7527 vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1} 7528 punpckldq\t{%2, %0|%0, %2} 7529 vpunpckldq\t{%2, %1, %0|%0, %1, %2} 7530 %vmovd\t{%1, %0|%0, %1} 7531 punpckldq\t{%2, %0|%0, %2} 7532 movd\t{%1, %0|%0, %1}" 7533 [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*") 7534 (set_attr "type" "sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov") 7535 (set_attr "prefix_extra" "1,1,*,*,*,*,*") 7536 (set_attr "length_immediate" "1,1,*,*,*,*,*") 7537 (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig") 7538 (set_attr "mode" "TI,TI,TI,TI,TI,DI,DI")]) 7539 7540;; ??? In theory we can match memory for the MMX alternative, but allowing 7541;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 7542;; alternatives pretty much forces the MMX alternative to be chosen. 7543(define_insn "*vec_concatv2si_sse2" 7544 [(set (match_operand:V2SI 0 "register_operand" "=x,x ,*y,*y") 7545 (vec_concat:V2SI 7546 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 7547 (match_operand:SI 2 "reg_or_0_operand" " x,C ,*y, C")))] 7548 "TARGET_SSE2" 7549 "@ 7550 punpckldq\t{%2, %0|%0, %2} 7551 movd\t{%1, %0|%0, %1} 7552 punpckldq\t{%2, %0|%0, %2} 7553 movd\t{%1, %0|%0, %1}" 7554 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 7555 (set_attr "mode" "TI,TI,DI,DI")]) 7556 7557(define_insn "*vec_concatv2si_sse" 7558 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 7559 (vec_concat:V2SI 7560 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 7561 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 7562 "TARGET_SSE" 7563 "@ 7564 unpcklps\t{%2, %0|%0, %2} 7565 movss\t{%1, %0|%0, %1} 7566 punpckldq\t{%2, %0|%0, %2} 7567 movd\t{%1, %0|%0, %1}" 7568 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 7569 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 7570 7571(define_insn "*vec_concatv4si" 7572 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x,x") 7573 (vec_concat:V4SI 7574 (match_operand:V2SI 1 "register_operand" " 0,x,0,0,x") 7575 (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))] 7576 "TARGET_SSE" 7577 "@ 7578 punpcklqdq\t{%2, %0|%0, %2} 7579 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7580 movlhps\t{%2, %0|%0, %2} 7581 movhps\t{%2, %0|%0, %2} 7582 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7583 [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx") 7584 (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov") 7585 (set_attr "prefix" "orig,vex,orig,orig,vex") 7586 (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")]) 7587 7588;; movd instead of movq is required to handle broken assemblers. 7589(define_insn "*vec_concatv2di_rex64" 7590 [(set (match_operand:V2DI 0 "register_operand" 7591 "=x,x ,x ,Yi,!x,x,x,x,x") 7592 (vec_concat:V2DI 7593 (match_operand:DI 1 "nonimmediate_operand" 7594 " 0,x ,xm,r ,*y,0,x,0,x") 7595 (match_operand:DI 2 "vector_move_operand" 7596 "rm,rm,C ,C ,C ,x,x,m,m")))] 7597 "TARGET_64BIT" 7598 "@ 7599 pinsrq\t{$1, %2, %0|%0, %2, 1} 7600 vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1} 7601 %vmovq\t{%1, %0|%0, %1} 7602 %vmovd\t{%1, %0|%0, %1} 7603 movq2dq\t{%1, %0|%0, %1} 7604 punpcklqdq\t{%2, %0|%0, %2} 7605 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7606 movhps\t{%2, %0|%0, %2} 7607 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7608 [(set_attr "isa" "sse4_noavx,avx,*,*,*,noavx,avx,noavx,avx") 7609 (set (attr "type") 7610 (if_then_else 7611 (eq_attr "alternative" "0,1,5,6") 7612 (const_string "sselog") 7613 (const_string "ssemov"))) 7614 (set (attr "prefix_rex") 7615 (if_then_else 7616 (and (eq_attr "alternative" "0,3") 7617 (not (match_test "TARGET_AVX"))) 7618 (const_string "1") 7619 (const_string "*"))) 7620 (set_attr "prefix_extra" "1,1,*,*,*,*,*,*,*") 7621 (set_attr "length_immediate" "1,1,*,*,*,*,*,*,*") 7622 (set_attr "prefix" "orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,vex") 7623 (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,V2SF,V2SF")]) 7624 7625(define_insn "vec_concatv2di" 7626 [(set (match_operand:V2DI 0 "register_operand" "=x,?x,x,x,x,x,x") 7627 (vec_concat:V2DI 7628 (match_operand:DI 1 "nonimmediate_operand" "xm,*y,0,x,0,0,x") 7629 (match_operand:DI 2 "vector_move_operand" " C, C,x,x,x,m,m")))] 7630 "!TARGET_64BIT && TARGET_SSE" 7631 "@ 7632 %vmovq\t{%1, %0|%0, %1} 7633 movq2dq\t{%1, %0|%0, %1} 7634 punpcklqdq\t{%2, %0|%0, %2} 7635 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7636 movlhps\t{%2, %0|%0, %2} 7637 movhps\t{%2, %0|%0, %2} 7638 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7639 [(set_attr "isa" "sse2,sse2,sse2_noavx,avx,noavx,noavx,avx") 7640 (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,ssemov,ssemov") 7641 (set_attr "prefix" "maybe_vex,orig,orig,vex,orig,orig,vex") 7642 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF,V2SF")]) 7643 7644(define_expand "vec_unpacks_lo_<mode>" 7645 [(match_operand:<sseunpackmode> 0 "register_operand") 7646 (match_operand:VI124_AVX2 1 "register_operand")] 7647 "TARGET_SSE2" 7648 "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;") 7649 7650(define_expand "vec_unpacks_hi_<mode>" 7651 [(match_operand:<sseunpackmode> 0 "register_operand") 7652 (match_operand:VI124_AVX2 1 "register_operand")] 7653 "TARGET_SSE2" 7654 "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;") 7655 7656(define_expand "vec_unpacku_lo_<mode>" 7657 [(match_operand:<sseunpackmode> 0 "register_operand") 7658 (match_operand:VI124_AVX2 1 "register_operand")] 7659 "TARGET_SSE2" 7660 "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;") 7661 7662(define_expand "vec_unpacku_hi_<mode>" 7663 [(match_operand:<sseunpackmode> 0 "register_operand") 7664 (match_operand:VI124_AVX2 1 "register_operand")] 7665 "TARGET_SSE2" 7666 "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;") 7667 7668;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7669;; 7670;; Miscellaneous 7671;; 7672;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7673 7674(define_expand "<sse2_avx2>_uavg<mode>3" 7675 [(set (match_operand:VI12_AVX2 0 "register_operand") 7676 (truncate:VI12_AVX2 7677 (lshiftrt:<ssedoublemode> 7678 (plus:<ssedoublemode> 7679 (plus:<ssedoublemode> 7680 (zero_extend:<ssedoublemode> 7681 (match_operand:VI12_AVX2 1 "nonimmediate_operand")) 7682 (zero_extend:<ssedoublemode> 7683 (match_operand:VI12_AVX2 2 "nonimmediate_operand"))) 7684 (match_dup 3)) 7685 (const_int 1))))] 7686 "TARGET_SSE2" 7687{ 7688 operands[3] = CONST1_RTX(<MODE>mode); 7689 ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands); 7690}) 7691 7692(define_insn "*<sse2_avx2>_uavg<mode>3" 7693 [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,x") 7694 (truncate:VI12_AVX2 7695 (lshiftrt:<ssedoublemode> 7696 (plus:<ssedoublemode> 7697 (plus:<ssedoublemode> 7698 (zero_extend:<ssedoublemode> 7699 (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,x")) 7700 (zero_extend:<ssedoublemode> 7701 (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,xm"))) 7702 (match_operand:VI12_AVX2 3 "const1_operand")) 7703 (const_int 1))))] 7704 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" 7705 "@ 7706 pavg<ssemodesuffix>\t{%2, %0|%0, %2} 7707 vpavg<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 7708 [(set_attr "isa" "noavx,avx") 7709 (set_attr "type" "sseiadd") 7710 (set_attr "prefix_data16" "1,*") 7711 (set_attr "prefix" "orig,vex") 7712 (set_attr "mode" "<sseinsnmode>")]) 7713 7714;; The correct representation for this is absolutely enormous, and 7715;; surely not generally useful. 7716(define_insn "<sse2_avx2>_psadbw" 7717 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x,x") 7718 (unspec:VI8_AVX2 7719 [(match_operand:<ssebytemode> 1 "register_operand" "0,x") 7720 (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,xm")] 7721 UNSPEC_PSADBW))] 7722 "TARGET_SSE2" 7723 "@ 7724 psadbw\t{%2, %0|%0, %2} 7725 vpsadbw\t{%2, %1, %0|%0, %1, %2}" 7726 [(set_attr "isa" "noavx,avx") 7727 (set_attr "type" "sseiadd") 7728 (set_attr "atom_unit" "simul") 7729 (set_attr "prefix_data16" "1,*") 7730 (set_attr "prefix" "orig,vex") 7731 (set_attr "mode" "<sseinsnmode>")]) 7732 7733(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>" 7734 [(set (match_operand:SI 0 "register_operand" "=r") 7735 (unspec:SI 7736 [(match_operand:VF 1 "register_operand" "x")] 7737 UNSPEC_MOVMSK))] 7738 "TARGET_SSE" 7739 "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}" 7740 [(set_attr "type" "ssemov") 7741 (set_attr "prefix" "maybe_vex") 7742 (set_attr "mode" "<MODE>")]) 7743 7744(define_insn "avx2_pmovmskb" 7745 [(set (match_operand:SI 0 "register_operand" "=r") 7746 (unspec:SI [(match_operand:V32QI 1 "register_operand" "x")] 7747 UNSPEC_MOVMSK))] 7748 "TARGET_AVX2" 7749 "vpmovmskb\t{%1, %0|%0, %1}" 7750 [(set_attr "type" "ssemov") 7751 (set_attr "prefix" "vex") 7752 (set_attr "mode" "DI")]) 7753 7754(define_insn "sse2_pmovmskb" 7755 [(set (match_operand:SI 0 "register_operand" "=r") 7756 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 7757 UNSPEC_MOVMSK))] 7758 "TARGET_SSE2" 7759 "%vpmovmskb\t{%1, %0|%0, %1}" 7760 [(set_attr "type" "ssemov") 7761 (set_attr "prefix_data16" "1") 7762 (set_attr "prefix" "maybe_vex") 7763 (set_attr "mode" "SI")]) 7764 7765(define_expand "sse2_maskmovdqu" 7766 [(set (match_operand:V16QI 0 "memory_operand") 7767 (unspec:V16QI [(match_operand:V16QI 1 "register_operand") 7768 (match_operand:V16QI 2 "register_operand") 7769 (match_dup 0)] 7770 UNSPEC_MASKMOV))] 7771 "TARGET_SSE2") 7772 7773(define_insn "*sse2_maskmovdqu" 7774 [(set (mem:V16QI (match_operand:P 0 "register_operand" "D")) 7775 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 7776 (match_operand:V16QI 2 "register_operand" "x") 7777 (mem:V16QI (match_dup 0))] 7778 UNSPEC_MASKMOV))] 7779 "TARGET_SSE2" 7780{ 7781 /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing 7782 that requires %v to be at the beginning of the opcode name. */ 7783 if (Pmode != word_mode) 7784 fputs ("\taddr32", asm_out_file); 7785 return "%vmaskmovdqu\t{%2, %1|%1, %2}"; 7786} 7787 [(set_attr "type" "ssemov") 7788 (set_attr "prefix_data16" "1") 7789 (set (attr "length_address") 7790 (symbol_ref ("Pmode != word_mode"))) 7791 ;; The implicit %rdi operand confuses default length_vex computation. 7792 (set (attr "length_vex") 7793 (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))"))) 7794 (set_attr "prefix" "maybe_vex") 7795 (set_attr "mode" "TI")]) 7796 7797(define_insn "sse_ldmxcsr" 7798 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 7799 UNSPECV_LDMXCSR)] 7800 "TARGET_SSE" 7801 "%vldmxcsr\t%0" 7802 [(set_attr "type" "sse") 7803 (set_attr "atom_sse_attr" "mxcsr") 7804 (set_attr "prefix" "maybe_vex") 7805 (set_attr "memory" "load")]) 7806 7807(define_insn "sse_stmxcsr" 7808 [(set (match_operand:SI 0 "memory_operand" "=m") 7809 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 7810 "TARGET_SSE" 7811 "%vstmxcsr\t%0" 7812 [(set_attr "type" "sse") 7813 (set_attr "atom_sse_attr" "mxcsr") 7814 (set_attr "prefix" "maybe_vex") 7815 (set_attr "memory" "store")]) 7816 7817(define_insn "sse2_clflush" 7818 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 7819 UNSPECV_CLFLUSH)] 7820 "TARGET_SSE2" 7821 "clflush\t%a0" 7822 [(set_attr "type" "sse") 7823 (set_attr "atom_sse_attr" "fence") 7824 (set_attr "memory" "unknown")]) 7825 7826;; As per AMD and Intel ISA manuals, the first operand is extensions 7827;; and it goes to %ecx. The second operand received is hints and it goes 7828;; to %eax. 7829(define_insn "sse3_mwait" 7830 [(unspec_volatile [(match_operand:SI 0 "register_operand" "c") 7831 (match_operand:SI 1 "register_operand" "a")] 7832 UNSPECV_MWAIT)] 7833 "TARGET_SSE3" 7834;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. 7835;; Since 32bit register operands are implicitly zero extended to 64bit, 7836;; we only need to set up 32bit registers. 7837 "mwait" 7838 [(set_attr "length" "3")]) 7839 7840(define_insn "sse3_monitor_<mode>" 7841 [(unspec_volatile [(match_operand:P 0 "register_operand" "a") 7842 (match_operand:SI 1 "register_operand" "c") 7843 (match_operand:SI 2 "register_operand" "d")] 7844 UNSPECV_MONITOR)] 7845 "TARGET_SSE3" 7846;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in 7847;; RCX and RDX are used. Since 32bit register operands are implicitly 7848;; zero extended to 64bit, we only need to set up 32bit registers. 7849 "%^monitor" 7850 [(set (attr "length") 7851 (symbol_ref ("(Pmode != word_mode) + 3")))]) 7852 7853;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7854;; 7855;; SSSE3 instructions 7856;; 7857;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 7858 7859(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus]) 7860 7861(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3" 7862 [(set (match_operand:V16HI 0 "register_operand" "=x") 7863 (vec_concat:V16HI 7864 (vec_concat:V8HI 7865 (vec_concat:V4HI 7866 (vec_concat:V2HI 7867 (ssse3_plusminus:HI 7868 (vec_select:HI 7869 (match_operand:V16HI 1 "register_operand" "x") 7870 (parallel [(const_int 0)])) 7871 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 7872 (ssse3_plusminus:HI 7873 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 7874 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 7875 (vec_concat:V2HI 7876 (ssse3_plusminus:HI 7877 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 7878 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 7879 (ssse3_plusminus:HI 7880 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 7881 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 7882 (vec_concat:V4HI 7883 (vec_concat:V2HI 7884 (ssse3_plusminus:HI 7885 (vec_select:HI (match_dup 1) (parallel [(const_int 8)])) 7886 (vec_select:HI (match_dup 1) (parallel [(const_int 9)]))) 7887 (ssse3_plusminus:HI 7888 (vec_select:HI (match_dup 1) (parallel [(const_int 10)])) 7889 (vec_select:HI (match_dup 1) (parallel [(const_int 11)])))) 7890 (vec_concat:V2HI 7891 (ssse3_plusminus:HI 7892 (vec_select:HI (match_dup 1) (parallel [(const_int 12)])) 7893 (vec_select:HI (match_dup 1) (parallel [(const_int 13)]))) 7894 (ssse3_plusminus:HI 7895 (vec_select:HI (match_dup 1) (parallel [(const_int 14)])) 7896 (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))) 7897 (vec_concat:V8HI 7898 (vec_concat:V4HI 7899 (vec_concat:V2HI 7900 (ssse3_plusminus:HI 7901 (vec_select:HI 7902 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 7903 (parallel [(const_int 0)])) 7904 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 7905 (ssse3_plusminus:HI 7906 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 7907 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 7908 (vec_concat:V2HI 7909 (ssse3_plusminus:HI 7910 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 7911 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 7912 (ssse3_plusminus:HI 7913 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 7914 (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))) 7915 (vec_concat:V4HI 7916 (vec_concat:V2HI 7917 (ssse3_plusminus:HI 7918 (vec_select:HI (match_dup 2) (parallel [(const_int 8)])) 7919 (vec_select:HI (match_dup 2) (parallel [(const_int 9)]))) 7920 (ssse3_plusminus:HI 7921 (vec_select:HI (match_dup 2) (parallel [(const_int 10)])) 7922 (vec_select:HI (match_dup 2) (parallel [(const_int 11)])))) 7923 (vec_concat:V2HI 7924 (ssse3_plusminus:HI 7925 (vec_select:HI (match_dup 2) (parallel [(const_int 12)])) 7926 (vec_select:HI (match_dup 2) (parallel [(const_int 13)]))) 7927 (ssse3_plusminus:HI 7928 (vec_select:HI (match_dup 2) (parallel [(const_int 14)])) 7929 (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))] 7930 "TARGET_AVX2" 7931 "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}" 7932 [(set_attr "type" "sseiadd") 7933 (set_attr "prefix_extra" "1") 7934 (set_attr "prefix" "vex") 7935 (set_attr "mode" "OI")]) 7936 7937(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3" 7938 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 7939 (vec_concat:V8HI 7940 (vec_concat:V4HI 7941 (vec_concat:V2HI 7942 (ssse3_plusminus:HI 7943 (vec_select:HI 7944 (match_operand:V8HI 1 "register_operand" "0,x") 7945 (parallel [(const_int 0)])) 7946 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 7947 (ssse3_plusminus:HI 7948 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 7949 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 7950 (vec_concat:V2HI 7951 (ssse3_plusminus:HI 7952 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 7953 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 7954 (ssse3_plusminus:HI 7955 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 7956 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 7957 (vec_concat:V4HI 7958 (vec_concat:V2HI 7959 (ssse3_plusminus:HI 7960 (vec_select:HI 7961 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 7962 (parallel [(const_int 0)])) 7963 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 7964 (ssse3_plusminus:HI 7965 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 7966 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 7967 (vec_concat:V2HI 7968 (ssse3_plusminus:HI 7969 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 7970 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 7971 (ssse3_plusminus:HI 7972 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 7973 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 7974 "TARGET_SSSE3" 7975 "@ 7976 ph<plusminus_mnemonic>w\t{%2, %0|%0, %2} 7977 vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}" 7978 [(set_attr "isa" "noavx,avx") 7979 (set_attr "type" "sseiadd") 7980 (set_attr "atom_unit" "complex") 7981 (set_attr "prefix_data16" "1,*") 7982 (set_attr "prefix_extra" "1") 7983 (set_attr "prefix" "orig,vex") 7984 (set_attr "mode" "TI")]) 7985 7986(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3" 7987 [(set (match_operand:V4HI 0 "register_operand" "=y") 7988 (vec_concat:V4HI 7989 (vec_concat:V2HI 7990 (ssse3_plusminus:HI 7991 (vec_select:HI 7992 (match_operand:V4HI 1 "register_operand" "0") 7993 (parallel [(const_int 0)])) 7994 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 7995 (ssse3_plusminus:HI 7996 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 7997 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 7998 (vec_concat:V2HI 7999 (ssse3_plusminus:HI 8000 (vec_select:HI 8001 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 8002 (parallel [(const_int 0)])) 8003 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 8004 (ssse3_plusminus:HI 8005 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8006 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8007 "TARGET_SSSE3" 8008 "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}" 8009 [(set_attr "type" "sseiadd") 8010 (set_attr "atom_unit" "complex") 8011 (set_attr "prefix_extra" "1") 8012 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8013 (set_attr "mode" "DI")]) 8014 8015(define_insn "avx2_ph<plusminus_mnemonic>dv8si3" 8016 [(set (match_operand:V8SI 0 "register_operand" "=x") 8017 (vec_concat:V8SI 8018 (vec_concat:V4SI 8019 (vec_concat:V2SI 8020 (plusminus:SI 8021 (vec_select:SI 8022 (match_operand:V8SI 1 "register_operand" "x") 8023 (parallel [(const_int 0)])) 8024 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8025 (plusminus:SI 8026 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8027 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8028 (vec_concat:V2SI 8029 (plusminus:SI 8030 (vec_select:SI (match_dup 1) (parallel [(const_int 4)])) 8031 (vec_select:SI (match_dup 1) (parallel [(const_int 5)]))) 8032 (plusminus:SI 8033 (vec_select:SI (match_dup 1) (parallel [(const_int 6)])) 8034 (vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))) 8035 (vec_concat:V4SI 8036 (vec_concat:V2SI 8037 (plusminus:SI 8038 (vec_select:SI 8039 (match_operand:V8SI 2 "nonimmediate_operand" "xm") 8040 (parallel [(const_int 0)])) 8041 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8042 (plusminus:SI 8043 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8044 (vec_select:SI (match_dup 2) (parallel [(const_int 3)])))) 8045 (vec_concat:V2SI 8046 (plusminus:SI 8047 (vec_select:SI (match_dup 2) (parallel [(const_int 4)])) 8048 (vec_select:SI (match_dup 2) (parallel [(const_int 5)]))) 8049 (plusminus:SI 8050 (vec_select:SI (match_dup 2) (parallel [(const_int 6)])) 8051 (vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))] 8052 "TARGET_AVX2" 8053 "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}" 8054 [(set_attr "type" "sseiadd") 8055 (set_attr "prefix_extra" "1") 8056 (set_attr "prefix" "vex") 8057 (set_attr "mode" "OI")]) 8058 8059(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3" 8060 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 8061 (vec_concat:V4SI 8062 (vec_concat:V2SI 8063 (plusminus:SI 8064 (vec_select:SI 8065 (match_operand:V4SI 1 "register_operand" "0,x") 8066 (parallel [(const_int 0)])) 8067 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8068 (plusminus:SI 8069 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 8070 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 8071 (vec_concat:V2SI 8072 (plusminus:SI 8073 (vec_select:SI 8074 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm") 8075 (parallel [(const_int 0)])) 8076 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 8077 (plusminus:SI 8078 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8079 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8080 "TARGET_SSSE3" 8081 "@ 8082 ph<plusminus_mnemonic>d\t{%2, %0|%0, %2} 8083 vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}" 8084 [(set_attr "isa" "noavx,avx") 8085 (set_attr "type" "sseiadd") 8086 (set_attr "atom_unit" "complex") 8087 (set_attr "prefix_data16" "1,*") 8088 (set_attr "prefix_extra" "1") 8089 (set_attr "prefix" "orig,vex") 8090 (set_attr "mode" "TI")]) 8091 8092(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3" 8093 [(set (match_operand:V2SI 0 "register_operand" "=y") 8094 (vec_concat:V2SI 8095 (plusminus:SI 8096 (vec_select:SI 8097 (match_operand:V2SI 1 "register_operand" "0") 8098 (parallel [(const_int 0)])) 8099 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 8100 (plusminus:SI 8101 (vec_select:SI 8102 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 8103 (parallel [(const_int 0)])) 8104 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8105 "TARGET_SSSE3" 8106 "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}" 8107 [(set_attr "type" "sseiadd") 8108 (set_attr "atom_unit" "complex") 8109 (set_attr "prefix_extra" "1") 8110 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8111 (set_attr "mode" "DI")]) 8112 8113(define_insn "avx2_pmaddubsw256" 8114 [(set (match_operand:V16HI 0 "register_operand" "=x") 8115 (ss_plus:V16HI 8116 (mult:V16HI 8117 (zero_extend:V16HI 8118 (vec_select:V16QI 8119 (match_operand:V32QI 1 "register_operand" "x") 8120 (parallel [(const_int 0) (const_int 2) 8121 (const_int 4) (const_int 6) 8122 (const_int 8) (const_int 10) 8123 (const_int 12) (const_int 14) 8124 (const_int 16) (const_int 18) 8125 (const_int 20) (const_int 22) 8126 (const_int 24) (const_int 26) 8127 (const_int 28) (const_int 30)]))) 8128 (sign_extend:V16HI 8129 (vec_select:V16QI 8130 (match_operand:V32QI 2 "nonimmediate_operand" "xm") 8131 (parallel [(const_int 0) (const_int 2) 8132 (const_int 4) (const_int 6) 8133 (const_int 8) (const_int 10) 8134 (const_int 12) (const_int 14) 8135 (const_int 16) (const_int 18) 8136 (const_int 20) (const_int 22) 8137 (const_int 24) (const_int 26) 8138 (const_int 28) (const_int 30)])))) 8139 (mult:V16HI 8140 (zero_extend:V16HI 8141 (vec_select:V16QI (match_dup 1) 8142 (parallel [(const_int 1) (const_int 3) 8143 (const_int 5) (const_int 7) 8144 (const_int 9) (const_int 11) 8145 (const_int 13) (const_int 15) 8146 (const_int 17) (const_int 19) 8147 (const_int 21) (const_int 23) 8148 (const_int 25) (const_int 27) 8149 (const_int 29) (const_int 31)]))) 8150 (sign_extend:V16HI 8151 (vec_select:V16QI (match_dup 2) 8152 (parallel [(const_int 1) (const_int 3) 8153 (const_int 5) (const_int 7) 8154 (const_int 9) (const_int 11) 8155 (const_int 13) (const_int 15) 8156 (const_int 17) (const_int 19) 8157 (const_int 21) (const_int 23) 8158 (const_int 25) (const_int 27) 8159 (const_int 29) (const_int 31)]))))))] 8160 "TARGET_AVX2" 8161 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 8162 [(set_attr "type" "sseiadd") 8163 (set_attr "prefix_extra" "1") 8164 (set_attr "prefix" "vex") 8165 (set_attr "mode" "OI")]) 8166 8167(define_insn "ssse3_pmaddubsw128" 8168 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8169 (ss_plus:V8HI 8170 (mult:V8HI 8171 (zero_extend:V8HI 8172 (vec_select:V8QI 8173 (match_operand:V16QI 1 "register_operand" "0,x") 8174 (parallel [(const_int 0) (const_int 2) 8175 (const_int 4) (const_int 6) 8176 (const_int 8) (const_int 10) 8177 (const_int 12) (const_int 14)]))) 8178 (sign_extend:V8HI 8179 (vec_select:V8QI 8180 (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm") 8181 (parallel [(const_int 0) (const_int 2) 8182 (const_int 4) (const_int 6) 8183 (const_int 8) (const_int 10) 8184 (const_int 12) (const_int 14)])))) 8185 (mult:V8HI 8186 (zero_extend:V8HI 8187 (vec_select:V8QI (match_dup 1) 8188 (parallel [(const_int 1) (const_int 3) 8189 (const_int 5) (const_int 7) 8190 (const_int 9) (const_int 11) 8191 (const_int 13) (const_int 15)]))) 8192 (sign_extend:V8HI 8193 (vec_select:V8QI (match_dup 2) 8194 (parallel [(const_int 1) (const_int 3) 8195 (const_int 5) (const_int 7) 8196 (const_int 9) (const_int 11) 8197 (const_int 13) (const_int 15)]))))))] 8198 "TARGET_SSSE3" 8199 "@ 8200 pmaddubsw\t{%2, %0|%0, %2} 8201 vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 8202 [(set_attr "isa" "noavx,avx") 8203 (set_attr "type" "sseiadd") 8204 (set_attr "atom_unit" "simul") 8205 (set_attr "prefix_data16" "1,*") 8206 (set_attr "prefix_extra" "1") 8207 (set_attr "prefix" "orig,vex") 8208 (set_attr "mode" "TI")]) 8209 8210(define_insn "ssse3_pmaddubsw" 8211 [(set (match_operand:V4HI 0 "register_operand" "=y") 8212 (ss_plus:V4HI 8213 (mult:V4HI 8214 (zero_extend:V4HI 8215 (vec_select:V4QI 8216 (match_operand:V8QI 1 "register_operand" "0") 8217 (parallel [(const_int 0) (const_int 2) 8218 (const_int 4) (const_int 6)]))) 8219 (sign_extend:V4HI 8220 (vec_select:V4QI 8221 (match_operand:V8QI 2 "nonimmediate_operand" "ym") 8222 (parallel [(const_int 0) (const_int 2) 8223 (const_int 4) (const_int 6)])))) 8224 (mult:V4HI 8225 (zero_extend:V4HI 8226 (vec_select:V4QI (match_dup 1) 8227 (parallel [(const_int 1) (const_int 3) 8228 (const_int 5) (const_int 7)]))) 8229 (sign_extend:V4HI 8230 (vec_select:V4QI (match_dup 2) 8231 (parallel [(const_int 1) (const_int 3) 8232 (const_int 5) (const_int 7)]))))))] 8233 "TARGET_SSSE3" 8234 "pmaddubsw\t{%2, %0|%0, %2}" 8235 [(set_attr "type" "sseiadd") 8236 (set_attr "atom_unit" "simul") 8237 (set_attr "prefix_extra" "1") 8238 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8239 (set_attr "mode" "DI")]) 8240 8241(define_mode_iterator PMULHRSW 8242 [V4HI V8HI (V16HI "TARGET_AVX2")]) 8243 8244(define_expand "<ssse3_avx2>_pmulhrsw<mode>3" 8245 [(set (match_operand:PMULHRSW 0 "register_operand") 8246 (truncate:PMULHRSW 8247 (lshiftrt:<ssedoublemode> 8248 (plus:<ssedoublemode> 8249 (lshiftrt:<ssedoublemode> 8250 (mult:<ssedoublemode> 8251 (sign_extend:<ssedoublemode> 8252 (match_operand:PMULHRSW 1 "nonimmediate_operand")) 8253 (sign_extend:<ssedoublemode> 8254 (match_operand:PMULHRSW 2 "nonimmediate_operand"))) 8255 (const_int 14)) 8256 (match_dup 3)) 8257 (const_int 1))))] 8258 "TARGET_AVX2" 8259{ 8260 operands[3] = CONST1_RTX(<MODE>mode); 8261 ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands); 8262}) 8263 8264(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3" 8265 [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x") 8266 (truncate:VI2_AVX2 8267 (lshiftrt:<ssedoublemode> 8268 (plus:<ssedoublemode> 8269 (lshiftrt:<ssedoublemode> 8270 (mult:<ssedoublemode> 8271 (sign_extend:<ssedoublemode> 8272 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")) 8273 (sign_extend:<ssedoublemode> 8274 (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm"))) 8275 (const_int 14)) 8276 (match_operand:VI2_AVX2 3 "const1_operand")) 8277 (const_int 1))))] 8278 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)" 8279 "@ 8280 pmulhrsw\t{%2, %0|%0, %2} 8281 vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" 8282 [(set_attr "isa" "noavx,avx") 8283 (set_attr "type" "sseimul") 8284 (set_attr "prefix_data16" "1,*") 8285 (set_attr "prefix_extra" "1") 8286 (set_attr "prefix" "orig,vex") 8287 (set_attr "mode" "<sseinsnmode>")]) 8288 8289(define_insn "*ssse3_pmulhrswv4hi3" 8290 [(set (match_operand:V4HI 0 "register_operand" "=y") 8291 (truncate:V4HI 8292 (lshiftrt:V4SI 8293 (plus:V4SI 8294 (lshiftrt:V4SI 8295 (mult:V4SI 8296 (sign_extend:V4SI 8297 (match_operand:V4HI 1 "nonimmediate_operand" "%0")) 8298 (sign_extend:V4SI 8299 (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) 8300 (const_int 14)) 8301 (match_operand:V4HI 3 "const1_operand")) 8302 (const_int 1))))] 8303 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" 8304 "pmulhrsw\t{%2, %0|%0, %2}" 8305 [(set_attr "type" "sseimul") 8306 (set_attr "prefix_extra" "1") 8307 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8308 (set_attr "mode" "DI")]) 8309 8310(define_insn "<ssse3_avx2>_pshufb<mode>3" 8311 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 8312 (unspec:VI1_AVX2 8313 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 8314 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm")] 8315 UNSPEC_PSHUFB))] 8316 "TARGET_SSSE3" 8317 "@ 8318 pshufb\t{%2, %0|%0, %2} 8319 vpshufb\t{%2, %1, %0|%0, %1, %2}" 8320 [(set_attr "isa" "noavx,avx") 8321 (set_attr "type" "sselog1") 8322 (set_attr "prefix_data16" "1,*") 8323 (set_attr "prefix_extra" "1") 8324 (set_attr "prefix" "orig,vex") 8325 (set_attr "btver2_decode" "vector,vector") 8326 (set_attr "mode" "<sseinsnmode>")]) 8327 8328(define_insn "ssse3_pshufbv8qi3" 8329 [(set (match_operand:V8QI 0 "register_operand" "=y") 8330 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") 8331 (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 8332 UNSPEC_PSHUFB))] 8333 "TARGET_SSSE3" 8334 "pshufb\t{%2, %0|%0, %2}"; 8335 [(set_attr "type" "sselog1") 8336 (set_attr "prefix_extra" "1") 8337 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8338 (set_attr "mode" "DI")]) 8339 8340(define_insn "<ssse3_avx2>_psign<mode>3" 8341 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x") 8342 (unspec:VI124_AVX2 8343 [(match_operand:VI124_AVX2 1 "register_operand" "0,x") 8344 (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")] 8345 UNSPEC_PSIGN))] 8346 "TARGET_SSSE3" 8347 "@ 8348 psign<ssemodesuffix>\t{%2, %0|%0, %2} 8349 vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 8350 [(set_attr "isa" "noavx,avx") 8351 (set_attr "type" "sselog1") 8352 (set_attr "prefix_data16" "1,*") 8353 (set_attr "prefix_extra" "1") 8354 (set_attr "prefix" "orig,vex") 8355 (set_attr "mode" "<sseinsnmode>")]) 8356 8357(define_insn "ssse3_psign<mode>3" 8358 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 8359 (unspec:MMXMODEI 8360 [(match_operand:MMXMODEI 1 "register_operand" "0") 8361 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] 8362 UNSPEC_PSIGN))] 8363 "TARGET_SSSE3" 8364 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; 8365 [(set_attr "type" "sselog1") 8366 (set_attr "prefix_extra" "1") 8367 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8368 (set_attr "mode" "DI")]) 8369 8370(define_insn "<ssse3_avx2>_palignr<mode>" 8371 [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x") 8372 (unspec:SSESCALARMODE 8373 [(match_operand:SSESCALARMODE 1 "register_operand" "0,x") 8374 (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,xm") 8375 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] 8376 UNSPEC_PALIGNR))] 8377 "TARGET_SSSE3" 8378{ 8379 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 8380 8381 switch (which_alternative) 8382 { 8383 case 0: 8384 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 8385 case 1: 8386 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 8387 default: 8388 gcc_unreachable (); 8389 } 8390} 8391 [(set_attr "isa" "noavx,avx") 8392 (set_attr "type" "sseishft") 8393 (set_attr "atom_unit" "sishuf") 8394 (set_attr "prefix_data16" "1,*") 8395 (set_attr "prefix_extra" "1") 8396 (set_attr "length_immediate" "1") 8397 (set_attr "prefix" "orig,vex") 8398 (set_attr "mode" "<sseinsnmode>")]) 8399 8400(define_insn "ssse3_palignrdi" 8401 [(set (match_operand:DI 0 "register_operand" "=y") 8402 (unspec:DI [(match_operand:DI 1 "register_operand" "0") 8403 (match_operand:DI 2 "nonimmediate_operand" "ym") 8404 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] 8405 UNSPEC_PALIGNR))] 8406 "TARGET_SSSE3" 8407{ 8408 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 8409 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 8410} 8411 [(set_attr "type" "sseishft") 8412 (set_attr "atom_unit" "sishuf") 8413 (set_attr "prefix_extra" "1") 8414 (set_attr "length_immediate" "1") 8415 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8416 (set_attr "mode" "DI")]) 8417 8418(define_insn "abs<mode>2" 8419 [(set (match_operand:VI124_AVX2 0 "register_operand" "=x") 8420 (abs:VI124_AVX2 8421 (match_operand:VI124_AVX2 1 "nonimmediate_operand" "xm")))] 8422 "TARGET_SSSE3" 8423 "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}" 8424 [(set_attr "type" "sselog1") 8425 (set_attr "prefix_data16" "1") 8426 (set_attr "prefix_extra" "1") 8427 (set_attr "prefix" "maybe_vex") 8428 (set_attr "mode" "<sseinsnmode>")]) 8429 8430(define_insn "abs<mode>2" 8431 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 8432 (abs:MMXMODEI 8433 (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] 8434 "TARGET_SSSE3" 8435 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; 8436 [(set_attr "type" "sselog1") 8437 (set_attr "prefix_rep" "0") 8438 (set_attr "prefix_extra" "1") 8439 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) 8440 (set_attr "mode" "DI")]) 8441 8442;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8443;; 8444;; AMD SSE4A instructions 8445;; 8446;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8447 8448(define_insn "sse4a_movnt<mode>" 8449 [(set (match_operand:MODEF 0 "memory_operand" "=m") 8450 (unspec:MODEF 8451 [(match_operand:MODEF 1 "register_operand" "x")] 8452 UNSPEC_MOVNT))] 8453 "TARGET_SSE4A" 8454 "movnt<ssemodesuffix>\t{%1, %0|%0, %1}" 8455 [(set_attr "type" "ssemov") 8456 (set_attr "mode" "<MODE>")]) 8457 8458(define_insn "sse4a_vmmovnt<mode>" 8459 [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m") 8460 (unspec:<ssescalarmode> 8461 [(vec_select:<ssescalarmode> 8462 (match_operand:VF_128 1 "register_operand" "x") 8463 (parallel [(const_int 0)]))] 8464 UNSPEC_MOVNT))] 8465 "TARGET_SSE4A" 8466 "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}" 8467 [(set_attr "type" "ssemov") 8468 (set_attr "mode" "<ssescalarmode>")]) 8469 8470(define_insn "sse4a_extrqi" 8471 [(set (match_operand:V2DI 0 "register_operand" "=x") 8472 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 8473 (match_operand 2 "const_0_to_255_operand") 8474 (match_operand 3 "const_0_to_255_operand")] 8475 UNSPEC_EXTRQI))] 8476 "TARGET_SSE4A" 8477 "extrq\t{%3, %2, %0|%0, %2, %3}" 8478 [(set_attr "type" "sse") 8479 (set_attr "prefix_data16" "1") 8480 (set_attr "length_immediate" "2") 8481 (set_attr "mode" "TI")]) 8482 8483(define_insn "sse4a_extrq" 8484 [(set (match_operand:V2DI 0 "register_operand" "=x") 8485 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 8486 (match_operand:V16QI 2 "register_operand" "x")] 8487 UNSPEC_EXTRQ))] 8488 "TARGET_SSE4A" 8489 "extrq\t{%2, %0|%0, %2}" 8490 [(set_attr "type" "sse") 8491 (set_attr "prefix_data16" "1") 8492 (set_attr "mode" "TI")]) 8493 8494(define_insn "sse4a_insertqi" 8495 [(set (match_operand:V2DI 0 "register_operand" "=x") 8496 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 8497 (match_operand:V2DI 2 "register_operand" "x") 8498 (match_operand 3 "const_0_to_255_operand") 8499 (match_operand 4 "const_0_to_255_operand")] 8500 UNSPEC_INSERTQI))] 8501 "TARGET_SSE4A" 8502 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" 8503 [(set_attr "type" "sseins") 8504 (set_attr "prefix_data16" "0") 8505 (set_attr "prefix_rep" "1") 8506 (set_attr "length_immediate" "2") 8507 (set_attr "mode" "TI")]) 8508 8509(define_insn "sse4a_insertq" 8510 [(set (match_operand:V2DI 0 "register_operand" "=x") 8511 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 8512 (match_operand:V2DI 2 "register_operand" "x")] 8513 UNSPEC_INSERTQ))] 8514 "TARGET_SSE4A" 8515 "insertq\t{%2, %0|%0, %2}" 8516 [(set_attr "type" "sseins") 8517 (set_attr "prefix_data16" "0") 8518 (set_attr "prefix_rep" "1") 8519 (set_attr "mode" "TI")]) 8520 8521;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8522;; 8523;; Intel SSE4.1 instructions 8524;; 8525;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8526 8527(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>" 8528 [(set (match_operand:VF 0 "register_operand" "=x,x") 8529 (vec_merge:VF 8530 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 8531 (match_operand:VF 1 "register_operand" "0,x") 8532 (match_operand:SI 3 "const_0_to_<blendbits>_operand")))] 8533 "TARGET_SSE4_1" 8534 "@ 8535 blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 8536 vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8537 [(set_attr "isa" "noavx,avx") 8538 (set_attr "type" "ssemov") 8539 (set_attr "length_immediate" "1") 8540 (set_attr "prefix_data16" "1,*") 8541 (set_attr "prefix_extra" "1") 8542 (set_attr "prefix" "orig,vex") 8543 (set_attr "mode" "<MODE>")]) 8544 8545(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>" 8546 [(set (match_operand:VF 0 "register_operand" "=x,x") 8547 (unspec:VF 8548 [(match_operand:VF 1 "register_operand" "0,x") 8549 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 8550 (match_operand:VF 3 "register_operand" "Yz,x")] 8551 UNSPEC_BLENDV))] 8552 "TARGET_SSE4_1" 8553 "@ 8554 blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 8555 vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8556 [(set_attr "isa" "noavx,avx") 8557 (set_attr "type" "ssemov") 8558 (set_attr "length_immediate" "1") 8559 (set_attr "prefix_data16" "1,*") 8560 (set_attr "prefix_extra" "1") 8561 (set_attr "prefix" "orig,vex") 8562 (set_attr "btver2_decode" "vector,vector") 8563 (set_attr "mode" "<MODE>")]) 8564 8565(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>" 8566 [(set (match_operand:VF 0 "register_operand" "=x,x") 8567 (unspec:VF 8568 [(match_operand:VF 1 "nonimmediate_operand" "%0,x") 8569 (match_operand:VF 2 "nonimmediate_operand" "xm,xm") 8570 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 8571 UNSPEC_DP))] 8572 "TARGET_SSE4_1" 8573 "@ 8574 dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} 8575 vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8576 [(set_attr "isa" "noavx,avx") 8577 (set_attr "type" "ssemul") 8578 (set_attr "length_immediate" "1") 8579 (set_attr "prefix_data16" "1,*") 8580 (set_attr "prefix_extra" "1") 8581 (set_attr "prefix" "orig,vex") 8582 (set_attr "btver2_decode" "vector,vector") 8583 (set_attr "mode" "<MODE>")]) 8584 8585(define_insn "<sse4_1_avx2>_movntdqa" 8586 [(set (match_operand:VI8_AVX2 0 "register_operand" "=x") 8587 (unspec:VI8_AVX2 [(match_operand:VI8_AVX2 1 "memory_operand" "m")] 8588 UNSPEC_MOVNTDQA))] 8589 "TARGET_SSE4_1" 8590 "%vmovntdqa\t{%1, %0|%0, %1}" 8591 [(set_attr "type" "ssemov") 8592 (set_attr "prefix_extra" "1") 8593 (set_attr "prefix" "maybe_vex") 8594 (set_attr "mode" "<sseinsnmode>")]) 8595 8596(define_insn "<sse4_1_avx2>_mpsadbw" 8597 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 8598 (unspec:VI1_AVX2 8599 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 8600 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") 8601 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 8602 UNSPEC_MPSADBW))] 8603 "TARGET_SSE4_1" 8604 "@ 8605 mpsadbw\t{%3, %2, %0|%0, %2, %3} 8606 vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8607 [(set_attr "isa" "noavx,avx") 8608 (set_attr "type" "sselog1") 8609 (set_attr "length_immediate" "1") 8610 (set_attr "prefix_extra" "1") 8611 (set_attr "prefix" "orig,vex") 8612 (set_attr "btver2_decode" "vector,vector") 8613 (set_attr "mode" "<sseinsnmode>")]) 8614 8615(define_insn "avx2_packusdw" 8616 [(set (match_operand:V16HI 0 "register_operand" "=x") 8617 (vec_concat:V16HI 8618 (us_truncate:V8HI 8619 (match_operand:V8SI 1 "register_operand" "x")) 8620 (us_truncate:V8HI 8621 (match_operand:V8SI 2 "nonimmediate_operand" "xm"))))] 8622 "TARGET_AVX2" 8623 "vpackusdw\t{%2, %1, %0|%0, %1, %2}" 8624 [(set_attr "type" "sselog") 8625 (set_attr "prefix_extra" "1") 8626 (set_attr "prefix" "vex") 8627 (set_attr "mode" "OI")]) 8628 8629(define_insn "sse4_1_packusdw" 8630 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8631 (vec_concat:V8HI 8632 (us_truncate:V4HI 8633 (match_operand:V4SI 1 "register_operand" "0,x")) 8634 (us_truncate:V4HI 8635 (match_operand:V4SI 2 "nonimmediate_operand" "xm,xm"))))] 8636 "TARGET_SSE4_1" 8637 "@ 8638 packusdw\t{%2, %0|%0, %2} 8639 vpackusdw\t{%2, %1, %0|%0, %1, %2}" 8640 [(set_attr "isa" "noavx,avx") 8641 (set_attr "type" "sselog") 8642 (set_attr "prefix_extra" "1") 8643 (set_attr "prefix" "orig,vex") 8644 (set_attr "mode" "TI")]) 8645 8646(define_insn "<sse4_1_avx2>_pblendvb" 8647 [(set (match_operand:VI1_AVX2 0 "register_operand" "=x,x") 8648 (unspec:VI1_AVX2 8649 [(match_operand:VI1_AVX2 1 "register_operand" "0,x") 8650 (match_operand:VI1_AVX2 2 "nonimmediate_operand" "xm,xm") 8651 (match_operand:VI1_AVX2 3 "register_operand" "Yz,x")] 8652 UNSPEC_BLENDV))] 8653 "TARGET_SSE4_1" 8654 "@ 8655 pblendvb\t{%3, %2, %0|%0, %2, %3} 8656 vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8657 [(set_attr "isa" "noavx,avx") 8658 (set_attr "type" "ssemov") 8659 (set_attr "prefix_extra" "1") 8660 (set_attr "length_immediate" "*,1") 8661 (set_attr "prefix" "orig,vex") 8662 (set_attr "btver2_decode" "vector,vector") 8663 (set_attr "mode" "<sseinsnmode>")]) 8664 8665(define_insn "sse4_1_pblendw" 8666 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 8667 (vec_merge:V8HI 8668 (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm") 8669 (match_operand:V8HI 1 "register_operand" "0,x") 8670 (match_operand:SI 3 "const_0_to_255_operand" "n,n")))] 8671 "TARGET_SSE4_1" 8672 "@ 8673 pblendw\t{%3, %2, %0|%0, %2, %3} 8674 vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8675 [(set_attr "isa" "noavx,avx") 8676 (set_attr "type" "ssemov") 8677 (set_attr "prefix_extra" "1") 8678 (set_attr "length_immediate" "1") 8679 (set_attr "prefix" "orig,vex") 8680 (set_attr "mode" "TI")]) 8681 8682;; The builtin uses an 8-bit immediate. Expand that. 8683(define_expand "avx2_pblendw" 8684 [(set (match_operand:V16HI 0 "register_operand") 8685 (vec_merge:V16HI 8686 (match_operand:V16HI 2 "nonimmediate_operand") 8687 (match_operand:V16HI 1 "register_operand") 8688 (match_operand:SI 3 "const_0_to_255_operand")))] 8689 "TARGET_AVX2" 8690{ 8691 HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff; 8692 operands[3] = GEN_INT (val << 8 | val); 8693}) 8694 8695(define_insn "*avx2_pblendw" 8696 [(set (match_operand:V16HI 0 "register_operand" "=x") 8697 (vec_merge:V16HI 8698 (match_operand:V16HI 2 "nonimmediate_operand" "xm") 8699 (match_operand:V16HI 1 "register_operand" "x") 8700 (match_operand:SI 3 "avx2_pblendw_operand" "n")))] 8701 "TARGET_AVX2" 8702{ 8703 operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff); 8704 return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 8705} 8706 [(set_attr "type" "ssemov") 8707 (set_attr "prefix_extra" "1") 8708 (set_attr "length_immediate" "1") 8709 (set_attr "prefix" "vex") 8710 (set_attr "mode" "OI")]) 8711 8712(define_insn "avx2_pblendd<mode>" 8713 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") 8714 (vec_merge:VI4_AVX2 8715 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm") 8716 (match_operand:VI4_AVX2 1 "register_operand" "x") 8717 (match_operand:SI 3 "const_0_to_255_operand" "n")))] 8718 "TARGET_AVX2" 8719 "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 8720 [(set_attr "type" "ssemov") 8721 (set_attr "prefix_extra" "1") 8722 (set_attr "length_immediate" "1") 8723 (set_attr "prefix" "vex") 8724 (set_attr "mode" "<sseinsnmode>")]) 8725 8726(define_insn "sse4_1_phminposuw" 8727 [(set (match_operand:V8HI 0 "register_operand" "=x") 8728 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 8729 UNSPEC_PHMINPOSUW))] 8730 "TARGET_SSE4_1" 8731 "%vphminposuw\t{%1, %0|%0, %1}" 8732 [(set_attr "type" "sselog1") 8733 (set_attr "prefix_extra" "1") 8734 (set_attr "prefix" "maybe_vex") 8735 (set_attr "mode" "TI")]) 8736 8737(define_insn "avx2_<code>v16qiv16hi2" 8738 [(set (match_operand:V16HI 0 "register_operand" "=x") 8739 (any_extend:V16HI 8740 (match_operand:V16QI 1 "nonimmediate_operand" "xm")))] 8741 "TARGET_AVX2" 8742 "vpmov<extsuffix>bw\t{%1, %0|%0, %1}" 8743 [(set_attr "type" "ssemov") 8744 (set_attr "prefix_extra" "1") 8745 (set_attr "prefix" "vex") 8746 (set_attr "mode" "OI")]) 8747 8748(define_insn "sse4_1_<code>v8qiv8hi2" 8749 [(set (match_operand:V8HI 0 "register_operand" "=x") 8750 (any_extend:V8HI 8751 (vec_select:V8QI 8752 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 8753 (parallel [(const_int 0) (const_int 1) 8754 (const_int 2) (const_int 3) 8755 (const_int 4) (const_int 5) 8756 (const_int 6) (const_int 7)]))))] 8757 "TARGET_SSE4_1" 8758 "%vpmov<extsuffix>bw\t{%1, %0|%0, %q1}" 8759 [(set_attr "type" "ssemov") 8760 (set_attr "ssememalign" "64") 8761 (set_attr "prefix_extra" "1") 8762 (set_attr "prefix" "maybe_vex") 8763 (set_attr "mode" "TI")]) 8764 8765(define_insn "avx2_<code>v8qiv8si2" 8766 [(set (match_operand:V8SI 0 "register_operand" "=x") 8767 (any_extend:V8SI 8768 (vec_select:V8QI 8769 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 8770 (parallel [(const_int 0) (const_int 1) 8771 (const_int 2) (const_int 3) 8772 (const_int 4) (const_int 5) 8773 (const_int 6) (const_int 7)]))))] 8774 "TARGET_AVX2" 8775 "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}" 8776 [(set_attr "type" "ssemov") 8777 (set_attr "prefix_extra" "1") 8778 (set_attr "prefix" "vex") 8779 (set_attr "mode" "OI")]) 8780 8781(define_insn "sse4_1_<code>v4qiv4si2" 8782 [(set (match_operand:V4SI 0 "register_operand" "=x") 8783 (any_extend:V4SI 8784 (vec_select:V4QI 8785 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 8786 (parallel [(const_int 0) (const_int 1) 8787 (const_int 2) (const_int 3)]))))] 8788 "TARGET_SSE4_1" 8789 "%vpmov<extsuffix>bd\t{%1, %0|%0, %k1}" 8790 [(set_attr "type" "ssemov") 8791 (set_attr "ssememalign" "32") 8792 (set_attr "prefix_extra" "1") 8793 (set_attr "prefix" "maybe_vex") 8794 (set_attr "mode" "TI")]) 8795 8796(define_insn "avx2_<code>v8hiv8si2" 8797 [(set (match_operand:V8SI 0 "register_operand" "=x") 8798 (any_extend:V8SI 8799 (match_operand:V8HI 1 "nonimmediate_operand" "xm")))] 8800 "TARGET_AVX2" 8801 "vpmov<extsuffix>wd\t{%1, %0|%0, %1}" 8802 [(set_attr "type" "ssemov") 8803 (set_attr "prefix_extra" "1") 8804 (set_attr "prefix" "vex") 8805 (set_attr "mode" "OI")]) 8806 8807(define_insn "sse4_1_<code>v4hiv4si2" 8808 [(set (match_operand:V4SI 0 "register_operand" "=x") 8809 (any_extend:V4SI 8810 (vec_select:V4HI 8811 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 8812 (parallel [(const_int 0) (const_int 1) 8813 (const_int 2) (const_int 3)]))))] 8814 "TARGET_SSE4_1" 8815 "%vpmov<extsuffix>wd\t{%1, %0|%0, %q1}" 8816 [(set_attr "type" "ssemov") 8817 (set_attr "ssememalign" "64") 8818 (set_attr "prefix_extra" "1") 8819 (set_attr "prefix" "maybe_vex") 8820 (set_attr "mode" "TI")]) 8821 8822(define_insn "avx2_<code>v4qiv4di2" 8823 [(set (match_operand:V4DI 0 "register_operand" "=x") 8824 (any_extend:V4DI 8825 (vec_select:V4QI 8826 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 8827 (parallel [(const_int 0) (const_int 1) 8828 (const_int 2) (const_int 3)]))))] 8829 "TARGET_AVX2" 8830 "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}" 8831 [(set_attr "type" "ssemov") 8832 (set_attr "prefix_extra" "1") 8833 (set_attr "prefix" "vex") 8834 (set_attr "mode" "OI")]) 8835 8836(define_insn "sse4_1_<code>v2qiv2di2" 8837 [(set (match_operand:V2DI 0 "register_operand" "=x") 8838 (any_extend:V2DI 8839 (vec_select:V2QI 8840 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 8841 (parallel [(const_int 0) (const_int 1)]))))] 8842 "TARGET_SSE4_1" 8843 "%vpmov<extsuffix>bq\t{%1, %0|%0, %w1}" 8844 [(set_attr "type" "ssemov") 8845 (set_attr "ssememalign" "16") 8846 (set_attr "prefix_extra" "1") 8847 (set_attr "prefix" "maybe_vex") 8848 (set_attr "mode" "TI")]) 8849 8850(define_insn "avx2_<code>v4hiv4di2" 8851 [(set (match_operand:V4DI 0 "register_operand" "=x") 8852 (any_extend:V4DI 8853 (vec_select:V4HI 8854 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 8855 (parallel [(const_int 0) (const_int 1) 8856 (const_int 2) (const_int 3)]))))] 8857 "TARGET_AVX2" 8858 "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}" 8859 [(set_attr "type" "ssemov") 8860 (set_attr "prefix_extra" "1") 8861 (set_attr "prefix" "vex") 8862 (set_attr "mode" "OI")]) 8863 8864(define_insn "sse4_1_<code>v2hiv2di2" 8865 [(set (match_operand:V2DI 0 "register_operand" "=x") 8866 (any_extend:V2DI 8867 (vec_select:V2HI 8868 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 8869 (parallel [(const_int 0) (const_int 1)]))))] 8870 "TARGET_SSE4_1" 8871 "%vpmov<extsuffix>wq\t{%1, %0|%0, %k1}" 8872 [(set_attr "type" "ssemov") 8873 (set_attr "ssememalign" "32") 8874 (set_attr "prefix_extra" "1") 8875 (set_attr "prefix" "maybe_vex") 8876 (set_attr "mode" "TI")]) 8877 8878(define_insn "avx2_<code>v4siv4di2" 8879 [(set (match_operand:V4DI 0 "register_operand" "=x") 8880 (any_extend:V4DI 8881 (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 8882 "TARGET_AVX2" 8883 "vpmov<extsuffix>dq\t{%1, %0|%0, %1}" 8884 [(set_attr "type" "ssemov") 8885 (set_attr "prefix_extra" "1") 8886 (set_attr "mode" "OI")]) 8887 8888(define_insn "sse4_1_<code>v2siv2di2" 8889 [(set (match_operand:V2DI 0 "register_operand" "=x") 8890 (any_extend:V2DI 8891 (vec_select:V2SI 8892 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 8893 (parallel [(const_int 0) (const_int 1)]))))] 8894 "TARGET_SSE4_1" 8895 "%vpmov<extsuffix>dq\t{%1, %0|%0, %q1}" 8896 [(set_attr "type" "ssemov") 8897 (set_attr "ssememalign" "64") 8898 (set_attr "prefix_extra" "1") 8899 (set_attr "prefix" "maybe_vex") 8900 (set_attr "mode" "TI")]) 8901 8902;; ptestps/ptestpd are very similar to comiss and ucomiss when 8903;; setting FLAGS_REG. But it is not a really compare instruction. 8904(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>" 8905 [(set (reg:CC FLAGS_REG) 8906 (unspec:CC [(match_operand:VF 0 "register_operand" "x") 8907 (match_operand:VF 1 "nonimmediate_operand" "xm")] 8908 UNSPEC_VTESTP))] 8909 "TARGET_AVX" 8910 "vtest<ssemodesuffix>\t{%1, %0|%0, %1}" 8911 [(set_attr "type" "ssecomi") 8912 (set_attr "prefix_extra" "1") 8913 (set_attr "prefix" "vex") 8914 (set_attr "mode" "<MODE>")]) 8915 8916;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. 8917;; But it is not a really compare instruction. 8918(define_insn "avx_ptest256" 8919 [(set (reg:CC FLAGS_REG) 8920 (unspec:CC [(match_operand:V4DI 0 "register_operand" "x") 8921 (match_operand:V4DI 1 "nonimmediate_operand" "xm")] 8922 UNSPEC_PTEST))] 8923 "TARGET_AVX" 8924 "vptest\t{%1, %0|%0, %1}" 8925 [(set_attr "type" "ssecomi") 8926 (set_attr "prefix_extra" "1") 8927 (set_attr "prefix" "vex") 8928 (set_attr "btver2_decode" "vector") 8929 (set_attr "mode" "OI")]) 8930 8931(define_insn "sse4_1_ptest" 8932 [(set (reg:CC FLAGS_REG) 8933 (unspec:CC [(match_operand:V2DI 0 "register_operand" "x") 8934 (match_operand:V2DI 1 "nonimmediate_operand" "xm")] 8935 UNSPEC_PTEST))] 8936 "TARGET_SSE4_1" 8937 "%vptest\t{%1, %0|%0, %1}" 8938 [(set_attr "type" "ssecomi") 8939 (set_attr "prefix_extra" "1") 8940 (set_attr "prefix" "maybe_vex") 8941 (set_attr "mode" "TI")]) 8942 8943(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" 8944 [(set (match_operand:VF 0 "register_operand" "=x") 8945 (unspec:VF 8946 [(match_operand:VF 1 "nonimmediate_operand" "xm") 8947 (match_operand:SI 2 "const_0_to_15_operand" "n")] 8948 UNSPEC_ROUND))] 8949 "TARGET_ROUND" 8950 "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 8951 [(set_attr "type" "ssecvt") 8952 (set (attr "prefix_data16") 8953 (if_then_else 8954 (match_test "TARGET_AVX") 8955 (const_string "*") 8956 (const_string "1"))) 8957 (set_attr "prefix_extra" "1") 8958 (set_attr "length_immediate" "1") 8959 (set_attr "prefix" "maybe_vex") 8960 (set_attr "mode" "<MODE>")]) 8961 8962(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>" 8963 [(match_operand:<sseintvecmode> 0 "register_operand") 8964 (match_operand:VF1 1 "nonimmediate_operand") 8965 (match_operand:SI 2 "const_0_to_15_operand")] 8966 "TARGET_ROUND" 8967{ 8968 rtx tmp = gen_reg_rtx (<MODE>mode); 8969 8970 emit_insn 8971 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1], 8972 operands[2])); 8973 emit_insn 8974 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); 8975 DONE; 8976}) 8977 8978(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>" 8979 [(match_operand:<ssepackfltmode> 0 "register_operand") 8980 (match_operand:VF2 1 "nonimmediate_operand") 8981 (match_operand:VF2 2 "nonimmediate_operand") 8982 (match_operand:SI 3 "const_0_to_15_operand")] 8983 "TARGET_ROUND" 8984{ 8985 rtx tmp0, tmp1; 8986 8987 if (<MODE>mode == V2DFmode 8988 && TARGET_AVX && !TARGET_PREFER_AVX128) 8989 { 8990 rtx tmp2 = gen_reg_rtx (V4DFmode); 8991 8992 tmp0 = gen_reg_rtx (V4DFmode); 8993 tmp1 = force_reg (V2DFmode, operands[1]); 8994 8995 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 8996 emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3])); 8997 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); 8998 } 8999 else 9000 { 9001 tmp0 = gen_reg_rtx (<MODE>mode); 9002 tmp1 = gen_reg_rtx (<MODE>mode); 9003 9004 emit_insn 9005 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1], 9006 operands[3])); 9007 emit_insn 9008 (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2], 9009 operands[3])); 9010 emit_insn 9011 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); 9012 } 9013 DONE; 9014}) 9015 9016(define_insn "sse4_1_round<ssescalarmodesuffix>" 9017 [(set (match_operand:VF_128 0 "register_operand" "=x,x") 9018 (vec_merge:VF_128 9019 (unspec:VF_128 9020 [(match_operand:VF_128 2 "register_operand" "x,x") 9021 (match_operand:SI 3 "const_0_to_15_operand" "n,n")] 9022 UNSPEC_ROUND) 9023 (match_operand:VF_128 1 "register_operand" "0,x") 9024 (const_int 1)))] 9025 "TARGET_ROUND" 9026 "@ 9027 round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} 9028 vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9029 [(set_attr "isa" "noavx,avx") 9030 (set_attr "type" "ssecvt") 9031 (set_attr "length_immediate" "1") 9032 (set_attr "prefix_data16" "1,*") 9033 (set_attr "prefix_extra" "1") 9034 (set_attr "prefix" "orig,vex") 9035 (set_attr "mode" "<MODE>")]) 9036 9037(define_expand "round<mode>2" 9038 [(set (match_dup 4) 9039 (plus:VF 9040 (match_operand:VF 1 "register_operand") 9041 (match_dup 3))) 9042 (set (match_operand:VF 0 "register_operand") 9043 (unspec:VF 9044 [(match_dup 4) (match_dup 5)] 9045 UNSPEC_ROUND))] 9046 "TARGET_ROUND && !flag_trapping_math" 9047{ 9048 enum machine_mode scalar_mode; 9049 const struct real_format *fmt; 9050 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 9051 rtx half, vec_half; 9052 9053 scalar_mode = GET_MODE_INNER (<MODE>mode); 9054 9055 /* load nextafter (0.5, 0.0) */ 9056 fmt = REAL_MODE_FORMAT (scalar_mode); 9057 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode); 9058 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 9059 half = const_double_from_real_value (pred_half, scalar_mode); 9060 9061 vec_half = ix86_build_const_vector (<MODE>mode, true, half); 9062 vec_half = force_reg (<MODE>mode, vec_half); 9063 9064 operands[3] = gen_reg_rtx (<MODE>mode); 9065 emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1])); 9066 9067 operands[4] = gen_reg_rtx (<MODE>mode); 9068 operands[5] = GEN_INT (ROUND_TRUNC); 9069}) 9070 9071(define_expand "round<mode>2_sfix" 9072 [(match_operand:<sseintvecmode> 0 "register_operand") 9073 (match_operand:VF1 1 "register_operand")] 9074 "TARGET_ROUND && !flag_trapping_math" 9075{ 9076 rtx tmp = gen_reg_rtx (<MODE>mode); 9077 9078 emit_insn (gen_round<mode>2 (tmp, operands[1])); 9079 9080 emit_insn 9081 (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); 9082 DONE; 9083}) 9084 9085(define_expand "round<mode>2_vec_pack_sfix" 9086 [(match_operand:<ssepackfltmode> 0 "register_operand") 9087 (match_operand:VF2 1 "register_operand") 9088 (match_operand:VF2 2 "register_operand")] 9089 "TARGET_ROUND && !flag_trapping_math" 9090{ 9091 rtx tmp0, tmp1; 9092 9093 if (<MODE>mode == V2DFmode 9094 && TARGET_AVX && !TARGET_PREFER_AVX128) 9095 { 9096 rtx tmp2 = gen_reg_rtx (V4DFmode); 9097 9098 tmp0 = gen_reg_rtx (V4DFmode); 9099 tmp1 = force_reg (V2DFmode, operands[1]); 9100 9101 emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); 9102 emit_insn (gen_roundv4df2 (tmp2, tmp0)); 9103 emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2)); 9104 } 9105 else 9106 { 9107 tmp0 = gen_reg_rtx (<MODE>mode); 9108 tmp1 = gen_reg_rtx (<MODE>mode); 9109 9110 emit_insn (gen_round<mode>2 (tmp0, operands[1])); 9111 emit_insn (gen_round<mode>2 (tmp1, operands[2])); 9112 9113 emit_insn 9114 (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1)); 9115 } 9116 DONE; 9117}) 9118 9119;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9120;; 9121;; Intel SSE4.2 string/text processing instructions 9122;; 9123;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9124 9125(define_insn_and_split "sse4_2_pcmpestr" 9126 [(set (match_operand:SI 0 "register_operand" "=c,c") 9127 (unspec:SI 9128 [(match_operand:V16QI 2 "register_operand" "x,x") 9129 (match_operand:SI 3 "register_operand" "a,a") 9130 (match_operand:V16QI 4 "nonimmediate_operand" "x,m") 9131 (match_operand:SI 5 "register_operand" "d,d") 9132 (match_operand:SI 6 "const_0_to_255_operand" "n,n")] 9133 UNSPEC_PCMPESTR)) 9134 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") 9135 (unspec:V16QI 9136 [(match_dup 2) 9137 (match_dup 3) 9138 (match_dup 4) 9139 (match_dup 5) 9140 (match_dup 6)] 9141 UNSPEC_PCMPESTR)) 9142 (set (reg:CC FLAGS_REG) 9143 (unspec:CC 9144 [(match_dup 2) 9145 (match_dup 3) 9146 (match_dup 4) 9147 (match_dup 5) 9148 (match_dup 6)] 9149 UNSPEC_PCMPESTR))] 9150 "TARGET_SSE4_2 9151 && can_create_pseudo_p ()" 9152 "#" 9153 "&& 1" 9154 [(const_int 0)] 9155{ 9156 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 9157 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 9158 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 9159 9160 if (ecx) 9161 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], 9162 operands[3], operands[4], 9163 operands[5], operands[6])); 9164 if (xmm0) 9165 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], 9166 operands[3], operands[4], 9167 operands[5], operands[6])); 9168 if (flags && !(ecx || xmm0)) 9169 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, 9170 operands[2], operands[3], 9171 operands[4], operands[5], 9172 operands[6])); 9173 if (!(flags || ecx || xmm0)) 9174 emit_note (NOTE_INSN_DELETED); 9175 9176 DONE; 9177} 9178 [(set_attr "type" "sselog") 9179 (set_attr "prefix_data16" "1") 9180 (set_attr "prefix_extra" "1") 9181 (set_attr "ssememalign" "8") 9182 (set_attr "length_immediate" "1") 9183 (set_attr "memory" "none,load") 9184 (set_attr "mode" "TI")]) 9185 9186(define_insn_and_split "*sse4_2_pcmpestr_unaligned" 9187 [(set (match_operand:SI 0 "register_operand" "=c") 9188 (unspec:SI 9189 [(match_operand:V16QI 2 "register_operand" "x") 9190 (match_operand:SI 3 "register_operand" "a") 9191 (unspec:V16QI 9192 [(match_operand:V16QI 4 "memory_operand" "m")] 9193 UNSPEC_LOADU) 9194 (match_operand:SI 5 "register_operand" "d") 9195 (match_operand:SI 6 "const_0_to_255_operand" "n")] 9196 UNSPEC_PCMPESTR)) 9197 (set (match_operand:V16QI 1 "register_operand" "=Yz") 9198 (unspec:V16QI 9199 [(match_dup 2) 9200 (match_dup 3) 9201 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) 9202 (match_dup 5) 9203 (match_dup 6)] 9204 UNSPEC_PCMPESTR)) 9205 (set (reg:CC FLAGS_REG) 9206 (unspec:CC 9207 [(match_dup 2) 9208 (match_dup 3) 9209 (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU) 9210 (match_dup 5) 9211 (match_dup 6)] 9212 UNSPEC_PCMPESTR))] 9213 "TARGET_SSE4_2 9214 && can_create_pseudo_p ()" 9215 "#" 9216 "&& 1" 9217 [(const_int 0)] 9218{ 9219 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 9220 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 9221 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 9222 9223 if (ecx) 9224 emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2], 9225 operands[3], operands[4], 9226 operands[5], operands[6])); 9227 if (xmm0) 9228 emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2], 9229 operands[3], operands[4], 9230 operands[5], operands[6])); 9231 if (flags && !(ecx || xmm0)) 9232 emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL, 9233 operands[2], operands[3], 9234 operands[4], operands[5], 9235 operands[6])); 9236 if (!(flags || ecx || xmm0)) 9237 emit_note (NOTE_INSN_DELETED); 9238 9239 DONE; 9240} 9241 [(set_attr "type" "sselog") 9242 (set_attr "prefix_data16" "1") 9243 (set_attr "prefix_extra" "1") 9244 (set_attr "ssememalign" "8") 9245 (set_attr "length_immediate" "1") 9246 (set_attr "memory" "load") 9247 (set_attr "mode" "TI")]) 9248 9249(define_insn "sse4_2_pcmpestri" 9250 [(set (match_operand:SI 0 "register_operand" "=c,c") 9251 (unspec:SI 9252 [(match_operand:V16QI 1 "register_operand" "x,x") 9253 (match_operand:SI 2 "register_operand" "a,a") 9254 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 9255 (match_operand:SI 4 "register_operand" "d,d") 9256 (match_operand:SI 5 "const_0_to_255_operand" "n,n")] 9257 UNSPEC_PCMPESTR)) 9258 (set (reg:CC FLAGS_REG) 9259 (unspec:CC 9260 [(match_dup 1) 9261 (match_dup 2) 9262 (match_dup 3) 9263 (match_dup 4) 9264 (match_dup 5)] 9265 UNSPEC_PCMPESTR))] 9266 "TARGET_SSE4_2" 9267 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" 9268 [(set_attr "type" "sselog") 9269 (set_attr "prefix_data16" "1") 9270 (set_attr "prefix_extra" "1") 9271 (set_attr "prefix" "maybe_vex") 9272 (set_attr "ssememalign" "8") 9273 (set_attr "length_immediate" "1") 9274 (set_attr "btver2_decode" "vector") 9275 (set_attr "memory" "none,load") 9276 (set_attr "mode" "TI")]) 9277 9278(define_insn "sse4_2_pcmpestrm" 9279 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 9280 (unspec:V16QI 9281 [(match_operand:V16QI 1 "register_operand" "x,x") 9282 (match_operand:SI 2 "register_operand" "a,a") 9283 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 9284 (match_operand:SI 4 "register_operand" "d,d") 9285 (match_operand:SI 5 "const_0_to_255_operand" "n,n")] 9286 UNSPEC_PCMPESTR)) 9287 (set (reg:CC FLAGS_REG) 9288 (unspec:CC 9289 [(match_dup 1) 9290 (match_dup 2) 9291 (match_dup 3) 9292 (match_dup 4) 9293 (match_dup 5)] 9294 UNSPEC_PCMPESTR))] 9295 "TARGET_SSE4_2" 9296 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" 9297 [(set_attr "type" "sselog") 9298 (set_attr "prefix_data16" "1") 9299 (set_attr "prefix_extra" "1") 9300 (set_attr "ssememalign" "8") 9301 (set_attr "length_immediate" "1") 9302 (set_attr "prefix" "maybe_vex") 9303 (set_attr "btver2_decode" "vector") 9304 (set_attr "memory" "none,load") 9305 (set_attr "mode" "TI")]) 9306 9307(define_insn "sse4_2_pcmpestr_cconly" 9308 [(set (reg:CC FLAGS_REG) 9309 (unspec:CC 9310 [(match_operand:V16QI 2 "register_operand" "x,x,x,x") 9311 (match_operand:SI 3 "register_operand" "a,a,a,a") 9312 (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m") 9313 (match_operand:SI 5 "register_operand" "d,d,d,d") 9314 (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")] 9315 UNSPEC_PCMPESTR)) 9316 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) 9317 (clobber (match_scratch:SI 1 "= X, X,c,c"))] 9318 "TARGET_SSE4_2" 9319 "@ 9320 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} 9321 %vpcmpestrm\t{%6, %4, %2|%2, %4, %6} 9322 %vpcmpestri\t{%6, %4, %2|%2, %4, %6} 9323 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" 9324 [(set_attr "type" "sselog") 9325 (set_attr "prefix_data16" "1") 9326 (set_attr "prefix_extra" "1") 9327 (set_attr "ssememalign" "8") 9328 (set_attr "length_immediate" "1") 9329 (set_attr "memory" "none,load,none,load") 9330 (set_attr "btver2_decode" "vector,vector,vector,vector") 9331 (set_attr "prefix" "maybe_vex") 9332 (set_attr "mode" "TI")]) 9333 9334(define_insn_and_split "sse4_2_pcmpistr" 9335 [(set (match_operand:SI 0 "register_operand" "=c,c") 9336 (unspec:SI 9337 [(match_operand:V16QI 2 "register_operand" "x,x") 9338 (match_operand:V16QI 3 "nonimmediate_operand" "x,m") 9339 (match_operand:SI 4 "const_0_to_255_operand" "n,n")] 9340 UNSPEC_PCMPISTR)) 9341 (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") 9342 (unspec:V16QI 9343 [(match_dup 2) 9344 (match_dup 3) 9345 (match_dup 4)] 9346 UNSPEC_PCMPISTR)) 9347 (set (reg:CC FLAGS_REG) 9348 (unspec:CC 9349 [(match_dup 2) 9350 (match_dup 3) 9351 (match_dup 4)] 9352 UNSPEC_PCMPISTR))] 9353 "TARGET_SSE4_2 9354 && can_create_pseudo_p ()" 9355 "#" 9356 "&& 1" 9357 [(const_int 0)] 9358{ 9359 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 9360 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 9361 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 9362 9363 if (ecx) 9364 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], 9365 operands[3], operands[4])); 9366 if (xmm0) 9367 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], 9368 operands[3], operands[4])); 9369 if (flags && !(ecx || xmm0)) 9370 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, 9371 operands[2], operands[3], 9372 operands[4])); 9373 if (!(flags || ecx || xmm0)) 9374 emit_note (NOTE_INSN_DELETED); 9375 9376 DONE; 9377} 9378 [(set_attr "type" "sselog") 9379 (set_attr "prefix_data16" "1") 9380 (set_attr "prefix_extra" "1") 9381 (set_attr "ssememalign" "8") 9382 (set_attr "length_immediate" "1") 9383 (set_attr "memory" "none,load") 9384 (set_attr "mode" "TI")]) 9385 9386(define_insn_and_split "*sse4_2_pcmpistr_unaligned" 9387 [(set (match_operand:SI 0 "register_operand" "=c") 9388 (unspec:SI 9389 [(match_operand:V16QI 2 "register_operand" "x") 9390 (unspec:V16QI 9391 [(match_operand:V16QI 3 "memory_operand" "m")] 9392 UNSPEC_LOADU) 9393 (match_operand:SI 4 "const_0_to_255_operand" "n")] 9394 UNSPEC_PCMPISTR)) 9395 (set (match_operand:V16QI 1 "register_operand" "=Yz") 9396 (unspec:V16QI 9397 [(match_dup 2) 9398 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) 9399 (match_dup 4)] 9400 UNSPEC_PCMPISTR)) 9401 (set (reg:CC FLAGS_REG) 9402 (unspec:CC 9403 [(match_dup 2) 9404 (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU) 9405 (match_dup 4)] 9406 UNSPEC_PCMPISTR))] 9407 "TARGET_SSE4_2 9408 && can_create_pseudo_p ()" 9409 "#" 9410 "&& 1" 9411 [(const_int 0)] 9412{ 9413 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 9414 int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1])); 9415 int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG); 9416 9417 if (ecx) 9418 emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2], 9419 operands[3], operands[4])); 9420 if (xmm0) 9421 emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2], 9422 operands[3], operands[4])); 9423 if (flags && !(ecx || xmm0)) 9424 emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, 9425 operands[2], operands[3], 9426 operands[4])); 9427 if (!(flags || ecx || xmm0)) 9428 emit_note (NOTE_INSN_DELETED); 9429 9430 DONE; 9431} 9432 [(set_attr "type" "sselog") 9433 (set_attr "prefix_data16" "1") 9434 (set_attr "prefix_extra" "1") 9435 (set_attr "ssememalign" "8") 9436 (set_attr "length_immediate" "1") 9437 (set_attr "memory" "load") 9438 (set_attr "mode" "TI")]) 9439 9440(define_insn "sse4_2_pcmpistri" 9441 [(set (match_operand:SI 0 "register_operand" "=c,c") 9442 (unspec:SI 9443 [(match_operand:V16QI 1 "register_operand" "x,x") 9444 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 9445 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 9446 UNSPEC_PCMPISTR)) 9447 (set (reg:CC FLAGS_REG) 9448 (unspec:CC 9449 [(match_dup 1) 9450 (match_dup 2) 9451 (match_dup 3)] 9452 UNSPEC_PCMPISTR))] 9453 "TARGET_SSE4_2" 9454 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" 9455 [(set_attr "type" "sselog") 9456 (set_attr "prefix_data16" "1") 9457 (set_attr "prefix_extra" "1") 9458 (set_attr "ssememalign" "8") 9459 (set_attr "length_immediate" "1") 9460 (set_attr "prefix" "maybe_vex") 9461 (set_attr "memory" "none,load") 9462 (set_attr "btver2_decode" "vector") 9463 (set_attr "mode" "TI")]) 9464 9465(define_insn "sse4_2_pcmpistrm" 9466 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 9467 (unspec:V16QI 9468 [(match_operand:V16QI 1 "register_operand" "x,x") 9469 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 9470 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 9471 UNSPEC_PCMPISTR)) 9472 (set (reg:CC FLAGS_REG) 9473 (unspec:CC 9474 [(match_dup 1) 9475 (match_dup 2) 9476 (match_dup 3)] 9477 UNSPEC_PCMPISTR))] 9478 "TARGET_SSE4_2" 9479 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" 9480 [(set_attr "type" "sselog") 9481 (set_attr "prefix_data16" "1") 9482 (set_attr "prefix_extra" "1") 9483 (set_attr "ssememalign" "8") 9484 (set_attr "length_immediate" "1") 9485 (set_attr "prefix" "maybe_vex") 9486 (set_attr "memory" "none,load") 9487 (set_attr "btver2_decode" "vector") 9488 (set_attr "mode" "TI")]) 9489 9490(define_insn "sse4_2_pcmpistr_cconly" 9491 [(set (reg:CC FLAGS_REG) 9492 (unspec:CC 9493 [(match_operand:V16QI 2 "register_operand" "x,x,x,x") 9494 (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m") 9495 (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")] 9496 UNSPEC_PCMPISTR)) 9497 (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) 9498 (clobber (match_scratch:SI 1 "= X, X,c,c"))] 9499 "TARGET_SSE4_2" 9500 "@ 9501 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} 9502 %vpcmpistrm\t{%4, %3, %2|%2, %3, %4} 9503 %vpcmpistri\t{%4, %3, %2|%2, %3, %4} 9504 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" 9505 [(set_attr "type" "sselog") 9506 (set_attr "prefix_data16" "1") 9507 (set_attr "prefix_extra" "1") 9508 (set_attr "ssememalign" "8") 9509 (set_attr "length_immediate" "1") 9510 (set_attr "memory" "none,load,none,load") 9511 (set_attr "prefix" "maybe_vex") 9512 (set_attr "btver2_decode" "vector,vector,vector,vector") 9513 (set_attr "mode" "TI")]) 9514 9515;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9516;; 9517;; XOP instructions 9518;; 9519;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9520 9521(define_code_iterator xop_plus [plus ss_plus]) 9522 9523(define_code_attr macs [(plus "macs") (ss_plus "macss")]) 9524(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")]) 9525 9526;; XOP parallel integer multiply/add instructions. 9527 9528(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>" 9529 [(set (match_operand:VI24_128 0 "register_operand" "=x") 9530 (xop_plus:VI24_128 9531 (mult:VI24_128 9532 (match_operand:VI24_128 1 "nonimmediate_operand" "%x") 9533 (match_operand:VI24_128 2 "nonimmediate_operand" "xm")) 9534 (match_operand:VI24_128 3 "register_operand" "x")))] 9535 "TARGET_XOP" 9536 "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9537 [(set_attr "type" "ssemuladd") 9538 (set_attr "mode" "TI")]) 9539 9540(define_insn "xop_p<macs>dql" 9541 [(set (match_operand:V2DI 0 "register_operand" "=x") 9542 (xop_plus:V2DI 9543 (mult:V2DI 9544 (sign_extend:V2DI 9545 (vec_select:V2SI 9546 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 9547 (parallel [(const_int 0) (const_int 2)]))) 9548 (sign_extend:V2DI 9549 (vec_select:V2SI 9550 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 9551 (parallel [(const_int 0) (const_int 2)])))) 9552 (match_operand:V2DI 3 "register_operand" "x")))] 9553 "TARGET_XOP" 9554 "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9555 [(set_attr "type" "ssemuladd") 9556 (set_attr "mode" "TI")]) 9557 9558(define_insn "xop_p<macs>dqh" 9559 [(set (match_operand:V2DI 0 "register_operand" "=x") 9560 (xop_plus:V2DI 9561 (mult:V2DI 9562 (sign_extend:V2DI 9563 (vec_select:V2SI 9564 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 9565 (parallel [(const_int 1) (const_int 3)]))) 9566 (sign_extend:V2DI 9567 (vec_select:V2SI 9568 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 9569 (parallel [(const_int 1) (const_int 3)])))) 9570 (match_operand:V2DI 3 "register_operand" "x")))] 9571 "TARGET_XOP" 9572 "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9573 [(set_attr "type" "ssemuladd") 9574 (set_attr "mode" "TI")]) 9575 9576;; XOP parallel integer multiply/add instructions for the intrinisics 9577(define_insn "xop_p<macs>wd" 9578 [(set (match_operand:V4SI 0 "register_operand" "=x") 9579 (xop_plus:V4SI 9580 (mult:V4SI 9581 (sign_extend:V4SI 9582 (vec_select:V4HI 9583 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 9584 (parallel [(const_int 1) (const_int 3) 9585 (const_int 5) (const_int 7)]))) 9586 (sign_extend:V4SI 9587 (vec_select:V4HI 9588 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 9589 (parallel [(const_int 1) (const_int 3) 9590 (const_int 5) (const_int 7)])))) 9591 (match_operand:V4SI 3 "register_operand" "x")))] 9592 "TARGET_XOP" 9593 "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9594 [(set_attr "type" "ssemuladd") 9595 (set_attr "mode" "TI")]) 9596 9597(define_insn "xop_p<madcs>wd" 9598 [(set (match_operand:V4SI 0 "register_operand" "=x") 9599 (xop_plus:V4SI 9600 (plus:V4SI 9601 (mult:V4SI 9602 (sign_extend:V4SI 9603 (vec_select:V4HI 9604 (match_operand:V8HI 1 "nonimmediate_operand" "%x") 9605 (parallel [(const_int 0) (const_int 2) 9606 (const_int 4) (const_int 6)]))) 9607 (sign_extend:V4SI 9608 (vec_select:V4HI 9609 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 9610 (parallel [(const_int 0) (const_int 2) 9611 (const_int 4) (const_int 6)])))) 9612 (mult:V4SI 9613 (sign_extend:V4SI 9614 (vec_select:V4HI 9615 (match_dup 1) 9616 (parallel [(const_int 1) (const_int 3) 9617 (const_int 5) (const_int 7)]))) 9618 (sign_extend:V4SI 9619 (vec_select:V4HI 9620 (match_dup 2) 9621 (parallel [(const_int 1) (const_int 3) 9622 (const_int 5) (const_int 7)]))))) 9623 (match_operand:V4SI 3 "register_operand" "x")))] 9624 "TARGET_XOP" 9625 "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9626 [(set_attr "type" "ssemuladd") 9627 (set_attr "mode" "TI")]) 9628 9629;; XOP parallel XMM conditional moves 9630(define_insn "xop_pcmov_<mode><avxsizesuffix>" 9631 [(set (match_operand:V 0 "register_operand" "=x,x") 9632 (if_then_else:V 9633 (match_operand:V 3 "nonimmediate_operand" "x,m") 9634 (match_operand:V 1 "register_operand" "x,x") 9635 (match_operand:V 2 "nonimmediate_operand" "xm,x")))] 9636 "TARGET_XOP" 9637 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9638 [(set_attr "type" "sse4arg")]) 9639 9640;; XOP horizontal add/subtract instructions 9641(define_insn "xop_phadd<u>bw" 9642 [(set (match_operand:V8HI 0 "register_operand" "=x") 9643 (plus:V8HI 9644 (any_extend:V8HI 9645 (vec_select:V8QI 9646 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9647 (parallel [(const_int 0) (const_int 2) 9648 (const_int 4) (const_int 6) 9649 (const_int 8) (const_int 10) 9650 (const_int 12) (const_int 14)]))) 9651 (any_extend:V8HI 9652 (vec_select:V8QI 9653 (match_dup 1) 9654 (parallel [(const_int 1) (const_int 3) 9655 (const_int 5) (const_int 7) 9656 (const_int 9) (const_int 11) 9657 (const_int 13) (const_int 15)])))))] 9658 "TARGET_XOP" 9659 "vphadd<u>bw\t{%1, %0|%0, %1}" 9660 [(set_attr "type" "sseiadd1")]) 9661 9662(define_insn "xop_phadd<u>bd" 9663 [(set (match_operand:V4SI 0 "register_operand" "=x") 9664 (plus:V4SI 9665 (plus:V4SI 9666 (any_extend:V4SI 9667 (vec_select:V4QI 9668 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9669 (parallel [(const_int 0) (const_int 4) 9670 (const_int 8) (const_int 12)]))) 9671 (any_extend:V4SI 9672 (vec_select:V4QI 9673 (match_dup 1) 9674 (parallel [(const_int 1) (const_int 5) 9675 (const_int 9) (const_int 13)])))) 9676 (plus:V4SI 9677 (any_extend:V4SI 9678 (vec_select:V4QI 9679 (match_dup 1) 9680 (parallel [(const_int 2) (const_int 6) 9681 (const_int 10) (const_int 14)]))) 9682 (any_extend:V4SI 9683 (vec_select:V4QI 9684 (match_dup 1) 9685 (parallel [(const_int 3) (const_int 7) 9686 (const_int 11) (const_int 15)]))))))] 9687 "TARGET_XOP" 9688 "vphadd<u>bd\t{%1, %0|%0, %1}" 9689 [(set_attr "type" "sseiadd1")]) 9690 9691(define_insn "xop_phadd<u>bq" 9692 [(set (match_operand:V2DI 0 "register_operand" "=x") 9693 (plus:V2DI 9694 (plus:V2DI 9695 (plus:V2DI 9696 (any_extend:V2DI 9697 (vec_select:V2QI 9698 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9699 (parallel [(const_int 0) (const_int 8)]))) 9700 (any_extend:V2DI 9701 (vec_select:V2QI 9702 (match_dup 1) 9703 (parallel [(const_int 1) (const_int 9)])))) 9704 (plus:V2DI 9705 (any_extend:V2DI 9706 (vec_select:V2QI 9707 (match_dup 1) 9708 (parallel [(const_int 2) (const_int 10)]))) 9709 (any_extend:V2DI 9710 (vec_select:V2QI 9711 (match_dup 1) 9712 (parallel [(const_int 3) (const_int 11)]))))) 9713 (plus:V2DI 9714 (plus:V2DI 9715 (any_extend:V2DI 9716 (vec_select:V2QI 9717 (match_dup 1) 9718 (parallel [(const_int 4) (const_int 12)]))) 9719 (any_extend:V2DI 9720 (vec_select:V2QI 9721 (match_dup 1) 9722 (parallel [(const_int 5) (const_int 13)])))) 9723 (plus:V2DI 9724 (any_extend:V2DI 9725 (vec_select:V2QI 9726 (match_dup 1) 9727 (parallel [(const_int 6) (const_int 14)]))) 9728 (any_extend:V2DI 9729 (vec_select:V2QI 9730 (match_dup 1) 9731 (parallel [(const_int 7) (const_int 15)])))))))] 9732 "TARGET_XOP" 9733 "vphadd<u>bq\t{%1, %0|%0, %1}" 9734 [(set_attr "type" "sseiadd1")]) 9735 9736(define_insn "xop_phadd<u>wd" 9737 [(set (match_operand:V4SI 0 "register_operand" "=x") 9738 (plus:V4SI 9739 (any_extend:V4SI 9740 (vec_select:V4HI 9741 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9742 (parallel [(const_int 0) (const_int 2) 9743 (const_int 4) (const_int 6)]))) 9744 (any_extend:V4SI 9745 (vec_select:V4HI 9746 (match_dup 1) 9747 (parallel [(const_int 1) (const_int 3) 9748 (const_int 5) (const_int 7)])))))] 9749 "TARGET_XOP" 9750 "vphadd<u>wd\t{%1, %0|%0, %1}" 9751 [(set_attr "type" "sseiadd1")]) 9752 9753(define_insn "xop_phadd<u>wq" 9754 [(set (match_operand:V2DI 0 "register_operand" "=x") 9755 (plus:V2DI 9756 (plus:V2DI 9757 (any_extend:V2DI 9758 (vec_select:V2HI 9759 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9760 (parallel [(const_int 0) (const_int 4)]))) 9761 (any_extend:V2DI 9762 (vec_select:V2HI 9763 (match_dup 1) 9764 (parallel [(const_int 1) (const_int 5)])))) 9765 (plus:V2DI 9766 (any_extend:V2DI 9767 (vec_select:V2HI 9768 (match_dup 1) 9769 (parallel [(const_int 2) (const_int 6)]))) 9770 (any_extend:V2DI 9771 (vec_select:V2HI 9772 (match_dup 1) 9773 (parallel [(const_int 3) (const_int 7)]))))))] 9774 "TARGET_XOP" 9775 "vphadd<u>wq\t{%1, %0|%0, %1}" 9776 [(set_attr "type" "sseiadd1")]) 9777 9778(define_insn "xop_phadd<u>dq" 9779 [(set (match_operand:V2DI 0 "register_operand" "=x") 9780 (plus:V2DI 9781 (any_extend:V2DI 9782 (vec_select:V2SI 9783 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 9784 (parallel [(const_int 0) (const_int 2)]))) 9785 (any_extend:V2DI 9786 (vec_select:V2SI 9787 (match_dup 1) 9788 (parallel [(const_int 1) (const_int 3)])))))] 9789 "TARGET_XOP" 9790 "vphadd<u>dq\t{%1, %0|%0, %1}" 9791 [(set_attr "type" "sseiadd1")]) 9792 9793(define_insn "xop_phsubbw" 9794 [(set (match_operand:V8HI 0 "register_operand" "=x") 9795 (minus:V8HI 9796 (sign_extend:V8HI 9797 (vec_select:V8QI 9798 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 9799 (parallel [(const_int 0) (const_int 2) 9800 (const_int 4) (const_int 6) 9801 (const_int 8) (const_int 10) 9802 (const_int 12) (const_int 14)]))) 9803 (sign_extend:V8HI 9804 (vec_select:V8QI 9805 (match_dup 1) 9806 (parallel [(const_int 1) (const_int 3) 9807 (const_int 5) (const_int 7) 9808 (const_int 9) (const_int 11) 9809 (const_int 13) (const_int 15)])))))] 9810 "TARGET_XOP" 9811 "vphsubbw\t{%1, %0|%0, %1}" 9812 [(set_attr "type" "sseiadd1")]) 9813 9814(define_insn "xop_phsubwd" 9815 [(set (match_operand:V4SI 0 "register_operand" "=x") 9816 (minus:V4SI 9817 (sign_extend:V4SI 9818 (vec_select:V4HI 9819 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 9820 (parallel [(const_int 0) (const_int 2) 9821 (const_int 4) (const_int 6)]))) 9822 (sign_extend:V4SI 9823 (vec_select:V4HI 9824 (match_dup 1) 9825 (parallel [(const_int 1) (const_int 3) 9826 (const_int 5) (const_int 7)])))))] 9827 "TARGET_XOP" 9828 "vphsubwd\t{%1, %0|%0, %1}" 9829 [(set_attr "type" "sseiadd1")]) 9830 9831(define_insn "xop_phsubdq" 9832 [(set (match_operand:V2DI 0 "register_operand" "=x") 9833 (minus:V2DI 9834 (sign_extend:V2DI 9835 (vec_select:V2SI 9836 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 9837 (parallel [(const_int 0) (const_int 2)]))) 9838 (sign_extend:V2DI 9839 (vec_select:V2SI 9840 (match_dup 1) 9841 (parallel [(const_int 1) (const_int 3)])))))] 9842 "TARGET_XOP" 9843 "vphsubdq\t{%1, %0|%0, %1}" 9844 [(set_attr "type" "sseiadd1")]) 9845 9846;; XOP permute instructions 9847(define_insn "xop_pperm" 9848 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 9849 (unspec:V16QI 9850 [(match_operand:V16QI 1 "register_operand" "x,x") 9851 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") 9852 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] 9853 UNSPEC_XOP_PERMUTE))] 9854 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 9855 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9856 [(set_attr "type" "sse4arg") 9857 (set_attr "mode" "TI")]) 9858 9859;; XOP pack instructions that combine two vectors into a smaller vector 9860(define_insn "xop_pperm_pack_v2di_v4si" 9861 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 9862 (vec_concat:V4SI 9863 (truncate:V2SI 9864 (match_operand:V2DI 1 "register_operand" "x,x")) 9865 (truncate:V2SI 9866 (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) 9867 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 9868 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 9869 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9870 [(set_attr "type" "sse4arg") 9871 (set_attr "mode" "TI")]) 9872 9873(define_insn "xop_pperm_pack_v4si_v8hi" 9874 [(set (match_operand:V8HI 0 "register_operand" "=x,x") 9875 (vec_concat:V8HI 9876 (truncate:V4HI 9877 (match_operand:V4SI 1 "register_operand" "x,x")) 9878 (truncate:V4HI 9879 (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) 9880 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 9881 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 9882 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9883 [(set_attr "type" "sse4arg") 9884 (set_attr "mode" "TI")]) 9885 9886(define_insn "xop_pperm_pack_v8hi_v16qi" 9887 [(set (match_operand:V16QI 0 "register_operand" "=x,x") 9888 (vec_concat:V16QI 9889 (truncate:V8QI 9890 (match_operand:V8HI 1 "register_operand" "x,x")) 9891 (truncate:V8QI 9892 (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) 9893 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] 9894 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" 9895 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9896 [(set_attr "type" "sse4arg") 9897 (set_attr "mode" "TI")]) 9898 9899;; XOP packed rotate instructions 9900(define_expand "rotl<mode>3" 9901 [(set (match_operand:VI_128 0 "register_operand") 9902 (rotate:VI_128 9903 (match_operand:VI_128 1 "nonimmediate_operand") 9904 (match_operand:SI 2 "general_operand")))] 9905 "TARGET_XOP" 9906{ 9907 /* If we were given a scalar, convert it to parallel */ 9908 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 9909 { 9910 rtvec vs = rtvec_alloc (<ssescalarnum>); 9911 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); 9912 rtx reg = gen_reg_rtx (<MODE>mode); 9913 rtx op2 = operands[2]; 9914 int i; 9915 9916 if (GET_MODE (op2) != <ssescalarmode>mode) 9917 { 9918 op2 = gen_reg_rtx (<ssescalarmode>mode); 9919 convert_move (op2, operands[2], false); 9920 } 9921 9922 for (i = 0; i < <ssescalarnum>; i++) 9923 RTVEC_ELT (vs, i) = op2; 9924 9925 emit_insn (gen_vec_init<mode> (reg, par)); 9926 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); 9927 DONE; 9928 } 9929}) 9930 9931(define_expand "rotr<mode>3" 9932 [(set (match_operand:VI_128 0 "register_operand") 9933 (rotatert:VI_128 9934 (match_operand:VI_128 1 "nonimmediate_operand") 9935 (match_operand:SI 2 "general_operand")))] 9936 "TARGET_XOP" 9937{ 9938 /* If we were given a scalar, convert it to parallel */ 9939 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 9940 { 9941 rtvec vs = rtvec_alloc (<ssescalarnum>); 9942 rtx par = gen_rtx_PARALLEL (<MODE>mode, vs); 9943 rtx neg = gen_reg_rtx (<MODE>mode); 9944 rtx reg = gen_reg_rtx (<MODE>mode); 9945 rtx op2 = operands[2]; 9946 int i; 9947 9948 if (GET_MODE (op2) != <ssescalarmode>mode) 9949 { 9950 op2 = gen_reg_rtx (<ssescalarmode>mode); 9951 convert_move (op2, operands[2], false); 9952 } 9953 9954 for (i = 0; i < <ssescalarnum>; i++) 9955 RTVEC_ELT (vs, i) = op2; 9956 9957 emit_insn (gen_vec_init<mode> (reg, par)); 9958 emit_insn (gen_neg<mode>2 (neg, reg)); 9959 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); 9960 DONE; 9961 } 9962}) 9963 9964(define_insn "xop_rotl<mode>3" 9965 [(set (match_operand:VI_128 0 "register_operand" "=x") 9966 (rotate:VI_128 9967 (match_operand:VI_128 1 "nonimmediate_operand" "xm") 9968 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 9969 "TARGET_XOP" 9970 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 9971 [(set_attr "type" "sseishft") 9972 (set_attr "length_immediate" "1") 9973 (set_attr "mode" "TI")]) 9974 9975(define_insn "xop_rotr<mode>3" 9976 [(set (match_operand:VI_128 0 "register_operand" "=x") 9977 (rotatert:VI_128 9978 (match_operand:VI_128 1 "nonimmediate_operand" "xm") 9979 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 9980 "TARGET_XOP" 9981{ 9982 operands[3] 9983 = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2])); 9984 return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\"; 9985} 9986 [(set_attr "type" "sseishft") 9987 (set_attr "length_immediate" "1") 9988 (set_attr "mode" "TI")]) 9989 9990(define_expand "vrotr<mode>3" 9991 [(match_operand:VI_128 0 "register_operand") 9992 (match_operand:VI_128 1 "register_operand") 9993 (match_operand:VI_128 2 "register_operand")] 9994 "TARGET_XOP" 9995{ 9996 rtx reg = gen_reg_rtx (<MODE>mode); 9997 emit_insn (gen_neg<mode>2 (reg, operands[2])); 9998 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); 9999 DONE; 10000}) 10001 10002(define_expand "vrotl<mode>3" 10003 [(match_operand:VI_128 0 "register_operand") 10004 (match_operand:VI_128 1 "register_operand") 10005 (match_operand:VI_128 2 "register_operand")] 10006 "TARGET_XOP" 10007{ 10008 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); 10009 DONE; 10010}) 10011 10012(define_insn "xop_vrotl<mode>3" 10013 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 10014 (if_then_else:VI_128 10015 (ge:VI_128 10016 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 10017 (const_int 0)) 10018 (rotate:VI_128 10019 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 10020 (match_dup 2)) 10021 (rotatert:VI_128 10022 (match_dup 1) 10023 (neg:VI_128 (match_dup 2)))))] 10024 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 10025 "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 10026 [(set_attr "type" "sseishft") 10027 (set_attr "prefix_data16" "0") 10028 (set_attr "prefix_extra" "2") 10029 (set_attr "mode" "TI")]) 10030 10031;; XOP packed shift instructions. 10032(define_expand "vlshr<mode>3" 10033 [(set (match_operand:VI12_128 0 "register_operand") 10034 (lshiftrt:VI12_128 10035 (match_operand:VI12_128 1 "register_operand") 10036 (match_operand:VI12_128 2 "nonimmediate_operand")))] 10037 "TARGET_XOP" 10038{ 10039 rtx neg = gen_reg_rtx (<MODE>mode); 10040 emit_insn (gen_neg<mode>2 (neg, operands[2])); 10041 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); 10042 DONE; 10043}) 10044 10045(define_expand "vlshr<mode>3" 10046 [(set (match_operand:VI48_128 0 "register_operand") 10047 (lshiftrt:VI48_128 10048 (match_operand:VI48_128 1 "register_operand") 10049 (match_operand:VI48_128 2 "nonimmediate_operand")))] 10050 "TARGET_AVX2 || TARGET_XOP" 10051{ 10052 if (!TARGET_AVX2) 10053 { 10054 rtx neg = gen_reg_rtx (<MODE>mode); 10055 emit_insn (gen_neg<mode>2 (neg, operands[2])); 10056 emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg)); 10057 DONE; 10058 } 10059}) 10060 10061(define_expand "vlshr<mode>3" 10062 [(set (match_operand:VI48_256 0 "register_operand") 10063 (lshiftrt:VI48_256 10064 (match_operand:VI48_256 1 "register_operand") 10065 (match_operand:VI48_256 2 "nonimmediate_operand")))] 10066 "TARGET_AVX2") 10067 10068(define_expand "vashr<mode>3" 10069 [(set (match_operand:VI128_128 0 "register_operand") 10070 (ashiftrt:VI128_128 10071 (match_operand:VI128_128 1 "register_operand") 10072 (match_operand:VI128_128 2 "nonimmediate_operand")))] 10073 "TARGET_XOP" 10074{ 10075 rtx neg = gen_reg_rtx (<MODE>mode); 10076 emit_insn (gen_neg<mode>2 (neg, operands[2])); 10077 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg)); 10078 DONE; 10079}) 10080 10081(define_expand "vashrv4si3" 10082 [(set (match_operand:V4SI 0 "register_operand") 10083 (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand") 10084 (match_operand:V4SI 2 "nonimmediate_operand")))] 10085 "TARGET_AVX2 || TARGET_XOP" 10086{ 10087 if (!TARGET_AVX2) 10088 { 10089 rtx neg = gen_reg_rtx (V4SImode); 10090 emit_insn (gen_negv4si2 (neg, operands[2])); 10091 emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg)); 10092 DONE; 10093 } 10094}) 10095 10096(define_expand "vashrv8si3" 10097 [(set (match_operand:V8SI 0 "register_operand") 10098 (ashiftrt:V8SI (match_operand:V8SI 1 "register_operand") 10099 (match_operand:V8SI 2 "nonimmediate_operand")))] 10100 "TARGET_AVX2") 10101 10102(define_expand "vashl<mode>3" 10103 [(set (match_operand:VI12_128 0 "register_operand") 10104 (ashift:VI12_128 10105 (match_operand:VI12_128 1 "register_operand") 10106 (match_operand:VI12_128 2 "nonimmediate_operand")))] 10107 "TARGET_XOP" 10108{ 10109 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); 10110 DONE; 10111}) 10112 10113(define_expand "vashl<mode>3" 10114 [(set (match_operand:VI48_128 0 "register_operand") 10115 (ashift:VI48_128 10116 (match_operand:VI48_128 1 "register_operand") 10117 (match_operand:VI48_128 2 "nonimmediate_operand")))] 10118 "TARGET_AVX2 || TARGET_XOP" 10119{ 10120 if (!TARGET_AVX2) 10121 { 10122 operands[2] = force_reg (<MODE>mode, operands[2]); 10123 emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2])); 10124 DONE; 10125 } 10126}) 10127 10128(define_expand "vashl<mode>3" 10129 [(set (match_operand:VI48_256 0 "register_operand") 10130 (ashift:VI48_256 10131 (match_operand:VI48_256 1 "register_operand") 10132 (match_operand:VI48_256 2 "nonimmediate_operand")))] 10133 "TARGET_AVX2") 10134 10135(define_insn "xop_sha<mode>3" 10136 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 10137 (if_then_else:VI_128 10138 (ge:VI_128 10139 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 10140 (const_int 0)) 10141 (ashift:VI_128 10142 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 10143 (match_dup 2)) 10144 (ashiftrt:VI_128 10145 (match_dup 1) 10146 (neg:VI_128 (match_dup 2)))))] 10147 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 10148 "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 10149 [(set_attr "type" "sseishft") 10150 (set_attr "prefix_data16" "0") 10151 (set_attr "prefix_extra" "2") 10152 (set_attr "mode" "TI")]) 10153 10154(define_insn "xop_shl<mode>3" 10155 [(set (match_operand:VI_128 0 "register_operand" "=x,x") 10156 (if_then_else:VI_128 10157 (ge:VI_128 10158 (match_operand:VI_128 2 "nonimmediate_operand" "x,m") 10159 (const_int 0)) 10160 (ashift:VI_128 10161 (match_operand:VI_128 1 "nonimmediate_operand" "xm,x") 10162 (match_dup 2)) 10163 (lshiftrt:VI_128 10164 (match_dup 1) 10165 (neg:VI_128 (match_dup 2)))))] 10166 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 10167 "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 10168 [(set_attr "type" "sseishft") 10169 (set_attr "prefix_data16" "0") 10170 (set_attr "prefix_extra" "2") 10171 (set_attr "mode" "TI")]) 10172 10173(define_expand "<shift_insn><mode>3" 10174 [(set (match_operand:VI1_AVX2 0 "register_operand") 10175 (any_shift:VI1_AVX2 10176 (match_operand:VI1_AVX2 1 "register_operand") 10177 (match_operand:SI 2 "nonmemory_operand")))] 10178 "TARGET_SSE2" 10179{ 10180 if (TARGET_XOP && <MODE>mode == V16QImode) 10181 { 10182 bool negate = false; 10183 rtx (*gen) (rtx, rtx, rtx); 10184 rtx tmp, par; 10185 int i; 10186 10187 if (<CODE> != ASHIFT) 10188 { 10189 if (CONST_INT_P (operands[2])) 10190 operands[2] = GEN_INT (-INTVAL (operands[2])); 10191 else 10192 negate = true; 10193 } 10194 par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); 10195 for (i = 0; i < 16; i++) 10196 XVECEXP (par, 0, i) = operands[2]; 10197 10198 tmp = gen_reg_rtx (V16QImode); 10199 emit_insn (gen_vec_initv16qi (tmp, par)); 10200 10201 if (negate) 10202 emit_insn (gen_negv16qi2 (tmp, tmp)); 10203 10204 gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3); 10205 emit_insn (gen (operands[0], operands[1], tmp)); 10206 } 10207 else 10208 ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]); 10209 DONE; 10210}) 10211 10212(define_expand "ashrv2di3" 10213 [(set (match_operand:V2DI 0 "register_operand") 10214 (ashiftrt:V2DI 10215 (match_operand:V2DI 1 "register_operand") 10216 (match_operand:DI 2 "nonmemory_operand")))] 10217 "TARGET_XOP" 10218{ 10219 rtx reg = gen_reg_rtx (V2DImode); 10220 rtx par; 10221 bool negate = false; 10222 int i; 10223 10224 if (CONST_INT_P (operands[2])) 10225 operands[2] = GEN_INT (-INTVAL (operands[2])); 10226 else 10227 negate = true; 10228 10229 par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2)); 10230 for (i = 0; i < 2; i++) 10231 XVECEXP (par, 0, i) = operands[2]; 10232 10233 emit_insn (gen_vec_initv2di (reg, par)); 10234 10235 if (negate) 10236 emit_insn (gen_negv2di2 (reg, reg)); 10237 10238 emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg)); 10239 DONE; 10240}) 10241 10242;; XOP FRCZ support 10243(define_insn "xop_frcz<mode>2" 10244 [(set (match_operand:FMAMODE 0 "register_operand" "=x") 10245 (unspec:FMAMODE 10246 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")] 10247 UNSPEC_FRCZ))] 10248 "TARGET_XOP" 10249 "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}" 10250 [(set_attr "type" "ssecvt1") 10251 (set_attr "mode" "<MODE>")]) 10252 10253(define_expand "xop_vmfrcz<mode>2" 10254 [(set (match_operand:VF_128 0 "register_operand") 10255 (vec_merge:VF_128 10256 (unspec:VF_128 10257 [(match_operand:VF_128 1 "nonimmediate_operand")] 10258 UNSPEC_FRCZ) 10259 (match_dup 2) 10260 (const_int 1)))] 10261 "TARGET_XOP" 10262 "operands[2] = CONST0_RTX (<MODE>mode);") 10263 10264(define_insn "*xop_vmfrcz<mode>2" 10265 [(set (match_operand:VF_128 0 "register_operand" "=x") 10266 (vec_merge:VF_128 10267 (unspec:VF_128 10268 [(match_operand:VF_128 1 "nonimmediate_operand" "xm")] 10269 UNSPEC_FRCZ) 10270 (match_operand:VF_128 2 "const0_operand") 10271 (const_int 1)))] 10272 "TARGET_XOP" 10273 "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %1}" 10274 [(set_attr "type" "ssecvt1") 10275 (set_attr "mode" "<MODE>")]) 10276 10277(define_insn "xop_maskcmp<mode>3" 10278 [(set (match_operand:VI_128 0 "register_operand" "=x") 10279 (match_operator:VI_128 1 "ix86_comparison_int_operator" 10280 [(match_operand:VI_128 2 "register_operand" "x") 10281 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] 10282 "TARGET_XOP" 10283 "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 10284 [(set_attr "type" "sse4arg") 10285 (set_attr "prefix_data16" "0") 10286 (set_attr "prefix_rep" "0") 10287 (set_attr "prefix_extra" "2") 10288 (set_attr "length_immediate" "1") 10289 (set_attr "mode" "TI")]) 10290 10291(define_insn "xop_maskcmp_uns<mode>3" 10292 [(set (match_operand:VI_128 0 "register_operand" "=x") 10293 (match_operator:VI_128 1 "ix86_comparison_uns_operator" 10294 [(match_operand:VI_128 2 "register_operand" "x") 10295 (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))] 10296 "TARGET_XOP" 10297 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 10298 [(set_attr "type" "ssecmp") 10299 (set_attr "prefix_data16" "0") 10300 (set_attr "prefix_rep" "0") 10301 (set_attr "prefix_extra" "2") 10302 (set_attr "length_immediate" "1") 10303 (set_attr "mode" "TI")]) 10304 10305;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* 10306;; and pcomneu* not to be converted to the signed ones in case somebody needs 10307;; the exact instruction generated for the intrinsic. 10308(define_insn "xop_maskcmp_uns2<mode>3" 10309 [(set (match_operand:VI_128 0 "register_operand" "=x") 10310 (unspec:VI_128 10311 [(match_operator:VI_128 1 "ix86_comparison_uns_operator" 10312 [(match_operand:VI_128 2 "register_operand" "x") 10313 (match_operand:VI_128 3 "nonimmediate_operand" "xm")])] 10314 UNSPEC_XOP_UNSIGNED_CMP))] 10315 "TARGET_XOP" 10316 "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}" 10317 [(set_attr "type" "ssecmp") 10318 (set_attr "prefix_data16" "0") 10319 (set_attr "prefix_extra" "2") 10320 (set_attr "length_immediate" "1") 10321 (set_attr "mode" "TI")]) 10322 10323;; Pcomtrue and pcomfalse support. These are useless instructions, but are 10324;; being added here to be complete. 10325(define_insn "xop_pcom_tf<mode>3" 10326 [(set (match_operand:VI_128 0 "register_operand" "=x") 10327 (unspec:VI_128 10328 [(match_operand:VI_128 1 "register_operand" "x") 10329 (match_operand:VI_128 2 "nonimmediate_operand" "xm") 10330 (match_operand:SI 3 "const_int_operand" "n")] 10331 UNSPEC_XOP_TRUEFALSE))] 10332 "TARGET_XOP" 10333{ 10334 return ((INTVAL (operands[3]) != 0) 10335 ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 10336 : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"); 10337} 10338 [(set_attr "type" "ssecmp") 10339 (set_attr "prefix_data16" "0") 10340 (set_attr "prefix_extra" "2") 10341 (set_attr "length_immediate" "1") 10342 (set_attr "mode" "TI")]) 10343 10344(define_insn "xop_vpermil2<mode>3" 10345 [(set (match_operand:VF 0 "register_operand" "=x") 10346 (unspec:VF 10347 [(match_operand:VF 1 "register_operand" "x") 10348 (match_operand:VF 2 "nonimmediate_operand" "%x") 10349 (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm") 10350 (match_operand:SI 4 "const_0_to_3_operand" "n")] 10351 UNSPEC_VPERMIL2))] 10352 "TARGET_XOP" 10353 "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}" 10354 [(set_attr "type" "sse4arg") 10355 (set_attr "length_immediate" "1") 10356 (set_attr "mode" "<MODE>")]) 10357 10358;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10359 10360(define_insn "aesenc" 10361 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 10362 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 10363 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 10364 UNSPEC_AESENC))] 10365 "TARGET_AES" 10366 "@ 10367 aesenc\t{%2, %0|%0, %2} 10368 vaesenc\t{%2, %1, %0|%0, %1, %2}" 10369 [(set_attr "isa" "noavx,avx") 10370 (set_attr "type" "sselog1") 10371 (set_attr "prefix_extra" "1") 10372 (set_attr "prefix" "orig,vex") 10373 (set_attr "btver2_decode" "double,double") 10374 (set_attr "mode" "TI")]) 10375 10376(define_insn "aesenclast" 10377 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 10378 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 10379 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 10380 UNSPEC_AESENCLAST))] 10381 "TARGET_AES" 10382 "@ 10383 aesenclast\t{%2, %0|%0, %2} 10384 vaesenclast\t{%2, %1, %0|%0, %1, %2}" 10385 [(set_attr "isa" "noavx,avx") 10386 (set_attr "type" "sselog1") 10387 (set_attr "prefix_extra" "1") 10388 (set_attr "prefix" "orig,vex") 10389 (set_attr "btver2_decode" "double,double") 10390 (set_attr "mode" "TI")]) 10391 10392(define_insn "aesdec" 10393 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 10394 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 10395 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 10396 UNSPEC_AESDEC))] 10397 "TARGET_AES" 10398 "@ 10399 aesdec\t{%2, %0|%0, %2} 10400 vaesdec\t{%2, %1, %0|%0, %1, %2}" 10401 [(set_attr "isa" "noavx,avx") 10402 (set_attr "type" "sselog1") 10403 (set_attr "prefix_extra" "1") 10404 (set_attr "prefix" "orig,vex") 10405 (set_attr "btver2_decode" "double,double") 10406 (set_attr "mode" "TI")]) 10407 10408(define_insn "aesdeclast" 10409 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 10410 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 10411 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")] 10412 UNSPEC_AESDECLAST))] 10413 "TARGET_AES" 10414 "@ 10415 aesdeclast\t{%2, %0|%0, %2} 10416 vaesdeclast\t{%2, %1, %0|%0, %1, %2}" 10417 [(set_attr "isa" "noavx,avx") 10418 (set_attr "type" "sselog1") 10419 (set_attr "prefix_extra" "1") 10420 (set_attr "prefix" "orig,vex") 10421 (set_attr "btver2_decode" "double,double") 10422 (set_attr "mode" "TI")]) 10423 10424(define_insn "aesimc" 10425 [(set (match_operand:V2DI 0 "register_operand" "=x") 10426 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")] 10427 UNSPEC_AESIMC))] 10428 "TARGET_AES" 10429 "%vaesimc\t{%1, %0|%0, %1}" 10430 [(set_attr "type" "sselog1") 10431 (set_attr "prefix_extra" "1") 10432 (set_attr "prefix" "maybe_vex") 10433 (set_attr "mode" "TI")]) 10434 10435(define_insn "aeskeygenassist" 10436 [(set (match_operand:V2DI 0 "register_operand" "=x") 10437 (unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm") 10438 (match_operand:SI 2 "const_0_to_255_operand" "n")] 10439 UNSPEC_AESKEYGENASSIST))] 10440 "TARGET_AES" 10441 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" 10442 [(set_attr "type" "sselog1") 10443 (set_attr "prefix_extra" "1") 10444 (set_attr "length_immediate" "1") 10445 (set_attr "prefix" "maybe_vex") 10446 (set_attr "mode" "TI")]) 10447 10448(define_insn "pclmulqdq" 10449 [(set (match_operand:V2DI 0 "register_operand" "=x,x") 10450 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") 10451 (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm") 10452 (match_operand:SI 3 "const_0_to_255_operand" "n,n")] 10453 UNSPEC_PCLMUL))] 10454 "TARGET_PCLMUL" 10455 "@ 10456 pclmulqdq\t{%3, %2, %0|%0, %2, %3} 10457 vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10458 [(set_attr "isa" "noavx,avx") 10459 (set_attr "type" "sselog1") 10460 (set_attr "prefix_extra" "1") 10461 (set_attr "length_immediate" "1") 10462 (set_attr "prefix" "orig,vex") 10463 (set_attr "mode" "TI")]) 10464 10465(define_expand "avx_vzeroall" 10466 [(match_par_dup 0 [(const_int 0)])] 10467 "TARGET_AVX" 10468{ 10469 int nregs = TARGET_64BIT ? 16 : 8; 10470 int regno; 10471 10472 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); 10473 10474 XVECEXP (operands[0], 0, 0) 10475 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), 10476 UNSPECV_VZEROALL); 10477 10478 for (regno = 0; regno < nregs; regno++) 10479 XVECEXP (operands[0], 0, regno + 1) 10480 = gen_rtx_SET (VOIDmode, 10481 gen_rtx_REG (V8SImode, SSE_REGNO (regno)), 10482 CONST0_RTX (V8SImode)); 10483}) 10484 10485(define_insn "*avx_vzeroall" 10486 [(match_parallel 0 "vzeroall_operation" 10487 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] 10488 "TARGET_AVX" 10489 "vzeroall" 10490 [(set_attr "type" "sse") 10491 (set_attr "modrm" "0") 10492 (set_attr "memory" "none") 10493 (set_attr "prefix" "vex") 10494 (set_attr "btver2_decode" "vector") 10495 (set_attr "mode" "OI")]) 10496 10497;; Clear the upper 128bits of AVX registers, equivalent to a NOP 10498;; if the upper 128bits are unused. 10499(define_insn "avx_vzeroupper" 10500 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)] 10501 "TARGET_AVX" 10502 "vzeroupper" 10503 [(set_attr "type" "sse") 10504 (set_attr "modrm" "0") 10505 (set_attr "memory" "none") 10506 (set_attr "prefix" "vex") 10507 (set_attr "btver2_decode" "vector") 10508 (set_attr "mode" "OI")]) 10509 10510(define_mode_attr AVXTOSSEMODE 10511 [(V4DI "V2DI") (V2DI "V2DI") 10512 (V8SI "V4SI") (V4SI "V4SI") 10513 (V16HI "V8HI") (V8HI "V8HI") 10514 (V32QI "V16QI") (V16QI "V16QI")]) 10515 10516(define_insn "avx2_pbroadcast<mode>" 10517 [(set (match_operand:VI 0 "register_operand" "=x") 10518 (vec_duplicate:VI 10519 (vec_select:<ssescalarmode> 10520 (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm") 10521 (parallel [(const_int 0)]))))] 10522 "TARGET_AVX2" 10523 "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %1}" 10524 [(set_attr "type" "ssemov") 10525 (set_attr "prefix_extra" "1") 10526 (set_attr "prefix" "vex") 10527 (set_attr "mode" "<sseinsnmode>")]) 10528 10529(define_insn "avx2_pbroadcast<mode>_1" 10530 [(set (match_operand:VI_256 0 "register_operand" "=x") 10531 (vec_duplicate:VI_256 10532 (vec_select:<ssescalarmode> 10533 (match_operand:VI_256 1 "nonimmediate_operand" "xm") 10534 (parallel [(const_int 0)]))))] 10535 "TARGET_AVX2" 10536 "vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}" 10537 [(set_attr "type" "ssemov") 10538 (set_attr "prefix_extra" "1") 10539 (set_attr "prefix" "vex") 10540 (set_attr "mode" "<sseinsnmode>")]) 10541 10542(define_insn "avx2_permvar<mode>" 10543 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 10544 (unspec:VI4F_256 10545 [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm") 10546 (match_operand:V8SI 2 "register_operand" "x")] 10547 UNSPEC_VPERMVAR))] 10548 "TARGET_AVX2" 10549 "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" 10550 [(set_attr "type" "sselog") 10551 (set_attr "prefix" "vex") 10552 (set_attr "mode" "OI")]) 10553 10554(define_expand "avx2_perm<mode>" 10555 [(match_operand:VI8F_256 0 "register_operand") 10556 (match_operand:VI8F_256 1 "nonimmediate_operand") 10557 (match_operand:SI 2 "const_0_to_255_operand")] 10558 "TARGET_AVX2" 10559{ 10560 int mask = INTVAL (operands[2]); 10561 emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1], 10562 GEN_INT ((mask >> 0) & 3), 10563 GEN_INT ((mask >> 2) & 3), 10564 GEN_INT ((mask >> 4) & 3), 10565 GEN_INT ((mask >> 6) & 3))); 10566 DONE; 10567}) 10568 10569(define_insn "avx2_perm<mode>_1" 10570 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 10571 (vec_select:VI8F_256 10572 (match_operand:VI8F_256 1 "nonimmediate_operand" "xm") 10573 (parallel [(match_operand 2 "const_0_to_3_operand") 10574 (match_operand 3 "const_0_to_3_operand") 10575 (match_operand 4 "const_0_to_3_operand") 10576 (match_operand 5 "const_0_to_3_operand")])))] 10577 "TARGET_AVX2" 10578{ 10579 int mask = 0; 10580 mask |= INTVAL (operands[2]) << 0; 10581 mask |= INTVAL (operands[3]) << 2; 10582 mask |= INTVAL (operands[4]) << 4; 10583 mask |= INTVAL (operands[5]) << 6; 10584 operands[2] = GEN_INT (mask); 10585 return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"; 10586} 10587 [(set_attr "type" "sselog") 10588 (set_attr "prefix" "vex") 10589 (set_attr "mode" "<sseinsnmode>")]) 10590 10591(define_insn "avx2_permv2ti" 10592 [(set (match_operand:V4DI 0 "register_operand" "=x") 10593 (unspec:V4DI 10594 [(match_operand:V4DI 1 "register_operand" "x") 10595 (match_operand:V4DI 2 "nonimmediate_operand" "xm") 10596 (match_operand:SI 3 "const_0_to_255_operand" "n")] 10597 UNSPEC_VPERMTI))] 10598 "TARGET_AVX2" 10599 "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10600 [(set_attr "type" "sselog") 10601 (set_attr "prefix" "vex") 10602 (set_attr "mode" "OI")]) 10603 10604(define_insn "avx2_vec_dupv4df" 10605 [(set (match_operand:V4DF 0 "register_operand" "=x") 10606 (vec_duplicate:V4DF 10607 (vec_select:DF 10608 (match_operand:V2DF 1 "register_operand" "x") 10609 (parallel [(const_int 0)]))))] 10610 "TARGET_AVX2" 10611 "vbroadcastsd\t{%1, %0|%0, %1}" 10612 [(set_attr "type" "sselog1") 10613 (set_attr "prefix" "vex") 10614 (set_attr "mode" "V4DF")]) 10615 10616;; Modes handled by AVX vec_dup patterns. 10617(define_mode_iterator AVX_VEC_DUP_MODE 10618 [V8SI V8SF V4DI V4DF]) 10619 10620(define_insn "vec_dup<mode>" 10621 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x") 10622 (vec_duplicate:AVX_VEC_DUP_MODE 10623 (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,?x")))] 10624 "TARGET_AVX" 10625 "@ 10626 vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} 10627 vbroadcast<ssescalarmodesuffix>\t{%x1, %0|%0, %x1} 10628 #" 10629 [(set_attr "type" "ssemov") 10630 (set_attr "prefix_extra" "1") 10631 (set_attr "prefix" "vex") 10632 (set_attr "isa" "*,avx2,noavx2") 10633 (set_attr "mode" "V8SF")]) 10634 10635(define_insn "avx2_vbroadcasti128_<mode>" 10636 [(set (match_operand:VI_256 0 "register_operand" "=x") 10637 (vec_concat:VI_256 10638 (match_operand:<ssehalfvecmode> 1 "memory_operand" "m") 10639 (match_dup 1)))] 10640 "TARGET_AVX2" 10641 "vbroadcasti128\t{%1, %0|%0, %1}" 10642 [(set_attr "type" "ssemov") 10643 (set_attr "prefix_extra" "1") 10644 (set_attr "prefix" "vex") 10645 (set_attr "mode" "OI")]) 10646 10647(define_split 10648 [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand") 10649 (vec_duplicate:AVX_VEC_DUP_MODE 10650 (match_operand:<ssescalarmode> 1 "register_operand")))] 10651 "TARGET_AVX && !TARGET_AVX2 && reload_completed" 10652 [(set (match_dup 2) 10653 (vec_duplicate:<ssehalfvecmode> (match_dup 1))) 10654 (set (match_dup 0) 10655 (vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))] 10656 "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));") 10657 10658(define_insn "avx_vbroadcastf128_<mode>" 10659 [(set (match_operand:V_256 0 "register_operand" "=x,x,x") 10660 (vec_concat:V_256 10661 (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x") 10662 (match_dup 1)))] 10663 "TARGET_AVX" 10664 "@ 10665 vbroadcast<i128>\t{%1, %0|%0, %1} 10666 vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1} 10667 vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" 10668 [(set_attr "type" "ssemov,sselog1,sselog1") 10669 (set_attr "prefix_extra" "1") 10670 (set_attr "length_immediate" "0,1,1") 10671 (set_attr "prefix" "vex") 10672 (set_attr "mode" "<sseinsnmode>")]) 10673 10674;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. 10675;; If it so happens that the input is in memory, use vbroadcast. 10676;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). 10677(define_insn "*avx_vperm_broadcast_v4sf" 10678 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") 10679 (vec_select:V4SF 10680 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") 10681 (match_parallel 2 "avx_vbroadcast_operand" 10682 [(match_operand 3 "const_int_operand" "C,n,n")])))] 10683 "TARGET_AVX" 10684{ 10685 int elt = INTVAL (operands[3]); 10686 switch (which_alternative) 10687 { 10688 case 0: 10689 case 1: 10690 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); 10691 return "vbroadcastss\t{%1, %0|%0, %1}"; 10692 case 2: 10693 operands[2] = GEN_INT (elt * 0x55); 10694 return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; 10695 default: 10696 gcc_unreachable (); 10697 } 10698} 10699 [(set_attr "type" "ssemov,ssemov,sselog1") 10700 (set_attr "prefix_extra" "1") 10701 (set_attr "length_immediate" "0,0,1") 10702 (set_attr "prefix" "vex") 10703 (set_attr "mode" "SF,SF,V4SF")]) 10704 10705(define_insn_and_split "*avx_vperm_broadcast_<mode>" 10706 [(set (match_operand:VF_256 0 "register_operand" "=x,x,x") 10707 (vec_select:VF_256 10708 (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x") 10709 (match_parallel 2 "avx_vbroadcast_operand" 10710 [(match_operand 3 "const_int_operand" "C,n,n")])))] 10711 "TARGET_AVX" 10712 "#" 10713 "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)" 10714 [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))] 10715{ 10716 rtx op0 = operands[0], op1 = operands[1]; 10717 int elt = INTVAL (operands[3]); 10718 10719 if (REG_P (op1)) 10720 { 10721 int mask; 10722 10723 if (TARGET_AVX2 && elt == 0) 10724 { 10725 emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode, 10726 op1))); 10727 DONE; 10728 } 10729 10730 /* Shuffle element we care about into all elements of the 128-bit lane. 10731 The other lane gets shuffled too, but we don't care. */ 10732 if (<MODE>mode == V4DFmode) 10733 mask = (elt & 1 ? 15 : 0); 10734 else 10735 mask = (elt & 3) * 0x55; 10736 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); 10737 10738 /* Shuffle the lane we care about into both lanes of the dest. */ 10739 mask = (elt / (<ssescalarnum> / 2)) * 0x11; 10740 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); 10741 DONE; 10742 } 10743 10744 operands[1] = adjust_address_nv (op1, <ssescalarmode>mode, 10745 elt * GET_MODE_SIZE (<ssescalarmode>mode)); 10746}) 10747 10748(define_expand "avx_vpermil<mode>" 10749 [(set (match_operand:VF2 0 "register_operand") 10750 (vec_select:VF2 10751 (match_operand:VF2 1 "nonimmediate_operand") 10752 (match_operand:SI 2 "const_0_to_255_operand")))] 10753 "TARGET_AVX" 10754{ 10755 int mask = INTVAL (operands[2]); 10756 rtx perm[<ssescalarnum>]; 10757 10758 perm[0] = GEN_INT (mask & 1); 10759 perm[1] = GEN_INT ((mask >> 1) & 1); 10760 if (<MODE>mode == V4DFmode) 10761 { 10762 perm[2] = GEN_INT (((mask >> 2) & 1) + 2); 10763 perm[3] = GEN_INT (((mask >> 3) & 1) + 2); 10764 } 10765 10766 operands[2] 10767 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); 10768}) 10769 10770(define_expand "avx_vpermil<mode>" 10771 [(set (match_operand:VF1 0 "register_operand") 10772 (vec_select:VF1 10773 (match_operand:VF1 1 "nonimmediate_operand") 10774 (match_operand:SI 2 "const_0_to_255_operand")))] 10775 "TARGET_AVX" 10776{ 10777 int mask = INTVAL (operands[2]); 10778 rtx perm[<ssescalarnum>]; 10779 10780 perm[0] = GEN_INT (mask & 3); 10781 perm[1] = GEN_INT ((mask >> 2) & 3); 10782 perm[2] = GEN_INT ((mask >> 4) & 3); 10783 perm[3] = GEN_INT ((mask >> 6) & 3); 10784 if (<MODE>mode == V8SFmode) 10785 { 10786 perm[4] = GEN_INT ((mask & 3) + 4); 10787 perm[5] = GEN_INT (((mask >> 2) & 3) + 4); 10788 perm[6] = GEN_INT (((mask >> 4) & 3) + 4); 10789 perm[7] = GEN_INT (((mask >> 6) & 3) + 4); 10790 } 10791 10792 operands[2] 10793 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); 10794}) 10795 10796(define_insn "*avx_vpermilp<mode>" 10797 [(set (match_operand:VF 0 "register_operand" "=x") 10798 (vec_select:VF 10799 (match_operand:VF 1 "nonimmediate_operand" "xm") 10800 (match_parallel 2 "" 10801 [(match_operand 3 "const_int_operand")])))] 10802 "TARGET_AVX 10803 && avx_vpermilp_parallel (operands[2], <MODE>mode)" 10804{ 10805 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; 10806 operands[2] = GEN_INT (mask); 10807 return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"; 10808} 10809 [(set_attr "type" "sselog") 10810 (set_attr "prefix_extra" "1") 10811 (set_attr "length_immediate" "1") 10812 (set_attr "prefix" "vex") 10813 (set_attr "mode" "<MODE>")]) 10814 10815(define_insn "avx_vpermilvar<mode>3" 10816 [(set (match_operand:VF 0 "register_operand" "=x") 10817 (unspec:VF 10818 [(match_operand:VF 1 "register_operand" "x") 10819 (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "xm")] 10820 UNSPEC_VPERMIL))] 10821 "TARGET_AVX" 10822 "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 10823 [(set_attr "type" "sselog") 10824 (set_attr "prefix_extra" "1") 10825 (set_attr "prefix" "vex") 10826 (set_attr "btver2_decode" "vector") 10827 (set_attr "mode" "<MODE>")]) 10828 10829(define_expand "avx_vperm2f128<mode>3" 10830 [(set (match_operand:AVX256MODE2P 0 "register_operand") 10831 (unspec:AVX256MODE2P 10832 [(match_operand:AVX256MODE2P 1 "register_operand") 10833 (match_operand:AVX256MODE2P 2 "nonimmediate_operand") 10834 (match_operand:SI 3 "const_0_to_255_operand")] 10835 UNSPEC_VPERMIL2F128))] 10836 "TARGET_AVX" 10837{ 10838 int mask = INTVAL (operands[3]); 10839 if ((mask & 0x88) == 0) 10840 { 10841 rtx perm[<ssescalarnum>], t1, t2; 10842 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; 10843 10844 base = (mask & 3) * nelt2; 10845 for (i = 0; i < nelt2; ++i) 10846 perm[i] = GEN_INT (base + i); 10847 10848 base = ((mask >> 4) & 3) * nelt2; 10849 for (i = 0; i < nelt2; ++i) 10850 perm[i + nelt2] = GEN_INT (base + i); 10851 10852 t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode, 10853 operands[1], operands[2]); 10854 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); 10855 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); 10856 t2 = gen_rtx_SET (VOIDmode, operands[0], t2); 10857 emit_insn (t2); 10858 DONE; 10859 } 10860}) 10861 10862;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which 10863;; means that in order to represent this properly in rtl we'd have to 10864;; nest *another* vec_concat with a zero operand and do the select from 10865;; a 4x wide vector. That doesn't seem very nice. 10866(define_insn "*avx_vperm2f128<mode>_full" 10867 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 10868 (unspec:AVX256MODE2P 10869 [(match_operand:AVX256MODE2P 1 "register_operand" "x") 10870 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") 10871 (match_operand:SI 3 "const_0_to_255_operand" "n")] 10872 UNSPEC_VPERMIL2F128))] 10873 "TARGET_AVX" 10874 "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 10875 [(set_attr "type" "sselog") 10876 (set_attr "prefix_extra" "1") 10877 (set_attr "length_immediate" "1") 10878 (set_attr "prefix" "vex") 10879 (set_attr "mode" "<sseinsnmode>")]) 10880 10881(define_insn "*avx_vperm2f128<mode>_nozero" 10882 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 10883 (vec_select:AVX256MODE2P 10884 (vec_concat:<ssedoublevecmode> 10885 (match_operand:AVX256MODE2P 1 "register_operand" "x") 10886 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) 10887 (match_parallel 3 "" 10888 [(match_operand 4 "const_int_operand")])))] 10889 "TARGET_AVX 10890 && avx_vperm2f128_parallel (operands[3], <MODE>mode)" 10891{ 10892 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; 10893 if (mask == 0x12) 10894 return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}"; 10895 if (mask == 0x20) 10896 return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}"; 10897 operands[3] = GEN_INT (mask); 10898 return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 10899} 10900 [(set_attr "type" "sselog") 10901 (set_attr "prefix_extra" "1") 10902 (set_attr "length_immediate" "1") 10903 (set_attr "prefix" "vex") 10904 (set_attr "mode" "<sseinsnmode>")]) 10905 10906(define_expand "avx_vinsertf128<mode>" 10907 [(match_operand:V_256 0 "register_operand") 10908 (match_operand:V_256 1 "register_operand") 10909 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand") 10910 (match_operand:SI 3 "const_0_to_1_operand")] 10911 "TARGET_AVX" 10912{ 10913 rtx (*insn)(rtx, rtx, rtx); 10914 10915 switch (INTVAL (operands[3])) 10916 { 10917 case 0: 10918 insn = gen_vec_set_lo_<mode>; 10919 break; 10920 case 1: 10921 insn = gen_vec_set_hi_<mode>; 10922 break; 10923 default: 10924 gcc_unreachable (); 10925 } 10926 10927 emit_insn (insn (operands[0], operands[1], operands[2])); 10928 DONE; 10929}) 10930 10931(define_insn "avx2_vec_set_lo_v4di" 10932 [(set (match_operand:V4DI 0 "register_operand" "=x") 10933 (vec_concat:V4DI 10934 (match_operand:V2DI 2 "nonimmediate_operand" "xm") 10935 (vec_select:V2DI 10936 (match_operand:V4DI 1 "register_operand" "x") 10937 (parallel [(const_int 2) (const_int 3)]))))] 10938 "TARGET_AVX2" 10939 "vinserti128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 10940 [(set_attr "type" "sselog") 10941 (set_attr "prefix_extra" "1") 10942 (set_attr "length_immediate" "1") 10943 (set_attr "prefix" "vex") 10944 (set_attr "mode" "OI")]) 10945 10946(define_insn "avx2_vec_set_hi_v4di" 10947 [(set (match_operand:V4DI 0 "register_operand" "=x") 10948 (vec_concat:V4DI 10949 (vec_select:V2DI 10950 (match_operand:V4DI 1 "register_operand" "x") 10951 (parallel [(const_int 0) (const_int 1)])) 10952 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] 10953 "TARGET_AVX2" 10954 "vinserti128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 10955 [(set_attr "type" "sselog") 10956 (set_attr "prefix_extra" "1") 10957 (set_attr "length_immediate" "1") 10958 (set_attr "prefix" "vex") 10959 (set_attr "mode" "OI")]) 10960 10961(define_insn "vec_set_lo_<mode>" 10962 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 10963 (vec_concat:VI8F_256 10964 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") 10965 (vec_select:<ssehalfvecmode> 10966 (match_operand:VI8F_256 1 "register_operand" "x") 10967 (parallel [(const_int 2) (const_int 3)]))))] 10968 "TARGET_AVX" 10969 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 10970 [(set_attr "type" "sselog") 10971 (set_attr "prefix_extra" "1") 10972 (set_attr "length_immediate" "1") 10973 (set_attr "prefix" "vex") 10974 (set_attr "mode" "<sseinsnmode>")]) 10975 10976(define_insn "vec_set_hi_<mode>" 10977 [(set (match_operand:VI8F_256 0 "register_operand" "=x") 10978 (vec_concat:VI8F_256 10979 (vec_select:<ssehalfvecmode> 10980 (match_operand:VI8F_256 1 "register_operand" "x") 10981 (parallel [(const_int 0) (const_int 1)])) 10982 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] 10983 "TARGET_AVX" 10984 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 10985 [(set_attr "type" "sselog") 10986 (set_attr "prefix_extra" "1") 10987 (set_attr "length_immediate" "1") 10988 (set_attr "prefix" "vex") 10989 (set_attr "mode" "<sseinsnmode>")]) 10990 10991(define_insn "vec_set_lo_<mode>" 10992 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 10993 (vec_concat:VI4F_256 10994 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm") 10995 (vec_select:<ssehalfvecmode> 10996 (match_operand:VI4F_256 1 "register_operand" "x") 10997 (parallel [(const_int 4) (const_int 5) 10998 (const_int 6) (const_int 7)]))))] 10999 "TARGET_AVX" 11000 "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 11001 [(set_attr "type" "sselog") 11002 (set_attr "prefix_extra" "1") 11003 (set_attr "length_immediate" "1") 11004 (set_attr "prefix" "vex") 11005 (set_attr "mode" "<sseinsnmode>")]) 11006 11007(define_insn "vec_set_hi_<mode>" 11008 [(set (match_operand:VI4F_256 0 "register_operand" "=x") 11009 (vec_concat:VI4F_256 11010 (vec_select:<ssehalfvecmode> 11011 (match_operand:VI4F_256 1 "register_operand" "x") 11012 (parallel [(const_int 0) (const_int 1) 11013 (const_int 2) (const_int 3)])) 11014 (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "xm")))] 11015 "TARGET_AVX" 11016 "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 11017 [(set_attr "type" "sselog") 11018 (set_attr "prefix_extra" "1") 11019 (set_attr "length_immediate" "1") 11020 (set_attr "prefix" "vex") 11021 (set_attr "mode" "<sseinsnmode>")]) 11022 11023(define_insn "vec_set_lo_v16hi" 11024 [(set (match_operand:V16HI 0 "register_operand" "=x") 11025 (vec_concat:V16HI 11026 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 11027 (vec_select:V8HI 11028 (match_operand:V16HI 1 "register_operand" "x") 11029 (parallel [(const_int 8) (const_int 9) 11030 (const_int 10) (const_int 11) 11031 (const_int 12) (const_int 13) 11032 (const_int 14) (const_int 15)]))))] 11033 "TARGET_AVX" 11034 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 11035 [(set_attr "type" "sselog") 11036 (set_attr "prefix_extra" "1") 11037 (set_attr "length_immediate" "1") 11038 (set_attr "prefix" "vex") 11039 (set_attr "mode" "OI")]) 11040 11041(define_insn "vec_set_hi_v16hi" 11042 [(set (match_operand:V16HI 0 "register_operand" "=x") 11043 (vec_concat:V16HI 11044 (vec_select:V8HI 11045 (match_operand:V16HI 1 "register_operand" "x") 11046 (parallel [(const_int 0) (const_int 1) 11047 (const_int 2) (const_int 3) 11048 (const_int 4) (const_int 5) 11049 (const_int 6) (const_int 7)])) 11050 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 11051 "TARGET_AVX" 11052 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 11053 [(set_attr "type" "sselog") 11054 (set_attr "prefix_extra" "1") 11055 (set_attr "length_immediate" "1") 11056 (set_attr "prefix" "vex") 11057 (set_attr "mode" "OI")]) 11058 11059(define_insn "vec_set_lo_v32qi" 11060 [(set (match_operand:V32QI 0 "register_operand" "=x") 11061 (vec_concat:V32QI 11062 (match_operand:V16QI 2 "nonimmediate_operand" "xm") 11063 (vec_select:V16QI 11064 (match_operand:V32QI 1 "register_operand" "x") 11065 (parallel [(const_int 16) (const_int 17) 11066 (const_int 18) (const_int 19) 11067 (const_int 20) (const_int 21) 11068 (const_int 22) (const_int 23) 11069 (const_int 24) (const_int 25) 11070 (const_int 26) (const_int 27) 11071 (const_int 28) (const_int 29) 11072 (const_int 30) (const_int 31)]))))] 11073 "TARGET_AVX" 11074 "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 11075 [(set_attr "type" "sselog") 11076 (set_attr "prefix_extra" "1") 11077 (set_attr "length_immediate" "1") 11078 (set_attr "prefix" "vex") 11079 (set_attr "mode" "OI")]) 11080 11081(define_insn "vec_set_hi_v32qi" 11082 [(set (match_operand:V32QI 0 "register_operand" "=x") 11083 (vec_concat:V32QI 11084 (vec_select:V16QI 11085 (match_operand:V32QI 1 "register_operand" "x") 11086 (parallel [(const_int 0) (const_int 1) 11087 (const_int 2) (const_int 3) 11088 (const_int 4) (const_int 5) 11089 (const_int 6) (const_int 7) 11090 (const_int 8) (const_int 9) 11091 (const_int 10) (const_int 11) 11092 (const_int 12) (const_int 13) 11093 (const_int 14) (const_int 15)])) 11094 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 11095 "TARGET_AVX" 11096 "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 11097 [(set_attr "type" "sselog") 11098 (set_attr "prefix_extra" "1") 11099 (set_attr "length_immediate" "1") 11100 (set_attr "prefix" "vex") 11101 (set_attr "mode" "OI")]) 11102 11103(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>" 11104 [(set (match_operand:V48_AVX2 0 "register_operand" "=x") 11105 (unspec:V48_AVX2 11106 [(match_operand:<sseintvecmode> 2 "register_operand" "x") 11107 (match_operand:V48_AVX2 1 "memory_operand" "m")] 11108 UNSPEC_MASKMOV))] 11109 "TARGET_AVX" 11110 "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" 11111 [(set_attr "type" "sselog1") 11112 (set_attr "prefix_extra" "1") 11113 (set_attr "prefix" "vex") 11114 (set_attr "btver2_decode" "vector") 11115 (set_attr "mode" "<sseinsnmode>")]) 11116 11117(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>" 11118 [(set (match_operand:V48_AVX2 0 "memory_operand" "+m") 11119 (unspec:V48_AVX2 11120 [(match_operand:<sseintvecmode> 1 "register_operand" "x") 11121 (match_operand:V48_AVX2 2 "register_operand" "x") 11122 (match_dup 0)] 11123 UNSPEC_MASKMOV))] 11124 "TARGET_AVX" 11125 "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11126 [(set_attr "type" "sselog1") 11127 (set_attr "prefix_extra" "1") 11128 (set_attr "prefix" "vex") 11129 (set_attr "btver2_decode" "vector") 11130 (set_attr "mode" "<sseinsnmode>")]) 11131 11132(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>" 11133 [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m") 11134 (unspec:AVX256MODE2P 11135 [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")] 11136 UNSPEC_CAST))] 11137 "TARGET_AVX" 11138 "#" 11139 "&& reload_completed" 11140 [(const_int 0)] 11141{ 11142 rtx op0 = operands[0]; 11143 rtx op1 = operands[1]; 11144 if (REG_P (op0)) 11145 op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0)); 11146 else 11147 op1 = gen_rtx_REG (<MODE>mode, REGNO (op1)); 11148 emit_move_insn (op0, op1); 11149 DONE; 11150}) 11151 11152(define_expand "vec_init<mode>" 11153 [(match_operand:V_256 0 "register_operand") 11154 (match_operand 1)] 11155 "TARGET_AVX" 11156{ 11157 ix86_expand_vector_init (false, operands[0], operands[1]); 11158 DONE; 11159}) 11160 11161(define_expand "avx2_extracti128" 11162 [(match_operand:V2DI 0 "nonimmediate_operand") 11163 (match_operand:V4DI 1 "register_operand") 11164 (match_operand:SI 2 "const_0_to_1_operand")] 11165 "TARGET_AVX2" 11166{ 11167 rtx (*insn)(rtx, rtx); 11168 11169 switch (INTVAL (operands[2])) 11170 { 11171 case 0: 11172 insn = gen_vec_extract_lo_v4di; 11173 break; 11174 case 1: 11175 insn = gen_vec_extract_hi_v4di; 11176 break; 11177 default: 11178 gcc_unreachable (); 11179 } 11180 11181 emit_insn (insn (operands[0], operands[1])); 11182 DONE; 11183}) 11184 11185(define_expand "avx2_inserti128" 11186 [(match_operand:V4DI 0 "register_operand") 11187 (match_operand:V4DI 1 "register_operand") 11188 (match_operand:V2DI 2 "nonimmediate_operand") 11189 (match_operand:SI 3 "const_0_to_1_operand")] 11190 "TARGET_AVX2" 11191{ 11192 rtx (*insn)(rtx, rtx, rtx); 11193 11194 switch (INTVAL (operands[3])) 11195 { 11196 case 0: 11197 insn = gen_avx2_vec_set_lo_v4di; 11198 break; 11199 case 1: 11200 insn = gen_avx2_vec_set_hi_v4di; 11201 break; 11202 default: 11203 gcc_unreachable (); 11204 } 11205 11206 emit_insn (insn (operands[0], operands[1], operands[2])); 11207 DONE; 11208}) 11209 11210(define_insn "avx2_ashrv<mode>" 11211 [(set (match_operand:VI4_AVX2 0 "register_operand" "=x") 11212 (ashiftrt:VI4_AVX2 11213 (match_operand:VI4_AVX2 1 "register_operand" "x") 11214 (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")))] 11215 "TARGET_AVX2" 11216 "vpsravd\t{%2, %1, %0|%0, %1, %2}" 11217 [(set_attr "type" "sseishft") 11218 (set_attr "prefix" "vex") 11219 (set_attr "mode" "<sseinsnmode>")]) 11220 11221(define_insn "avx2_<shift_insn>v<mode>" 11222 [(set (match_operand:VI48_AVX2 0 "register_operand" "=x") 11223 (any_lshift:VI48_AVX2 11224 (match_operand:VI48_AVX2 1 "register_operand" "x") 11225 (match_operand:VI48_AVX2 2 "nonimmediate_operand" "xm")))] 11226 "TARGET_AVX2" 11227 "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" 11228 [(set_attr "type" "sseishft") 11229 (set_attr "prefix" "vex") 11230 (set_attr "mode" "<sseinsnmode>")]) 11231 11232(define_insn "avx_vec_concat<mode>" 11233 [(set (match_operand:V_256 0 "register_operand" "=x,x") 11234 (vec_concat:V_256 11235 (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x") 11236 (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))] 11237 "TARGET_AVX" 11238{ 11239 switch (which_alternative) 11240 { 11241 case 0: 11242 return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}"; 11243 case 1: 11244 switch (get_attr_mode (insn)) 11245 { 11246 case MODE_V8SF: 11247 return "vmovaps\t{%1, %x0|%x0, %1}"; 11248 case MODE_V4DF: 11249 return "vmovapd\t{%1, %x0|%x0, %1}"; 11250 default: 11251 return "vmovdqa\t{%1, %x0|%x0, %1}"; 11252 } 11253 default: 11254 gcc_unreachable (); 11255 } 11256} 11257 [(set_attr "type" "sselog,ssemov") 11258 (set_attr "prefix_extra" "1,*") 11259 (set_attr "length_immediate" "1,*") 11260 (set_attr "prefix" "vex") 11261 (set_attr "mode" "<sseinsnmode>")]) 11262 11263(define_insn "vcvtph2ps" 11264 [(set (match_operand:V4SF 0 "register_operand" "=x") 11265 (vec_select:V4SF 11266 (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "x")] 11267 UNSPEC_VCVTPH2PS) 11268 (parallel [(const_int 0) (const_int 1) 11269 (const_int 2) (const_int 3)])))] 11270 "TARGET_F16C" 11271 "vcvtph2ps\t{%1, %0|%0, %1}" 11272 [(set_attr "type" "ssecvt") 11273 (set_attr "prefix" "vex") 11274 (set_attr "mode" "V4SF")]) 11275 11276(define_insn "*vcvtph2ps_load" 11277 [(set (match_operand:V4SF 0 "register_operand" "=x") 11278 (unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")] 11279 UNSPEC_VCVTPH2PS))] 11280 "TARGET_F16C" 11281 "vcvtph2ps\t{%1, %0|%0, %1}" 11282 [(set_attr "type" "ssecvt") 11283 (set_attr "prefix" "vex") 11284 (set_attr "mode" "V8SF")]) 11285 11286(define_insn "vcvtph2ps256" 11287 [(set (match_operand:V8SF 0 "register_operand" "=x") 11288 (unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 11289 UNSPEC_VCVTPH2PS))] 11290 "TARGET_F16C" 11291 "vcvtph2ps\t{%1, %0|%0, %1}" 11292 [(set_attr "type" "ssecvt") 11293 (set_attr "prefix" "vex") 11294 (set_attr "btver2_decode" "double") 11295 (set_attr "mode" "V8SF")]) 11296 11297(define_expand "vcvtps2ph" 11298 [(set (match_operand:V8HI 0 "register_operand") 11299 (vec_concat:V8HI 11300 (unspec:V4HI [(match_operand:V4SF 1 "register_operand") 11301 (match_operand:SI 2 "const_0_to_255_operand")] 11302 UNSPEC_VCVTPS2PH) 11303 (match_dup 3)))] 11304 "TARGET_F16C" 11305 "operands[3] = CONST0_RTX (V4HImode);") 11306 11307(define_insn "*vcvtps2ph" 11308 [(set (match_operand:V8HI 0 "register_operand" "=x") 11309 (vec_concat:V8HI 11310 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") 11311 (match_operand:SI 2 "const_0_to_255_operand" "N")] 11312 UNSPEC_VCVTPS2PH) 11313 (match_operand:V4HI 3 "const0_operand")))] 11314 "TARGET_F16C" 11315 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 11316 [(set_attr "type" "ssecvt") 11317 (set_attr "prefix" "vex") 11318 (set_attr "mode" "V4SF")]) 11319 11320(define_insn "*vcvtps2ph_store" 11321 [(set (match_operand:V4HI 0 "memory_operand" "=m") 11322 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x") 11323 (match_operand:SI 2 "const_0_to_255_operand" "N")] 11324 UNSPEC_VCVTPS2PH))] 11325 "TARGET_F16C" 11326 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 11327 [(set_attr "type" "ssecvt") 11328 (set_attr "prefix" "vex") 11329 (set_attr "mode" "V4SF")]) 11330 11331(define_insn "vcvtps2ph256" 11332 [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm") 11333 (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x") 11334 (match_operand:SI 2 "const_0_to_255_operand" "N")] 11335 UNSPEC_VCVTPS2PH))] 11336 "TARGET_F16C" 11337 "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" 11338 [(set_attr "type" "ssecvt") 11339 (set_attr "prefix" "vex") 11340 (set_attr "btver2_decode" "vector") 11341 (set_attr "mode" "V8SF")]) 11342 11343;; For gather* insn patterns 11344(define_mode_iterator VEC_GATHER_MODE 11345 [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) 11346(define_mode_attr VEC_GATHER_IDXSI 11347 [(V2DI "V4SI") (V2DF "V4SI") 11348 (V4DI "V4SI") (V4DF "V4SI") 11349 (V4SI "V4SI") (V4SF "V4SI") 11350 (V8SI "V8SI") (V8SF "V8SI")]) 11351(define_mode_attr VEC_GATHER_IDXDI 11352 [(V2DI "V2DI") (V2DF "V2DI") 11353 (V4DI "V4DI") (V4DF "V4DI") 11354 (V4SI "V2DI") (V4SF "V2DI") 11355 (V8SI "V4DI") (V8SF "V4DI")]) 11356(define_mode_attr VEC_GATHER_SRCDI 11357 [(V2DI "V2DI") (V2DF "V2DF") 11358 (V4DI "V4DI") (V4DF "V4DF") 11359 (V4SI "V4SI") (V4SF "V4SF") 11360 (V8SI "V4SI") (V8SF "V4SF")]) 11361 11362(define_expand "avx2_gathersi<mode>" 11363 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") 11364 (unspec:VEC_GATHER_MODE 11365 [(match_operand:VEC_GATHER_MODE 1 "register_operand") 11366 (mem:<ssescalarmode> 11367 (match_par_dup 7 11368 [(match_operand 2 "vsib_address_operand") 11369 (match_operand:<VEC_GATHER_IDXSI> 11370 3 "register_operand") 11371 (match_operand:SI 5 "const1248_operand ")])) 11372 (mem:BLK (scratch)) 11373 (match_operand:VEC_GATHER_MODE 4 "register_operand")] 11374 UNSPEC_GATHER)) 11375 (clobber (match_scratch:VEC_GATHER_MODE 6))])] 11376 "TARGET_AVX2" 11377{ 11378 operands[7] 11379 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], 11380 operands[5]), UNSPEC_VSIBADDR); 11381}) 11382 11383(define_insn "*avx2_gathersi<mode>" 11384 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 11385 (unspec:VEC_GATHER_MODE 11386 [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0") 11387 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 11388 [(unspec:P 11389 [(match_operand:P 3 "vsib_address_operand" "p") 11390 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x") 11391 (match_operand:SI 6 "const1248_operand" "n")] 11392 UNSPEC_VSIBADDR)]) 11393 (mem:BLK (scratch)) 11394 (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")] 11395 UNSPEC_GATHER)) 11396 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 11397 "TARGET_AVX2" 11398 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}" 11399 [(set_attr "type" "ssemov") 11400 (set_attr "prefix" "vex") 11401 (set_attr "mode" "<sseinsnmode>")]) 11402 11403(define_insn "*avx2_gathersi<mode>_2" 11404 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 11405 (unspec:VEC_GATHER_MODE 11406 [(pc) 11407 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 11408 [(unspec:P 11409 [(match_operand:P 2 "vsib_address_operand" "p") 11410 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x") 11411 (match_operand:SI 5 "const1248_operand" "n")] 11412 UNSPEC_VSIBADDR)]) 11413 (mem:BLK (scratch)) 11414 (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")] 11415 UNSPEC_GATHER)) 11416 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 11417 "TARGET_AVX2" 11418 "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}" 11419 [(set_attr "type" "ssemov") 11420 (set_attr "prefix" "vex") 11421 (set_attr "mode" "<sseinsnmode>")]) 11422 11423(define_expand "avx2_gatherdi<mode>" 11424 [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand") 11425 (unspec:VEC_GATHER_MODE 11426 [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand") 11427 (mem:<ssescalarmode> 11428 (match_par_dup 7 11429 [(match_operand 2 "vsib_address_operand") 11430 (match_operand:<VEC_GATHER_IDXDI> 11431 3 "register_operand") 11432 (match_operand:SI 5 "const1248_operand ")])) 11433 (mem:BLK (scratch)) 11434 (match_operand:<VEC_GATHER_SRCDI> 11435 4 "register_operand")] 11436 UNSPEC_GATHER)) 11437 (clobber (match_scratch:VEC_GATHER_MODE 6))])] 11438 "TARGET_AVX2" 11439{ 11440 operands[7] 11441 = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3], 11442 operands[5]), UNSPEC_VSIBADDR); 11443}) 11444 11445(define_insn "*avx2_gatherdi<mode>" 11446 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 11447 (unspec:VEC_GATHER_MODE 11448 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") 11449 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 11450 [(unspec:P 11451 [(match_operand:P 3 "vsib_address_operand" "p") 11452 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") 11453 (match_operand:SI 6 "const1248_operand" "n")] 11454 UNSPEC_VSIBADDR)]) 11455 (mem:BLK (scratch)) 11456 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] 11457 UNSPEC_GATHER)) 11458 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 11459 "TARGET_AVX2" 11460 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}" 11461 [(set_attr "type" "ssemov") 11462 (set_attr "prefix" "vex") 11463 (set_attr "mode" "<sseinsnmode>")]) 11464 11465(define_insn "*avx2_gatherdi<mode>_2" 11466 [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x") 11467 (unspec:VEC_GATHER_MODE 11468 [(pc) 11469 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 11470 [(unspec:P 11471 [(match_operand:P 2 "vsib_address_operand" "p") 11472 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") 11473 (match_operand:SI 5 "const1248_operand" "n")] 11474 UNSPEC_VSIBADDR)]) 11475 (mem:BLK (scratch)) 11476 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] 11477 UNSPEC_GATHER)) 11478 (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))] 11479 "TARGET_AVX2" 11480{ 11481 if (<MODE>mode != <VEC_GATHER_SRCDI>mode) 11482 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}"; 11483 return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"; 11484} 11485 [(set_attr "type" "ssemov") 11486 (set_attr "prefix" "vex") 11487 (set_attr "mode" "<sseinsnmode>")]) 11488 11489(define_insn "*avx2_gatherdi<mode>_3" 11490 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") 11491 (vec_select:<VEC_GATHER_SRCDI> 11492 (unspec:VI4F_256 11493 [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0") 11494 (match_operator:<ssescalarmode> 7 "vsib_mem_operator" 11495 [(unspec:P 11496 [(match_operand:P 3 "vsib_address_operand" "p") 11497 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x") 11498 (match_operand:SI 6 "const1248_operand" "n")] 11499 UNSPEC_VSIBADDR)]) 11500 (mem:BLK (scratch)) 11501 (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")] 11502 UNSPEC_GATHER) 11503 (parallel [(const_int 0) (const_int 1) 11504 (const_int 2) (const_int 3)]))) 11505 (clobber (match_scratch:VI4F_256 1 "=&x"))] 11506 "TARGET_AVX2" 11507 "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}" 11508 [(set_attr "type" "ssemov") 11509 (set_attr "prefix" "vex") 11510 (set_attr "mode" "<sseinsnmode>")]) 11511 11512(define_insn "*avx2_gatherdi<mode>_4" 11513 [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x") 11514 (vec_select:<VEC_GATHER_SRCDI> 11515 (unspec:VI4F_256 11516 [(pc) 11517 (match_operator:<ssescalarmode> 6 "vsib_mem_operator" 11518 [(unspec:P 11519 [(match_operand:P 2 "vsib_address_operand" "p") 11520 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x") 11521 (match_operand:SI 5 "const1248_operand" "n")] 11522 UNSPEC_VSIBADDR)]) 11523 (mem:BLK (scratch)) 11524 (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")] 11525 UNSPEC_GATHER) 11526 (parallel [(const_int 0) (const_int 1) 11527 (const_int 2) (const_int 3)]))) 11528 (clobber (match_scratch:VI4F_256 1 "=&x"))] 11529 "TARGET_AVX2" 11530 "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}" 11531 [(set_attr "type" "ssemov") 11532 (set_attr "prefix" "vex") 11533 (set_attr "mode" "<sseinsnmode>")]) 11534