1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-FAST 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2 --check-prefix=AVX2-FAST 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST --check-prefix=AVX512VL-FAST-ALL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST --check-prefix=AVX512VL-FAST-PERLANE 9 10define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { 11; AVX1-LABEL: shuffle_v4f64_0000: 12; AVX1: # %bb.0: 13; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 14; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 15; AVX1-NEXT: retq 16; 17; AVX2-LABEL: shuffle_v4f64_0000: 18; AVX2: # %bb.0: 19; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 20; AVX2-NEXT: retq 21; 22; AVX512VL-LABEL: shuffle_v4f64_0000: 23; AVX512VL: # %bb.0: 24; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 25; AVX512VL-NEXT: retq 26 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 27 ret <4 x double> %shuffle 28} 29 30define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { 31; AVX1-LABEL: shuffle_v4f64_0001: 32; AVX1: # %bb.0: 33; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] 34; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 35; AVX1-NEXT: retq 36; 37; AVX2-LABEL: shuffle_v4f64_0001: 38; AVX2: # %bb.0: 39; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 40; AVX2-NEXT: retq 41; 42; AVX512VL-LABEL: shuffle_v4f64_0001: 43; AVX512VL: # %bb.0: 44; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 45; AVX512VL-NEXT: retq 46 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 47 ret <4 x double> %shuffle 48} 49 50define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { 51; AVX1-LABEL: shuffle_v4f64_0020: 52; AVX1: # %bb.0: 53; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 54; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 55; AVX1-NEXT: retq 56; 57; AVX2-LABEL: shuffle_v4f64_0020: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 60; AVX2-NEXT: retq 61; 62; AVX512VL-LABEL: shuffle_v4f64_0020: 63; AVX512VL: # %bb.0: 64; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 65; AVX512VL-NEXT: retq 66 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 67 ret <4 x double> %shuffle 68} 69 70define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { 71; AVX1-LABEL: shuffle_v4f64_0300: 72; AVX1: # %bb.0: 73; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 74; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 75; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2] 76; AVX1-NEXT: retq 77; 78; AVX2-LABEL: shuffle_v4f64_0300: 79; AVX2: # %bb.0: 80; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 81; AVX2-NEXT: retq 82; 83; AVX512VL-LABEL: shuffle_v4f64_0300: 84; AVX512VL: # %bb.0: 85; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 86; AVX512VL-NEXT: retq 87 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 88 ret <4 x double> %shuffle 89} 90 91define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { 92; AVX1-LABEL: shuffle_v4f64_1000: 93; AVX1: # %bb.0: 94; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 95; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 96; AVX1-NEXT: retq 97; 98; AVX2-LABEL: shuffle_v4f64_1000: 99; AVX2: # %bb.0: 100; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 101; AVX2-NEXT: retq 102; 103; AVX512VL-LABEL: shuffle_v4f64_1000: 104; AVX512VL: # %bb.0: 105; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 106; AVX512VL-NEXT: retq 107 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 108 ret <4 x double> %shuffle 109} 110 111define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { 112; AVX1-LABEL: shuffle_v4f64_2200: 113; AVX1: # %bb.0: 114; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 115; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 116; AVX1-NEXT: retq 117; 118; AVX2-LABEL: shuffle_v4f64_2200: 119; AVX2: # %bb.0: 120; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 121; AVX2-NEXT: retq 122; 123; AVX512VL-LABEL: shuffle_v4f64_2200: 124; AVX512VL: # %bb.0: 125; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 126; AVX512VL-NEXT: retq 127 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 128 ret <4 x double> %shuffle 129} 130 131define <4 x double> @shuffle_v4f64_2222(<4 x double> %a, <4 x double> %b) { 132; AVX1-LABEL: shuffle_v4f64_2222: 133; AVX1: # %bb.0: 134; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 135; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 136; AVX1-NEXT: retq 137; 138; AVX2-LABEL: shuffle_v4f64_2222: 139; AVX2: # %bb.0: 140; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 141; AVX2-NEXT: retq 142; 143; AVX512VL-LABEL: shuffle_v4f64_2222: 144; AVX512VL: # %bb.0: 145; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 146; AVX512VL-NEXT: retq 147 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 148 ret <4 x double> %shuffle 149} 150 151define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) { 152; AVX1-LABEL: shuffle_v4f64_2222_bc: 153; AVX1: # %bb.0: 154; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 155; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 156; AVX1-NEXT: retq 157; 158; AVX2-LABEL: shuffle_v4f64_2222_bc: 159; AVX2: # %bb.0: 160; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 161; AVX2-NEXT: retq 162; 163; AVX512VL-LABEL: shuffle_v4f64_2222_bc: 164; AVX512VL: # %bb.0: 165; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 166; AVX512VL-NEXT: retq 167 %tmp0 = bitcast <4 x i64> %a to <4 x double> 168 %tmp1 = bitcast <4 x i64> %b to <4 x double> 169 %shuffle = shufflevector <4 x double> %tmp0, <4 x double> %tmp1, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 170 ret <4 x double> %shuffle 171} 172 173define <4 x double> @shuffle_v4f64_2233(<4 x double> %a, <4 x double> %b) { 174; AVX1-LABEL: shuffle_v4f64_2233: 175; AVX1: # %bb.0: 176; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 177; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0,0,3,3] 178; AVX1-NEXT: retq 179; 180; AVX2-LABEL: shuffle_v4f64_2233: 181; AVX2: # %bb.0: 182; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3] 183; AVX2-NEXT: retq 184; 185; AVX512VL-LABEL: shuffle_v4f64_2233: 186; AVX512VL: # %bb.0: 187; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3] 188; AVX512VL-NEXT: retq 189 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 190 ret <4 x double> %shuffle 191} 192 193define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { 194; AVX1-LABEL: shuffle_v4f64_3330: 195; AVX1: # %bb.0: 196; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 197; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 198; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[2] 199; AVX1-NEXT: retq 200; 201; AVX2-LABEL: shuffle_v4f64_3330: 202; AVX2: # %bb.0: 203; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 204; AVX2-NEXT: retq 205; 206; AVX512VL-LABEL: shuffle_v4f64_3330: 207; AVX512VL: # %bb.0: 208; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 209; AVX512VL-NEXT: retq 210 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 211 ret <4 x double> %shuffle 212} 213 214define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { 215; AVX1-LABEL: shuffle_v4f64_3210: 216; AVX1: # %bb.0: 217; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 218; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 219; AVX1-NEXT: retq 220; 221; AVX2-LABEL: shuffle_v4f64_3210: 222; AVX2: # %bb.0: 223; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 224; AVX2-NEXT: retq 225; 226; AVX512VL-LABEL: shuffle_v4f64_3210: 227; AVX512VL: # %bb.0: 228; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 229; AVX512VL-NEXT: retq 230 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 231 ret <4 x double> %shuffle 232} 233 234define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { 235; ALL-LABEL: shuffle_v4f64_0023: 236; ALL: # %bb.0: 237; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 238; ALL-NEXT: retq 239 240 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 241 ret <4 x double> %shuffle 242} 243 244define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 245; ALL-LABEL: shuffle_v4f64_0022: 246; ALL: # %bb.0: 247; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 248; ALL-NEXT: retq 249 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 250 ret <4 x double> %shuffle 251} 252 253define <4 x double> @shuffle_v4f64mem_0022(ptr %ptr, <4 x double> %b) { 254; ALL-LABEL: shuffle_v4f64mem_0022: 255; ALL: # %bb.0: 256; ALL-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2] 257; ALL-NEXT: retq 258 %a = load <4 x double>, ptr %ptr 259 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 260 ret <4 x double> %shuffle 261} 262 263define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { 264; ALL-LABEL: shuffle_v4f64_1032: 265; ALL: # %bb.0: 266; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 267; ALL-NEXT: retq 268 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 269 ret <4 x double> %shuffle 270} 271 272define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { 273; ALL-LABEL: shuffle_v4f64_1133: 274; ALL: # %bb.0: 275; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 276; ALL-NEXT: retq 277 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 278 ret <4 x double> %shuffle 279} 280 281define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { 282; ALL-LABEL: shuffle_v4f64_1023: 283; ALL: # %bb.0: 284; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,3] 285; ALL-NEXT: retq 286 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 287 ret <4 x double> %shuffle 288} 289 290define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { 291; ALL-LABEL: shuffle_v4f64_1022: 292; ALL: # %bb.0: 293; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 294; ALL-NEXT: retq 295 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2> 296 ret <4 x double> %shuffle 297} 298 299define <4 x double> @shuffle_v4f64_0213(<4 x double> %a, <4 x double> %b) { 300; AVX1-LABEL: shuffle_v4f64_0213: 301; AVX1: # %bb.0: 302; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3] 303; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 304; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 305; AVX1-NEXT: retq 306; 307; AVX2-LABEL: shuffle_v4f64_0213: 308; AVX2: # %bb.0: 309; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 310; AVX2-NEXT: retq 311; 312; AVX512VL-LABEL: shuffle_v4f64_0213: 313; AVX512VL: # %bb.0: 314; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 315; AVX512VL-NEXT: retq 316 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 317 ret <4 x double> %shuffle 318} 319 320define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { 321; AVX1OR2-LABEL: shuffle_v4f64_0423: 322; AVX1OR2: # %bb.0: 323; AVX1OR2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 324; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 325; AVX1OR2-NEXT: retq 326; 327; AVX512VL-SLOW-LABEL: shuffle_v4f64_0423: 328; AVX512VL-SLOW: # %bb.0: 329; AVX512VL-SLOW-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 330; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 331; AVX512VL-SLOW-NEXT: retq 332; 333; AVX512VL-FAST-LABEL: shuffle_v4f64_0423: 334; AVX512VL-FAST: # %bb.0: 335; AVX512VL-FAST-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,2,3] 336; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 337; AVX512VL-FAST-NEXT: retq 338 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 339 ret <4 x double> %shuffle 340} 341 342define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { 343; AVX1OR2-LABEL: shuffle_v4f64_0462: 344; AVX1OR2: # %bb.0: 345; AVX1OR2-NEXT: vblendps {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7] 346; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 347; AVX1OR2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 348; AVX1OR2-NEXT: retq 349; 350; AVX512VL-LABEL: shuffle_v4f64_0462: 351; AVX512VL: # %bb.0: 352; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,6,2] 353; AVX512VL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 354; AVX512VL-NEXT: retq 355 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> 356 ret <4 x double> %shuffle 357} 358 359define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { 360; ALL-LABEL: shuffle_v4f64_0426: 361; ALL: # %bb.0: 362; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 363; ALL-NEXT: retq 364 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 365 ret <4 x double> %shuffle 366} 367 368define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { 369; ALL-LABEL: shuffle_v4f64_1537: 370; ALL: # %bb.0: 371; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 372; ALL-NEXT: retq 373 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 374 ret <4 x double> %shuffle 375} 376 377define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { 378; ALL-LABEL: shuffle_v4f64_4062: 379; ALL: # %bb.0: 380; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 381; ALL-NEXT: retq 382 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 383 ret <4 x double> %shuffle 384} 385 386define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { 387; ALL-LABEL: shuffle_v4f64_5173: 388; ALL: # %bb.0: 389; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 390; ALL-NEXT: retq 391 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3> 392 ret <4 x double> %shuffle 393} 394 395define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 396; ALL-LABEL: shuffle_v4f64_5163: 397; ALL: # %bb.0: 398; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] 399; ALL-NEXT: retq 400 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 401 ret <4 x double> %shuffle 402} 403 404define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { 405; ALL-LABEL: shuffle_v4f64_0527: 406; ALL: # %bb.0: 407; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 408; ALL-NEXT: retq 409 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 410 ret <4 x double> %shuffle 411} 412 413define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { 414; ALL-LABEL: shuffle_v4f64_4163: 415; ALL: # %bb.0: 416; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 417; ALL-NEXT: retq 418 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 419 ret <4 x double> %shuffle 420} 421 422define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { 423; ALL-LABEL: shuffle_v4f64_0145: 424; ALL: # %bb.0: 425; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 426; ALL-NEXT: retq 427 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 428 ret <4 x double> %shuffle 429} 430 431define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { 432; ALL-LABEL: shuffle_v4f64_4501: 433; ALL: # %bb.0: 434; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 435; ALL-NEXT: retq 436 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 437 ret <4 x double> %shuffle 438} 439 440define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { 441; ALL-LABEL: shuffle_v4f64_0167: 442; ALL: # %bb.0: 443; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 444; ALL-NEXT: retq 445 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 446 ret <4 x double> %shuffle 447} 448 449define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { 450; AVX1OR2-LABEL: shuffle_v4f64_1054: 451; AVX1OR2: # %bb.0: 452; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 453; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 454; AVX1OR2-NEXT: retq 455; 456; AVX512VL-SLOW-LABEL: shuffle_v4f64_1054: 457; AVX512VL-SLOW: # %bb.0: 458; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 459; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 460; AVX512VL-SLOW-NEXT: retq 461; 462; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_1054: 463; AVX512VL-FAST-ALL: # %bb.0: 464; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [1,0,5,4] 465; AVX512VL-FAST-ALL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 466; AVX512VL-FAST-ALL-NEXT: retq 467; 468; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_1054: 469; AVX512VL-FAST-PERLANE: # %bb.0: 470; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 471; AVX512VL-FAST-PERLANE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 472; AVX512VL-FAST-PERLANE-NEXT: retq 473 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 474 ret <4 x double> %shuffle 475} 476 477define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { 478; AVX1OR2-LABEL: shuffle_v4f64_3254: 479; AVX1OR2: # %bb.0: 480; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 481; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 482; AVX1OR2-NEXT: retq 483; 484; AVX512VL-SLOW-LABEL: shuffle_v4f64_3254: 485; AVX512VL-SLOW: # %bb.0: 486; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 487; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 488; AVX512VL-SLOW-NEXT: retq 489; 490; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_3254: 491; AVX512VL-FAST-ALL: # %bb.0: 492; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [3,2,5,4] 493; AVX512VL-FAST-ALL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 494; AVX512VL-FAST-ALL-NEXT: retq 495; 496; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_3254: 497; AVX512VL-FAST-PERLANE: # %bb.0: 498; AVX512VL-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 499; AVX512VL-FAST-PERLANE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 500; AVX512VL-FAST-PERLANE-NEXT: retq 501 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 502 ret <4 x double> %shuffle 503} 504 505define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { 506; AVX1OR2-LABEL: shuffle_v4f64_3276: 507; AVX1OR2: # %bb.0: 508; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 509; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 510; AVX1OR2-NEXT: retq 511; 512; AVX512VL-SLOW-LABEL: shuffle_v4f64_3276: 513; AVX512VL-SLOW: # %bb.0: 514; AVX512VL-SLOW-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 515; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 516; AVX512VL-SLOW-NEXT: retq 517; 518; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_3276: 519; AVX512VL-FAST-ALL: # %bb.0: 520; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [3,2,7,6] 521; AVX512VL-FAST-ALL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 522; AVX512VL-FAST-ALL-NEXT: retq 523; 524; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_3276: 525; AVX512VL-FAST-PERLANE: # %bb.0: 526; AVX512VL-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 527; AVX512VL-FAST-PERLANE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 528; AVX512VL-FAST-PERLANE-NEXT: retq 529 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 530 ret <4 x double> %shuffle 531} 532 533define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { 534; AVX1OR2-LABEL: shuffle_v4f64_1076: 535; AVX1OR2: # %bb.0: 536; AVX1OR2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 537; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 538; AVX1OR2-NEXT: retq 539; 540; AVX512VL-SLOW-LABEL: shuffle_v4f64_1076: 541; AVX512VL-SLOW: # %bb.0: 542; AVX512VL-SLOW-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 543; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 544; AVX512VL-SLOW-NEXT: retq 545; 546; AVX512VL-FAST-LABEL: shuffle_v4f64_1076: 547; AVX512VL-FAST: # %bb.0: 548; AVX512VL-FAST-NEXT: vpmovsxbq {{.*#+}} ymm2 = [1,0,7,6] 549; AVX512VL-FAST-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 550; AVX512VL-FAST-NEXT: retq 551 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 552 ret <4 x double> %shuffle 553} 554 555define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { 556; AVX1-LABEL: shuffle_v4f64_0415: 557; AVX1: # %bb.0: 558; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 559; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 560; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 561; AVX1-NEXT: retq 562; 563; AVX2-LABEL: shuffle_v4f64_0415: 564; AVX2: # %bb.0: 565; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 566; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 567; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 568; AVX2-NEXT: retq 569; 570; AVX512VL-LABEL: shuffle_v4f64_0415: 571; AVX512VL: # %bb.0: 572; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,1,5] 573; AVX512VL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 574; AVX512VL-NEXT: retq 575 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 576 ret <4 x double> %shuffle 577} 578 579define <4 x double> @shuffle_v4f64_2741(<4 x double> %a, <4 x double> %b) { 580; AVX1OR2-LABEL: shuffle_v4f64_2741: 581; AVX1OR2: # %bb.0: 582; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[0,1] 583; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 584; AVX1OR2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7] 585; AVX1OR2-NEXT: retq 586; 587; AVX512VL-LABEL: shuffle_v4f64_2741: 588; AVX512VL: # %bb.0: 589; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [2,7,4,1] 590; AVX512VL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 591; AVX512VL-NEXT: retq 592 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 7, i32 4, i32 1> 593 ret <4 x double> %shuffle 594} 595 596define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) { 597; ALL-LABEL: shuffle_v4f64_u062: 598; ALL: # %bb.0: 599; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 600; ALL-NEXT: retq 601 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 poison, i32 0, i32 6, i32 2> 602 ret <4 x double> %shuffle 603} 604 605define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) { 606; ALL-LABEL: shuffle_v4f64_15uu: 607; ALL: # %bb.0: 608; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 609; ALL-NEXT: retq 610 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 611 ret <4 x double> %shuffle 612} 613 614define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) { 615; ALL-LABEL: shuffle_v4f64_11uu: 616; ALL: # %bb.0: 617; ALL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,1] 618; ALL-NEXT: retq 619 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 poison, i32 poison> 620 ret <4 x double> %shuffle 621} 622 623define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) { 624; AVX1-LABEL: shuffle_v4f64_22uu: 625; AVX1: # %bb.0: 626; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 627; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 628; AVX1-NEXT: retq 629; 630; AVX2-LABEL: shuffle_v4f64_22uu: 631; AVX2: # %bb.0: 632; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 633; AVX2-NEXT: retq 634; 635; AVX512VL-LABEL: shuffle_v4f64_22uu: 636; AVX512VL: # %bb.0: 637; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 638; AVX512VL-NEXT: retq 639 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 poison, i32 poison> 640 ret <4 x double> %shuffle 641} 642 643define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) { 644; AVX1-LABEL: shuffle_v4f64_3333: 645; AVX1: # %bb.0: 646; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 647; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 648; AVX1-NEXT: retq 649; 650; AVX2-LABEL: shuffle_v4f64_3333: 651; AVX2: # %bb.0: 652; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 653; AVX2-NEXT: retq 654; 655; AVX512VL-LABEL: shuffle_v4f64_3333: 656; AVX512VL: # %bb.0: 657; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 658; AVX512VL-NEXT: retq 659 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 660 ret <4 x double> %shuffle 661} 662 663define <4 x double> @shuffle_v4f64_0456(<4 x double> %a, <4 x double> %b) { 664; AVX1OR2-LABEL: shuffle_v4f64_0456: 665; AVX1OR2: # %bb.0: 666; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 667; AVX1OR2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 668; AVX1OR2-NEXT: retq 669; 670; AVX512VL-SLOW-LABEL: shuffle_v4f64_0456: 671; AVX512VL-SLOW: # %bb.0: 672; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 673; AVX512VL-SLOW-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 674; AVX512VL-SLOW-NEXT: retq 675; 676; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_0456: 677; AVX512VL-FAST-ALL: # %bb.0: 678; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [4,0,1,2] 679; AVX512VL-FAST-ALL-NEXT: vpermi2pd %ymm0, %ymm1, %ymm2 680; AVX512VL-FAST-ALL-NEXT: vmovapd %ymm2, %ymm0 681; AVX512VL-FAST-ALL-NEXT: retq 682; 683; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_0456: 684; AVX512VL-FAST-PERLANE: # %bb.0: 685; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 686; AVX512VL-FAST-PERLANE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 687; AVX512VL-FAST-PERLANE-NEXT: retq 688 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 6> 689 ret <4 x double> %shuffle 690} 691 692; PR59860 693define <4 x double> @shuffle_v4f64_0437(<4 x double> %a, <4 x double> %b) { 694; ALL-LABEL: shuffle_v4f64_0437: 695; ALL: # %bb.0: 696; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 697; ALL-NEXT: retq 698 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 3, i32 7> 699 ret <4 x double> %shuffle 700} 701 702; PR91433 703define <4 x double> @shuffle_v4f64_2303(<4 x double> %a) { 704; AVX1-LABEL: shuffle_v4f64_2303: 705; AVX1: # %bb.0: 706; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3] 707; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 708; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 709; AVX1-NEXT: retq 710; 711; AVX2-LABEL: shuffle_v4f64_2303: 712; AVX2: # %bb.0: 713; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3] 714; AVX2-NEXT: retq 715; 716; AVX512VL-LABEL: shuffle_v4f64_2303: 717; AVX512VL: # %bb.0: 718; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,3] 719; AVX512VL-NEXT: retq 720 %shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3> 721 ret <4 x double> %shuffle 722} 723 724define <4 x double> @shuffle_v4f64_0z3z(<4 x double> %a, <4 x double> %b) { 725; ALL-LABEL: shuffle_v4f64_0z3z: 726; ALL: # %bb.0: 727; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 728; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3] 729; ALL-NEXT: retq 730 %shuffle = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 0, i32 4, i32 3, i32 4> 731 ret <4 x double> %shuffle 732} 733 734define <4 x double> @shuffle_v4f64_1z2z(<4 x double> %a, <4 x double> %b) { 735; ALL-LABEL: shuffle_v4f64_1z2z: 736; ALL: # %bb.0: 737; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 738; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[3] 739; ALL-NEXT: retq 740 %1 = shufflevector <4 x double> %a, <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 1, i32 4, i32 2, i32 4> 741 ret <4 x double> %1 742} 743 744define <4 x double> @shuffle_v4f64_0044(<4 x double> %a, <4 x double> %b) { 745; AVX1-LABEL: shuffle_v4f64_0044: 746; AVX1: # %bb.0: 747; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 748; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 749; AVX1-NEXT: retq 750; 751; AVX2-LABEL: shuffle_v4f64_0044: 752; AVX2: # %bb.0: 753; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 754; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 755; AVX2-NEXT: retq 756; 757; AVX512VL-SLOW-LABEL: shuffle_v4f64_0044: 758; AVX512VL-SLOW: # %bb.0: 759; AVX512VL-SLOW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 760; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 761; AVX512VL-SLOW-NEXT: retq 762; 763; AVX512VL-FAST-ALL-LABEL: shuffle_v4f64_0044: 764; AVX512VL-FAST-ALL: # %bb.0: 765; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,0,4,4] 766; AVX512VL-FAST-ALL-NEXT: vpermt2pd %ymm1, %ymm2, %ymm0 767; AVX512VL-FAST-ALL-NEXT: retq 768; 769; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4f64_0044: 770; AVX512VL-FAST-PERLANE: # %bb.0: 771; AVX512VL-FAST-PERLANE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 772; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1] 773; AVX512VL-FAST-PERLANE-NEXT: retq 774 %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 775 ret <4 x double> %1 776} 777 778define <4 x double> @shuffle_v4f64_0044_v2f64(<2 x double> %a, <2 x double> %b) { 779; ALL-LABEL: shuffle_v4f64_0044_v2f64: 780; ALL: # %bb.0: 781; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 782; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 783; ALL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 784; ALL-NEXT: retq 785 %1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 0, i32 0> 786 %2 = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> <i32 0, i32 0> 787 %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 788 ret <4 x double> %3 789} 790 791define <4 x double> @shuffle_v4f64_1032_v2f64(<2 x double> %a, <2 x double> %b) { 792; ALL-LABEL: shuffle_v4f64_1032_v2f64: 793; ALL: # %bb.0: 794; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 795; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 796; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 797; ALL-NEXT: retq 798 %1 = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0> 799 %2 = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> <i32 1, i32 0> 800 %3 = shufflevector <2 x double> %1, <2 x double> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 801 ret <4 x double> %3 802} 803 804;PR34359 805define <4 x double> @shuffle_v4f64_2345_0567_select(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3) { 806; ALL-LABEL: shuffle_v4f64_2345_0567_select: 807; ALL: # %bb.0: 808; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 809; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7] 810; ALL-NEXT: retq 811 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 812 %res = select <4 x i1> <i1 0, i1 1, i1 1, i1 1>, <4 x double> %shuf, <4 x double> %vec3 813 ret <4 x double> %res 814} 815 816define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { 817; AVX1-LABEL: shuffle_v4i64_0000: 818; AVX1: # %bb.0: 819; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1] 820; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 821; AVX1-NEXT: retq 822; 823; AVX2-LABEL: shuffle_v4i64_0000: 824; AVX2: # %bb.0: 825; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 826; AVX2-NEXT: retq 827; 828; AVX512VL-LABEL: shuffle_v4i64_0000: 829; AVX512VL: # %bb.0: 830; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 831; AVX512VL-NEXT: retq 832 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 833 ret <4 x i64> %shuffle 834} 835 836define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { 837; AVX1-LABEL: shuffle_v4i64_0001: 838; AVX1: # %bb.0: 839; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[0,1,0,1] 840; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 841; AVX1-NEXT: retq 842; 843; AVX2-LABEL: shuffle_v4i64_0001: 844; AVX2: # %bb.0: 845; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 846; AVX2-NEXT: retq 847; 848; AVX512VL-LABEL: shuffle_v4i64_0001: 849; AVX512VL: # %bb.0: 850; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 851; AVX512VL-NEXT: retq 852 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 853 ret <4 x i64> %shuffle 854} 855 856define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { 857; AVX1-LABEL: shuffle_v4i64_0020: 858; AVX1: # %bb.0: 859; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 860; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 861; AVX1-NEXT: retq 862; 863; AVX2-LABEL: shuffle_v4i64_0020: 864; AVX2: # %bb.0: 865; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 866; AVX2-NEXT: retq 867; 868; AVX512VL-LABEL: shuffle_v4i64_0020: 869; AVX512VL: # %bb.0: 870; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 871; AVX512VL-NEXT: retq 872 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 873 ret <4 x i64> %shuffle 874} 875 876define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { 877; AVX1-LABEL: shuffle_v4i64_0112: 878; AVX1: # %bb.0: 879; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1 880; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[3],ymm0[2] 881; AVX1-NEXT: retq 882; 883; AVX2-LABEL: shuffle_v4i64_0112: 884; AVX2: # %bb.0: 885; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2] 886; AVX2-NEXT: retq 887; 888; AVX512VL-LABEL: shuffle_v4i64_0112: 889; AVX512VL: # %bb.0: 890; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2] 891; AVX512VL-NEXT: retq 892 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 893 ret <4 x i64> %shuffle 894} 895 896define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { 897; AVX1-LABEL: shuffle_v4i64_0300: 898; AVX1: # %bb.0: 899; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 900; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 901; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2] 902; AVX1-NEXT: retq 903; 904; AVX2-LABEL: shuffle_v4i64_0300: 905; AVX2: # %bb.0: 906; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 907; AVX2-NEXT: retq 908; 909; AVX512VL-LABEL: shuffle_v4i64_0300: 910; AVX512VL: # %bb.0: 911; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 912; AVX512VL-NEXT: retq 913 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 914 ret <4 x i64> %shuffle 915} 916 917define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { 918; AVX1-LABEL: shuffle_v4i64_1000: 919; AVX1: # %bb.0: 920; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 921; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 922; AVX1-NEXT: retq 923; 924; AVX2-LABEL: shuffle_v4i64_1000: 925; AVX2: # %bb.0: 926; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 927; AVX2-NEXT: retq 928; 929; AVX512VL-LABEL: shuffle_v4i64_1000: 930; AVX512VL: # %bb.0: 931; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 932; AVX512VL-NEXT: retq 933 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 934 ret <4 x i64> %shuffle 935} 936 937define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { 938; AVX1-LABEL: shuffle_v4i64_2200: 939; AVX1: # %bb.0: 940; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 941; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 942; AVX1-NEXT: retq 943; 944; AVX2-LABEL: shuffle_v4i64_2200: 945; AVX2: # %bb.0: 946; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 947; AVX2-NEXT: retq 948; 949; AVX512VL-LABEL: shuffle_v4i64_2200: 950; AVX512VL: # %bb.0: 951; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 952; AVX512VL-NEXT: retq 953 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 954 ret <4 x i64> %shuffle 955} 956 957define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { 958; AVX1-LABEL: shuffle_v4i64_3330: 959; AVX1: # %bb.0: 960; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 961; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 962; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[2] 963; AVX1-NEXT: retq 964; 965; AVX2-LABEL: shuffle_v4i64_3330: 966; AVX2: # %bb.0: 967; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 968; AVX2-NEXT: retq 969; 970; AVX512VL-LABEL: shuffle_v4i64_3330: 971; AVX512VL: # %bb.0: 972; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 973; AVX512VL-NEXT: retq 974 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 975 ret <4 x i64> %shuffle 976} 977 978define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { 979; AVX1-LABEL: shuffle_v4i64_3210: 980; AVX1: # %bb.0: 981; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 982; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 983; AVX1-NEXT: retq 984; 985; AVX2-LABEL: shuffle_v4i64_3210: 986; AVX2: # %bb.0: 987; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 988; AVX2-NEXT: retq 989; 990; AVX512VL-LABEL: shuffle_v4i64_3210: 991; AVX512VL: # %bb.0: 992; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 993; AVX512VL-NEXT: retq 994 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 995 ret <4 x i64> %shuffle 996} 997 998define <4 x i64> @shuffle_v4i64_0213(<4 x i64> %a, <4 x i64> %b) { 999; AVX1-LABEL: shuffle_v4i64_0213: 1000; AVX1: # %bb.0: 1001; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,2,3] 1002; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1003; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 1004; AVX1-NEXT: retq 1005; 1006; AVX2-LABEL: shuffle_v4i64_0213: 1007; AVX2: # %bb.0: 1008; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1009; AVX2-NEXT: retq 1010; 1011; AVX512VL-LABEL: shuffle_v4i64_0213: 1012; AVX512VL: # %bb.0: 1013; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1014; AVX512VL-NEXT: retq 1015 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 1016 ret <4 x i64> %shuffle 1017} 1018 1019define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { 1020; AVX1-LABEL: shuffle_v4i64_0124: 1021; AVX1: # %bb.0: 1022; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 1023; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[2] 1024; AVX1-NEXT: retq 1025; 1026; AVX2-LABEL: shuffle_v4i64_0124: 1027; AVX2: # %bb.0: 1028; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 1029; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 1030; AVX2-NEXT: retq 1031; 1032; AVX512VL-SLOW-LABEL: shuffle_v4i64_0124: 1033; AVX512VL-SLOW: # %bb.0: 1034; AVX512VL-SLOW-NEXT: vbroadcastsd %xmm1, %ymm1 1035; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 1036; AVX512VL-SLOW-NEXT: retq 1037; 1038; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_0124: 1039; AVX512VL-FAST-ALL: # %bb.0: 1040; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,1,2,4] 1041; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1042; AVX512VL-FAST-ALL-NEXT: retq 1043; 1044; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_0124: 1045; AVX512VL-FAST-PERLANE: # %bb.0: 1046; AVX512VL-FAST-PERLANE-NEXT: vbroadcastsd %xmm1, %ymm1 1047; AVX512VL-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 1048; AVX512VL-FAST-PERLANE-NEXT: retq 1049 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 1050 ret <4 x i64> %shuffle 1051} 1052 1053define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { 1054; AVX1-LABEL: shuffle_v4i64_0142: 1055; AVX1: # %bb.0: 1056; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 1057; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[2] 1058; AVX1-NEXT: retq 1059; 1060; AVX2-LABEL: shuffle_v4i64_0142: 1061; AVX2: # %bb.0: 1062; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 1063; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,2] 1064; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 1065; AVX2-NEXT: retq 1066; 1067; AVX512VL-LABEL: shuffle_v4i64_0142: 1068; AVX512VL: # %bb.0: 1069; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,1,4,2] 1070; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1071; AVX512VL-NEXT: retq 1072 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 1073 ret <4 x i64> %shuffle 1074} 1075 1076define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { 1077; AVX1-LABEL: shuffle_v4i64_0412: 1078; AVX1: # %bb.0: 1079; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3] 1080; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1081; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[2] 1082; AVX1-NEXT: retq 1083; 1084; AVX2-LABEL: shuffle_v4i64_0412: 1085; AVX2: # %bb.0: 1086; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1087; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,2] 1088; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 1089; AVX2-NEXT: retq 1090; 1091; AVX512VL-LABEL: shuffle_v4i64_0412: 1092; AVX512VL: # %bb.0: 1093; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,1,2] 1094; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1095; AVX512VL-NEXT: retq 1096 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 1097 ret <4 x i64> %shuffle 1098} 1099 1100define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { 1101; AVX1-LABEL: shuffle_v4i64_4012: 1102; AVX1: # %bb.0: 1103; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1104; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[2] 1105; AVX1-NEXT: retq 1106; 1107; AVX2-LABEL: shuffle_v4i64_4012: 1108; AVX2: # %bb.0: 1109; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2] 1110; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1111; AVX2-NEXT: retq 1112; 1113; AVX512VL-SLOW-LABEL: shuffle_v4i64_4012: 1114; AVX512VL-SLOW: # %bb.0: 1115; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2] 1116; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1117; AVX512VL-SLOW-NEXT: retq 1118; 1119; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_4012: 1120; AVX512VL-FAST-ALL: # %bb.0: 1121; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [4,0,1,2] 1122; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1123; AVX512VL-FAST-ALL-NEXT: retq 1124; 1125; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_4012: 1126; AVX512VL-FAST-PERLANE: # %bb.0: 1127; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2] 1128; AVX512VL-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1129; AVX512VL-FAST-PERLANE-NEXT: retq 1130 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 1131 ret <4 x i64> %shuffle 1132} 1133 1134define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { 1135; ALL-LABEL: shuffle_v4i64_0145: 1136; ALL: # %bb.0: 1137; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1138; ALL-NEXT: retq 1139 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1140 ret <4 x i64> %shuffle 1141} 1142 1143define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { 1144; AVX1-LABEL: shuffle_v4i64_0451: 1145; AVX1: # %bb.0: 1146; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] 1147; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1148; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1149; AVX1-NEXT: retq 1150; 1151; AVX2-LABEL: shuffle_v4i64_0451: 1152; AVX2: # %bb.0: 1153; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,3] 1154; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1] 1155; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 1156; AVX2-NEXT: retq 1157; 1158; AVX512VL-LABEL: shuffle_v4i64_0451: 1159; AVX512VL: # %bb.0: 1160; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,5,1] 1161; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1162; AVX512VL-NEXT: retq 1163 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 1164 ret <4 x i64> %shuffle 1165} 1166 1167define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { 1168; ALL-LABEL: shuffle_v4i64_4501: 1169; ALL: # %bb.0: 1170; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1171; ALL-NEXT: retq 1172 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 1173 ret <4 x i64> %shuffle 1174} 1175 1176define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { 1177; AVX1-LABEL: shuffle_v4i64_4015: 1178; AVX1: # %bb.0: 1179; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 1180; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1181; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1182; AVX1-NEXT: retq 1183; 1184; AVX2-LABEL: shuffle_v4i64_4015: 1185; AVX2: # %bb.0: 1186; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,2,1] 1187; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3] 1188; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 1189; AVX2-NEXT: retq 1190; 1191; AVX512VL-LABEL: shuffle_v4i64_4015: 1192; AVX512VL: # %bb.0: 1193; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [4,0,1,5] 1194; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1195; AVX512VL-NEXT: retq 1196 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 1197 ret <4 x i64> %shuffle 1198} 1199 1200define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) { 1201; AVX1-LABEL: shuffle_v4i64_2u35: 1202; AVX1: # %bb.0: 1203; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1204; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 1205; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3] 1206; AVX1-NEXT: retq 1207; 1208; AVX2-LABEL: shuffle_v4i64_2u35: 1209; AVX2: # %bb.0: 1210; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1211; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1] 1212; AVX2-NEXT: retq 1213; 1214; AVX512VL-SLOW-LABEL: shuffle_v4i64_2u35: 1215; AVX512VL-SLOW: # %bb.0: 1216; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1217; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1] 1218; AVX512VL-SLOW-NEXT: retq 1219; 1220; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_2u35: 1221; AVX512VL-FAST-ALL: # %bb.0: 1222; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [2,5,3,5] 1223; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1224; AVX512VL-FAST-ALL-NEXT: retq 1225; 1226; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_2u35: 1227; AVX512VL-FAST-PERLANE: # %bb.0: 1228; AVX512VL-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1229; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,1] 1230; AVX512VL-FAST-PERLANE-NEXT: retq 1231 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 poison, i32 3, i32 5> 1232 ret <4 x i64> %shuffle 1233} 1234 1235define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) { 1236; AVX1-LABEL: shuffle_v4i64_1251: 1237; AVX1: # %bb.0: 1238; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 1239; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1240; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[3],ymm2[3] 1241; AVX1-NEXT: retq 1242; 1243; AVX2-LABEL: shuffle_v4i64_1251: 1244; AVX2: # %bb.0: 1245; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[1,1,1,1] 1246; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,2,1] 1247; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 1248; AVX2-NEXT: retq 1249; 1250; AVX512VL-LABEL: shuffle_v4i64_1251: 1251; AVX512VL: # %bb.0: 1252; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [1,2,5,1] 1253; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1254; AVX512VL-NEXT: retq 1255 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1> 1256 ret <4 x i64> %shuffle 1257} 1258 1259define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { 1260; AVX1-LABEL: shuffle_v4i64_1054: 1261; AVX1: # %bb.0: 1262; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1263; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1264; AVX1-NEXT: retq 1265; 1266; AVX2-LABEL: shuffle_v4i64_1054: 1267; AVX2: # %bb.0: 1268; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1269; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1270; AVX2-NEXT: retq 1271; 1272; AVX512VL-SLOW-LABEL: shuffle_v4i64_1054: 1273; AVX512VL-SLOW: # %bb.0: 1274; AVX512VL-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1275; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1276; AVX512VL-SLOW-NEXT: retq 1277; 1278; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_1054: 1279; AVX512VL-FAST-ALL: # %bb.0: 1280; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [1,0,5,4] 1281; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1282; AVX512VL-FAST-ALL-NEXT: retq 1283; 1284; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_1054: 1285; AVX512VL-FAST-PERLANE: # %bb.0: 1286; AVX512VL-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1287; AVX512VL-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1288; AVX512VL-FAST-PERLANE-NEXT: retq 1289 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 1290 ret <4 x i64> %shuffle 1291} 1292 1293define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { 1294; AVX1-LABEL: shuffle_v4i64_3254: 1295; AVX1: # %bb.0: 1296; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1297; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1298; AVX1-NEXT: retq 1299; 1300; AVX2-LABEL: shuffle_v4i64_3254: 1301; AVX2: # %bb.0: 1302; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1303; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 1304; AVX2-NEXT: retq 1305; 1306; AVX512VL-SLOW-LABEL: shuffle_v4i64_3254: 1307; AVX512VL-SLOW: # %bb.0: 1308; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1309; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 1310; AVX512VL-SLOW-NEXT: retq 1311; 1312; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_3254: 1313; AVX512VL-FAST-ALL: # %bb.0: 1314; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [3,2,5,4] 1315; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1316; AVX512VL-FAST-ALL-NEXT: retq 1317; 1318; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_3254: 1319; AVX512VL-FAST-PERLANE: # %bb.0: 1320; AVX512VL-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 1321; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 1322; AVX512VL-FAST-PERLANE-NEXT: retq 1323 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 1324 ret <4 x i64> %shuffle 1325} 1326 1327define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { 1328; AVX1-LABEL: shuffle_v4i64_3276: 1329; AVX1: # %bb.0: 1330; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1331; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1332; AVX1-NEXT: retq 1333; 1334; AVX2-LABEL: shuffle_v4i64_3276: 1335; AVX2: # %bb.0: 1336; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1337; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1338; AVX2-NEXT: retq 1339; 1340; AVX512VL-SLOW-LABEL: shuffle_v4i64_3276: 1341; AVX512VL-SLOW: # %bb.0: 1342; AVX512VL-SLOW-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1343; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1344; AVX512VL-SLOW-NEXT: retq 1345; 1346; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_3276: 1347; AVX512VL-FAST-ALL: # %bb.0: 1348; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [3,2,7,6] 1349; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1350; AVX512VL-FAST-ALL-NEXT: retq 1351; 1352; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_3276: 1353; AVX512VL-FAST-PERLANE: # %bb.0: 1354; AVX512VL-FAST-PERLANE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 1355; AVX512VL-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1356; AVX512VL-FAST-PERLANE-NEXT: retq 1357 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 1358 ret <4 x i64> %shuffle 1359} 1360 1361define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) { 1362; AVX1-LABEL: shuffle_v4i64_1076: 1363; AVX1: # %bb.0: 1364; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 1365; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 1366; AVX1-NEXT: retq 1367; 1368; AVX2-LABEL: shuffle_v4i64_1076: 1369; AVX2: # %bb.0: 1370; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1371; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1372; AVX2-NEXT: retq 1373; 1374; AVX512VL-SLOW-LABEL: shuffle_v4i64_1076: 1375; AVX512VL-SLOW: # %bb.0: 1376; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1377; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1378; AVX512VL-SLOW-NEXT: retq 1379; 1380; AVX512VL-FAST-LABEL: shuffle_v4i64_1076: 1381; AVX512VL-FAST: # %bb.0: 1382; AVX512VL-FAST-NEXT: vpmovsxbq {{.*#+}} ymm2 = [1,0,7,6] 1383; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1384; AVX512VL-FAST-NEXT: retq 1385 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 1386 ret <4 x i64> %shuffle 1387} 1388 1389define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { 1390; AVX1-LABEL: shuffle_v4i64_0415: 1391; AVX1: # %bb.0: 1392; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 1393; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1394; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1395; AVX1-NEXT: retq 1396; 1397; AVX2-LABEL: shuffle_v4i64_0415: 1398; AVX2: # %bb.0: 1399; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 1400; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 1401; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1402; AVX2-NEXT: retq 1403; 1404; AVX512VL-LABEL: shuffle_v4i64_0415: 1405; AVX512VL: # %bb.0: 1406; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,1,5] 1407; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1408; AVX512VL-NEXT: retq 1409 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1410 ret <4 x i64> %shuffle 1411} 1412 1413define <4 x i64> @shuffle_v4i64_2741(<4 x i64> %a, <4 x i64> %b) { 1414; AVX1-LABEL: shuffle_v4i64_2741: 1415; AVX1: # %bb.0: 1416; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[0,1] 1417; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 1418; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7] 1419; AVX1-NEXT: retq 1420; 1421; AVX2-LABEL: shuffle_v4i64_2741: 1422; AVX2: # %bb.0: 1423; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 1424; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1425; AVX2-NEXT: retq 1426; 1427; AVX512VL-SLOW-LABEL: shuffle_v4i64_2741: 1428; AVX512VL-SLOW: # %bb.0: 1429; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 1430; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1431; AVX512VL-SLOW-NEXT: retq 1432; 1433; AVX512VL-FAST-ALL-LABEL: shuffle_v4i64_2741: 1434; AVX512VL-FAST-ALL: # %bb.0: 1435; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [2,7,4,1] 1436; AVX512VL-FAST-ALL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1437; AVX512VL-FAST-ALL-NEXT: retq 1438; 1439; AVX512VL-FAST-PERLANE-LABEL: shuffle_v4i64_2741: 1440; AVX512VL-FAST-PERLANE: # %bb.0: 1441; AVX512VL-FAST-PERLANE-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 1442; AVX512VL-FAST-PERLANE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] 1443; AVX512VL-FAST-PERLANE-NEXT: retq 1444 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 7, i32 4, i32 1> 1445 ret <4 x i64> %shuffle 1446} 1447 1448; PR59860 1449define <4 x i64> @shuffle_v4i64_0437(<4 x i64> %a, <4 x i64> %b) { 1450; AVX1-LABEL: shuffle_v4i64_0437: 1451; AVX1: # %bb.0: 1452; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] 1453; AVX1-NEXT: retq 1454; 1455; AVX2-LABEL: shuffle_v4i64_0437: 1456; AVX2: # %bb.0: 1457; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,3] 1458; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,3,3] 1459; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 1460; AVX2-NEXT: retq 1461; 1462; AVX512VL-LABEL: shuffle_v4i64_0437: 1463; AVX512VL: # %bb.0: 1464; AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,4,3,7] 1465; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0 1466; AVX512VL-NEXT: retq 1467 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 3, i32 7> 1468 ret <4 x i64> %shuffle 1469} 1470 1471define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) { 1472; AVX1-LABEL: shuffle_v4i64_z4z6: 1473; AVX1: # %bb.0: 1474; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1475; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1476; AVX1-NEXT: retq 1477; 1478; AVX2-LABEL: shuffle_v4i64_z4z6: 1479; AVX2: # %bb.0: 1480; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1481; AVX2-NEXT: retq 1482; 1483; AVX512VL-LABEL: shuffle_v4i64_z4z6: 1484; AVX512VL: # %bb.0: 1485; AVX512VL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23] 1486; AVX512VL-NEXT: retq 1487 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6> 1488 ret <4 x i64> %shuffle 1489} 1490 1491define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) { 1492; AVX1-LABEL: shuffle_v4i64_5zuz: 1493; AVX1: # %bb.0: 1494; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1495; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1496; AVX1-NEXT: retq 1497; 1498; AVX2-LABEL: shuffle_v4i64_5zuz: 1499; AVX2: # %bb.0: 1500; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1501; AVX2-NEXT: retq 1502; 1503; AVX512VL-LABEL: shuffle_v4i64_5zuz: 1504; AVX512VL: # %bb.0: 1505; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1506; AVX512VL-NEXT: retq 1507 %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 poison, i32 0> 1508 ret <4 x i64> %shuffle 1509} 1510 1511define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) { 1512; ALL-LABEL: shuffle_v4i64_40u2: 1513; ALL: # %bb.0: 1514; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 1515; ALL-NEXT: retq 1516 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 poison, i32 2> 1517 ret <4 x i64> %shuffle 1518} 1519 1520define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) { 1521; ALL-LABEL: shuffle_v4i64_15uu: 1522; ALL: # %bb.0: 1523; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 1524; ALL-NEXT: retq 1525 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 1526 ret <4 x i64> %shuffle 1527} 1528 1529define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) { 1530; ALL-LABEL: shuffle_v4i64_11uu: 1531; ALL: # %bb.0: 1532; ALL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 1533; ALL-NEXT: retq 1534 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 poison, i32 poison> 1535 ret <4 x i64> %shuffle 1536} 1537 1538define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) { 1539; AVX1-LABEL: shuffle_v4i64_22uu: 1540; AVX1: # %bb.0: 1541; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1542; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1] 1543; AVX1-NEXT: retq 1544; 1545; AVX2-LABEL: shuffle_v4i64_22uu: 1546; AVX2: # %bb.0: 1547; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 1548; AVX2-NEXT: retq 1549; 1550; AVX512VL-LABEL: shuffle_v4i64_22uu: 1551; AVX512VL: # %bb.0: 1552; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] 1553; AVX512VL-NEXT: retq 1554 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 poison, i32 poison> 1555 ret <4 x i64> %shuffle 1556} 1557 1558define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) { 1559; AVX1-LABEL: shuffle_v4i64_3333: 1560; AVX1: # %bb.0: 1561; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 1562; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 1563; AVX1-NEXT: retq 1564; 1565; AVX2-LABEL: shuffle_v4i64_3333: 1566; AVX2: # %bb.0: 1567; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 1568; AVX2-NEXT: retq 1569; 1570; AVX512VL-LABEL: shuffle_v4i64_3333: 1571; AVX512VL: # %bb.0: 1572; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3] 1573; AVX512VL-NEXT: retq 1574 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1575 ret <4 x i64> %shuffle 1576} 1577 1578define <4 x i64> @shuffle_v4i64_1z3z(<4 x i64> %a, <4 x i64> %b) { 1579; AVX1-LABEL: shuffle_v4i64_1z3z: 1580; AVX1: # %bb.0: 1581; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 1582; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 1583; AVX1-NEXT: retq 1584; 1585; AVX2-LABEL: shuffle_v4i64_1z3z: 1586; AVX2: # %bb.0: 1587; AVX2-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1588; AVX2-NEXT: retq 1589; 1590; AVX512VL-LABEL: shuffle_v4i64_1z3z: 1591; AVX512VL: # %bb.0: 1592; AVX512VL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero 1593; AVX512VL-NEXT: retq 1594 %shuffle = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i32> <i32 1, i32 4, i32 3, i32 4> 1595 ret <4 x i64> %shuffle 1596} 1597 1598define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) { 1599; AVX1-LABEL: shuffle_v4i64_0044_v2i64: 1600; AVX1: # %bb.0: 1601; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1602; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1603; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] 1604; AVX1-NEXT: retq 1605; 1606; AVX2-LABEL: shuffle_v4i64_0044_v2i64: 1607; AVX2: # %bb.0: 1608; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1609; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1610; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1611; AVX2-NEXT: retq 1612; 1613; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64: 1614; AVX512VL: # %bb.0: 1615; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1616; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1617; AVX512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5] 1618; AVX512VL-NEXT: retq 1619 %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 0, i32 0> 1620 %2 = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> <i32 0, i32 0> 1621 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1622 ret <4 x i64> %3 1623} 1624 1625define <4 x i64> @shuffle_v4i64_1032_v2i64(<2 x i64> %a, <2 x i64> %b) { 1626; ALL-LABEL: shuffle_v4i64_1032_v2i64: 1627; ALL: # %bb.0: 1628; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1629; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1630; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 1631; ALL-NEXT: retq 1632 %1 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0> 1633 %2 = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> <i32 1, i32 0> 1634 %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1635 ret <4 x i64> %3 1636} 1637 1638define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { 1639; ALL-LABEL: stress_test1: 1640; ALL: retq 1641 %c = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 0> 1642 %d = shufflevector <4 x i64> %c, <4 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 2, i32 poison> 1643 %e = shufflevector <4 x i64> %b, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 1, i32 poison> 1644 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0> 1645 1646 ret <4 x i64> %f 1647} 1648 1649define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) { 1650; ALL-LABEL: insert_reg_and_zero_v4i64: 1651; ALL: # %bb.0: 1652; ALL-NEXT: vmovq %rdi, %xmm0 1653; ALL-NEXT: retq 1654 %v = insertelement <4 x i64> poison, i64 %a, i64 0 1655 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1656 ret <4 x i64> %shuffle 1657} 1658 1659define <4 x i64> @insert_mem_and_zero_v4i64(ptr %ptr) { 1660; ALL-LABEL: insert_mem_and_zero_v4i64: 1661; ALL: # %bb.0: 1662; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1663; ALL-NEXT: retq 1664 %a = load i64, ptr %ptr 1665 %v = insertelement <4 x i64> poison, i64 %a, i64 0 1666 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1667 ret <4 x i64> %shuffle 1668} 1669 1670define <4 x double> @insert_reg_and_zero_v4f64(double %a) { 1671; ALL-LABEL: insert_reg_and_zero_v4f64: 1672; ALL: # %bb.0: 1673; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1674; ALL-NEXT: retq 1675 %v = insertelement <4 x double> poison, double %a, i32 0 1676 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1677 ret <4 x double> %shuffle 1678} 1679 1680define <4 x double> @insert_mem_and_zero_v4f64(ptr %ptr) { 1681; ALL-LABEL: insert_mem_and_zero_v4f64: 1682; ALL: # %bb.0: 1683; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1684; ALL-NEXT: retq 1685 %a = load double, ptr %ptr 1686 %v = insertelement <4 x double> poison, double %a, i32 0 1687 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 1688 ret <4 x double> %shuffle 1689} 1690 1691define <4 x double> @splat_mem_v4f64(ptr %ptr) { 1692; ALL-LABEL: splat_mem_v4f64: 1693; ALL: # %bb.0: 1694; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1695; ALL-NEXT: retq 1696 %a = load double, ptr %ptr 1697 %v = insertelement <4 x double> poison, double %a, i32 0 1698 %shuffle = shufflevector <4 x double> %v, <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1699 ret <4 x double> %shuffle 1700} 1701 1702define <4 x i64> @splat_mem_v4i64(ptr %ptr) { 1703; ALL-LABEL: splat_mem_v4i64: 1704; ALL: # %bb.0: 1705; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1706; ALL-NEXT: retq 1707 %a = load i64, ptr %ptr 1708 %v = insertelement <4 x i64> poison, i64 %a, i64 0 1709 %shuffle = shufflevector <4 x i64> %v, <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1710 ret <4 x i64> %shuffle 1711} 1712 1713define <4 x double> @splat_mem_v4f64_2(ptr %p) { 1714; ALL-LABEL: splat_mem_v4f64_2: 1715; ALL: # %bb.0: 1716; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1717; ALL-NEXT: retq 1718 %1 = load double, ptr %p 1719 %2 = insertelement <2 x double> poison, double %1, i32 0 1720 %3 = shufflevector <2 x double> %2, <2 x double> poison, <4 x i32> zeroinitializer 1721 ret <4 x double> %3 1722} 1723 1724define <4 x double> @splat_v4f64(<2 x double> %r) { 1725; AVX1-LABEL: splat_v4f64: 1726; AVX1: # %bb.0: 1727; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 1728; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1729; AVX1-NEXT: retq 1730; 1731; AVX2-LABEL: splat_v4f64: 1732; AVX2: # %bb.0: 1733; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1734; AVX2-NEXT: retq 1735; 1736; AVX512VL-LABEL: splat_v4f64: 1737; AVX512VL: # %bb.0: 1738; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1739; AVX512VL-NEXT: retq 1740 %1 = shufflevector <2 x double> %r, <2 x double> poison, <4 x i32> zeroinitializer 1741 ret <4 x double> %1 1742} 1743 1744define <4 x i64> @splat_mem_v4i64_from_v2i64(ptr %ptr) { 1745; ALL-LABEL: splat_mem_v4i64_from_v2i64: 1746; ALL: # %bb.0: 1747; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1748; ALL-NEXT: retq 1749 %v = load <2 x i64>, ptr %ptr 1750 %shuffle = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1751 ret <4 x i64> %shuffle 1752} 1753 1754define <4 x double> @splat_mem_v4f64_from_v2f64(ptr %ptr) { 1755; ALL-LABEL: splat_mem_v4f64_from_v2f64: 1756; ALL: # %bb.0: 1757; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1758; ALL-NEXT: retq 1759 %v = load <2 x double>, ptr %ptr 1760 %shuffle = shufflevector <2 x double> %v, <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1761 ret <4 x double> %shuffle 1762} 1763 1764define <4 x i64> @splat128_mem_v4i64_from_v2i64(ptr %ptr) { 1765; AVX1OR2-LABEL: splat128_mem_v4i64_from_v2i64: 1766; AVX1OR2: # %bb.0: 1767; AVX1OR2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 1768; AVX1OR2-NEXT: retq 1769; 1770; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64: 1771; AVX512VL: # %bb.0: 1772; AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] 1773; AVX512VL-NEXT: retq 1774 %v = load <2 x i64>, ptr %ptr 1775 %shuffle = shufflevector <2 x i64> %v, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1776 ret <4 x i64> %shuffle 1777} 1778 1779define <4 x double> @splat128_mem_v4f64_from_v2f64(ptr %ptr) { 1780; ALL-LABEL: splat128_mem_v4f64_from_v2f64: 1781; ALL: # %bb.0: 1782; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 1783; ALL-NEXT: retq 1784 %v = load <2 x double>, ptr %ptr 1785 %shuffle = shufflevector <2 x double> %v, <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 1786 ret <4 x double> %shuffle 1787} 1788 1789define <4 x double> @broadcast_v4f64_0000_from_v2i64(<2 x i64> %a0) { 1790; AVX1-LABEL: broadcast_v4f64_0000_from_v2i64: 1791; AVX1: # %bb.0: 1792; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1,0,1] 1793; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 1794; AVX1-NEXT: retq 1795; 1796; AVX2-LABEL: broadcast_v4f64_0000_from_v2i64: 1797; AVX2: # %bb.0: 1798; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 1799; AVX2-NEXT: retq 1800; 1801; AVX512VL-LABEL: broadcast_v4f64_0000_from_v2i64: 1802; AVX512VL: # %bb.0: 1803; AVX512VL-NEXT: vbroadcastsd %xmm0, %ymm0 1804; AVX512VL-NEXT: retq 1805 %1 = shufflevector <2 x i64> %a0, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1806 %2 = bitcast <4 x i64> %1 to <4 x double> 1807 %3 = shufflevector <4 x double> %2, <4 x double> poison, <4 x i32> zeroinitializer 1808 ret <4 x double> %3 1809} 1810 1811; PR114959 1812define <4 x double> @concat_v4f64_0213_broadcasts(ptr %src) { 1813; ALL-LABEL: concat_v4f64_0213_broadcasts: 1814; ALL: # %bb.0: 1815; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] 1816; ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = mem[0,1,0,1] 1817; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[3],ymm0[3] 1818; ALL-NEXT: retq 1819 %src.hi = getelementptr inbounds i8, ptr %src, i64 32 1820 %lo = load <2 x double>, ptr %src, align 1 1821 %hi = load <2 x double>, ptr %src.hi, align 1 1822 %shuffle = shufflevector <2 x double> %lo, <2 x double> %hi, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 1823 ret <4 x double> %shuffle 1824} 1825 1826define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) { 1827; ALL-LABEL: bitcast_v4f64_0426: 1828; ALL: # %bb.0: 1829; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 1830; ALL-NEXT: retq 1831 %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 1832 %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float> 1833 %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1834 %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16> 1835 %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> 1836 %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double> 1837 ret <4 x double> %bitcast64 1838} 1839 1840define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) { 1841; ALL-LABEL: concat_v4i64_0167: 1842; ALL: # %bb.0: 1843; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 1844; ALL-NEXT: retq 1845 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1846 %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7> 1847 %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1848 ret <4 x i64> %shuffle64 1849} 1850 1851define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { 1852; ALL-LABEL: concat_v4i64_0145_bc: 1853; ALL: # %bb.0: 1854; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1855; ALL-NEXT: retq 1856 %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> 1857 %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5> 1858 %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32> 1859 %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32> 1860 %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1861 %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64> 1862 ret <4 x i64> %shuffle64 1863} 1864 1865define <4 x i64> @insert_dup_mem_v4i64(ptr %ptr) { 1866; ALL-LABEL: insert_dup_mem_v4i64: 1867; ALL: # %bb.0: 1868; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 1869; ALL-NEXT: retq 1870 %tmp = load i64, ptr %ptr, align 1 1871 %tmp1 = insertelement <2 x i64> poison, i64 %tmp, i32 0 1872 %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <4 x i32> zeroinitializer 1873 ret <4 x i64> %tmp2 1874} 1875 1876define <4 x i64> @shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b) { 1877; AVX1-LABEL: shuffle_v4i64_1234: 1878; AVX1: # %bb.0: 1879; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] 1880; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2] 1881; AVX1-NEXT: retq 1882; 1883; AVX2-LABEL: shuffle_v4i64_1234: 1884; AVX2: # %bb.0: 1885; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1886; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0] 1887; AVX2-NEXT: retq 1888; 1889; AVX512VL-LABEL: shuffle_v4i64_1234: 1890; AVX512VL: # %bb.0: 1891; AVX512VL-NEXT: valignq {{.*#+}} ymm0 = ymm0[1,2,3],ymm1[0] 1892; AVX512VL-NEXT: retq 1893 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4> 1894 ret <4 x i64> %shuffle 1895} 1896 1897define <4 x i64> @shuffle_v4i64_1230(<4 x i64> %a) { 1898; AVX1-LABEL: shuffle_v4i64_1230: 1899; AVX1: # %bb.0: 1900; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 1901; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[3],ymm1[2] 1902; AVX1-NEXT: retq 1903; 1904; AVX2-LABEL: shuffle_v4i64_1230: 1905; AVX2: # %bb.0: 1906; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0] 1907; AVX2-NEXT: retq 1908; 1909; AVX512VL-LABEL: shuffle_v4i64_1230: 1910; AVX512VL: # %bb.0: 1911; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,0] 1912; AVX512VL-NEXT: retq 1913 %shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0> 1914 ret <4 x i64> %shuffle 1915} 1916 1917define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) { 1918; AVX1-LABEL: shuffle_v4i64_z0z3: 1919; AVX1: # %bb.0: 1920; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1921; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[3] 1922; AVX1-NEXT: retq 1923; 1924; AVX2-SLOW-LABEL: shuffle_v4i64_z0z3: 1925; AVX2-SLOW: # %bb.0: 1926; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 1927; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1928; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1929; AVX2-SLOW-NEXT: retq 1930; 1931; AVX2-FAST-LABEL: shuffle_v4i64_z0z3: 1932; AVX2-FAST: # %bb.0: 1933; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31] 1934; AVX2-FAST-NEXT: retq 1935; 1936; AVX512VL-SLOW-LABEL: shuffle_v4i64_z0z3: 1937; AVX512VL-SLOW: # %bb.0: 1938; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 1939; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1940; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7] 1941; AVX512VL-SLOW-NEXT: retq 1942; 1943; AVX512VL-FAST-LABEL: shuffle_v4i64_z0z3: 1944; AVX512VL-FAST: # %bb.0: 1945; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31] 1946; AVX512VL-FAST-NEXT: retq 1947 %1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i32> <i32 4, i32 0, i32 4, i32 3> 1948 ret <4 x i64> %1 1949} 1950 1951define <4 x i64> @shuffle_v4i64_1z2z(<4 x i64> %a, <4 x i64> %b) { 1952; AVX1-LABEL: shuffle_v4i64_1z2z: 1953; AVX1: # %bb.0: 1954; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 1955; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[2],ymm1[3] 1956; AVX1-NEXT: retq 1957; 1958; AVX2-SLOW-LABEL: shuffle_v4i64_1z2z: 1959; AVX2-SLOW: # %bb.0: 1960; AVX2-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1961; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1962; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] 1963; AVX2-SLOW-NEXT: retq 1964; 1965; AVX2-FAST-LABEL: shuffle_v4i64_1z2z: 1966; AVX2-FAST: # %bb.0: 1967; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero 1968; AVX2-FAST-NEXT: retq 1969; 1970; AVX512VL-SLOW-LABEL: shuffle_v4i64_1z2z: 1971; AVX512VL-SLOW: # %bb.0: 1972; AVX512VL-SLOW-NEXT: vxorps %xmm1, %xmm1, %xmm1 1973; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 1974; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,2,0] 1975; AVX512VL-SLOW-NEXT: retq 1976; 1977; AVX512VL-FAST-LABEL: shuffle_v4i64_1z2z: 1978; AVX512VL-FAST: # %bb.0: 1979; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero 1980; AVX512VL-FAST-NEXT: retq 1981 %1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i32> <i32 1, i32 4, i32 2, i32 4> 1982 ret <4 x i64> %1 1983} 1984 1985define <4 x double> @add_v4f64_0246_1357(<4 x double> %a, <4 x double> %b) { 1986; AVX1-LABEL: add_v4f64_0246_1357: 1987; AVX1: # %bb.0: # %entry 1988; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 1989; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1990; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0 1991; AVX1-NEXT: retq 1992; 1993; AVX2-LABEL: add_v4f64_0246_1357: 1994; AVX2: # %bb.0: # %entry 1995; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 1996; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 1997; AVX2-NEXT: retq 1998; 1999; AVX512VL-LABEL: add_v4f64_0246_1357: 2000; AVX512VL: # %bb.0: # %entry 2001; AVX512VL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2002; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3] 2003; AVX512VL-NEXT: retq 2004entry: 2005 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 2006 %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 2007 %add = fadd <4 x double> %shuffle, %shuffle1 2008 ret <4 x double> %add 2009} 2010 2011define <4 x double> @add_v4f64_4602_5713(<4 x double> %a, <4 x double> %b) { 2012; AVX1-LABEL: add_v4f64_4602_5713: 2013; AVX1: # %bb.0: # %entry 2014; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3] 2015; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2016; AVX1-NEXT: vhaddpd %ymm2, %ymm0, %ymm0 2017; AVX1-NEXT: retq 2018; 2019; AVX2-LABEL: add_v4f64_4602_5713: 2020; AVX2: # %bb.0: # %entry 2021; AVX2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2022; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2] 2023; AVX2-NEXT: retq 2024; 2025; AVX512VL-LABEL: add_v4f64_4602_5713: 2026; AVX512VL: # %bb.0: # %entry 2027; AVX512VL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2028; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,3,0,2] 2029; AVX512VL-NEXT: retq 2030entry: 2031 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 6, i32 0, i32 2> 2032 %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 3> 2033 %add = fadd <4 x double> %shuffle, %shuffle1 2034 ret <4 x double> %add 2035} 2036 2037define <4 x double> @add_v4f64_024u_135u_reverse(<4 x double> %a, <4 x double> %b) { 2038; AVX1-LABEL: add_v4f64_024u_135u_reverse: 2039; AVX1: # %bb.0: 2040; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 2041; AVX1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 2042; AVX1-NEXT: retq 2043; 2044; AVX2-LABEL: add_v4f64_024u_135u_reverse: 2045; AVX2: # %bb.0: 2046; AVX2-NEXT: vhaddpd %ymm0, %ymm1, %ymm0 2047; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,1] 2048; AVX2-NEXT: retq 2049; 2050; AVX512VL-LABEL: add_v4f64_024u_135u_reverse: 2051; AVX512VL: # %bb.0: 2052; AVX512VL-NEXT: vhaddpd %ymm0, %ymm1, %ymm0 2053; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,1] 2054; AVX512VL-NEXT: retq 2055 %shuffle0 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 poison> 2056 %shuffle1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 poison> 2057 %add = fadd <4 x double> %shuffle0, %shuffle1 2058 %shuffle = shufflevector <4 x double> %add, <4 x double> poison, <4 x i32> <i32 poison, i32 2, i32 1, i32 0> 2059 ret <4 x double> %shuffle 2060} 2061 2062define <4 x i64> @add_v4i64_0246_1357(<4 x i64> %a, <4 x i64> %b) { 2063; AVX1-LABEL: add_v4i64_0246_1357: 2064; AVX1: # %bb.0: # %entry 2065; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] 2066; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2067; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 2068; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] 2069; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2070; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2071; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 2072; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 2073; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2074; AVX1-NEXT: retq 2075; 2076; AVX2-LABEL: add_v4i64_0246_1357: 2077; AVX2: # %bb.0: # %entry 2078; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2079; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2080; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2081; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2082; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 2083; AVX2-NEXT: retq 2084; 2085; AVX512VL-SLOW-LABEL: add_v4i64_0246_1357: 2086; AVX512VL-SLOW: # %bb.0: # %entry 2087; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2088; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2089; AVX512VL-SLOW-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2090; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2091; AVX512VL-SLOW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 2092; AVX512VL-SLOW-NEXT: retq 2093; 2094; AVX512VL-FAST-ALL-LABEL: add_v4i64_0246_1357: 2095; AVX512VL-FAST-ALL: # %bb.0: # %entry 2096; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,2,4,6] 2097; AVX512VL-FAST-ALL-NEXT: vpermi2q %ymm1, %ymm0, %ymm2 2098; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3,5,7] 2099; AVX512VL-FAST-ALL-NEXT: vpermi2q %ymm1, %ymm0, %ymm3 2100; AVX512VL-FAST-ALL-NEXT: vpaddq %ymm3, %ymm2, %ymm0 2101; AVX512VL-FAST-ALL-NEXT: retq 2102; 2103; AVX512VL-FAST-PERLANE-LABEL: add_v4i64_0246_1357: 2104; AVX512VL-FAST-PERLANE: # %bb.0: # %entry 2105; AVX512VL-FAST-PERLANE-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 2106; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2107; AVX512VL-FAST-PERLANE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 2108; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2109; AVX512VL-FAST-PERLANE-NEXT: vpaddq %ymm0, %ymm2, %ymm0 2110; AVX512VL-FAST-PERLANE-NEXT: retq 2111entry: 2112 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 2113 %shuffle1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 2114 %add = add <4 x i64> %shuffle, %shuffle1 2115 ret <4 x i64> %add 2116} 2117 2118define <4 x i64> @add_v4i64_4602_5713(<4 x i64> %a, <4 x i64> %b) { 2119; AVX1-LABEL: add_v4i64_4602_5713: 2120; AVX1: # %bb.0: # %entry 2121; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3],ymm0[2,3] 2122; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2123; AVX1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm0[0],ymm2[0],ymm0[2],ymm2[2] 2124; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[3],ymm2[3] 2125; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2126; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 2127; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 2128; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 2129; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2130; AVX1-NEXT: retq 2131; 2132; AVX2-LABEL: add_v4i64_4602_5713: 2133; AVX2: # %bb.0: # %entry 2134; AVX2-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 2135; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2136; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 2137; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2138; AVX2-NEXT: vpaddq %ymm0, %ymm2, %ymm0 2139; AVX2-NEXT: retq 2140; 2141; AVX512VL-SLOW-LABEL: add_v4i64_4602_5713: 2142; AVX512VL-SLOW: # %bb.0: # %entry 2143; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 2144; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2145; AVX512VL-SLOW-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 2146; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2147; AVX512VL-SLOW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 2148; AVX512VL-SLOW-NEXT: retq 2149; 2150; AVX512VL-FAST-ALL-LABEL: add_v4i64_4602_5713: 2151; AVX512VL-FAST-ALL: # %bb.0: # %entry 2152; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm2 = [0,2,4,6] 2153; AVX512VL-FAST-ALL-NEXT: vpermi2q %ymm0, %ymm1, %ymm2 2154; AVX512VL-FAST-ALL-NEXT: vpmovsxbq {{.*#+}} ymm3 = [1,3,5,7] 2155; AVX512VL-FAST-ALL-NEXT: vpermi2q %ymm0, %ymm1, %ymm3 2156; AVX512VL-FAST-ALL-NEXT: vpaddq %ymm3, %ymm2, %ymm0 2157; AVX512VL-FAST-ALL-NEXT: retq 2158; 2159; AVX512VL-FAST-PERLANE-LABEL: add_v4i64_4602_5713: 2160; AVX512VL-FAST-PERLANE: # %bb.0: # %entry 2161; AVX512VL-FAST-PERLANE-NEXT: vpunpcklqdq {{.*#+}} ymm2 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 2162; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3] 2163; AVX512VL-FAST-PERLANE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 2164; AVX512VL-FAST-PERLANE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 2165; AVX512VL-FAST-PERLANE-NEXT: vpaddq %ymm0, %ymm2, %ymm0 2166; AVX512VL-FAST-PERLANE-NEXT: retq 2167entry: 2168 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 6, i32 0, i32 2> 2169 %shuffle1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 5, i32 7, i32 1, i32 3> 2170 %add = add <4 x i64> %shuffle, %shuffle1 2171 ret <4 x i64> %add 2172} 2173 2174define <4 x double> @shuffle_v4f64_0zzz_optsize(<4 x double> %a) optsize { 2175; ALL-LABEL: shuffle_v4f64_0zzz_optsize: 2176; ALL: # %bb.0: 2177; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2178; ALL-NEXT: retq 2179 %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2180 ret <4 x double> %b 2181} 2182 2183define <4 x i64> @shuffle_v4i64_0zzz_optsize(<4 x i64> %a) optsize { 2184; ALL-LABEL: shuffle_v4i64_0zzz_optsize: 2185; ALL: # %bb.0: 2186; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2187; ALL-NEXT: retq 2188 %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2189 ret <4 x i64> %b 2190} 2191 2192define <8 x float> @shuffle_v8f32_0zzzzzzz_optsize(<8 x float> %a) optsize { 2193; ALL-LABEL: shuffle_v8f32_0zzzzzzz_optsize: 2194; ALL: # %bb.0: 2195; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 2196; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2197; ALL-NEXT: retq 2198 %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2199 ret <8 x float> %b 2200} 2201 2202define <8 x i32> @shuffle_v8i32_0zzzzzzz_optsize(<8 x i32> %a) optsize { 2203; ALL-LABEL: shuffle_v8i32_0zzzzzzz_optsize: 2204; ALL: # %bb.0: 2205; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 2206; ALL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2207; ALL-NEXT: retq 2208 %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2209 ret <8 x i32> %b 2210} 2211 2212define <4 x double> @shuffle_v4f64_0zzz_pgso(<4 x double> %a) !prof !14 { 2213; ALL-LABEL: shuffle_v4f64_0zzz_pgso: 2214; ALL: # %bb.0: 2215; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2216; ALL-NEXT: retq 2217 %b = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2218 ret <4 x double> %b 2219} 2220 2221define <4 x i64> @shuffle_v4i64_0zzz_pgso(<4 x i64> %a) !prof !14 { 2222; ALL-LABEL: shuffle_v4i64_0zzz_pgso: 2223; ALL: # %bb.0: 2224; ALL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2225; ALL-NEXT: retq 2226 %b = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 2227 ret <4 x i64> %b 2228} 2229 2230define <8 x float> @shuffle_v8f32_0zzzzzzz_pgso(<8 x float> %a) !prof !14 { 2231; AVX1OR2-LABEL: shuffle_v8f32_0zzzzzzz_pgso: 2232; AVX1OR2: # %bb.0: 2233; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 2234; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2235; AVX1OR2-NEXT: retq 2236; 2237; AVX512VL-LABEL: shuffle_v8f32_0zzzzzzz_pgso: 2238; AVX512VL: # %bb.0: 2239; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 2240; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2241; AVX512VL-NEXT: retq 2242 %b = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2243 ret <8 x float> %b 2244} 2245 2246define <8 x i32> @shuffle_v8i32_0zzzzzzz_pgso(<8 x i32> %a) !prof !14 { 2247; AVX1OR2-LABEL: shuffle_v8i32_0zzzzzzz_pgso: 2248; AVX1OR2: # %bb.0: 2249; AVX1OR2-NEXT: vxorps %xmm1, %xmm1, %xmm1 2250; AVX1OR2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2251; AVX1OR2-NEXT: retq 2252; 2253; AVX512VL-LABEL: shuffle_v8i32_0zzzzzzz_pgso: 2254; AVX512VL: # %bb.0: 2255; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 2256; AVX512VL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 2257; AVX512VL-NEXT: retq 2258 %b = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2259 ret <8 x i32> %b 2260} 2261 2262define <4 x i64> @unpckh_v4i64(<4 x i64> %x, <4 x i64> %y) { 2263; ALL-LABEL: unpckh_v4i64: 2264; ALL: # %bb.0: 2265; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 2266; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2267; ALL-NEXT: retq 2268 %unpckh = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> <i32 1, i32 7, i32 poison, i32 poison> 2269 ret <4 x i64> %unpckh 2270} 2271 2272define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) { 2273; ALL-LABEL: unpckh_v4f64: 2274; ALL: # %bb.0: 2275; ALL-NEXT: vextractf128 $1, %ymm1, %xmm1 2276; ALL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 2277; ALL-NEXT: retq 2278 %unpckh = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 7, i32 poison, i32 poison> 2279 ret <4 x double> %unpckh 2280} 2281 2282!llvm.module.flags = !{!0} 2283!0 = !{i32 1, !"ProfileSummary", !1} 2284!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 2285!2 = !{!"ProfileFormat", !"InstrProf"} 2286!3 = !{!"TotalCount", i64 10000} 2287!4 = !{!"MaxCount", i64 10} 2288!5 = !{!"MaxInternalCount", i64 1} 2289!6 = !{!"MaxFunctionCount", i64 1000} 2290!7 = !{!"NumCounts", i64 3} 2291!8 = !{!"NumFunctions", i64 3} 2292!9 = !{!"DetailedSummary", !10} 2293!10 = !{!11, !12, !13} 2294!11 = !{i32 10000, i64 100, i32 1} 2295!12 = !{i32 999000, i64 100, i32 1} 2296!13 = !{i32 999999, i64 1, i32 2} 2297!14 = !{!"function_entry_count", i64 0} 2298