1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefixes=CHECK,KNL %s 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck --check-prefixes=CHECK,SKX %s 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq,+avx512vbmi | FileCheck --check-prefixes=CHECK,SKX %s 5 6define <16 x float> @test1(<16 x float> %x, ptr %br, float %y) nounwind { 7; KNL-LABEL: test1: 8; KNL: ## %bb.0: 9; KNL-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 10; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 11; KNL-NEXT: movw $16384, %ax ## imm = 0x4000 12; KNL-NEXT: kmovw %eax, %k1 13; KNL-NEXT: vbroadcastss %xmm1, %zmm0 {%k1} 14; KNL-NEXT: retq 15; 16; SKX-LABEL: test1: 17; SKX: ## %bb.0: 18; SKX-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],mem[0],xmm0[2,3] 19; SKX-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 20; SKX-NEXT: movw $16384, %ax ## imm = 0x4000 21; SKX-NEXT: kmovd %eax, %k1 22; SKX-NEXT: vbroadcastss %xmm1, %zmm0 {%k1} 23; SKX-NEXT: retq 24 %rrr = load float, ptr %br 25 %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 26 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 27 ret <16 x float> %rrr3 28} 29 30define <8 x double> @test2(<8 x double> %x, ptr %br, double %y) nounwind { 31; KNL-LABEL: test2: 32; KNL: ## %bb.0: 33; KNL-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 34; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 35; KNL-NEXT: movb $64, %al 36; KNL-NEXT: kmovw %eax, %k1 37; KNL-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1} 38; KNL-NEXT: retq 39; 40; SKX-LABEL: test2: 41; SKX: ## %bb.0: 42; SKX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0] 43; SKX-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0 44; SKX-NEXT: movb $64, %al 45; SKX-NEXT: kmovd %eax, %k1 46; SKX-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1} 47; SKX-NEXT: retq 48 %rrr = load double, ptr %br 49 %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 50 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 51 ret <8 x double> %rrr3 52} 53 54define <16 x float> @test3(<16 x float> %x) nounwind { 55; CHECK-LABEL: test3: 56; CHECK: ## %bb.0: 57; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 58; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 59; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 60; CHECK-NEXT: retq 61 %eee = extractelement <16 x float> %x, i32 4 62 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 63 ret <16 x float> %rrr2 64} 65 66define <8 x i64> @test4(<8 x i64> %x) nounwind { 67; CHECK-LABEL: test4: 68; CHECK: ## %bb.0: 69; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 70; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0] 71; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 72; CHECK-NEXT: retq 73 %eee = extractelement <8 x i64> %x, i32 4 74 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 75 ret <8 x i64> %rrr2 76} 77 78define i32 @test5(<4 x float> %x) nounwind { 79; CHECK-LABEL: test5: 80; CHECK: ## %bb.0: 81; CHECK-NEXT: vextractps $3, %xmm0, %eax 82; CHECK-NEXT: retq 83 %ef = extractelement <4 x float> %x, i32 3 84 %ei = bitcast float %ef to i32 85 ret i32 %ei 86} 87 88define void @test6(<4 x float> %x, ptr %out) nounwind { 89; CHECK-LABEL: test6: 90; CHECK: ## %bb.0: 91; CHECK-NEXT: vextractps $3, %xmm0, (%rdi) 92; CHECK-NEXT: retq 93 %ef = extractelement <4 x float> %x, i32 3 94 store float %ef, ptr %out, align 4 95 ret void 96} 97 98define float @test7(<16 x float> %x, i32 %ind) nounwind { 99; CHECK-LABEL: test7: 100; CHECK: ## %bb.0: 101; CHECK-NEXT: pushq %rbp 102; CHECK-NEXT: movq %rsp, %rbp 103; CHECK-NEXT: andq $-64, %rsp 104; CHECK-NEXT: subq $128, %rsp 105; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 106; CHECK-NEXT: vmovaps %zmm0, (%rsp) 107; CHECK-NEXT: andl $15, %edi 108; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 109; CHECK-NEXT: movq %rbp, %rsp 110; CHECK-NEXT: popq %rbp 111; CHECK-NEXT: vzeroupper 112; CHECK-NEXT: retq 113 %e = extractelement <16 x float> %x, i32 %ind 114 ret float %e 115} 116 117define double @test8(<8 x double> %x, i32 %ind) nounwind { 118; CHECK-LABEL: test8: 119; CHECK: ## %bb.0: 120; CHECK-NEXT: pushq %rbp 121; CHECK-NEXT: movq %rsp, %rbp 122; CHECK-NEXT: andq $-64, %rsp 123; CHECK-NEXT: subq $128, %rsp 124; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 125; CHECK-NEXT: vmovaps %zmm0, (%rsp) 126; CHECK-NEXT: andl $7, %edi 127; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 128; CHECK-NEXT: movq %rbp, %rsp 129; CHECK-NEXT: popq %rbp 130; CHECK-NEXT: vzeroupper 131; CHECK-NEXT: retq 132 %e = extractelement <8 x double> %x, i32 %ind 133 ret double %e 134} 135 136define float @test9(<8 x float> %x, i32 %ind) nounwind { 137; CHECK-LABEL: test9: 138; CHECK: ## %bb.0: 139; CHECK-NEXT: pushq %rbp 140; CHECK-NEXT: movq %rsp, %rbp 141; CHECK-NEXT: andq $-32, %rsp 142; CHECK-NEXT: subq $64, %rsp 143; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 144; CHECK-NEXT: vmovaps %ymm0, (%rsp) 145; CHECK-NEXT: andl $7, %edi 146; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 147; CHECK-NEXT: movq %rbp, %rsp 148; CHECK-NEXT: popq %rbp 149; CHECK-NEXT: vzeroupper 150; CHECK-NEXT: retq 151 %e = extractelement <8 x float> %x, i32 %ind 152 ret float %e 153} 154 155define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { 156; CHECK-LABEL: test10: 157; CHECK: ## %bb.0: 158; CHECK-NEXT: pushq %rbp 159; CHECK-NEXT: movq %rsp, %rbp 160; CHECK-NEXT: andq $-64, %rsp 161; CHECK-NEXT: subq $128, %rsp 162; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 163; CHECK-NEXT: vmovaps %zmm0, (%rsp) 164; CHECK-NEXT: andl $15, %edi 165; CHECK-NEXT: movl (%rsp,%rdi,4), %eax 166; CHECK-NEXT: movq %rbp, %rsp 167; CHECK-NEXT: popq %rbp 168; CHECK-NEXT: vzeroupper 169; CHECK-NEXT: retq 170 %e = extractelement <16 x i32> %x, i32 %ind 171 ret i32 %e 172} 173 174define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) nounwind { 175; KNL-LABEL: test11: 176; KNL: ## %bb.0: 177; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 178; KNL-NEXT: kmovw %k0, %eax 179; KNL-NEXT: testb $16, %al 180; KNL-NEXT: je LBB10_2 181; KNL-NEXT: ## %bb.1: ## %A 182; KNL-NEXT: vmovdqa64 %zmm1, %zmm0 183; KNL-NEXT: retq 184; KNL-NEXT: LBB10_2: ## %B 185; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 186; KNL-NEXT: retq 187; 188; SKX-LABEL: test11: 189; SKX: ## %bb.0: 190; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 191; SKX-NEXT: kmovd %k0, %eax 192; SKX-NEXT: testb $16, %al 193; SKX-NEXT: je LBB10_2 194; SKX-NEXT: ## %bb.1: ## %A 195; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 196; SKX-NEXT: retq 197; SKX-NEXT: LBB10_2: ## %B 198; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 199; SKX-NEXT: retq 200 %cmp_res = icmp ult <16 x i32> %a, %b 201 %ia = extractelement <16 x i1> %cmp_res, i32 4 202 br i1 %ia, label %A, label %B 203 A: 204 ret <16 x i32>%b 205 B: 206 %c = add <16 x i32>%b, %a 207 ret <16 x i32>%c 208} 209 210define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) nounwind { 211; KNL-LABEL: test12: 212; KNL: ## %bb.0: 213; KNL-NEXT: movq %rdi, %rax 214; KNL-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 215; KNL-NEXT: kmovw %k0, %ecx 216; KNL-NEXT: testb $1, %cl 217; KNL-NEXT: cmoveq %rsi, %rax 218; KNL-NEXT: vzeroupper 219; KNL-NEXT: retq 220; 221; SKX-LABEL: test12: 222; SKX: ## %bb.0: 223; SKX-NEXT: movq %rdi, %rax 224; SKX-NEXT: vpcmpgtq %zmm0, %zmm2, %k0 225; SKX-NEXT: kmovd %k0, %ecx 226; SKX-NEXT: testb $1, %cl 227; SKX-NEXT: cmoveq %rsi, %rax 228; SKX-NEXT: vzeroupper 229; SKX-NEXT: retq 230 %cmpvector_func.i = icmp slt <16 x i64> %a, %b 231 %extract24vector_func.i = extractelement <16 x i1> %cmpvector_func.i, i32 0 232 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 233 ret i64 %res 234} 235 236define i16 @test13(i32 %a, i32 %b) nounwind { 237; KNL-LABEL: test13: 238; KNL: ## %bb.0: 239; KNL-NEXT: cmpl %esi, %edi 240; KNL-NEXT: setb %al 241; KNL-NEXT: movw $-4, %cx 242; KNL-NEXT: kmovw %ecx, %k0 243; KNL-NEXT: kshiftrw $1, %k0, %k0 244; KNL-NEXT: kshiftlw $1, %k0, %k0 245; KNL-NEXT: andl $1, %eax 246; KNL-NEXT: kmovw %eax, %k1 247; KNL-NEXT: korw %k1, %k0, %k0 248; KNL-NEXT: kmovw %k0, %eax 249; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 250; KNL-NEXT: retq 251; 252; SKX-LABEL: test13: 253; SKX: ## %bb.0: 254; SKX-NEXT: cmpl %esi, %edi 255; SKX-NEXT: setb %al 256; SKX-NEXT: movw $-4, %cx 257; SKX-NEXT: kmovd %ecx, %k0 258; SKX-NEXT: kshiftrw $1, %k0, %k0 259; SKX-NEXT: kshiftlw $1, %k0, %k0 260; SKX-NEXT: andl $1, %eax 261; SKX-NEXT: kmovw %eax, %k1 262; SKX-NEXT: korw %k1, %k0, %k0 263; SKX-NEXT: kmovd %k0, %eax 264; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 265; SKX-NEXT: retq 266 %cmp_res = icmp ult i32 %a, %b 267 %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0 268 %res = bitcast <16 x i1> %maskv to i16 269 ret i16 %res 270} 271 272define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) nounwind { 273; KNL-LABEL: test14: 274; KNL: ## %bb.0: 275; KNL-NEXT: movq %rdi, %rax 276; KNL-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 277; KNL-NEXT: kmovw %k0, %ecx 278; KNL-NEXT: testb $16, %cl 279; KNL-NEXT: cmoveq %rsi, %rax 280; KNL-NEXT: vzeroupper 281; KNL-NEXT: retq 282; 283; SKX-LABEL: test14: 284; SKX: ## %bb.0: 285; SKX-NEXT: movq %rdi, %rax 286; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 287; SKX-NEXT: kmovd %k0, %ecx 288; SKX-NEXT: testb $16, %cl 289; SKX-NEXT: cmoveq %rsi, %rax 290; SKX-NEXT: vzeroupper 291; SKX-NEXT: retq 292 %cmpvector_func.i = icmp slt <8 x i64> %a, %b 293 %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4 294 %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1 295 ret i64 %res 296} 297 298define i16 @test15(ptr%addr) nounwind { 299; CHECK-LABEL: test15: 300; CHECK: ## %bb.0: 301; CHECK-NEXT: movzbl (%rdi), %eax 302; CHECK-NEXT: negl %eax 303; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 304; CHECK-NEXT: retq 305 %x = load i1 , ptr %addr, align 1 306 %x1 = insertelement <16 x i1> undef, i1 %x, i32 10 307 %x2 = bitcast <16 x i1>%x1 to i16 308 ret i16 %x2 309} 310 311define i16 @test16(ptr%addr, i16 %a) nounwind { 312; KNL-LABEL: test16: 313; KNL: ## %bb.0: 314; KNL-NEXT: movzbl (%rdi), %eax 315; KNL-NEXT: kmovw %esi, %k0 316; KNL-NEXT: movw $-1025, %cx ## imm = 0xFBFF 317; KNL-NEXT: kmovw %ecx, %k1 318; KNL-NEXT: kandw %k1, %k0, %k0 319; KNL-NEXT: kmovw %eax, %k1 320; KNL-NEXT: kshiftlw $15, %k1, %k1 321; KNL-NEXT: kshiftrw $5, %k1, %k1 322; KNL-NEXT: korw %k1, %k0, %k0 323; KNL-NEXT: kmovw %k0, %eax 324; KNL-NEXT: ## kill: def $ax killed $ax killed $eax 325; KNL-NEXT: retq 326; 327; SKX-LABEL: test16: 328; SKX: ## %bb.0: 329; SKX-NEXT: kmovb (%rdi), %k0 330; SKX-NEXT: kmovd %esi, %k1 331; SKX-NEXT: movw $-1025, %ax ## imm = 0xFBFF 332; SKX-NEXT: kmovd %eax, %k2 333; SKX-NEXT: kandw %k2, %k1, %k1 334; SKX-NEXT: kshiftlw $15, %k0, %k0 335; SKX-NEXT: kshiftrw $5, %k0, %k0 336; SKX-NEXT: korw %k0, %k1, %k0 337; SKX-NEXT: kmovd %k0, %eax 338; SKX-NEXT: ## kill: def $ax killed $ax killed $eax 339; SKX-NEXT: retq 340 %x = load i1 , ptr %addr, align 128 341 %a1 = bitcast i16 %a to <16 x i1> 342 %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10 343 %x2 = bitcast <16 x i1>%x1 to i16 344 ret i16 %x2 345} 346 347define i8 @test17(ptr%addr, i8 %a) nounwind { 348; KNL-LABEL: test17: 349; KNL: ## %bb.0: 350; KNL-NEXT: movzbl (%rdi), %eax 351; KNL-NEXT: kmovw %esi, %k0 352; KNL-NEXT: movw $-17, %cx 353; KNL-NEXT: kmovw %ecx, %k1 354; KNL-NEXT: kandw %k1, %k0, %k0 355; KNL-NEXT: kmovw %eax, %k1 356; KNL-NEXT: kshiftlw $15, %k1, %k1 357; KNL-NEXT: kshiftrw $11, %k1, %k1 358; KNL-NEXT: korw %k1, %k0, %k0 359; KNL-NEXT: kmovw %k0, %eax 360; KNL-NEXT: ## kill: def $al killed $al killed $eax 361; KNL-NEXT: retq 362; 363; SKX-LABEL: test17: 364; SKX: ## %bb.0: 365; SKX-NEXT: kmovb (%rdi), %k0 366; SKX-NEXT: kmovd %esi, %k1 367; SKX-NEXT: movb $-17, %al 368; SKX-NEXT: kmovd %eax, %k2 369; SKX-NEXT: kandb %k2, %k1, %k1 370; SKX-NEXT: kshiftlb $7, %k0, %k0 371; SKX-NEXT: kshiftrb $3, %k0, %k0 372; SKX-NEXT: korb %k0, %k1, %k0 373; SKX-NEXT: kmovd %k0, %eax 374; SKX-NEXT: ## kill: def $al killed $al killed $eax 375; SKX-NEXT: retq 376 %x = load i1 , ptr %addr, align 128 377 %a1 = bitcast i8 %a to <8 x i1> 378 %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4 379 %x2 = bitcast <8 x i1>%x1 to i8 380 ret i8 %x2 381} 382 383define i64 @extract_v8i64(<8 x i64> %x, ptr %dst) nounwind { 384; CHECK-LABEL: extract_v8i64: 385; CHECK: ## %bb.0: 386; CHECK-NEXT: vpextrq $1, %xmm0, %rax 387; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 388; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi) 389; CHECK-NEXT: vzeroupper 390; CHECK-NEXT: retq 391 %r1 = extractelement <8 x i64> %x, i32 1 392 %r2 = extractelement <8 x i64> %x, i32 3 393 store i64 %r2, ptr %dst, align 1 394 ret i64 %r1 395} 396 397define i64 @extract_v4i64(<4 x i64> %x, ptr %dst) nounwind { 398; CHECK-LABEL: extract_v4i64: 399; CHECK: ## %bb.0: 400; CHECK-NEXT: vpextrq $1, %xmm0, %rax 401; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 402; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi) 403; CHECK-NEXT: vzeroupper 404; CHECK-NEXT: retq 405 %r1 = extractelement <4 x i64> %x, i32 1 406 %r2 = extractelement <4 x i64> %x, i32 3 407 store i64 %r2, ptr %dst, align 1 408 ret i64 %r1 409} 410 411define i64 @extract_v2i64(<2 x i64> %x, ptr %dst) nounwind { 412; CHECK-LABEL: extract_v2i64: 413; CHECK: ## %bb.0: 414; CHECK-NEXT: vmovq %xmm0, %rax 415; CHECK-NEXT: vpextrq $1, %xmm0, (%rdi) 416; CHECK-NEXT: retq 417 %r1 = extractelement <2 x i64> %x, i32 0 418 %r2 = extractelement <2 x i64> %x, i32 1 419 store i64 %r2, ptr %dst, align 1 420 ret i64 %r1 421} 422 423define i32 @extract_v16i32(<16 x i32> %x, ptr %dst) nounwind { 424; CHECK-LABEL: extract_v16i32: 425; CHECK: ## %bb.0: 426; CHECK-NEXT: vextractps $1, %xmm0, %eax 427; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 428; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) 429; CHECK-NEXT: vzeroupper 430; CHECK-NEXT: retq 431 %r1 = extractelement <16 x i32> %x, i32 1 432 %r2 = extractelement <16 x i32> %x, i32 5 433 store i32 %r2, ptr %dst, align 1 434 ret i32 %r1 435} 436 437define i32 @extract_v8i32(<8 x i32> %x, ptr %dst) nounwind { 438; CHECK-LABEL: extract_v8i32: 439; CHECK: ## %bb.0: 440; CHECK-NEXT: vextractps $1, %xmm0, %eax 441; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 442; CHECK-NEXT: vextractps $1, %xmm0, (%rdi) 443; CHECK-NEXT: vzeroupper 444; CHECK-NEXT: retq 445 %r1 = extractelement <8 x i32> %x, i32 1 446 %r2 = extractelement <8 x i32> %x, i32 5 447 store i32 %r2, ptr %dst, align 1 448 ret i32 %r1 449} 450 451define i32 @extract_v4i32(<4 x i32> %x, ptr %dst) nounwind { 452; CHECK-LABEL: extract_v4i32: 453; CHECK: ## %bb.0: 454; CHECK-NEXT: vextractps $1, %xmm0, %eax 455; CHECK-NEXT: vextractps $3, %xmm0, (%rdi) 456; CHECK-NEXT: retq 457 %r1 = extractelement <4 x i32> %x, i32 1 458 %r2 = extractelement <4 x i32> %x, i32 3 459 store i32 %r2, ptr %dst, align 1 460 ret i32 %r1 461} 462 463define i16 @extract_v32i16(<32 x i16> %x, ptr %dst) nounwind { 464; CHECK-LABEL: extract_v32i16: 465; CHECK: ## %bb.0: 466; CHECK-NEXT: vpextrw $1, %xmm0, %eax 467; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 468; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi) 469; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 470; CHECK-NEXT: vzeroupper 471; CHECK-NEXT: retq 472 %r1 = extractelement <32 x i16> %x, i32 1 473 %r2 = extractelement <32 x i16> %x, i32 9 474 store i16 %r2, ptr %dst, align 1 475 ret i16 %r1 476} 477 478define i16 @extract_v16i16(<16 x i16> %x, ptr %dst) nounwind { 479; CHECK-LABEL: extract_v16i16: 480; CHECK: ## %bb.0: 481; CHECK-NEXT: vpextrw $1, %xmm0, %eax 482; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 483; CHECK-NEXT: vpextrw $1, %xmm0, (%rdi) 484; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 485; CHECK-NEXT: vzeroupper 486; CHECK-NEXT: retq 487 %r1 = extractelement <16 x i16> %x, i32 1 488 %r2 = extractelement <16 x i16> %x, i32 9 489 store i16 %r2, ptr %dst, align 1 490 ret i16 %r1 491} 492 493define i16 @extract_v8i16(<8 x i16> %x, ptr %dst) nounwind { 494; CHECK-LABEL: extract_v8i16: 495; CHECK: ## %bb.0: 496; CHECK-NEXT: vpextrw $1, %xmm0, %eax 497; CHECK-NEXT: vpextrw $3, %xmm0, (%rdi) 498; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax 499; CHECK-NEXT: retq 500 %r1 = extractelement <8 x i16> %x, i32 1 501 %r2 = extractelement <8 x i16> %x, i32 3 502 store i16 %r2, ptr %dst, align 1 503 ret i16 %r1 504} 505 506define i8 @extract_v64i8(<64 x i8> %x, ptr %dst) nounwind { 507; CHECK-LABEL: extract_v64i8: 508; CHECK: ## %bb.0: 509; CHECK-NEXT: vpextrb $1, %xmm0, %eax 510; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 511; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi) 512; CHECK-NEXT: ## kill: def $al killed $al killed $eax 513; CHECK-NEXT: vzeroupper 514; CHECK-NEXT: retq 515 %r1 = extractelement <64 x i8> %x, i32 1 516 %r2 = extractelement <64 x i8> %x, i32 17 517 store i8 %r2, ptr %dst, align 1 518 ret i8 %r1 519} 520 521define i8 @extract_v32i8(<32 x i8> %x, ptr %dst) nounwind { 522; CHECK-LABEL: extract_v32i8: 523; CHECK: ## %bb.0: 524; CHECK-NEXT: vpextrb $1, %xmm0, %eax 525; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 526; CHECK-NEXT: vpextrb $1, %xmm0, (%rdi) 527; CHECK-NEXT: ## kill: def $al killed $al killed $eax 528; CHECK-NEXT: vzeroupper 529; CHECK-NEXT: retq 530 %r1 = extractelement <32 x i8> %x, i32 1 531 %r2 = extractelement <32 x i8> %x, i32 17 532 store i8 %r2, ptr %dst, align 1 533 ret i8 %r1 534} 535 536define i8 @extract_v16i8(<16 x i8> %x, ptr %dst) nounwind { 537; CHECK-LABEL: extract_v16i8: 538; CHECK: ## %bb.0: 539; CHECK-NEXT: vpextrb $1, %xmm0, %eax 540; CHECK-NEXT: vpextrb $3, %xmm0, (%rdi) 541; CHECK-NEXT: ## kill: def $al killed $al killed $eax 542; CHECK-NEXT: retq 543 %r1 = extractelement <16 x i8> %x, i32 1 544 %r2 = extractelement <16 x i8> %x, i32 3 545 store i8 %r2, ptr %dst, align 1 546 ret i8 %r1 547} 548 549define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , ptr %ptr) nounwind { 550; KNL-LABEL: insert_v8i64: 551; KNL: ## %bb.0: 552; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 553; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 554; KNL-NEXT: movb $8, %al 555; KNL-NEXT: kmovw %eax, %k1 556; KNL-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 557; KNL-NEXT: retq 558; 559; SKX-LABEL: insert_v8i64: 560; SKX: ## %bb.0: 561; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 562; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 563; SKX-NEXT: movb $8, %al 564; SKX-NEXT: kmovd %eax, %k1 565; SKX-NEXT: vpbroadcastq %rdi, %zmm0 {%k1} 566; SKX-NEXT: retq 567 %val = load i64, ptr %ptr 568 %r1 = insertelement <8 x i64> %x, i64 %val, i32 1 569 %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3 570 ret <8 x i64> %r2 571} 572 573define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , ptr %ptr) nounwind { 574; KNL-LABEL: insert_v4i64: 575; KNL: ## %bb.0: 576; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 577; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 578; KNL-NEXT: vmovq %rdi, %xmm1 579; KNL-NEXT: vpbroadcastq %xmm1, %ymm1 580; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 581; KNL-NEXT: retq 582; 583; SKX-LABEL: insert_v4i64: 584; SKX: ## %bb.0: 585; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1 586; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 587; SKX-NEXT: vpbroadcastq %rdi, %ymm1 588; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 589; SKX-NEXT: retq 590 %val = load i64, ptr %ptr 591 %r1 = insertelement <4 x i64> %x, i64 %val, i32 1 592 %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3 593 ret <4 x i64> %r2 594} 595 596define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , ptr %ptr) nounwind { 597; CHECK-LABEL: insert_v2i64: 598; CHECK: ## %bb.0: 599; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 600; CHECK-NEXT: vmovq %rdi, %xmm1 601; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 602; CHECK-NEXT: retq 603 %val = load i64, ptr %ptr 604 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 605 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 0 606 ret <2 x i64> %r2 607} 608 609define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, ptr %ptr) nounwind { 610; KNL-LABEL: insert_v16i32: 611; KNL: ## %bb.0: 612; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 613; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 614; KNL-NEXT: movw $32, %ax 615; KNL-NEXT: kmovw %eax, %k1 616; KNL-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 617; KNL-NEXT: retq 618; 619; SKX-LABEL: insert_v16i32: 620; SKX: ## %bb.0: 621; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 622; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 623; SKX-NEXT: movw $32, %ax 624; SKX-NEXT: kmovd %eax, %k1 625; SKX-NEXT: vpbroadcastd %edi, %zmm0 {%k1} 626; SKX-NEXT: retq 627 %val = load i32, ptr %ptr 628 %r1 = insertelement <16 x i32> %x, i32 %val, i32 1 629 %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5 630 ret <16 x i32> %r2 631} 632 633define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, ptr %ptr) nounwind { 634; KNL-LABEL: insert_v8i32: 635; KNL: ## %bb.0: 636; KNL-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 637; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 638; KNL-NEXT: vmovd %edi, %xmm1 639; KNL-NEXT: vpbroadcastd %xmm1, %ymm1 640; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7] 641; KNL-NEXT: retq 642; 643; SKX-LABEL: insert_v8i32: 644; SKX: ## %bb.0: 645; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1 646; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 647; SKX-NEXT: vpbroadcastd %edi, %ymm1 648; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6,7] 649; SKX-NEXT: retq 650 %val = load i32, ptr %ptr 651 %r1 = insertelement <8 x i32> %x, i32 %val, i32 1 652 %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5 653 ret <8 x i32> %r2 654} 655 656define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, ptr %ptr) nounwind { 657; CHECK-LABEL: insert_v4i32: 658; CHECK: ## %bb.0: 659; CHECK-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 660; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 661; CHECK-NEXT: retq 662 %val = load i32, ptr %ptr 663 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 664 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 665 ret <4 x i32> %r2 666} 667 668define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, ptr %ptr) nounwind { 669; KNL-LABEL: insert_v32i16: 670; KNL: ## %bb.0: 671; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 672; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 673; KNL-NEXT: vmovd %edi, %xmm0 674; KNL-NEXT: vpbroadcastw %xmm0, %ymm0 675; KNL-NEXT: vpternlogq {{.*#+}} zmm0 = zmm0 ^ (mem & (zmm0 ^ zmm1)) 676; KNL-NEXT: retq 677; 678; SKX-LABEL: insert_v32i16: 679; SKX: ## %bb.0: 680; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 681; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 682; SKX-NEXT: movl $512, %eax ## imm = 0x200 683; SKX-NEXT: kmovd %eax, %k1 684; SKX-NEXT: vpbroadcastw %edi, %zmm0 {%k1} 685; SKX-NEXT: retq 686 %val = load i16, ptr %ptr 687 %r1 = insertelement <32 x i16> %x, i16 %val, i32 1 688 %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9 689 ret <32 x i16> %r2 690} 691 692define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, ptr %ptr) nounwind { 693; KNL-LABEL: insert_v16i16: 694; KNL: ## %bb.0: 695; KNL-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 696; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 697; KNL-NEXT: vmovd %edi, %xmm1 698; KNL-NEXT: vpbroadcastw %xmm1, %ymm1 699; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15] 700; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 701; KNL-NEXT: retq 702; 703; SKX-LABEL: insert_v16i16: 704; SKX: ## %bb.0: 705; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1 706; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 707; SKX-NEXT: vpbroadcastw %edi, %ymm1 708; SKX-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3,4,5,6,7,8],ymm1[9],ymm0[10,11,12,13,14,15] 709; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 710; SKX-NEXT: retq 711 %val = load i16, ptr %ptr 712 %r1 = insertelement <16 x i16> %x, i16 %val, i32 1 713 %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9 714 ret <16 x i16> %r2 715} 716 717define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, ptr %ptr) nounwind { 718; CHECK-LABEL: insert_v8i16: 719; CHECK: ## %bb.0: 720; CHECK-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm0 721; CHECK-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 722; CHECK-NEXT: retq 723 %val = load i16, ptr %ptr 724 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 725 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 726 ret <8 x i16> %r2 727} 728 729define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, ptr %ptr) nounwind { 730; CHECK-LABEL: insert_v64i8: 731; CHECK: ## %bb.0: 732; CHECK-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 733; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 734; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm0 735; CHECK-NEXT: vpinsrb $2, %edi, %xmm0, %xmm0 736; CHECK-NEXT: vinserti32x4 $3, %xmm0, %zmm1, %zmm0 737; CHECK-NEXT: retq 738 %val = load i8, ptr %ptr 739 %r1 = insertelement <64 x i8> %x, i8 %val, i32 1 740 %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50 741 ret <64 x i8> %r2 742} 743 744define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, ptr %ptr) nounwind { 745; CHECK-LABEL: insert_v32i8: 746; CHECK: ## %bb.0: 747; CHECK-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1 748; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 749; CHECK-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 750; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 751; CHECK-NEXT: retq 752 %val = load i8, ptr %ptr 753 %r1 = insertelement <32 x i8> %x, i8 %val, i32 1 754 %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17 755 ret <32 x i8> %r2 756} 757 758define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, ptr %ptr) nounwind { 759; CHECK-LABEL: insert_v16i8: 760; CHECK: ## %bb.0: 761; CHECK-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 762; CHECK-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 763; CHECK-NEXT: retq 764 %val = load i8, ptr %ptr 765 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 766 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 767 ret <16 x i8> %r2 768} 769 770define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) nounwind { 771; CHECK-LABEL: test_insert_128_v8i64: 772; CHECK: ## %bb.0: 773; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 774; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 775; CHECK-NEXT: retq 776 %r = insertelement <8 x i64> %x, i64 %y, i32 1 777 ret <8 x i64> %r 778} 779 780define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) nounwind { 781; CHECK-LABEL: test_insert_128_v16i32: 782; CHECK: ## %bb.0: 783; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 784; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 785; CHECK-NEXT: retq 786 %r = insertelement <16 x i32> %x, i32 %y, i32 1 787 ret <16 x i32> %r 788} 789 790define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) nounwind { 791; CHECK-LABEL: test_insert_128_v8f64: 792; CHECK: ## %bb.0: 793; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0] 794; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 795; CHECK-NEXT: retq 796 %r = insertelement <8 x double> %x, double %y, i32 1 797 ret <8 x double> %r 798} 799 800define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) nounwind { 801; CHECK-LABEL: test_insert_128_v16f32: 802; CHECK: ## %bb.0: 803; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] 804; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 805; CHECK-NEXT: retq 806 %r = insertelement <16 x float> %x, float %y, i32 1 807 ret <16 x float> %r 808} 809 810define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) nounwind { 811; KNL-LABEL: test_insert_128_v16i16: 812; KNL: ## %bb.0: 813; KNL-NEXT: vmovd %edi, %xmm1 814; KNL-NEXT: vpbroadcastw %xmm1, %ymm1 815; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 816; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 817; KNL-NEXT: retq 818; 819; SKX-LABEL: test_insert_128_v16i16: 820; SKX: ## %bb.0: 821; SKX-NEXT: vpbroadcastw %edi, %ymm1 822; SKX-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3,4,5,6,7,8,9],ymm1[10],ymm0[11,12,13,14,15] 823; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 824; SKX-NEXT: retq 825 %r = insertelement <16 x i16> %x, i16 %y, i32 10 826 ret <16 x i16> %r 827} 828 829define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) nounwind { 830; CHECK-LABEL: test_insert_128_v32i8: 831; CHECK: ## %bb.0: 832; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1 833; CHECK-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 834; CHECK-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 835; CHECK-NEXT: retq 836 %r = insertelement <32 x i8> %x, i8 %y, i32 20 837 ret <32 x i8> %r 838} 839 840define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> %y) nounwind { 841; KNL-LABEL: test_insertelement_v32i1: 842; KNL: ## %bb.0: 843; KNL-NEXT: cmpl %esi, %edi 844; KNL-NEXT: setb %al 845; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 846; KNL-NEXT: kmovw %k0, %ecx 847; KNL-NEXT: shll $16, %ecx 848; KNL-NEXT: movw $-17, %dx 849; KNL-NEXT: kmovw %edx, %k1 850; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 {%k1} 851; KNL-NEXT: kmovw %eax, %k1 852; KNL-NEXT: kshiftlw $15, %k1, %k1 853; KNL-NEXT: kshiftrw $11, %k1, %k1 854; KNL-NEXT: korw %k1, %k0, %k0 855; KNL-NEXT: kmovw %k0, %eax 856; KNL-NEXT: orl %ecx, %eax 857; KNL-NEXT: vzeroupper 858; KNL-NEXT: retq 859; 860; SKX-LABEL: test_insertelement_v32i1: 861; SKX: ## %bb.0: 862; SKX-NEXT: cmpl %esi, %edi 863; SKX-NEXT: setb %al 864; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k0 865; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k1 866; SKX-NEXT: kunpckwd %k0, %k1, %k0 867; SKX-NEXT: movl $-17, %ecx 868; SKX-NEXT: kmovd %ecx, %k1 869; SKX-NEXT: kandd %k1, %k0, %k0 870; SKX-NEXT: kmovd %eax, %k1 871; SKX-NEXT: kshiftld $31, %k1, %k1 872; SKX-NEXT: kshiftrd $27, %k1, %k1 873; SKX-NEXT: kord %k1, %k0, %k0 874; SKX-NEXT: kmovd %k0, %eax 875; SKX-NEXT: vzeroupper 876; SKX-NEXT: retq 877 %cmp_res_i1 = icmp ult i32 %a, %b 878 %cmp_cmp_vec = icmp ult <32 x i32> %x, %y 879 %maskv = insertelement <32 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 4 880 %res = bitcast <32 x i1> %maskv to i32 881 ret i32 %res 882} 883 884define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) nounwind { 885; KNL-LABEL: test_iinsertelement_v4i1: 886; KNL: ## %bb.0: 887; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 888; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 889; KNL-NEXT: cmpl %esi, %edi 890; KNL-NEXT: setb %al 891; KNL-NEXT: movw $-5, %cx 892; KNL-NEXT: kmovw %ecx, %k1 893; KNL-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 894; KNL-NEXT: kmovw %eax, %k1 895; KNL-NEXT: kshiftlw $15, %k1, %k1 896; KNL-NEXT: kshiftrw $13, %k1, %k1 897; KNL-NEXT: korw %k1, %k0, %k0 898; KNL-NEXT: kmovw %k0, %eax 899; KNL-NEXT: ## kill: def $al killed $al killed $eax 900; KNL-NEXT: vzeroupper 901; KNL-NEXT: retq 902; 903; SKX-LABEL: test_iinsertelement_v4i1: 904; SKX: ## %bb.0: 905; SKX-NEXT: cmpl %esi, %edi 906; SKX-NEXT: setb %al 907; SKX-NEXT: movb $-5, %cl 908; SKX-NEXT: kmovd %ecx, %k1 909; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 910; SKX-NEXT: kmovd %eax, %k1 911; SKX-NEXT: kshiftlb $7, %k1, %k1 912; SKX-NEXT: kshiftrb $5, %k1, %k1 913; SKX-NEXT: korw %k1, %k0, %k0 914; SKX-NEXT: kmovd %k0, %eax 915; SKX-NEXT: ## kill: def $al killed $al killed $eax 916; SKX-NEXT: retq 917 %cmp_res_i1 = icmp ult i32 %a, %b 918 %cmp_cmp_vec = icmp ult <4 x i32> %x, %y 919 %maskv = insertelement <4 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 2 920 %res0 = shufflevector <4 x i1> %maskv, <4 x i1> undef , <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> 921 %res = bitcast <8 x i1> %res0 to i8 922 ret i8 %res 923} 924 925define i8 @test_iinsertelement_v2i1(i32 %a, i32 %b, <2 x i64> %x , <2 x i64> %y) nounwind { 926; KNL-LABEL: test_iinsertelement_v2i1: 927; KNL: ## %bb.0: 928; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 929; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 930; KNL-NEXT: cmpl %esi, %edi 931; KNL-NEXT: setb %al 932; KNL-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 933; KNL-NEXT: kshiftlw $15, %k0, %k0 934; KNL-NEXT: kshiftrw $15, %k0, %k0 935; KNL-NEXT: kmovw %eax, %k1 936; KNL-NEXT: kshiftlw $1, %k1, %k1 937; KNL-NEXT: korw %k1, %k0, %k0 938; KNL-NEXT: kmovw %k0, %eax 939; KNL-NEXT: ## kill: def $al killed $al killed $eax 940; KNL-NEXT: vzeroupper 941; KNL-NEXT: retq 942; 943; SKX-LABEL: test_iinsertelement_v2i1: 944; SKX: ## %bb.0: 945; SKX-NEXT: cmpl %esi, %edi 946; SKX-NEXT: setb %al 947; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 948; SKX-NEXT: kshiftlb $7, %k0, %k0 949; SKX-NEXT: kshiftrb $7, %k0, %k0 950; SKX-NEXT: kmovd %eax, %k1 951; SKX-NEXT: kshiftlb $1, %k1, %k1 952; SKX-NEXT: korw %k1, %k0, %k0 953; SKX-NEXT: kmovd %k0, %eax 954; SKX-NEXT: ## kill: def $al killed $al killed $eax 955; SKX-NEXT: retq 956 %cmp_res_i1 = icmp ult i32 %a, %b 957 %cmp_cmp_vec = icmp ult <2 x i64> %x, %y 958 %maskv = insertelement <2 x i1> %cmp_cmp_vec, i1 %cmp_res_i1, i32 1 959 %res0 = shufflevector <2 x i1> %maskv, <2 x i1> undef , <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 960 %res = bitcast <8 x i1> %res0 to i8 961 ret i8 %res 962} 963 964define zeroext i8 @test_extractelement_v2i1(<2 x i64> %a, <2 x i64> %b) nounwind { 965; KNL-LABEL: test_extractelement_v2i1: 966; KNL: ## %bb.0: 967; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 968; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 969; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 970; KNL-NEXT: kmovw %k0, %ecx 971; KNL-NEXT: andl $1, %ecx 972; KNL-NEXT: movl $4, %eax 973; KNL-NEXT: subl %ecx, %eax 974; KNL-NEXT: vzeroupper 975; KNL-NEXT: retq 976; 977; SKX-LABEL: test_extractelement_v2i1: 978; SKX: ## %bb.0: 979; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 980; SKX-NEXT: kmovd %k0, %ecx 981; SKX-NEXT: andl $1, %ecx 982; SKX-NEXT: movl $4, %eax 983; SKX-NEXT: subl %ecx, %eax 984; SKX-NEXT: retq 985 %t1 = icmp ugt <2 x i64> %a, %b 986 %t2 = extractelement <2 x i1> %t1, i32 0 987 %res = select i1 %t2, i8 3, i8 4 988 ret i8 %res 989} 990 991define zeroext i8 @extractelement_v2i1_alt(<2 x i64> %a, <2 x i64> %b) nounwind { 992; KNL-LABEL: extractelement_v2i1_alt: 993; KNL: ## %bb.0: 994; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 995; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 996; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k0 997; KNL-NEXT: kmovw %k0, %eax 998; KNL-NEXT: andb $1, %al 999; KNL-NEXT: movb $4, %cl 1000; KNL-NEXT: subb %al, %cl 1001; KNL-NEXT: movzbl %cl, %eax 1002; KNL-NEXT: vzeroupper 1003; KNL-NEXT: retq 1004; 1005; SKX-LABEL: extractelement_v2i1_alt: 1006; SKX: ## %bb.0: 1007; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 1008; SKX-NEXT: kmovd %k0, %eax 1009; SKX-NEXT: andb $1, %al 1010; SKX-NEXT: movb $4, %cl 1011; SKX-NEXT: subb %al, %cl 1012; SKX-NEXT: movzbl %cl, %eax 1013; SKX-NEXT: retq 1014 %t1 = icmp ugt <2 x i64> %a, %b 1015 %t2 = extractelement <2 x i1> %t1, i32 0 1016 %sext = sext i1 %t2 to i8 1017 %res = add i8 %sext, 4 1018 ret i8 %res 1019} 1020 1021define zeroext i8 @test_extractelement_v4i1(<4 x i32> %a, <4 x i32> %b) nounwind { 1022; KNL-LABEL: test_extractelement_v4i1: 1023; KNL: ## %bb.0: 1024; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 1025; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1026; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 1027; KNL-NEXT: kshiftrw $3, %k0, %k0 1028; KNL-NEXT: kmovw %k0, %eax 1029; KNL-NEXT: andl $1, %eax 1030; KNL-NEXT: vzeroupper 1031; KNL-NEXT: retq 1032; 1033; SKX-LABEL: test_extractelement_v4i1: 1034; SKX: ## %bb.0: 1035; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 1036; SKX-NEXT: kshiftrb $3, %k0, %k0 1037; SKX-NEXT: kmovd %k0, %eax 1038; SKX-NEXT: andl $1, %eax 1039; SKX-NEXT: retq 1040 %t1 = icmp ugt <4 x i32> %a, %b 1041 %t2 = extractelement <4 x i1> %t1, i32 3 1042 %res = zext i1 %t2 to i8 1043 ret i8 %res 1044} 1045 1046define zeroext i8 @test_extractelement_v32i1(<32 x i8> %a, <32 x i8> %b) nounwind { 1047; KNL-LABEL: test_extractelement_v32i1: 1048; KNL: ## %bb.0: 1049; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm1 1050; KNL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 1051; KNL-NEXT: vpextrb $2, %xmm0, %eax 1052; KNL-NEXT: notb %al 1053; KNL-NEXT: movzbl %al, %eax 1054; KNL-NEXT: andl $1, %eax 1055; KNL-NEXT: vzeroupper 1056; KNL-NEXT: retq 1057; 1058; SKX-LABEL: test_extractelement_v32i1: 1059; SKX: ## %bb.0: 1060; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 1061; SKX-NEXT: kshiftrd $2, %k0, %k0 1062; SKX-NEXT: kmovd %k0, %eax 1063; SKX-NEXT: andl $1, %eax 1064; SKX-NEXT: vzeroupper 1065; SKX-NEXT: retq 1066 %t1 = icmp ugt <32 x i8> %a, %b 1067 %t2 = extractelement <32 x i1> %t1, i32 2 1068 %res = zext i1 %t2 to i8 1069 ret i8 %res 1070} 1071 1072define zeroext i8 @test_extractelement_v64i1(<64 x i8> %a, <64 x i8> %b) nounwind { 1073; KNL-LABEL: test_extractelement_v64i1: 1074; KNL: ## %bb.0: 1075; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1076; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1077; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 1078; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1079; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 1080; KNL-NEXT: vpextrb $15, %xmm0, %eax 1081; KNL-NEXT: notb %al 1082; KNL-NEXT: movzbl %al, %ecx 1083; KNL-NEXT: andl $1, %ecx 1084; KNL-NEXT: movl $4, %eax 1085; KNL-NEXT: subl %ecx, %eax 1086; KNL-NEXT: vzeroupper 1087; KNL-NEXT: retq 1088; 1089; SKX-LABEL: test_extractelement_v64i1: 1090; SKX: ## %bb.0: 1091; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 1092; SKX-NEXT: kshiftrq $63, %k0, %k0 1093; SKX-NEXT: kmovd %k0, %ecx 1094; SKX-NEXT: andl $1, %ecx 1095; SKX-NEXT: movl $4, %eax 1096; SKX-NEXT: subl %ecx, %eax 1097; SKX-NEXT: vzeroupper 1098; SKX-NEXT: retq 1099 %t1 = icmp ugt <64 x i8> %a, %b 1100 %t2 = extractelement <64 x i1> %t1, i32 63 1101 %res = select i1 %t2, i8 3, i8 4 1102 ret i8 %res 1103} 1104 1105define zeroext i8 @extractelement_v64i1_alt(<64 x i8> %a, <64 x i8> %b) nounwind { 1106; KNL-LABEL: extractelement_v64i1_alt: 1107; KNL: ## %bb.0: 1108; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 1109; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1110; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 1111; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1112; KNL-NEXT: vextracti128 $1, %ymm0, %xmm0 1113; KNL-NEXT: vpextrb $15, %xmm0, %eax 1114; KNL-NEXT: notb %al 1115; KNL-NEXT: addb $4, %al 1116; KNL-NEXT: movzbl %al, %eax 1117; KNL-NEXT: vzeroupper 1118; KNL-NEXT: retq 1119; 1120; SKX-LABEL: extractelement_v64i1_alt: 1121; SKX: ## %bb.0: 1122; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 1123; SKX-NEXT: kshiftrq $63, %k0, %k0 1124; SKX-NEXT: kmovd %k0, %eax 1125; SKX-NEXT: andb $1, %al 1126; SKX-NEXT: movb $4, %cl 1127; SKX-NEXT: subb %al, %cl 1128; SKX-NEXT: movzbl %cl, %eax 1129; SKX-NEXT: vzeroupper 1130; SKX-NEXT: retq 1131 %t1 = icmp ugt <64 x i8> %a, %b 1132 %t2 = extractelement <64 x i1> %t1, i32 63 1133 %sext = sext i1 %t2 to i8 1134 %res = add i8 %sext, 4 1135 ret i8 %res 1136} 1137 1138define i64 @test_extractelement_variable_v2i64(<2 x i64> %t1, i32 %index) nounwind { 1139; CHECK-LABEL: test_extractelement_variable_v2i64: 1140; CHECK: ## %bb.0: 1141; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1142; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1143; CHECK-NEXT: andl $1, %edi 1144; CHECK-NEXT: movq -24(%rsp,%rdi,8), %rax 1145; CHECK-NEXT: retq 1146 %t2 = extractelement <2 x i64> %t1, i32 %index 1147 ret i64 %t2 1148} 1149 1150define i64 @test_extractelement_variable_v4i64(<4 x i64> %t1, i32 %index) nounwind { 1151; CHECK-LABEL: test_extractelement_variable_v4i64: 1152; CHECK: ## %bb.0: 1153; CHECK-NEXT: pushq %rbp 1154; CHECK-NEXT: movq %rsp, %rbp 1155; CHECK-NEXT: andq $-32, %rsp 1156; CHECK-NEXT: subq $64, %rsp 1157; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1158; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1159; CHECK-NEXT: andl $3, %edi 1160; CHECK-NEXT: movq (%rsp,%rdi,8), %rax 1161; CHECK-NEXT: movq %rbp, %rsp 1162; CHECK-NEXT: popq %rbp 1163; CHECK-NEXT: vzeroupper 1164; CHECK-NEXT: retq 1165 %t2 = extractelement <4 x i64> %t1, i32 %index 1166 ret i64 %t2 1167} 1168 1169define i64 @test_extractelement_variable_v8i64(<8 x i64> %t1, i32 %index) nounwind { 1170; CHECK-LABEL: test_extractelement_variable_v8i64: 1171; CHECK: ## %bb.0: 1172; CHECK-NEXT: pushq %rbp 1173; CHECK-NEXT: movq %rsp, %rbp 1174; CHECK-NEXT: andq $-64, %rsp 1175; CHECK-NEXT: subq $128, %rsp 1176; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1177; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1178; CHECK-NEXT: andl $7, %edi 1179; CHECK-NEXT: movq (%rsp,%rdi,8), %rax 1180; CHECK-NEXT: movq %rbp, %rsp 1181; CHECK-NEXT: popq %rbp 1182; CHECK-NEXT: vzeroupper 1183; CHECK-NEXT: retq 1184 %t2 = extractelement <8 x i64> %t1, i32 %index 1185 ret i64 %t2 1186} 1187 1188define double @test_extractelement_variable_v2f64(<2 x double> %t1, i32 %index) nounwind { 1189; CHECK-LABEL: test_extractelement_variable_v2f64: 1190; CHECK: ## %bb.0: 1191; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1192; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1193; CHECK-NEXT: andl $1, %edi 1194; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1195; CHECK-NEXT: retq 1196 %t2 = extractelement <2 x double> %t1, i32 %index 1197 ret double %t2 1198} 1199 1200define double @test_extractelement_variable_v4f64(<4 x double> %t1, i32 %index) nounwind { 1201; CHECK-LABEL: test_extractelement_variable_v4f64: 1202; CHECK: ## %bb.0: 1203; CHECK-NEXT: pushq %rbp 1204; CHECK-NEXT: movq %rsp, %rbp 1205; CHECK-NEXT: andq $-32, %rsp 1206; CHECK-NEXT: subq $64, %rsp 1207; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1208; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1209; CHECK-NEXT: andl $3, %edi 1210; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1211; CHECK-NEXT: movq %rbp, %rsp 1212; CHECK-NEXT: popq %rbp 1213; CHECK-NEXT: vzeroupper 1214; CHECK-NEXT: retq 1215 %t2 = extractelement <4 x double> %t1, i32 %index 1216 ret double %t2 1217} 1218 1219define double @test_extractelement_variable_v8f64(<8 x double> %t1, i32 %index) nounwind { 1220; CHECK-LABEL: test_extractelement_variable_v8f64: 1221; CHECK: ## %bb.0: 1222; CHECK-NEXT: pushq %rbp 1223; CHECK-NEXT: movq %rsp, %rbp 1224; CHECK-NEXT: andq $-64, %rsp 1225; CHECK-NEXT: subq $128, %rsp 1226; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1227; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1228; CHECK-NEXT: andl $7, %edi 1229; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1230; CHECK-NEXT: movq %rbp, %rsp 1231; CHECK-NEXT: popq %rbp 1232; CHECK-NEXT: vzeroupper 1233; CHECK-NEXT: retq 1234 %t2 = extractelement <8 x double> %t1, i32 %index 1235 ret double %t2 1236} 1237 1238define i32 @test_extractelement_variable_v4i32(<4 x i32> %t1, i32 %index) nounwind { 1239; CHECK-LABEL: test_extractelement_variable_v4i32: 1240; CHECK: ## %bb.0: 1241; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1242; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1243; CHECK-NEXT: andl $3, %edi 1244; CHECK-NEXT: movl -24(%rsp,%rdi,4), %eax 1245; CHECK-NEXT: retq 1246 %t2 = extractelement <4 x i32> %t1, i32 %index 1247 ret i32 %t2 1248} 1249 1250define i32 @test_extractelement_variable_v8i32(<8 x i32> %t1, i32 %index) nounwind { 1251; CHECK-LABEL: test_extractelement_variable_v8i32: 1252; CHECK: ## %bb.0: 1253; CHECK-NEXT: pushq %rbp 1254; CHECK-NEXT: movq %rsp, %rbp 1255; CHECK-NEXT: andq $-32, %rsp 1256; CHECK-NEXT: subq $64, %rsp 1257; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1258; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1259; CHECK-NEXT: andl $7, %edi 1260; CHECK-NEXT: movl (%rsp,%rdi,4), %eax 1261; CHECK-NEXT: movq %rbp, %rsp 1262; CHECK-NEXT: popq %rbp 1263; CHECK-NEXT: vzeroupper 1264; CHECK-NEXT: retq 1265 %t2 = extractelement <8 x i32> %t1, i32 %index 1266 ret i32 %t2 1267} 1268 1269define i32 @test_extractelement_variable_v16i32(<16 x i32> %t1, i32 %index) nounwind { 1270; CHECK-LABEL: test_extractelement_variable_v16i32: 1271; CHECK: ## %bb.0: 1272; CHECK-NEXT: pushq %rbp 1273; CHECK-NEXT: movq %rsp, %rbp 1274; CHECK-NEXT: andq $-64, %rsp 1275; CHECK-NEXT: subq $128, %rsp 1276; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1277; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1278; CHECK-NEXT: andl $15, %edi 1279; CHECK-NEXT: movl (%rsp,%rdi,4), %eax 1280; CHECK-NEXT: movq %rbp, %rsp 1281; CHECK-NEXT: popq %rbp 1282; CHECK-NEXT: vzeroupper 1283; CHECK-NEXT: retq 1284 %t2 = extractelement <16 x i32> %t1, i32 %index 1285 ret i32 %t2 1286} 1287 1288define float @test_extractelement_variable_v4f32(<4 x float> %t1, i32 %index) nounwind { 1289; CHECK-LABEL: test_extractelement_variable_v4f32: 1290; CHECK: ## %bb.0: 1291; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1292; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1293; CHECK-NEXT: andl $3, %edi 1294; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1295; CHECK-NEXT: retq 1296 %t2 = extractelement <4 x float> %t1, i32 %index 1297 ret float %t2 1298} 1299 1300define float @test_extractelement_variable_v8f32(<8 x float> %t1, i32 %index) nounwind { 1301; CHECK-LABEL: test_extractelement_variable_v8f32: 1302; CHECK: ## %bb.0: 1303; CHECK-NEXT: pushq %rbp 1304; CHECK-NEXT: movq %rsp, %rbp 1305; CHECK-NEXT: andq $-32, %rsp 1306; CHECK-NEXT: subq $64, %rsp 1307; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1308; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1309; CHECK-NEXT: andl $7, %edi 1310; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1311; CHECK-NEXT: movq %rbp, %rsp 1312; CHECK-NEXT: popq %rbp 1313; CHECK-NEXT: vzeroupper 1314; CHECK-NEXT: retq 1315 %t2 = extractelement <8 x float> %t1, i32 %index 1316 ret float %t2 1317} 1318 1319define float @test_extractelement_variable_v16f32(<16 x float> %t1, i32 %index) nounwind { 1320; CHECK-LABEL: test_extractelement_variable_v16f32: 1321; CHECK: ## %bb.0: 1322; CHECK-NEXT: pushq %rbp 1323; CHECK-NEXT: movq %rsp, %rbp 1324; CHECK-NEXT: andq $-64, %rsp 1325; CHECK-NEXT: subq $128, %rsp 1326; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1327; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1328; CHECK-NEXT: andl $15, %edi 1329; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1330; CHECK-NEXT: movq %rbp, %rsp 1331; CHECK-NEXT: popq %rbp 1332; CHECK-NEXT: vzeroupper 1333; CHECK-NEXT: retq 1334 %t2 = extractelement <16 x float> %t1, i32 %index 1335 ret float %t2 1336} 1337 1338define i16 @test_extractelement_variable_v8i16(<8 x i16> %t1, i32 %index) nounwind { 1339; CHECK-LABEL: test_extractelement_variable_v8i16: 1340; CHECK: ## %bb.0: 1341; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1342; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1343; CHECK-NEXT: andl $7, %edi 1344; CHECK-NEXT: movzwl -24(%rsp,%rdi,2), %eax 1345; CHECK-NEXT: retq 1346 %t2 = extractelement <8 x i16> %t1, i32 %index 1347 ret i16 %t2 1348} 1349 1350define i16 @test_extractelement_variable_v16i16(<16 x i16> %t1, i32 %index) nounwind { 1351; CHECK-LABEL: test_extractelement_variable_v16i16: 1352; CHECK: ## %bb.0: 1353; CHECK-NEXT: pushq %rbp 1354; CHECK-NEXT: movq %rsp, %rbp 1355; CHECK-NEXT: andq $-32, %rsp 1356; CHECK-NEXT: subq $64, %rsp 1357; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1358; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1359; CHECK-NEXT: andl $15, %edi 1360; CHECK-NEXT: movzwl (%rsp,%rdi,2), %eax 1361; CHECK-NEXT: movq %rbp, %rsp 1362; CHECK-NEXT: popq %rbp 1363; CHECK-NEXT: vzeroupper 1364; CHECK-NEXT: retq 1365 %t2 = extractelement <16 x i16> %t1, i32 %index 1366 ret i16 %t2 1367} 1368 1369define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) nounwind { 1370; CHECK-LABEL: test_extractelement_variable_v32i16: 1371; CHECK: ## %bb.0: 1372; CHECK-NEXT: pushq %rbp 1373; CHECK-NEXT: movq %rsp, %rbp 1374; CHECK-NEXT: andq $-64, %rsp 1375; CHECK-NEXT: subq $128, %rsp 1376; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1377; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1378; CHECK-NEXT: andl $31, %edi 1379; CHECK-NEXT: movzwl (%rsp,%rdi,2), %eax 1380; CHECK-NEXT: movq %rbp, %rsp 1381; CHECK-NEXT: popq %rbp 1382; CHECK-NEXT: vzeroupper 1383; CHECK-NEXT: retq 1384 %t2 = extractelement <32 x i16> %t1, i32 %index 1385 ret i16 %t2 1386} 1387 1388define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) nounwind { 1389; CHECK-LABEL: test_extractelement_variable_v16i8: 1390; CHECK: ## %bb.0: 1391; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1392; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1393; CHECK-NEXT: andl $15, %edi 1394; CHECK-NEXT: movzbl -24(%rsp,%rdi), %eax 1395; CHECK-NEXT: retq 1396 %t2 = extractelement <16 x i8> %t1, i32 %index 1397 ret i8 %t2 1398} 1399 1400define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) nounwind { 1401; CHECK-LABEL: test_extractelement_variable_v32i8: 1402; CHECK: ## %bb.0: 1403; CHECK-NEXT: pushq %rbp 1404; CHECK-NEXT: movq %rsp, %rbp 1405; CHECK-NEXT: andq $-32, %rsp 1406; CHECK-NEXT: subq $64, %rsp 1407; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1408; CHECK-NEXT: vmovaps %ymm0, (%rsp) 1409; CHECK-NEXT: andl $31, %edi 1410; CHECK-NEXT: movzbl (%rsp,%rdi), %eax 1411; CHECK-NEXT: movq %rbp, %rsp 1412; CHECK-NEXT: popq %rbp 1413; CHECK-NEXT: vzeroupper 1414; CHECK-NEXT: retq 1415 1416 %t2 = extractelement <32 x i8> %t1, i32 %index 1417 ret i8 %t2 1418} 1419 1420define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) nounwind { 1421; CHECK-LABEL: test_extractelement_variable_v64i8: 1422; CHECK: ## %bb.0: 1423; CHECK-NEXT: pushq %rbp 1424; CHECK-NEXT: movq %rsp, %rbp 1425; CHECK-NEXT: andq $-64, %rsp 1426; CHECK-NEXT: subq $128, %rsp 1427; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi 1428; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1429; CHECK-NEXT: andl $63, %edi 1430; CHECK-NEXT: movzbl (%rsp,%rdi), %eax 1431; CHECK-NEXT: movq %rbp, %rsp 1432; CHECK-NEXT: popq %rbp 1433; CHECK-NEXT: vzeroupper 1434; CHECK-NEXT: retq 1435 1436 %t2 = extractelement <64 x i8> %t1, i32 %index 1437 ret i8 %t2 1438} 1439 1440define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) nounwind { 1441; CHECK-LABEL: test_extractelement_variable_v64i8_indexi8: 1442; CHECK: ## %bb.0: 1443; CHECK-NEXT: pushq %rbp 1444; CHECK-NEXT: movq %rsp, %rbp 1445; CHECK-NEXT: andq $-64, %rsp 1446; CHECK-NEXT: subq $128, %rsp 1447; CHECK-NEXT: addb %dil, %dil 1448; CHECK-NEXT: vmovaps %zmm0, (%rsp) 1449; CHECK-NEXT: movzbl %dil, %eax 1450; CHECK-NEXT: andl $63, %eax 1451; CHECK-NEXT: movzbl (%rsp,%rax), %eax 1452; CHECK-NEXT: movq %rbp, %rsp 1453; CHECK-NEXT: popq %rbp 1454; CHECK-NEXT: vzeroupper 1455; CHECK-NEXT: retq 1456 1457 %i = add i8 %index, %index 1458 %t2 = extractelement <64 x i8> %t1, i8 %i 1459 ret i8 %t2 1460} 1461 1462define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) nounwind { 1463; KNL-LABEL: test_extractelement_varible_v2i1: 1464; KNL: ## %bb.0: 1465; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1466; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 1467; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1468; KNL-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 1469; KNL-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 1470; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1471; KNL-NEXT: andl $1, %edi 1472; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax 1473; KNL-NEXT: andl $1, %eax 1474; KNL-NEXT: vzeroupper 1475; KNL-NEXT: retq 1476; 1477; SKX-LABEL: test_extractelement_varible_v2i1: 1478; SKX: ## %bb.0: 1479; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1480; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0 1481; SKX-NEXT: vpmovm2q %k0, %xmm0 1482; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1483; SKX-NEXT: andl $1, %edi 1484; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax 1485; SKX-NEXT: andl $1, %eax 1486; SKX-NEXT: retq 1487 %t1 = icmp ugt <2 x i64> %a, %b 1488 %t2 = extractelement <2 x i1> %t1, i32 %index 1489 %res = zext i1 %t2 to i8 1490 ret i8 %res 1491} 1492 1493define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) nounwind { 1494; KNL-LABEL: test_extractelement_varible_v4i1: 1495; KNL: ## %bb.0: 1496; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1497; KNL-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 1498; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 1499; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 1500; KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 1501; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1502; KNL-NEXT: andl $3, %edi 1503; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax 1504; KNL-NEXT: andl $1, %eax 1505; KNL-NEXT: vzeroupper 1506; KNL-NEXT: retq 1507; 1508; SKX-LABEL: test_extractelement_varible_v4i1: 1509; SKX: ## %bb.0: 1510; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1511; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0 1512; SKX-NEXT: vpmovm2d %k0, %xmm0 1513; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1514; SKX-NEXT: andl $3, %edi 1515; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax 1516; SKX-NEXT: andl $1, %eax 1517; SKX-NEXT: retq 1518 %t1 = icmp ugt <4 x i32> %a, %b 1519 %t2 = extractelement <4 x i1> %t1, i32 %index 1520 %res = zext i1 %t2 to i8 1521 ret i8 %res 1522} 1523 1524define zeroext i8 @test_extractelement_varible_v8i1(<8 x i32> %a, <8 x i32> %b, i32 %index) nounwind { 1525; KNL-LABEL: test_extractelement_varible_v8i1: 1526; KNL: ## %bb.0: 1527; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1528; KNL-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 1529; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 1530; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 1531; KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 1532; KNL-NEXT: vpmovdw %zmm0, %ymm0 1533; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1534; KNL-NEXT: andl $7, %edi 1535; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax 1536; KNL-NEXT: andl $1, %eax 1537; KNL-NEXT: vzeroupper 1538; KNL-NEXT: retq 1539; 1540; SKX-LABEL: test_extractelement_varible_v8i1: 1541; SKX: ## %bb.0: 1542; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1543; SKX-NEXT: vpcmpnleud %ymm1, %ymm0, %k0 1544; SKX-NEXT: vpmovm2w %k0, %xmm0 1545; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1546; SKX-NEXT: andl $7, %edi 1547; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax 1548; SKX-NEXT: andl $1, %eax 1549; SKX-NEXT: vzeroupper 1550; SKX-NEXT: retq 1551 %t1 = icmp ugt <8 x i32> %a, %b 1552 %t2 = extractelement <8 x i1> %t1, i32 %index 1553 %res = zext i1 %t2 to i8 1554 ret i8 %res 1555} 1556 1557define zeroext i8 @test_extractelement_varible_v16i1(<16 x i32> %a, <16 x i32> %b, i32 %index) nounwind { 1558; KNL-LABEL: test_extractelement_varible_v16i1: 1559; KNL: ## %bb.0: 1560; KNL-NEXT: ## kill: def $edi killed $edi def $rdi 1561; KNL-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 1562; KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1 1563; KNL-NEXT: vpmovdb %zmm0, -{{[0-9]+}}(%rsp) 1564; KNL-NEXT: andl $15, %edi 1565; KNL-NEXT: movzbl -24(%rsp,%rdi), %eax 1566; KNL-NEXT: andl $1, %eax 1567; KNL-NEXT: vzeroupper 1568; KNL-NEXT: retq 1569; 1570; SKX-LABEL: test_extractelement_varible_v16i1: 1571; SKX: ## %bb.0: 1572; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1573; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 1574; SKX-NEXT: vpmovm2b %k0, %xmm0 1575; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1576; SKX-NEXT: andl $15, %edi 1577; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax 1578; SKX-NEXT: andl $1, %eax 1579; SKX-NEXT: vzeroupper 1580; SKX-NEXT: retq 1581 %t1 = icmp ugt <16 x i32> %a, %b 1582 %t2 = extractelement <16 x i1> %t1, i32 %index 1583 %res = zext i1 %t2 to i8 1584 ret i8 %res 1585} 1586 1587define zeroext i8 @test_extractelement_varible_v32i1(<32 x i8> %a, <32 x i8> %b, i32 %index) nounwind { 1588; KNL-LABEL: test_extractelement_varible_v32i1: 1589; KNL: ## %bb.0: 1590; KNL-NEXT: vpminub %ymm1, %ymm0, %ymm1 1591; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1592; KNL-NEXT: vpmovmskb %ymm0, %ecx 1593; KNL-NEXT: xorl %eax, %eax 1594; KNL-NEXT: btl %edi, %ecx 1595; KNL-NEXT: setae %al 1596; KNL-NEXT: vzeroupper 1597; KNL-NEXT: retq 1598; 1599; SKX-LABEL: test_extractelement_varible_v32i1: 1600; SKX: ## %bb.0: 1601; SKX-NEXT: pushq %rbp 1602; SKX-NEXT: movq %rsp, %rbp 1603; SKX-NEXT: andq $-32, %rsp 1604; SKX-NEXT: subq $64, %rsp 1605; SKX-NEXT: ## kill: def $edi killed $edi def $rdi 1606; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 1607; SKX-NEXT: vpmovm2b %k0, %ymm0 1608; SKX-NEXT: vmovdqa %ymm0, (%rsp) 1609; SKX-NEXT: andl $31, %edi 1610; SKX-NEXT: movzbl (%rsp,%rdi), %eax 1611; SKX-NEXT: andl $1, %eax 1612; SKX-NEXT: movq %rbp, %rsp 1613; SKX-NEXT: popq %rbp 1614; SKX-NEXT: vzeroupper 1615; SKX-NEXT: retq 1616 %t1 = icmp ugt <32 x i8> %a, %b 1617 %t2 = extractelement <32 x i1> %t1, i32 %index 1618 %res = zext i1 %t2 to i8 1619 ret i8 %res 1620} 1621 1622define <8 x i64> @insert_double_zero(<2 x i64> %a) nounwind { 1623; CHECK-LABEL: insert_double_zero: 1624; CHECK: ## %bb.0: 1625; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 1626; CHECK-NEXT: vinsertf32x4 $2, %xmm0, %zmm1, %zmm0 1627; CHECK-NEXT: retq 1628 %b = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1629 %d = shufflevector <4 x i64> %b, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1630 %e = shufflevector <8 x i64> %d, <8 x i64> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 1631 ret <8 x i64> %e 1632} 1633 1634define i32 @test_insertelement_variable_v32i1(<32 x i8> %a, i8 %b, i32 %index) nounwind { 1635; KNL-LABEL: test_insertelement_variable_v32i1: 1636; KNL: ## %bb.0: 1637; KNL-NEXT: pushq %rbp 1638; KNL-NEXT: movq %rsp, %rbp 1639; KNL-NEXT: andq $-32, %rsp 1640; KNL-NEXT: subq $64, %rsp 1641; KNL-NEXT: ## kill: def $esi killed $esi def $rsi 1642; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 1643; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 1644; KNL-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 1645; KNL-NEXT: andl $31, %esi 1646; KNL-NEXT: testb %dil, %dil 1647; KNL-NEXT: vmovdqa %ymm0, (%rsp) 1648; KNL-NEXT: setne (%rsp,%rsi) 1649; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 1650; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1651; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1652; KNL-NEXT: kmovw %k0, %ecx 1653; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1654; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1655; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1656; KNL-NEXT: kmovw %k0, %eax 1657; KNL-NEXT: shll $16, %eax 1658; KNL-NEXT: orl %ecx, %eax 1659; KNL-NEXT: movq %rbp, %rsp 1660; KNL-NEXT: popq %rbp 1661; KNL-NEXT: vzeroupper 1662; KNL-NEXT: retq 1663; 1664; SKX-LABEL: test_insertelement_variable_v32i1: 1665; SKX: ## %bb.0: 1666; SKX-NEXT: vptestmb %ymm0, %ymm0, %k0 1667; SKX-NEXT: testb %dil, %dil 1668; SKX-NEXT: setne %al 1669; SKX-NEXT: vpbroadcastb %esi, %ymm0 1670; SKX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %k1 1671; SKX-NEXT: vpmovm2b %k0, %ymm0 1672; SKX-NEXT: vpbroadcastb %eax, %ymm0 {%k1} 1673; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 1674; SKX-NEXT: vpmovb2m %ymm0, %k0 1675; SKX-NEXT: kmovd %k0, %eax 1676; SKX-NEXT: vzeroupper 1677; SKX-NEXT: retq 1678 %t1 = icmp ugt <32 x i8> %a, zeroinitializer 1679 %t2 = icmp ugt i8 %b, 0 1680 %t3 = insertelement <32 x i1> %t1, i1 %t2, i32 %index 1681 %t4 = bitcast <32 x i1> %t3 to i32 1682 ret i32 %t4 1683} 1684 1685define i64 @test_insertelement_variable_v64i1(<64 x i8> %a, i8 %b, i32 %index) nounwind { 1686; KNL-LABEL: test_insertelement_variable_v64i1: 1687; KNL: ## %bb.0: 1688; KNL-NEXT: pushq %rbp 1689; KNL-NEXT: movq %rsp, %rbp 1690; KNL-NEXT: andq $-64, %rsp 1691; KNL-NEXT: subq $128, %rsp 1692; KNL-NEXT: ## kill: def $esi killed $esi def $rsi 1693; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 1694; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 1695; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 1696; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 1697; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 1698; KNL-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 1699; KNL-NEXT: andl $63, %esi 1700; KNL-NEXT: testb %dil, %dil 1701; KNL-NEXT: vmovdqa64 %zmm0, (%rsp) 1702; KNL-NEXT: setne (%rsp,%rsi) 1703; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 1704; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1705; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1706; KNL-NEXT: kmovw %k0, %eax 1707; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1708; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1709; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1710; KNL-NEXT: kmovw %k0, %ecx 1711; KNL-NEXT: shll $16, %ecx 1712; KNL-NEXT: orl %eax, %ecx 1713; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1714; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1715; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1716; KNL-NEXT: kmovw %k0, %edx 1717; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1718; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1719; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1720; KNL-NEXT: kmovw %k0, %eax 1721; KNL-NEXT: shll $16, %eax 1722; KNL-NEXT: orl %edx, %eax 1723; KNL-NEXT: shlq $32, %rax 1724; KNL-NEXT: orq %rcx, %rax 1725; KNL-NEXT: movq %rbp, %rsp 1726; KNL-NEXT: popq %rbp 1727; KNL-NEXT: vzeroupper 1728; KNL-NEXT: retq 1729; 1730; SKX-LABEL: test_insertelement_variable_v64i1: 1731; SKX: ## %bb.0: 1732; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 1733; SKX-NEXT: testb %dil, %dil 1734; SKX-NEXT: setne %al 1735; SKX-NEXT: vpbroadcastb %esi, %zmm0 1736; SKX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k1 1737; SKX-NEXT: vpmovm2b %k0, %zmm0 1738; SKX-NEXT: vpbroadcastb %eax, %zmm0 {%k1} 1739; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 1740; SKX-NEXT: vpmovb2m %zmm0, %k0 1741; SKX-NEXT: kmovq %k0, %rax 1742; SKX-NEXT: vzeroupper 1743; SKX-NEXT: retq 1744 %t1 = icmp ugt <64 x i8> %a, zeroinitializer 1745 %t2 = icmp ugt i8 %b, 0 1746 %t3 = insertelement <64 x i1> %t1, i1 %t2, i32 %index 1747 %t4 = bitcast <64 x i1> %t3 to i64 1748 ret i64 %t4 1749} 1750 1751define i96 @test_insertelement_variable_v96i1(<96 x i8> %a, i8 %b, i32 %index) nounwind { 1752; KNL-LABEL: test_insertelement_variable_v96i1: 1753; KNL: ## %bb.0: 1754; KNL-NEXT: pushq %rbp 1755; KNL-NEXT: movq %rsp, %rbp 1756; KNL-NEXT: andq $-64, %rsp 1757; KNL-NEXT: subq $192, %rsp 1758; KNL-NEXT: movl 744(%rbp), %eax 1759; KNL-NEXT: andl $127, %eax 1760; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1761; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0 1762; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0 1763; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0 1764; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0 1765; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0 1766; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0 1767; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0 1768; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0 1769; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0 1770; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0 1771; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0 1772; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0 1773; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0 1774; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0 1775; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0 1776; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1777; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1 1778; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1 1779; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1 1780; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1 1781; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1 1782; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1 1783; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1 1784; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1 1785; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1 1786; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1 1787; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1 1788; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1 1789; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1 1790; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1 1791; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1 1792; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 1793; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 1794; KNL-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm1 1795; KNL-NEXT: vmovd %edi, %xmm2 1796; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 1797; KNL-NEXT: vpinsrb $2, %edx, %xmm2, %xmm2 1798; KNL-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2 1799; KNL-NEXT: vpinsrb $4, %r8d, %xmm2, %xmm2 1800; KNL-NEXT: vpinsrb $5, %r9d, %xmm2, %xmm2 1801; KNL-NEXT: vpinsrb $6, 16(%rbp), %xmm2, %xmm2 1802; KNL-NEXT: vpinsrb $7, 24(%rbp), %xmm2, %xmm2 1803; KNL-NEXT: vpinsrb $8, 32(%rbp), %xmm2, %xmm2 1804; KNL-NEXT: vpinsrb $9, 40(%rbp), %xmm2, %xmm2 1805; KNL-NEXT: vpinsrb $10, 48(%rbp), %xmm2, %xmm2 1806; KNL-NEXT: vpinsrb $11, 56(%rbp), %xmm2, %xmm2 1807; KNL-NEXT: vpinsrb $12, 64(%rbp), %xmm2, %xmm2 1808; KNL-NEXT: vpinsrb $13, 72(%rbp), %xmm2, %xmm2 1809; KNL-NEXT: vpinsrb $14, 80(%rbp), %xmm2, %xmm2 1810; KNL-NEXT: vpinsrb $15, 88(%rbp), %xmm2, %xmm2 1811; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1812; KNL-NEXT: vpinsrb $1, 104(%rbp), %xmm3, %xmm3 1813; KNL-NEXT: vpinsrb $2, 112(%rbp), %xmm3, %xmm3 1814; KNL-NEXT: vpinsrb $3, 120(%rbp), %xmm3, %xmm3 1815; KNL-NEXT: vpinsrb $4, 128(%rbp), %xmm3, %xmm3 1816; KNL-NEXT: vpinsrb $5, 136(%rbp), %xmm3, %xmm3 1817; KNL-NEXT: vpinsrb $6, 144(%rbp), %xmm3, %xmm3 1818; KNL-NEXT: vpinsrb $7, 152(%rbp), %xmm3, %xmm3 1819; KNL-NEXT: vpinsrb $8, 160(%rbp), %xmm3, %xmm3 1820; KNL-NEXT: vpinsrb $9, 168(%rbp), %xmm3, %xmm3 1821; KNL-NEXT: vpinsrb $10, 176(%rbp), %xmm3, %xmm3 1822; KNL-NEXT: vpinsrb $11, 184(%rbp), %xmm3, %xmm3 1823; KNL-NEXT: vpinsrb $12, 192(%rbp), %xmm3, %xmm3 1824; KNL-NEXT: vpinsrb $13, 200(%rbp), %xmm3, %xmm3 1825; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3 1826; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3 1827; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1828; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm2 1829; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1 1830; KNL-NEXT: vpternlogq {{.*#+}} zmm1 = ~zmm1 1831; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1832; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm2, %xmm2 1833; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm2, %xmm2 1834; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm2, %xmm2 1835; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm2, %xmm2 1836; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm2, %xmm2 1837; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm2, %xmm2 1838; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm2, %xmm2 1839; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm2, %xmm2 1840; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm2, %xmm2 1841; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm2, %xmm2 1842; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm2, %xmm2 1843; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm2, %xmm2 1844; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm2, %xmm2 1845; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm2, %xmm2 1846; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm2, %xmm2 1847; KNL-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 1848; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm3, %xmm3 1849; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm3, %xmm3 1850; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm3, %xmm3 1851; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm3, %xmm3 1852; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm3, %xmm3 1853; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm3, %xmm3 1854; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm3, %xmm3 1855; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm3, %xmm3 1856; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm3, %xmm3 1857; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm3, %xmm3 1858; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm3, %xmm3 1859; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm3, %xmm3 1860; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm3, %xmm3 1861; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm3, %xmm3 1862; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm3, %xmm3 1863; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 1864; KNL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm2 1865; KNL-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm2 1866; KNL-NEXT: cmpb $0, 736(%rbp) 1867; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 1868; KNL-NEXT: vmovdqa %ymm2, {{[0-9]+}}(%rsp) 1869; KNL-NEXT: vmovdqa64 %zmm1, (%rsp) 1870; KNL-NEXT: setne (%rsp,%rax) 1871; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 1872; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1873; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1874; KNL-NEXT: kmovw %k0, %eax 1875; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1876; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1877; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1878; KNL-NEXT: kmovw %k0, %ecx 1879; KNL-NEXT: shll $16, %ecx 1880; KNL-NEXT: orl %eax, %ecx 1881; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1882; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1883; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1884; KNL-NEXT: kmovw %k0, %edx 1885; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1886; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1887; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1888; KNL-NEXT: kmovw %k0, %eax 1889; KNL-NEXT: shll $16, %eax 1890; KNL-NEXT: orl %edx, %eax 1891; KNL-NEXT: shlq $32, %rax 1892; KNL-NEXT: orq %rcx, %rax 1893; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1894; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1895; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1896; KNL-NEXT: kmovw %k0, %ecx 1897; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1898; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1899; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1900; KNL-NEXT: kmovw %k0, %esi 1901; KNL-NEXT: shll $16, %esi 1902; KNL-NEXT: orl %ecx, %esi 1903; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1904; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1905; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1906; KNL-NEXT: kmovw %k0, %ecx 1907; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 1908; KNL-NEXT: vpslld $31, %zmm0, %zmm0 1909; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 1910; KNL-NEXT: kmovw %k0, %edx 1911; KNL-NEXT: shll $16, %edx 1912; KNL-NEXT: orl %ecx, %edx 1913; KNL-NEXT: shlq $32, %rdx 1914; KNL-NEXT: orq %rsi, %rdx 1915; KNL-NEXT: movq %rbp, %rsp 1916; KNL-NEXT: popq %rbp 1917; KNL-NEXT: vzeroupper 1918; KNL-NEXT: retq 1919; 1920; SKX-LABEL: test_insertelement_variable_v96i1: 1921; SKX: ## %bb.0: 1922; SKX-NEXT: pushq %rbp 1923; SKX-NEXT: movq %rsp, %rbp 1924; SKX-NEXT: andq $-64, %rsp 1925; SKX-NEXT: subq $192, %rsp 1926; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1927; SKX-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0 1928; SKX-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0 1929; SKX-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0 1930; SKX-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0 1931; SKX-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0 1932; SKX-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0 1933; SKX-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0 1934; SKX-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0 1935; SKX-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0 1936; SKX-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0 1937; SKX-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0 1938; SKX-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0 1939; SKX-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0 1940; SKX-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0 1941; SKX-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0 1942; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1943; SKX-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1 1944; SKX-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1 1945; SKX-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1 1946; SKX-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1 1947; SKX-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1 1948; SKX-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1 1949; SKX-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1 1950; SKX-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1 1951; SKX-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1 1952; SKX-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1 1953; SKX-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1 1954; SKX-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1 1955; SKX-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1 1956; SKX-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1 1957; SKX-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1 1958; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1959; SKX-NEXT: vmovd %edi, %xmm1 1960; SKX-NEXT: vpinsrb $1, %esi, %xmm1, %xmm1 1961; SKX-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 1962; SKX-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1963; SKX-NEXT: vpinsrb $4, %r8d, %xmm1, %xmm1 1964; SKX-NEXT: vpinsrb $5, %r9d, %xmm1, %xmm1 1965; SKX-NEXT: vpinsrb $6, 16(%rbp), %xmm1, %xmm1 1966; SKX-NEXT: vpinsrb $7, 24(%rbp), %xmm1, %xmm1 1967; SKX-NEXT: vpinsrb $8, 32(%rbp), %xmm1, %xmm1 1968; SKX-NEXT: vpinsrb $9, 40(%rbp), %xmm1, %xmm1 1969; SKX-NEXT: vpinsrb $10, 48(%rbp), %xmm1, %xmm1 1970; SKX-NEXT: vpinsrb $11, 56(%rbp), %xmm1, %xmm1 1971; SKX-NEXT: vpinsrb $12, 64(%rbp), %xmm1, %xmm1 1972; SKX-NEXT: vpinsrb $13, 72(%rbp), %xmm1, %xmm1 1973; SKX-NEXT: vpinsrb $14, 80(%rbp), %xmm1, %xmm1 1974; SKX-NEXT: vpinsrb $15, 88(%rbp), %xmm1, %xmm1 1975; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1976; SKX-NEXT: vpinsrb $1, 104(%rbp), %xmm2, %xmm2 1977; SKX-NEXT: vpinsrb $2, 112(%rbp), %xmm2, %xmm2 1978; SKX-NEXT: vpinsrb $3, 120(%rbp), %xmm2, %xmm2 1979; SKX-NEXT: vpinsrb $4, 128(%rbp), %xmm2, %xmm2 1980; SKX-NEXT: vpinsrb $5, 136(%rbp), %xmm2, %xmm2 1981; SKX-NEXT: vpinsrb $6, 144(%rbp), %xmm2, %xmm2 1982; SKX-NEXT: vpinsrb $7, 152(%rbp), %xmm2, %xmm2 1983; SKX-NEXT: vpinsrb $8, 160(%rbp), %xmm2, %xmm2 1984; SKX-NEXT: vpinsrb $9, 168(%rbp), %xmm2, %xmm2 1985; SKX-NEXT: vpinsrb $10, 176(%rbp), %xmm2, %xmm2 1986; SKX-NEXT: vpinsrb $11, 184(%rbp), %xmm2, %xmm2 1987; SKX-NEXT: vpinsrb $12, 192(%rbp), %xmm2, %xmm2 1988; SKX-NEXT: vpinsrb $13, 200(%rbp), %xmm2, %xmm2 1989; SKX-NEXT: vpinsrb $14, 208(%rbp), %xmm2, %xmm2 1990; SKX-NEXT: vpinsrb $15, 216(%rbp), %xmm2, %xmm2 1991; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 1992; SKX-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 1993; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1994; SKX-NEXT: vpinsrb $1, 488(%rbp), %xmm1, %xmm1 1995; SKX-NEXT: vpinsrb $2, 496(%rbp), %xmm1, %xmm1 1996; SKX-NEXT: vpinsrb $3, 504(%rbp), %xmm1, %xmm1 1997; SKX-NEXT: vpinsrb $4, 512(%rbp), %xmm1, %xmm1 1998; SKX-NEXT: vpinsrb $5, 520(%rbp), %xmm1, %xmm1 1999; SKX-NEXT: vpinsrb $6, 528(%rbp), %xmm1, %xmm1 2000; SKX-NEXT: vpinsrb $7, 536(%rbp), %xmm1, %xmm1 2001; SKX-NEXT: vpinsrb $8, 544(%rbp), %xmm1, %xmm1 2002; SKX-NEXT: vpinsrb $9, 552(%rbp), %xmm1, %xmm1 2003; SKX-NEXT: vpinsrb $10, 560(%rbp), %xmm1, %xmm1 2004; SKX-NEXT: vpinsrb $11, 568(%rbp), %xmm1, %xmm1 2005; SKX-NEXT: vpinsrb $12, 576(%rbp), %xmm1, %xmm1 2006; SKX-NEXT: vpinsrb $13, 584(%rbp), %xmm1, %xmm1 2007; SKX-NEXT: vpinsrb $14, 592(%rbp), %xmm1, %xmm1 2008; SKX-NEXT: vpinsrb $15, 600(%rbp), %xmm1, %xmm1 2009; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2010; SKX-NEXT: vpinsrb $1, 616(%rbp), %xmm2, %xmm2 2011; SKX-NEXT: vpinsrb $2, 624(%rbp), %xmm2, %xmm2 2012; SKX-NEXT: vpinsrb $3, 632(%rbp), %xmm2, %xmm2 2013; SKX-NEXT: vpinsrb $4, 640(%rbp), %xmm2, %xmm2 2014; SKX-NEXT: vpinsrb $5, 648(%rbp), %xmm2, %xmm2 2015; SKX-NEXT: vpinsrb $6, 656(%rbp), %xmm2, %xmm2 2016; SKX-NEXT: vpinsrb $7, 664(%rbp), %xmm2, %xmm2 2017; SKX-NEXT: vpinsrb $8, 672(%rbp), %xmm2, %xmm2 2018; SKX-NEXT: vpinsrb $9, 680(%rbp), %xmm2, %xmm2 2019; SKX-NEXT: vpinsrb $10, 688(%rbp), %xmm2, %xmm2 2020; SKX-NEXT: vpinsrb $11, 696(%rbp), %xmm2, %xmm2 2021; SKX-NEXT: vpinsrb $12, 704(%rbp), %xmm2, %xmm2 2022; SKX-NEXT: vpinsrb $13, 712(%rbp), %xmm2, %xmm2 2023; SKX-NEXT: vpinsrb $14, 720(%rbp), %xmm2, %xmm2 2024; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2 2025; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 2026; SKX-NEXT: movl 744(%rbp), %eax 2027; SKX-NEXT: andl $127, %eax 2028; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 2029; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 2030; SKX-NEXT: cmpb $0, 736(%rbp) 2031; SKX-NEXT: vpmovm2b %k1, %zmm0 2032; SKX-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) 2033; SKX-NEXT: vpmovm2b %k0, %zmm0 2034; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) 2035; SKX-NEXT: setne (%rsp,%rax) 2036; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 2037; SKX-NEXT: vpmovb2m %zmm0, %k0 2038; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 2039; SKX-NEXT: vpmovb2m %zmm0, %k1 2040; SKX-NEXT: kmovq %k1, %rax 2041; SKX-NEXT: kmovq %k0, %rdx 2042; SKX-NEXT: movq %rbp, %rsp 2043; SKX-NEXT: popq %rbp 2044; SKX-NEXT: vzeroupper 2045; SKX-NEXT: retq 2046 %t1 = icmp ugt <96 x i8> %a, zeroinitializer 2047 %t2 = icmp ugt i8 %b, 0 2048 %t3 = insertelement <96 x i1> %t1, i1 %t2, i32 %index 2049 %t4 = bitcast <96 x i1> %t3 to i96 2050 ret i96 %t4 2051} 2052 2053define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index) nounwind { 2054; KNL-LABEL: test_insertelement_variable_v128i1: 2055; KNL: ## %bb.0: 2056; KNL-NEXT: pushq %rbp 2057; KNL-NEXT: movq %rsp, %rbp 2058; KNL-NEXT: andq $-64, %rsp 2059; KNL-NEXT: subq $192, %rsp 2060; KNL-NEXT: ## kill: def $esi killed $esi def $rsi 2061; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 2062; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 2063; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 2064; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 2065; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 2066; KNL-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 2067; KNL-NEXT: vextracti64x4 $1, %zmm1, %ymm2 2068; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 2069; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 2070; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 2071; KNL-NEXT: vpternlogq {{.*#+}} zmm1 = ~zmm1 2072; KNL-NEXT: andl $127, %esi 2073; KNL-NEXT: testb %dil, %dil 2074; KNL-NEXT: vmovdqa64 %zmm1, {{[0-9]+}}(%rsp) 2075; KNL-NEXT: vmovdqa64 %zmm0, (%rsp) 2076; KNL-NEXT: setne (%rsp,%rsi) 2077; KNL-NEXT: vpmovsxbd (%rsp), %zmm0 2078; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2079; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2080; KNL-NEXT: kmovw %k0, %eax 2081; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2082; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2083; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2084; KNL-NEXT: kmovw %k0, %ecx 2085; KNL-NEXT: shll $16, %ecx 2086; KNL-NEXT: orl %eax, %ecx 2087; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2088; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2089; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2090; KNL-NEXT: kmovw %k0, %edx 2091; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2092; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2093; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2094; KNL-NEXT: kmovw %k0, %eax 2095; KNL-NEXT: shll $16, %eax 2096; KNL-NEXT: orl %edx, %eax 2097; KNL-NEXT: shlq $32, %rax 2098; KNL-NEXT: orq %rcx, %rax 2099; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2100; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2101; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2102; KNL-NEXT: kmovw %k0, %ecx 2103; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2104; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2105; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2106; KNL-NEXT: kmovw %k0, %esi 2107; KNL-NEXT: shll $16, %esi 2108; KNL-NEXT: orl %ecx, %esi 2109; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2110; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2111; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2112; KNL-NEXT: kmovw %k0, %ecx 2113; KNL-NEXT: vpmovsxbd {{[0-9]+}}(%rsp), %zmm0 2114; KNL-NEXT: vpslld $31, %zmm0, %zmm0 2115; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 2116; KNL-NEXT: kmovw %k0, %edx 2117; KNL-NEXT: shll $16, %edx 2118; KNL-NEXT: orl %ecx, %edx 2119; KNL-NEXT: shlq $32, %rdx 2120; KNL-NEXT: orq %rsi, %rdx 2121; KNL-NEXT: movq %rbp, %rsp 2122; KNL-NEXT: popq %rbp 2123; KNL-NEXT: vzeroupper 2124; KNL-NEXT: retq 2125; 2126; SKX-LABEL: test_insertelement_variable_v128i1: 2127; SKX: ## %bb.0: 2128; SKX-NEXT: pushq %rbp 2129; SKX-NEXT: movq %rsp, %rbp 2130; SKX-NEXT: andq $-64, %rsp 2131; SKX-NEXT: subq $192, %rsp 2132; SKX-NEXT: ## kill: def $esi killed $esi def $rsi 2133; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 2134; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 2135; SKX-NEXT: andl $127, %esi 2136; SKX-NEXT: testb %dil, %dil 2137; SKX-NEXT: vpmovm2b %k1, %zmm0 2138; SKX-NEXT: vmovdqa64 %zmm0, {{[0-9]+}}(%rsp) 2139; SKX-NEXT: vpmovm2b %k0, %zmm0 2140; SKX-NEXT: vmovdqa64 %zmm0, (%rsp) 2141; SKX-NEXT: setne (%rsp,%rsi) 2142; SKX-NEXT: vpsllw $7, {{[0-9]+}}(%rsp), %zmm0 2143; SKX-NEXT: vpmovb2m %zmm0, %k0 2144; SKX-NEXT: vpsllw $7, (%rsp), %zmm0 2145; SKX-NEXT: vpmovb2m %zmm0, %k1 2146; SKX-NEXT: kmovq %k1, %rax 2147; SKX-NEXT: kmovq %k0, %rdx 2148; SKX-NEXT: movq %rbp, %rsp 2149; SKX-NEXT: popq %rbp 2150; SKX-NEXT: vzeroupper 2151; SKX-NEXT: retq 2152 %t1 = icmp ugt <128 x i8> %a, zeroinitializer 2153 %t2 = icmp ugt i8 %b, 0 2154 %t3 = insertelement <128 x i1> %t1, i1 %t2, i32 %index 2155 %t4 = bitcast <128 x i1> %t3 to i128 2156 ret i128 %t4 2157} 2158 2159define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind { 2160; KNL-LABEL: test_concat_v2i1: 2161; KNL: ## %bb.0: 2162; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2163; KNL-NEXT: vcvtph2ps %xmm0, %xmm1 2164; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0] 2165; KNL-NEXT: vucomiss %xmm2, %xmm1 2166; KNL-NEXT: setb %al 2167; KNL-NEXT: andl $1, %eax 2168; KNL-NEXT: kmovw %eax, %k0 2169; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7] 2170; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 2171; KNL-NEXT: vucomiss %xmm2, %xmm0 2172; KNL-NEXT: setb %al 2173; KNL-NEXT: kmovw %eax, %k1 2174; KNL-NEXT: kshiftlw $1, %k1, %k1 2175; KNL-NEXT: korw %k1, %k0, %k0 2176; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 2177; KNL-NEXT: vucomiss %xmm2, %xmm1 2178; KNL-NEXT: seta %al 2179; KNL-NEXT: andl $1, %eax 2180; KNL-NEXT: kmovw %eax, %k1 2181; KNL-NEXT: vucomiss %xmm2, %xmm0 2182; KNL-NEXT: seta %al 2183; KNL-NEXT: kmovw %eax, %k2 2184; KNL-NEXT: kshiftlw $1, %k2, %k2 2185; KNL-NEXT: korw %k2, %k1, %k1 2186; KNL-NEXT: kandw %k1, %k0, %k1 2187; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2188; KNL-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1 2189; KNL-NEXT: vpmovdw %zmm1, %ymm1 2190; KNL-NEXT: vpand %xmm0, %xmm1, %xmm0 2191; KNL-NEXT: vmovd %xmm0, (%rdx) 2192; KNL-NEXT: vzeroupper 2193; KNL-NEXT: retq 2194; 2195; SKX-LABEL: test_concat_v2i1: 2196; SKX: ## %bb.0: 2197; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2198; SKX-NEXT: vcvtph2ps %xmm0, %ymm0 2199; SKX-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 2200; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 2201; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k1 2202; SKX-NEXT: kandw %k1, %k0, %k1 2203; SKX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 2204; SKX-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z} 2205; SKX-NEXT: vmovd %xmm0, (%rdx) 2206; SKX-NEXT: vzeroupper 2207; SKX-NEXT: retq 2208 %tmp = load <2 x half>, ptr %arg, align 8 2209 %tmp3 = fcmp fast olt <2 x half> %tmp, <half 0xH4600, half 0xH4600> 2210 %tmp4 = fcmp fast ogt <2 x half> %tmp, zeroinitializer 2211 %tmp5 = and <2 x i1> %tmp3, %tmp4 2212 %tmp6 = load <2 x half>, ptr %arg1, align 8 2213 %tmp7 = select <2 x i1> %tmp5, <2 x half> %tmp6, <2 x half> zeroinitializer 2214 store <2 x half> %tmp7, ptr %arg2, align 8 2215 ret void 2216} 2217 2218; Check that an extracted bool element comparison is correctly extended when 2219; the bool vector is bitcasted instead. 2220define void @test_extractelement_v64i1_cmp0(<64 x i1> %0) nounwind { 2221; KNL-LABEL: test_extractelement_v64i1_cmp0: 2222; KNL: ## %bb.0: ## %entry 2223; KNL-NEXT: retq 2224; 2225; SKX-LABEL: test_extractelement_v64i1_cmp0: 2226; SKX: ## %bb.0: ## %entry 2227; SKX-NEXT: vzeroupper 2228; SKX-NEXT: retq 2229entry: 2230 %1 = extractelement <64 x i1> %0, i64 0 2231 br i1 %1, label %common.ret, label %2 2232 2233common.ret: ; preds = %2, %entry 2234 ret void 2235 22362: ; preds = %entry 2237 %3 = extractelement <64 x i1> zeroinitializer, i64 0 2238 br label %common.ret 2239} 2240