1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mcpu=skylake -mtriple=i386-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X86 %s 3; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck --check-prefix=X64 %s 4; RUN: llc < %s -mcpu=skx -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-avx512f | FileCheck --check-prefix=X64 %s 5; RUN: llc < %s -mcpu=skylake -mtriple=x86_64-unknown-linux-gnu -mattr=-avx2 | FileCheck --check-prefix=NOGATHER %s 6 7declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x i32> %passthro) 8 9define <2 x i32> @masked_gather_v2i32(ptr %ptr, <2 x i1> %masks, <2 x i32> %passthro) { 10; X86-LABEL: masked_gather_v2i32: 11; X86: # %bb.0: # %entry 12; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 13; X86-NEXT: vpslld $31, %xmm0, %xmm0 14; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 15; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 16; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1 17; X86-NEXT: vmovdqa %xmm1, %xmm0 18; X86-NEXT: retl 19; 20; X64-LABEL: masked_gather_v2i32: 21; X64: # %bb.0: # %entry 22; X64-NEXT: vmovdqa (%rdi), %xmm2 23; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 24; X64-NEXT: vpslld $31, %xmm0, %xmm0 25; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1 26; X64-NEXT: vmovdqa %xmm1, %xmm0 27; X64-NEXT: retq 28; 29; NOGATHER-LABEL: masked_gather_v2i32: 30; NOGATHER: # %bb.0: # %entry 31; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 32; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 33; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 34; NOGATHER-NEXT: testb $1, %al 35; NOGATHER-NEXT: jne .LBB0_1 36; NOGATHER-NEXT: # %bb.2: # %else 37; NOGATHER-NEXT: testb $2, %al 38; NOGATHER-NEXT: jne .LBB0_3 39; NOGATHER-NEXT: .LBB0_4: # %else2 40; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 41; NOGATHER-NEXT: retq 42; NOGATHER-NEXT: .LBB0_1: # %cond.load 43; NOGATHER-NEXT: vmovq %xmm2, %rcx 44; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm1 45; NOGATHER-NEXT: testb $2, %al 46; NOGATHER-NEXT: je .LBB0_4 47; NOGATHER-NEXT: .LBB0_3: # %cond.load1 48; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 49; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1 50; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 51; NOGATHER-NEXT: retq 52entry: 53 %ld = load <2 x ptr>, ptr %ptr 54 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro) 55 ret <2 x i32> %res 56} 57 58define <4 x i32> @masked_gather_v2i32_concat(ptr %ptr, <2 x i1> %masks, <2 x i32> %passthro) { 59; X86-LABEL: masked_gather_v2i32_concat: 60; X86: # %bb.0: # %entry 61; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 62; X86-NEXT: vpslld $31, %xmm0, %xmm0 63; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 64; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 65; X86-NEXT: vpgatherdd %xmm0, (,%xmm2), %xmm1 66; X86-NEXT: vmovdqa %xmm1, %xmm0 67; X86-NEXT: retl 68; 69; X64-LABEL: masked_gather_v2i32_concat: 70; X64: # %bb.0: # %entry 71; X64-NEXT: vmovdqa (%rdi), %xmm2 72; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 73; X64-NEXT: vpslld $31, %xmm0, %xmm0 74; X64-NEXT: vpgatherqd %xmm0, (,%xmm2), %xmm1 75; X64-NEXT: vmovdqa %xmm1, %xmm0 76; X64-NEXT: retq 77; 78; NOGATHER-LABEL: masked_gather_v2i32_concat: 79; NOGATHER: # %bb.0: # %entry 80; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 81; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 82; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 83; NOGATHER-NEXT: testb $1, %al 84; NOGATHER-NEXT: jne .LBB1_1 85; NOGATHER-NEXT: # %bb.2: # %else 86; NOGATHER-NEXT: testb $2, %al 87; NOGATHER-NEXT: jne .LBB1_3 88; NOGATHER-NEXT: .LBB1_4: # %else2 89; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 90; NOGATHER-NEXT: retq 91; NOGATHER-NEXT: .LBB1_1: # %cond.load 92; NOGATHER-NEXT: vmovq %xmm2, %rcx 93; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm1 94; NOGATHER-NEXT: testb $2, %al 95; NOGATHER-NEXT: je .LBB1_4 96; NOGATHER-NEXT: .LBB1_3: # %cond.load1 97; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 98; NOGATHER-NEXT: vpinsrd $1, (%rax), %xmm1, %xmm1 99; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 100; NOGATHER-NEXT: retq 101entry: 102 %ld = load <2 x ptr>, ptr %ptr 103 %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i32> %passthro) 104 %res2 = shufflevector <2 x i32> %res, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 105 ret <4 x i32> %res2 106} 107 108declare <2 x float> @llvm.masked.gather.v2float(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x float> %passthro) 109 110define <2 x float> @masked_gather_v2float(ptr %ptr, <2 x i1> %masks, <2 x float> %passthro) { 111; X86-LABEL: masked_gather_v2float: 112; X86: # %bb.0: # %entry 113; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 114; X86-NEXT: vpslld $31, %xmm0, %xmm0 115; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 116; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 117; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1 118; X86-NEXT: vmovaps %xmm1, %xmm0 119; X86-NEXT: retl 120; 121; X64-LABEL: masked_gather_v2float: 122; X64: # %bb.0: # %entry 123; X64-NEXT: vmovaps (%rdi), %xmm2 124; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 125; X64-NEXT: vpslld $31, %xmm0, %xmm0 126; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 127; X64-NEXT: vmovaps %xmm1, %xmm0 128; X64-NEXT: retq 129; 130; NOGATHER-LABEL: masked_gather_v2float: 131; NOGATHER: # %bb.0: # %entry 132; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 133; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 134; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 135; NOGATHER-NEXT: testb $1, %al 136; NOGATHER-NEXT: jne .LBB2_1 137; NOGATHER-NEXT: # %bb.2: # %else 138; NOGATHER-NEXT: testb $2, %al 139; NOGATHER-NEXT: jne .LBB2_3 140; NOGATHER-NEXT: .LBB2_4: # %else2 141; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 142; NOGATHER-NEXT: retq 143; NOGATHER-NEXT: .LBB2_1: # %cond.load 144; NOGATHER-NEXT: vmovq %xmm2, %rcx 145; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 146; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 147; NOGATHER-NEXT: testb $2, %al 148; NOGATHER-NEXT: je .LBB2_4 149; NOGATHER-NEXT: .LBB2_3: # %cond.load1 150; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 151; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 152; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 153; NOGATHER-NEXT: retq 154entry: 155 %ld = load <2 x ptr>, ptr %ptr 156 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) 157 ret <2 x float> %res 158} 159 160define <4 x float> @masked_gather_v2float_concat(ptr %ptr, <2 x i1> %masks, <2 x float> %passthro) { 161; X86-LABEL: masked_gather_v2float_concat: 162; X86: # %bb.0: # %entry 163; X86-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero 164; X86-NEXT: vpslld $31, %xmm0, %xmm0 165; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 166; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 167; X86-NEXT: vgatherdps %xmm0, (,%xmm2), %xmm1 168; X86-NEXT: vmovaps %xmm1, %xmm0 169; X86-NEXT: retl 170; 171; X64-LABEL: masked_gather_v2float_concat: 172; X64: # %bb.0: # %entry 173; X64-NEXT: vmovaps (%rdi), %xmm2 174; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 175; X64-NEXT: vpslld $31, %xmm0, %xmm0 176; X64-NEXT: vgatherqps %xmm0, (,%xmm2), %xmm1 177; X64-NEXT: vmovaps %xmm1, %xmm0 178; X64-NEXT: retq 179; 180; NOGATHER-LABEL: masked_gather_v2float_concat: 181; NOGATHER: # %bb.0: # %entry 182; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 183; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 184; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 185; NOGATHER-NEXT: testb $1, %al 186; NOGATHER-NEXT: jne .LBB3_1 187; NOGATHER-NEXT: # %bb.2: # %else 188; NOGATHER-NEXT: testb $2, %al 189; NOGATHER-NEXT: jne .LBB3_3 190; NOGATHER-NEXT: .LBB3_4: # %else2 191; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 192; NOGATHER-NEXT: retq 193; NOGATHER-NEXT: .LBB3_1: # %cond.load 194; NOGATHER-NEXT: vmovq %xmm2, %rcx 195; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 196; NOGATHER-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 197; NOGATHER-NEXT: testb $2, %al 198; NOGATHER-NEXT: je .LBB3_4 199; NOGATHER-NEXT: .LBB3_3: # %cond.load1 200; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 201; NOGATHER-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 202; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 203; NOGATHER-NEXT: retq 204entry: 205 %ld = load <2 x ptr>, ptr %ptr 206 %res = call <2 x float> @llvm.masked.gather.v2float(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x float> %passthro) 207 %res2 = shufflevector <2 x float> %res, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 208 ret <4 x float> %res2 209} 210 211 212declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x i32> %passthro) 213 214define <4 x i32> @masked_gather_v4i32(<4 x ptr> %ptrs, <4 x i1> %masks, <4 x i32> %passthro) { 215; X86-LABEL: masked_gather_v4i32: 216; X86: # %bb.0: # %entry 217; X86-NEXT: vpslld $31, %xmm1, %xmm1 218; X86-NEXT: vpgatherdd %xmm1, (,%xmm0), %xmm2 219; X86-NEXT: vmovdqa %xmm2, %xmm0 220; X86-NEXT: retl 221; 222; X64-LABEL: masked_gather_v4i32: 223; X64: # %bb.0: # %entry 224; X64-NEXT: vpslld $31, %xmm1, %xmm1 225; X64-NEXT: vpgatherqd %xmm1, (,%ymm0), %xmm2 226; X64-NEXT: vmovdqa %xmm2, %xmm0 227; X64-NEXT: vzeroupper 228; X64-NEXT: retq 229; 230; NOGATHER-LABEL: masked_gather_v4i32: 231; NOGATHER: # %bb.0: # %entry 232; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm1 233; NOGATHER-NEXT: vmovmskps %xmm1, %eax 234; NOGATHER-NEXT: testb $1, %al 235; NOGATHER-NEXT: je .LBB4_2 236; NOGATHER-NEXT: # %bb.1: # %cond.load 237; NOGATHER-NEXT: vmovq %xmm0, %rcx 238; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm2, %xmm2 239; NOGATHER-NEXT: .LBB4_2: # %else 240; NOGATHER-NEXT: testb $2, %al 241; NOGATHER-NEXT: je .LBB4_4 242; NOGATHER-NEXT: # %bb.3: # %cond.load1 243; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 244; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm2, %xmm2 245; NOGATHER-NEXT: .LBB4_4: # %else2 246; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 247; NOGATHER-NEXT: testb $4, %al 248; NOGATHER-NEXT: jne .LBB4_5 249; NOGATHER-NEXT: # %bb.6: # %else5 250; NOGATHER-NEXT: testb $8, %al 251; NOGATHER-NEXT: jne .LBB4_7 252; NOGATHER-NEXT: .LBB4_8: # %else8 253; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0 254; NOGATHER-NEXT: vzeroupper 255; NOGATHER-NEXT: retq 256; NOGATHER-NEXT: .LBB4_5: # %cond.load4 257; NOGATHER-NEXT: vmovq %xmm0, %rcx 258; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm2, %xmm2 259; NOGATHER-NEXT: testb $8, %al 260; NOGATHER-NEXT: je .LBB4_8 261; NOGATHER-NEXT: .LBB4_7: # %cond.load7 262; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 263; NOGATHER-NEXT: vpinsrd $3, (%rax), %xmm2, %xmm2 264; NOGATHER-NEXT: vmovdqa %xmm2, %xmm0 265; NOGATHER-NEXT: vzeroupper 266; NOGATHER-NEXT: retq 267entry: 268 %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 0, <4 x i1> %masks, <4 x i32> %passthro) 269 ret <4 x i32> %res 270} 271 272declare <4 x float> @llvm.masked.gather.v4float(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x float> %passthro) 273 274define <4 x float> @masked_gather_v4float(<4 x ptr> %ptrs, <4 x i1> %masks, <4 x float> %passthro) { 275; X86-LABEL: masked_gather_v4float: 276; X86: # %bb.0: # %entry 277; X86-NEXT: vpslld $31, %xmm1, %xmm1 278; X86-NEXT: vgatherdps %xmm1, (,%xmm0), %xmm2 279; X86-NEXT: vmovaps %xmm2, %xmm0 280; X86-NEXT: retl 281; 282; X64-LABEL: masked_gather_v4float: 283; X64: # %bb.0: # %entry 284; X64-NEXT: vpslld $31, %xmm1, %xmm1 285; X64-NEXT: vgatherqps %xmm1, (,%ymm0), %xmm2 286; X64-NEXT: vmovaps %xmm2, %xmm0 287; X64-NEXT: vzeroupper 288; X64-NEXT: retq 289; 290; NOGATHER-LABEL: masked_gather_v4float: 291; NOGATHER: # %bb.0: # %entry 292; NOGATHER-NEXT: vpslld $31, %xmm1, %xmm1 293; NOGATHER-NEXT: vmovmskps %xmm1, %eax 294; NOGATHER-NEXT: testb $1, %al 295; NOGATHER-NEXT: je .LBB5_2 296; NOGATHER-NEXT: # %bb.1: # %cond.load 297; NOGATHER-NEXT: vmovq %xmm0, %rcx 298; NOGATHER-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 299; NOGATHER-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 300; NOGATHER-NEXT: .LBB5_2: # %else 301; NOGATHER-NEXT: testb $2, %al 302; NOGATHER-NEXT: je .LBB5_4 303; NOGATHER-NEXT: # %bb.3: # %cond.load1 304; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 305; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3] 306; NOGATHER-NEXT: .LBB5_4: # %else2 307; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 308; NOGATHER-NEXT: testb $4, %al 309; NOGATHER-NEXT: jne .LBB5_5 310; NOGATHER-NEXT: # %bb.6: # %else5 311; NOGATHER-NEXT: testb $8, %al 312; NOGATHER-NEXT: jne .LBB5_7 313; NOGATHER-NEXT: .LBB5_8: # %else8 314; NOGATHER-NEXT: vmovaps %xmm2, %xmm0 315; NOGATHER-NEXT: vzeroupper 316; NOGATHER-NEXT: retq 317; NOGATHER-NEXT: .LBB5_5: # %cond.load4 318; NOGATHER-NEXT: vmovq %xmm0, %rcx 319; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3] 320; NOGATHER-NEXT: testb $8, %al 321; NOGATHER-NEXT: je .LBB5_8 322; NOGATHER-NEXT: .LBB5_7: # %cond.load7 323; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 324; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0] 325; NOGATHER-NEXT: vmovaps %xmm2, %xmm0 326; NOGATHER-NEXT: vzeroupper 327; NOGATHER-NEXT: retq 328entry: 329 %res = call <4 x float> @llvm.masked.gather.v4float(<4 x ptr> %ptrs, i32 0, <4 x i1> %masks, <4 x float> %passthro) 330 ret <4 x float> %res 331} 332 333declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 %align, <8 x i1> %masks, <8 x i32> %passthro) 334 335define <8 x i32> @masked_gather_v8i32(ptr %ptr, <8 x i1> %masks, <8 x i32> %passthro) { 336; X86-LABEL: masked_gather_v8i32: 337; X86: # %bb.0: # %entry 338; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 339; X86-NEXT: vpslld $31, %ymm0, %ymm0 340; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 341; X86-NEXT: vmovdqa (%eax), %ymm2 342; X86-NEXT: vpgatherdd %ymm0, (,%ymm2), %ymm1 343; X86-NEXT: vmovdqa %ymm1, %ymm0 344; X86-NEXT: retl 345; 346; X64-LABEL: masked_gather_v8i32: 347; X64: # %bb.0: # %entry 348; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 349; X64-NEXT: vpslld $31, %ymm0, %ymm0 350; X64-NEXT: vmovdqa (%rdi), %ymm2 351; X64-NEXT: vmovdqa 32(%rdi), %ymm3 352; X64-NEXT: vextracti128 $1, %ymm1, %xmm4 353; X64-NEXT: vextracti128 $1, %ymm0, %xmm5 354; X64-NEXT: vpgatherqd %xmm5, (,%ymm3), %xmm4 355; X64-NEXT: vpgatherqd %xmm0, (,%ymm2), %xmm1 356; X64-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0 357; X64-NEXT: retq 358; 359; NOGATHER-LABEL: masked_gather_v8i32: 360; NOGATHER: # %bb.0: # %entry 361; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 362; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0 363; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 364; NOGATHER-NEXT: vpmovmskb %xmm0, %eax 365; NOGATHER-NEXT: testb $1, %al 366; NOGATHER-NEXT: je .LBB6_2 367; NOGATHER-NEXT: # %bb.1: # %cond.load 368; NOGATHER-NEXT: vmovq %xmm2, %rcx 369; NOGATHER-NEXT: vpinsrd $0, (%rcx), %xmm1, %xmm0 370; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 371; NOGATHER-NEXT: .LBB6_2: # %else 372; NOGATHER-NEXT: testb $2, %al 373; NOGATHER-NEXT: je .LBB6_4 374; NOGATHER-NEXT: # %bb.3: # %cond.load1 375; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 376; NOGATHER-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm0 377; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 378; NOGATHER-NEXT: .LBB6_4: # %else2 379; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 380; NOGATHER-NEXT: testb $4, %al 381; NOGATHER-NEXT: je .LBB6_6 382; NOGATHER-NEXT: # %bb.5: # %cond.load4 383; NOGATHER-NEXT: vmovq %xmm0, %rcx 384; NOGATHER-NEXT: vpinsrd $2, (%rcx), %xmm1, %xmm2 385; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 386; NOGATHER-NEXT: .LBB6_6: # %else5 387; NOGATHER-NEXT: testb $8, %al 388; NOGATHER-NEXT: je .LBB6_8 389; NOGATHER-NEXT: # %bb.7: # %cond.load7 390; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 391; NOGATHER-NEXT: vpinsrd $3, (%rcx), %xmm1, %xmm0 392; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 393; NOGATHER-NEXT: .LBB6_8: # %else8 394; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm0 395; NOGATHER-NEXT: testb $16, %al 396; NOGATHER-NEXT: je .LBB6_10 397; NOGATHER-NEXT: # %bb.9: # %cond.load10 398; NOGATHER-NEXT: vmovq %xmm0, %rcx 399; NOGATHER-NEXT: vbroadcastss (%rcx), %ymm2 400; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4],ymm1[5,6,7] 401; NOGATHER-NEXT: .LBB6_10: # %else11 402; NOGATHER-NEXT: testb $32, %al 403; NOGATHER-NEXT: je .LBB6_12 404; NOGATHER-NEXT: # %bb.11: # %cond.load13 405; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 406; NOGATHER-NEXT: vbroadcastss (%rcx), %ymm2 407; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6,7] 408; NOGATHER-NEXT: .LBB6_12: # %else14 409; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 410; NOGATHER-NEXT: testb $64, %al 411; NOGATHER-NEXT: jne .LBB6_13 412; NOGATHER-NEXT: # %bb.14: # %else17 413; NOGATHER-NEXT: testb $-128, %al 414; NOGATHER-NEXT: jne .LBB6_15 415; NOGATHER-NEXT: .LBB6_16: # %else20 416; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 417; NOGATHER-NEXT: retq 418; NOGATHER-NEXT: .LBB6_13: # %cond.load16 419; NOGATHER-NEXT: vmovq %xmm0, %rcx 420; NOGATHER-NEXT: vbroadcastss (%rcx), %ymm2 421; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6],ymm1[7] 422; NOGATHER-NEXT: testb $-128, %al 423; NOGATHER-NEXT: je .LBB6_16 424; NOGATHER-NEXT: .LBB6_15: # %cond.load19 425; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 426; NOGATHER-NEXT: vbroadcastss (%rax), %ymm0 427; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm0[7] 428; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 429; NOGATHER-NEXT: retq 430entry: 431 %ld = load <8 x ptr>, ptr %ptr 432 %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ld, i32 0, <8 x i1> %masks, <8 x i32> %passthro) 433 ret <8 x i32> %res 434} 435 436declare <8 x float> @llvm.masked.gather.v8float(<8 x ptr> %ptrs, i32 %align, <8 x i1> %masks, <8 x float> %passthro) 437 438define <8 x float> @masked_gather_v8float(ptr %ptr, <8 x i1> %masks, <8 x float> %passthro) { 439; X86-LABEL: masked_gather_v8float: 440; X86: # %bb.0: # %entry 441; X86-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 442; X86-NEXT: vpslld $31, %ymm0, %ymm0 443; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 444; X86-NEXT: vmovaps (%eax), %ymm2 445; X86-NEXT: vgatherdps %ymm0, (,%ymm2), %ymm1 446; X86-NEXT: vmovaps %ymm1, %ymm0 447; X86-NEXT: retl 448; 449; X64-LABEL: masked_gather_v8float: 450; X64: # %bb.0: # %entry 451; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 452; X64-NEXT: vpslld $31, %ymm0, %ymm0 453; X64-NEXT: vmovaps (%rdi), %ymm2 454; X64-NEXT: vmovaps 32(%rdi), %ymm3 455; X64-NEXT: vextractf128 $1, %ymm1, %xmm4 456; X64-NEXT: vextracti128 $1, %ymm0, %xmm5 457; X64-NEXT: vgatherqps %xmm5, (,%ymm3), %xmm4 458; X64-NEXT: vgatherqps %xmm0, (,%ymm2), %xmm1 459; X64-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm0 460; X64-NEXT: retq 461; 462; NOGATHER-LABEL: masked_gather_v8float: 463; NOGATHER: # %bb.0: # %entry 464; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 465; NOGATHER-NEXT: vpsllw $15, %xmm0, %xmm0 466; NOGATHER-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 467; NOGATHER-NEXT: vpmovmskb %xmm0, %eax 468; NOGATHER-NEXT: testb $1, %al 469; NOGATHER-NEXT: je .LBB7_2 470; NOGATHER-NEXT: # %bb.1: # %cond.load 471; NOGATHER-NEXT: vmovq %xmm2, %rcx 472; NOGATHER-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 473; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0],ymm1[1,2,3,4,5,6,7] 474; NOGATHER-NEXT: .LBB7_2: # %else 475; NOGATHER-NEXT: testb $2, %al 476; NOGATHER-NEXT: je .LBB7_4 477; NOGATHER-NEXT: # %bb.3: # %cond.load1 478; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 479; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],mem[0],xmm1[2,3] 480; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 481; NOGATHER-NEXT: .LBB7_4: # %else2 482; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 483; NOGATHER-NEXT: testb $4, %al 484; NOGATHER-NEXT: je .LBB7_6 485; NOGATHER-NEXT: # %bb.5: # %cond.load4 486; NOGATHER-NEXT: vmovq %xmm0, %rcx 487; NOGATHER-NEXT: vinsertps {{.*#+}} xmm2 = xmm1[0,1],mem[0],xmm1[3] 488; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] 489; NOGATHER-NEXT: .LBB7_6: # %else5 490; NOGATHER-NEXT: testb $8, %al 491; NOGATHER-NEXT: je .LBB7_8 492; NOGATHER-NEXT: # %bb.7: # %cond.load7 493; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 494; NOGATHER-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],mem[0] 495; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 496; NOGATHER-NEXT: .LBB7_8: # %else8 497; NOGATHER-NEXT: vmovdqa 32(%rdi), %ymm0 498; NOGATHER-NEXT: testb $16, %al 499; NOGATHER-NEXT: je .LBB7_10 500; NOGATHER-NEXT: # %bb.9: # %cond.load10 501; NOGATHER-NEXT: vmovq %xmm0, %rcx 502; NOGATHER-NEXT: vbroadcastss (%rcx), %ymm2 503; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4],ymm1[5,6,7] 504; NOGATHER-NEXT: .LBB7_10: # %else11 505; NOGATHER-NEXT: testb $32, %al 506; NOGATHER-NEXT: je .LBB7_12 507; NOGATHER-NEXT: # %bb.11: # %cond.load13 508; NOGATHER-NEXT: vpextrq $1, %xmm0, %rcx 509; NOGATHER-NEXT: vbroadcastss (%rcx), %ymm2 510; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6,7] 511; NOGATHER-NEXT: .LBB7_12: # %else14 512; NOGATHER-NEXT: vextractf128 $1, %ymm0, %xmm0 513; NOGATHER-NEXT: testb $64, %al 514; NOGATHER-NEXT: jne .LBB7_13 515; NOGATHER-NEXT: # %bb.14: # %else17 516; NOGATHER-NEXT: testb $-128, %al 517; NOGATHER-NEXT: jne .LBB7_15 518; NOGATHER-NEXT: .LBB7_16: # %else20 519; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 520; NOGATHER-NEXT: retq 521; NOGATHER-NEXT: .LBB7_13: # %cond.load16 522; NOGATHER-NEXT: vmovq %xmm0, %rcx 523; NOGATHER-NEXT: vbroadcastss (%rcx), %ymm2 524; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6],ymm1[7] 525; NOGATHER-NEXT: testb $-128, %al 526; NOGATHER-NEXT: je .LBB7_16 527; NOGATHER-NEXT: .LBB7_15: # %cond.load19 528; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 529; NOGATHER-NEXT: vbroadcastss (%rax), %ymm0 530; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5,6],ymm0[7] 531; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 532; NOGATHER-NEXT: retq 533entry: 534 %ld = load <8 x ptr>, ptr %ptr 535 %res = call <8 x float> @llvm.masked.gather.v8float(<8 x ptr> %ld, i32 0, <8 x i1> %masks, <8 x float> %passthro) 536 ret <8 x float> %res 537} 538 539declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x i64> %passthro) 540 541define <4 x i64> @masked_gather_v4i64(ptr %ptr, <4 x i1> %masks, <4 x i64> %passthro) { 542; X86-LABEL: masked_gather_v4i64: 543; X86: # %bb.0: # %entry 544; X86-NEXT: vpslld $31, %xmm0, %xmm0 545; X86-NEXT: vpmovsxdq %xmm0, %ymm0 546; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 547; X86-NEXT: vmovdqa (%eax), %xmm2 548; X86-NEXT: vpgatherdq %ymm0, (,%xmm2), %ymm1 549; X86-NEXT: vmovdqa %ymm1, %ymm0 550; X86-NEXT: retl 551; 552; X64-LABEL: masked_gather_v4i64: 553; X64: # %bb.0: # %entry 554; X64-NEXT: vpslld $31, %xmm0, %xmm0 555; X64-NEXT: vpmovsxdq %xmm0, %ymm0 556; X64-NEXT: vmovdqa (%rdi), %ymm2 557; X64-NEXT: vpgatherqq %ymm0, (,%ymm2), %ymm1 558; X64-NEXT: vmovdqa %ymm1, %ymm0 559; X64-NEXT: retq 560; 561; NOGATHER-LABEL: masked_gather_v4i64: 562; NOGATHER: # %bb.0: # %entry 563; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 564; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 565; NOGATHER-NEXT: vmovmskps %xmm0, %eax 566; NOGATHER-NEXT: testb $1, %al 567; NOGATHER-NEXT: je .LBB8_2 568; NOGATHER-NEXT: # %bb.1: # %cond.load 569; NOGATHER-NEXT: vmovq %xmm2, %rcx 570; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm1, %xmm0 571; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 572; NOGATHER-NEXT: .LBB8_2: # %else 573; NOGATHER-NEXT: testb $2, %al 574; NOGATHER-NEXT: je .LBB8_4 575; NOGATHER-NEXT: # %bb.3: # %cond.load1 576; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 577; NOGATHER-NEXT: vpinsrq $1, (%rcx), %xmm1, %xmm0 578; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 579; NOGATHER-NEXT: .LBB8_4: # %else2 580; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 581; NOGATHER-NEXT: testb $4, %al 582; NOGATHER-NEXT: jne .LBB8_5 583; NOGATHER-NEXT: # %bb.6: # %else5 584; NOGATHER-NEXT: testb $8, %al 585; NOGATHER-NEXT: jne .LBB8_7 586; NOGATHER-NEXT: .LBB8_8: # %else8 587; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 588; NOGATHER-NEXT: retq 589; NOGATHER-NEXT: .LBB8_5: # %cond.load4 590; NOGATHER-NEXT: vmovq %xmm0, %rcx 591; NOGATHER-NEXT: vbroadcastsd (%rcx), %ymm2 592; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7] 593; NOGATHER-NEXT: testb $8, %al 594; NOGATHER-NEXT: je .LBB8_8 595; NOGATHER-NEXT: .LBB8_7: # %cond.load7 596; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 597; NOGATHER-NEXT: vbroadcastsd (%rax), %ymm0 598; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm0[6,7] 599; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 600; NOGATHER-NEXT: retq 601entry: 602 %ld = load <4 x ptr>, ptr %ptr 603 %res = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x i64> %passthro) 604 ret <4 x i64> %res 605} 606 607declare <4 x double> @llvm.masked.gather.v4double(<4 x ptr> %ptrs, i32 %align, <4 x i1> %masks, <4 x double> %passthro) 608 609define <4 x double> @masked_gather_v4double(ptr %ptr, <4 x i1> %masks, <4 x double> %passthro) { 610; X86-LABEL: masked_gather_v4double: 611; X86: # %bb.0: # %entry 612; X86-NEXT: vpslld $31, %xmm0, %xmm0 613; X86-NEXT: vpmovsxdq %xmm0, %ymm0 614; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 615; X86-NEXT: vmovapd (%eax), %xmm2 616; X86-NEXT: vgatherdpd %ymm0, (,%xmm2), %ymm1 617; X86-NEXT: vmovapd %ymm1, %ymm0 618; X86-NEXT: retl 619; 620; X64-LABEL: masked_gather_v4double: 621; X64: # %bb.0: # %entry 622; X64-NEXT: vpslld $31, %xmm0, %xmm0 623; X64-NEXT: vpmovsxdq %xmm0, %ymm0 624; X64-NEXT: vmovapd (%rdi), %ymm2 625; X64-NEXT: vgatherqpd %ymm0, (,%ymm2), %ymm1 626; X64-NEXT: vmovapd %ymm1, %ymm0 627; X64-NEXT: retq 628; 629; NOGATHER-LABEL: masked_gather_v4double: 630; NOGATHER: # %bb.0: # %entry 631; NOGATHER-NEXT: vmovdqa (%rdi), %ymm2 632; NOGATHER-NEXT: vpslld $31, %xmm0, %xmm0 633; NOGATHER-NEXT: vmovmskps %xmm0, %eax 634; NOGATHER-NEXT: testb $1, %al 635; NOGATHER-NEXT: je .LBB9_2 636; NOGATHER-NEXT: # %bb.1: # %cond.load 637; NOGATHER-NEXT: vmovq %xmm2, %rcx 638; NOGATHER-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 639; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3,4,5,6,7] 640; NOGATHER-NEXT: .LBB9_2: # %else 641; NOGATHER-NEXT: testb $2, %al 642; NOGATHER-NEXT: je .LBB9_4 643; NOGATHER-NEXT: # %bb.3: # %cond.load1 644; NOGATHER-NEXT: vpextrq $1, %xmm2, %rcx 645; NOGATHER-NEXT: vmovhps {{.*#+}} xmm0 = xmm1[0,1],mem[0,1] 646; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5,6,7] 647; NOGATHER-NEXT: .LBB9_4: # %else2 648; NOGATHER-NEXT: vextractf128 $1, %ymm2, %xmm0 649; NOGATHER-NEXT: testb $4, %al 650; NOGATHER-NEXT: jne .LBB9_5 651; NOGATHER-NEXT: # %bb.6: # %else5 652; NOGATHER-NEXT: testb $8, %al 653; NOGATHER-NEXT: jne .LBB9_7 654; NOGATHER-NEXT: .LBB9_8: # %else8 655; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 656; NOGATHER-NEXT: retq 657; NOGATHER-NEXT: .LBB9_5: # %cond.load4 658; NOGATHER-NEXT: vmovq %xmm0, %rcx 659; NOGATHER-NEXT: vbroadcastsd (%rcx), %ymm2 660; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7] 661; NOGATHER-NEXT: testb $8, %al 662; NOGATHER-NEXT: je .LBB9_8 663; NOGATHER-NEXT: .LBB9_7: # %cond.load7 664; NOGATHER-NEXT: vpextrq $1, %xmm0, %rax 665; NOGATHER-NEXT: vbroadcastsd (%rax), %ymm0 666; NOGATHER-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm0[6,7] 667; NOGATHER-NEXT: vmovaps %ymm1, %ymm0 668; NOGATHER-NEXT: retq 669entry: 670 %ld = load <4 x ptr>, ptr %ptr 671 %res = call <4 x double> @llvm.masked.gather.v4double(<4 x ptr> %ld, i32 0, <4 x i1> %masks, <4 x double> %passthro) 672 ret <4 x double> %res 673} 674 675declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x i64> %passthro) 676 677define <2 x i64> @masked_gather_v2i64(ptr %ptr, <2 x i1> %masks, <2 x i64> %passthro) { 678; X86-LABEL: masked_gather_v2i64: 679; X86: # %bb.0: # %entry 680; X86-NEXT: vpsllq $63, %xmm0, %xmm0 681; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 682; X86-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero 683; X86-NEXT: vpgatherdq %xmm0, (,%xmm2), %xmm1 684; X86-NEXT: vmovdqa %xmm1, %xmm0 685; X86-NEXT: retl 686; 687; X64-LABEL: masked_gather_v2i64: 688; X64: # %bb.0: # %entry 689; X64-NEXT: vpsllq $63, %xmm0, %xmm0 690; X64-NEXT: vmovdqa (%rdi), %xmm2 691; X64-NEXT: vpgatherqq %xmm0, (,%xmm2), %xmm1 692; X64-NEXT: vmovdqa %xmm1, %xmm0 693; X64-NEXT: retq 694; 695; NOGATHER-LABEL: masked_gather_v2i64: 696; NOGATHER: # %bb.0: # %entry 697; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 698; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 699; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 700; NOGATHER-NEXT: testb $1, %al 701; NOGATHER-NEXT: jne .LBB10_1 702; NOGATHER-NEXT: # %bb.2: # %else 703; NOGATHER-NEXT: testb $2, %al 704; NOGATHER-NEXT: jne .LBB10_3 705; NOGATHER-NEXT: .LBB10_4: # %else2 706; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 707; NOGATHER-NEXT: retq 708; NOGATHER-NEXT: .LBB10_1: # %cond.load 709; NOGATHER-NEXT: vmovq %xmm2, %rcx 710; NOGATHER-NEXT: vpinsrq $0, (%rcx), %xmm1, %xmm1 711; NOGATHER-NEXT: testb $2, %al 712; NOGATHER-NEXT: je .LBB10_4 713; NOGATHER-NEXT: .LBB10_3: # %cond.load1 714; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 715; NOGATHER-NEXT: vpinsrq $1, (%rax), %xmm1, %xmm1 716; NOGATHER-NEXT: vmovdqa %xmm1, %xmm0 717; NOGATHER-NEXT: retq 718entry: 719 %ld = load <2 x ptr>, ptr %ptr 720 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x i64> %passthro) 721 ret <2 x i64> %res 722} 723 724declare <2 x double> @llvm.masked.gather.v2double(<2 x ptr> %ptrs, i32 %align, <2 x i1> %masks, <2 x double> %passthro) 725 726define <2 x double> @masked_gather_v2double(ptr %ptr, <2 x i1> %masks, <2 x double> %passthro) { 727; X86-LABEL: masked_gather_v2double: 728; X86: # %bb.0: # %entry 729; X86-NEXT: vpsllq $63, %xmm0, %xmm0 730; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 731; X86-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 732; X86-NEXT: vgatherdpd %xmm0, (,%xmm2), %xmm1 733; X86-NEXT: vmovapd %xmm1, %xmm0 734; X86-NEXT: retl 735; 736; X64-LABEL: masked_gather_v2double: 737; X64: # %bb.0: # %entry 738; X64-NEXT: vpsllq $63, %xmm0, %xmm0 739; X64-NEXT: vmovapd (%rdi), %xmm2 740; X64-NEXT: vgatherqpd %xmm0, (,%xmm2), %xmm1 741; X64-NEXT: vmovapd %xmm1, %xmm0 742; X64-NEXT: retq 743; 744; NOGATHER-LABEL: masked_gather_v2double: 745; NOGATHER: # %bb.0: # %entry 746; NOGATHER-NEXT: vmovdqa (%rdi), %xmm2 747; NOGATHER-NEXT: vpsllq $63, %xmm0, %xmm0 748; NOGATHER-NEXT: vmovmskpd %xmm0, %eax 749; NOGATHER-NEXT: testb $1, %al 750; NOGATHER-NEXT: jne .LBB11_1 751; NOGATHER-NEXT: # %bb.2: # %else 752; NOGATHER-NEXT: testb $2, %al 753; NOGATHER-NEXT: jne .LBB11_3 754; NOGATHER-NEXT: .LBB11_4: # %else2 755; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 756; NOGATHER-NEXT: retq 757; NOGATHER-NEXT: .LBB11_1: # %cond.load 758; NOGATHER-NEXT: vmovq %xmm2, %rcx 759; NOGATHER-NEXT: vmovlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3] 760; NOGATHER-NEXT: testb $2, %al 761; NOGATHER-NEXT: je .LBB11_4 762; NOGATHER-NEXT: .LBB11_3: # %cond.load1 763; NOGATHER-NEXT: vpextrq $1, %xmm2, %rax 764; NOGATHER-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 765; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 766; NOGATHER-NEXT: retq 767entry: 768 %ld = load <2 x ptr>, ptr %ptr 769 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x ptr> %ld, i32 0, <2 x i1> %masks, <2 x double> %passthro) 770 ret <2 x double> %res 771} 772 773 774define <2 x double> @masked_gather_zeromask(ptr %ptr, <2 x double> %dummy, <2 x double> %passthru) { 775; X86-LABEL: masked_gather_zeromask: 776; X86: # %bb.0: # %entry 777; X86-NEXT: vmovaps %xmm1, %xmm0 778; X86-NEXT: retl 779; 780; X64-LABEL: masked_gather_zeromask: 781; X64: # %bb.0: # %entry 782; X64-NEXT: vmovaps %xmm1, %xmm0 783; X64-NEXT: retq 784; 785; NOGATHER-LABEL: masked_gather_zeromask: 786; NOGATHER: # %bb.0: # %entry 787; NOGATHER-NEXT: vmovaps %xmm1, %xmm0 788; NOGATHER-NEXT: retq 789entry: 790 %ld = load <2 x ptr>, ptr %ptr 791 %res = call <2 x double> @llvm.masked.gather.v2double(<2 x ptr> %ld, i32 0, <2 x i1> zeroinitializer, <2 x double> %passthru) 792 ret <2 x double> %res 793} 794