1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s 3 4declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr, <4 x i32>, <4 x float>, i8) 5 6define <4 x float> @test_llvm_x86_avx2_gather_d_ps(ptr %b, <4 x i32> %iv, <4 x float> %mask) #0 { 7; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: movq %rsp, %rax 10; CHECK-NEXT: movq $-1, %rcx 11; CHECK-NEXT: sarq $63, %rax 12; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 13; CHECK-NEXT: orq %rax, %rdi 14; CHECK-NEXT: vmovq %rax, %xmm3 15; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 16; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 17; CHECK-NEXT: vgatherdps %xmm1, (%rdi,%xmm0), %xmm2 18; CHECK-NEXT: shlq $47, %rax 19; CHECK-NEXT: vmovaps %xmm2, %xmm0 20; CHECK-NEXT: orq %rax, %rsp 21; CHECK-NEXT: retq 22entry: 23 %v = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x float> %mask, i8 1) 24 ret <4 x float> %v 25} 26 27declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr, <2 x i64>, <4 x float>, i8) 28 29define <4 x float> @test_llvm_x86_avx2_gather_q_ps(ptr %b, <2 x i64> %iv, <4 x float> %mask) #0 { 30; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps: 31; CHECK: # %bb.0: # %entry 32; CHECK-NEXT: movq %rsp, %rax 33; CHECK-NEXT: movq $-1, %rcx 34; CHECK-NEXT: sarq $63, %rax 35; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 36; CHECK-NEXT: orq %rax, %rdi 37; CHECK-NEXT: vmovq %rax, %xmm3 38; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 39; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 40; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%xmm0), %xmm2 41; CHECK-NEXT: shlq $47, %rax 42; CHECK-NEXT: vmovaps %xmm2, %xmm0 43; CHECK-NEXT: orq %rax, %rsp 44; CHECK-NEXT: retq 45entry: 46 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, ptr %b, <2 x i64> %iv, <4 x float> %mask, i8 1) 47 ret <4 x float> %v 48} 49 50declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr, <4 x i32>, <2 x double>, i8) 51 52define <2 x double> @test_llvm_x86_avx2_gather_d_pd(ptr %b, <4 x i32> %iv, <2 x double> %mask) #0 { 53; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd: 54; CHECK: # %bb.0: # %entry 55; CHECK-NEXT: movq %rsp, %rax 56; CHECK-NEXT: movq $-1, %rcx 57; CHECK-NEXT: sarq $63, %rax 58; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 59; CHECK-NEXT: orq %rax, %rdi 60; CHECK-NEXT: vmovq %rax, %xmm3 61; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 62; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 63; CHECK-NEXT: vgatherdpd %xmm1, (%rdi,%xmm0), %xmm2 64; CHECK-NEXT: shlq $47, %rax 65; CHECK-NEXT: vmovapd %xmm2, %xmm0 66; CHECK-NEXT: orq %rax, %rsp 67; CHECK-NEXT: retq 68entry: 69 %v = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, ptr %b, <4 x i32> %iv, <2 x double> %mask, i8 1) 70 ret <2 x double> %v 71} 72 73declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr, <2 x i64>, <2 x double>, i8) 74 75define <2 x double> @test_llvm_x86_avx2_gather_q_pd(ptr %b, <2 x i64> %iv, <2 x double> %mask) #0 { 76; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd: 77; CHECK: # %bb.0: # %entry 78; CHECK-NEXT: movq %rsp, %rax 79; CHECK-NEXT: movq $-1, %rcx 80; CHECK-NEXT: sarq $63, %rax 81; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 82; CHECK-NEXT: orq %rax, %rdi 83; CHECK-NEXT: vmovq %rax, %xmm3 84; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 85; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 86; CHECK-NEXT: vgatherqpd %xmm1, (%rdi,%xmm0), %xmm2 87; CHECK-NEXT: shlq $47, %rax 88; CHECK-NEXT: vmovapd %xmm2, %xmm0 89; CHECK-NEXT: orq %rax, %rsp 90; CHECK-NEXT: retq 91entry: 92 %v = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, ptr %b, <2 x i64> %iv, <2 x double> %mask, i8 1) 93 ret <2 x double> %v 94} 95 96declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr, <8 x i32>, <8 x float>, i8) 97 98define <8 x float> @test_llvm_x86_avx2_gather_d_ps_256(ptr %b, <8 x i32> %iv, <8 x float> %mask) #0 { 99; CHECK-LABEL: test_llvm_x86_avx2_gather_d_ps_256: 100; CHECK: # %bb.0: # %entry 101; CHECK-NEXT: movq %rsp, %rax 102; CHECK-NEXT: movq $-1, %rcx 103; CHECK-NEXT: sarq $63, %rax 104; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 105; CHECK-NEXT: orq %rax, %rdi 106; CHECK-NEXT: vmovq %rax, %xmm3 107; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 108; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 109; CHECK-NEXT: vgatherdps %ymm1, (%rdi,%ymm0), %ymm2 110; CHECK-NEXT: shlq $47, %rax 111; CHECK-NEXT: vmovaps %ymm2, %ymm0 112; CHECK-NEXT: orq %rax, %rsp 113; CHECK-NEXT: retq 114entry: 115 %v = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, ptr %b, <8 x i32> %iv, <8 x float> %mask, i8 1) 116 ret <8 x float> %v 117} 118 119declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr, <4 x i64>, <4 x float>, i8) 120 121define <4 x float> @test_llvm_x86_avx2_gather_q_ps_256(ptr %b, <4 x i64> %iv, <4 x float> %mask) #0 { 122; CHECK-LABEL: test_llvm_x86_avx2_gather_q_ps_256: 123; CHECK: # %bb.0: # %entry 124; CHECK-NEXT: movq %rsp, %rax 125; CHECK-NEXT: movq $-1, %rcx 126; CHECK-NEXT: sarq $63, %rax 127; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 128; CHECK-NEXT: orq %rax, %rdi 129; CHECK-NEXT: vmovq %rax, %xmm3 130; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 131; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 132; CHECK-NEXT: vgatherqps %xmm1, (%rdi,%ymm0), %xmm2 133; CHECK-NEXT: shlq $47, %rax 134; CHECK-NEXT: vmovaps %xmm2, %xmm0 135; CHECK-NEXT: orq %rax, %rsp 136; CHECK-NEXT: vzeroupper 137; CHECK-NEXT: retq 138entry: 139 %v = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x float> %mask, i8 1) 140 ret <4 x float> %v 141} 142 143declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr, <4 x i32>, <4 x double>, i8) 144 145define <4 x double> @test_llvm_x86_avx2_gather_d_pd_256(ptr %b, <4 x i32> %iv, <4 x double> %mask) #0 { 146; CHECK-LABEL: test_llvm_x86_avx2_gather_d_pd_256: 147; CHECK: # %bb.0: # %entry 148; CHECK-NEXT: movq %rsp, %rax 149; CHECK-NEXT: movq $-1, %rcx 150; CHECK-NEXT: sarq $63, %rax 151; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 152; CHECK-NEXT: orq %rax, %rdi 153; CHECK-NEXT: vmovq %rax, %xmm3 154; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 155; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 156; CHECK-NEXT: vgatherdpd %ymm1, (%rdi,%xmm0), %ymm2 157; CHECK-NEXT: shlq $47, %rax 158; CHECK-NEXT: vmovapd %ymm2, %ymm0 159; CHECK-NEXT: orq %rax, %rsp 160; CHECK-NEXT: retq 161entry: 162 %v = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x double> %mask, i8 1) 163 ret <4 x double> %v 164} 165 166declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr, <4 x i64>, <4 x double>, i8) 167 168define <4 x double> @test_llvm_x86_avx2_gather_q_pd_256(ptr %b, <4 x i64> %iv, <4 x double> %mask) #0 { 169; CHECK-LABEL: test_llvm_x86_avx2_gather_q_pd_256: 170; CHECK: # %bb.0: # %entry 171; CHECK-NEXT: movq %rsp, %rax 172; CHECK-NEXT: movq $-1, %rcx 173; CHECK-NEXT: sarq $63, %rax 174; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 175; CHECK-NEXT: orq %rax, %rdi 176; CHECK-NEXT: vmovq %rax, %xmm3 177; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 178; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 179; CHECK-NEXT: vgatherqpd %ymm1, (%rdi,%ymm0), %ymm2 180; CHECK-NEXT: shlq $47, %rax 181; CHECK-NEXT: vmovapd %ymm2, %ymm0 182; CHECK-NEXT: orq %rax, %rsp 183; CHECK-NEXT: retq 184entry: 185 %v = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x double> %mask, i8 1) 186 ret <4 x double> %v 187} 188 189declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr, <4 x i32>, <4 x i32>, i8) 190 191define <4 x i32> @test_llvm_x86_avx2_gather_d_d(ptr %b, <4 x i32> %iv, <4 x i32> %mask) #0 { 192; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d: 193; CHECK: # %bb.0: # %entry 194; CHECK-NEXT: movq %rsp, %rax 195; CHECK-NEXT: movq $-1, %rcx 196; CHECK-NEXT: sarq $63, %rax 197; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 198; CHECK-NEXT: orq %rax, %rdi 199; CHECK-NEXT: vmovq %rax, %xmm3 200; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 201; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 202; CHECK-NEXT: vpgatherdd %xmm1, (%rdi,%xmm0), %xmm2 203; CHECK-NEXT: shlq $47, %rax 204; CHECK-NEXT: vmovdqa %xmm2, %xmm0 205; CHECK-NEXT: orq %rax, %rsp 206; CHECK-NEXT: retq 207entry: 208 %v = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x i32> %mask, i8 1) 209 ret <4 x i32> %v 210} 211 212declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr, <2 x i64>, <4 x i32>, i8) 213 214define <4 x i32> @test_llvm_x86_avx2_gather_q_d(ptr %b, <2 x i64> %iv, <4 x i32> %mask) #0 { 215; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d: 216; CHECK: # %bb.0: # %entry 217; CHECK-NEXT: movq %rsp, %rax 218; CHECK-NEXT: movq $-1, %rcx 219; CHECK-NEXT: sarq $63, %rax 220; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 221; CHECK-NEXT: orq %rax, %rdi 222; CHECK-NEXT: vmovq %rax, %xmm3 223; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 224; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 225; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%xmm0), %xmm2 226; CHECK-NEXT: shlq $47, %rax 227; CHECK-NEXT: vmovdqa %xmm2, %xmm0 228; CHECK-NEXT: orq %rax, %rsp 229; CHECK-NEXT: retq 230entry: 231 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> zeroinitializer, ptr %b, <2 x i64> %iv, <4 x i32> %mask, i8 1) 232 ret <4 x i32> %v 233} 234 235declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr, <4 x i32>, <2 x i64>, i8) 236 237define <2 x i64> @test_llvm_x86_avx2_gather_d_q(ptr %b, <4 x i32> %iv, <2 x i64> %mask) #0 { 238; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q: 239; CHECK: # %bb.0: # %entry 240; CHECK-NEXT: movq %rsp, %rax 241; CHECK-NEXT: movq $-1, %rcx 242; CHECK-NEXT: sarq $63, %rax 243; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 244; CHECK-NEXT: orq %rax, %rdi 245; CHECK-NEXT: vmovq %rax, %xmm3 246; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 247; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 248; CHECK-NEXT: vpgatherdq %xmm1, (%rdi,%xmm0), %xmm2 249; CHECK-NEXT: shlq $47, %rax 250; CHECK-NEXT: vmovdqa %xmm2, %xmm0 251; CHECK-NEXT: orq %rax, %rsp 252; CHECK-NEXT: retq 253entry: 254 %v = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, <2 x i64> %mask, i8 1) 255 ret <2 x i64> %v 256} 257 258declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr, <2 x i64>, <2 x i64>, i8) 259 260define <2 x i64> @test_llvm_x86_avx2_gather_q_q(ptr %b, <2 x i64> %iv, <2 x i64> %mask) #0 { 261; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q: 262; CHECK: # %bb.0: # %entry 263; CHECK-NEXT: movq %rsp, %rax 264; CHECK-NEXT: movq $-1, %rcx 265; CHECK-NEXT: sarq $63, %rax 266; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 267; CHECK-NEXT: orq %rax, %rdi 268; CHECK-NEXT: vmovq %rax, %xmm3 269; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 270; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 271; CHECK-NEXT: vpgatherqq %xmm1, (%rdi,%xmm0), %xmm2 272; CHECK-NEXT: shlq $47, %rax 273; CHECK-NEXT: vmovdqa %xmm2, %xmm0 274; CHECK-NEXT: orq %rax, %rsp 275; CHECK-NEXT: retq 276entry: 277 %v = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> zeroinitializer, ptr %b, <2 x i64> %iv, <2 x i64> %mask, i8 1) 278 ret <2 x i64> %v 279} 280 281declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr, <8 x i32>, <8 x i32>, i8) 282 283define <8 x i32> @test_llvm_x86_avx2_gather_d_d_256(ptr %b, <8 x i32> %iv, <8 x i32> %mask) #0 { 284; CHECK-LABEL: test_llvm_x86_avx2_gather_d_d_256: 285; CHECK: # %bb.0: # %entry 286; CHECK-NEXT: movq %rsp, %rax 287; CHECK-NEXT: movq $-1, %rcx 288; CHECK-NEXT: sarq $63, %rax 289; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 290; CHECK-NEXT: orq %rax, %rdi 291; CHECK-NEXT: vmovq %rax, %xmm3 292; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 293; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 294; CHECK-NEXT: vpgatherdd %ymm1, (%rdi,%ymm0), %ymm2 295; CHECK-NEXT: shlq $47, %rax 296; CHECK-NEXT: vmovdqa %ymm2, %ymm0 297; CHECK-NEXT: orq %rax, %rsp 298; CHECK-NEXT: retq 299entry: 300 %v = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> zeroinitializer, ptr %b, <8 x i32> %iv, <8 x i32> %mask, i8 1) 301 ret <8 x i32> %v 302} 303 304declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr, <4 x i64>, <4 x i32>, i8) 305 306define <4 x i32> @test_llvm_x86_avx2_gather_q_d_256(ptr %b, <4 x i64> %iv, <4 x i32> %mask) #0 { 307; CHECK-LABEL: test_llvm_x86_avx2_gather_q_d_256: 308; CHECK: # %bb.0: # %entry 309; CHECK-NEXT: movq %rsp, %rax 310; CHECK-NEXT: movq $-1, %rcx 311; CHECK-NEXT: sarq $63, %rax 312; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 313; CHECK-NEXT: orq %rax, %rdi 314; CHECK-NEXT: vmovq %rax, %xmm3 315; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 316; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 317; CHECK-NEXT: vpgatherqd %xmm1, (%rdi,%ymm0), %xmm2 318; CHECK-NEXT: shlq $47, %rax 319; CHECK-NEXT: vmovdqa %xmm2, %xmm0 320; CHECK-NEXT: orq %rax, %rsp 321; CHECK-NEXT: vzeroupper 322; CHECK-NEXT: retq 323entry: 324 %v = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x i32> %mask, i8 1) 325 ret <4 x i32> %v 326} 327 328declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr, <4 x i32>, <4 x i64>, i8) 329 330define <4 x i64> @test_llvm_x86_avx2_gather_d_q_256(ptr %b, <4 x i32> %iv, <4 x i64> %mask) #0 { 331; CHECK-LABEL: test_llvm_x86_avx2_gather_d_q_256: 332; CHECK: # %bb.0: # %entry 333; CHECK-NEXT: movq %rsp, %rax 334; CHECK-NEXT: movq $-1, %rcx 335; CHECK-NEXT: sarq $63, %rax 336; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 337; CHECK-NEXT: orq %rax, %rdi 338; CHECK-NEXT: vmovq %rax, %xmm3 339; CHECK-NEXT: vpbroadcastq %xmm3, %xmm3 340; CHECK-NEXT: vpor %xmm0, %xmm3, %xmm0 341; CHECK-NEXT: vpgatherdq %ymm1, (%rdi,%xmm0), %ymm2 342; CHECK-NEXT: shlq $47, %rax 343; CHECK-NEXT: vmovdqa %ymm2, %ymm0 344; CHECK-NEXT: orq %rax, %rsp 345; CHECK-NEXT: retq 346entry: 347 %v = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, <4 x i64> %mask, i8 1) 348 ret <4 x i64> %v 349} 350 351declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr, <4 x i64>, <4 x i64>, i8) 352 353define <4 x i64> @test_llvm_x86_avx2_gather_q_q_256(ptr %b, <4 x i64> %iv, <4 x i64> %mask) #0 { 354; CHECK-LABEL: test_llvm_x86_avx2_gather_q_q_256: 355; CHECK: # %bb.0: # %entry 356; CHECK-NEXT: movq %rsp, %rax 357; CHECK-NEXT: movq $-1, %rcx 358; CHECK-NEXT: sarq $63, %rax 359; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 360; CHECK-NEXT: orq %rax, %rdi 361; CHECK-NEXT: vmovq %rax, %xmm3 362; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3 363; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 364; CHECK-NEXT: vpgatherqq %ymm1, (%rdi,%ymm0), %ymm2 365; CHECK-NEXT: shlq $47, %rax 366; CHECK-NEXT: vmovdqa %ymm2, %ymm0 367; CHECK-NEXT: orq %rax, %rsp 368; CHECK-NEXT: retq 369entry: 370 %v = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, ptr %b, <4 x i64> %iv, <4 x i64> %mask, i8 1) 371 ret <4 x i64> %v 372} 373 374declare <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float>, ptr, <16 x i32>, i16, i32) 375 376define <16 x float> @test_llvm_x86_avx512_gather_dps_512(ptr %b, <16 x i32> %iv) #1 { 377; CHECK-LABEL: test_llvm_x86_avx512_gather_dps_512: 378; CHECK: # %bb.0: # %entry 379; CHECK-NEXT: movq %rsp, %rax 380; CHECK-NEXT: movq $-1, %rcx 381; CHECK-NEXT: sarq $63, %rax 382; CHECK-NEXT: kxnorw %k0, %k0, %k1 383; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 384; CHECK-NEXT: orq %rax, %rdi 385; CHECK-NEXT: vpbroadcastq %rax, %zmm2 386; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 387; CHECK-NEXT: vgatherdps (%rdi,%zmm0), %zmm1 {%k1} 388; CHECK-NEXT: shlq $47, %rax 389; CHECK-NEXT: vmovaps %zmm1, %zmm0 390; CHECK-NEXT: orq %rax, %rsp 391; CHECK-NEXT: retq 392entry: 393 %v = call <16 x float> @llvm.x86.avx512.gather.dps.512(<16 x float> zeroinitializer, ptr %b, <16 x i32> %iv, i16 -1, i32 1) 394 ret <16 x float> %v 395} 396 397declare <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double>, ptr, <8 x i32>, i8, i32) 398 399define <8 x double> @test_llvm_x86_avx512_gather_dpd_512(ptr %b, <8 x i32> %iv) #1 { 400; CHECK-LABEL: test_llvm_x86_avx512_gather_dpd_512: 401; CHECK: # %bb.0: # %entry 402; CHECK-NEXT: movq %rsp, %rax 403; CHECK-NEXT: movq $-1, %rcx 404; CHECK-NEXT: sarq $63, %rax 405; CHECK-NEXT: kxnorw %k0, %k0, %k1 406; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 407; CHECK-NEXT: orq %rax, %rdi 408; CHECK-NEXT: vmovq %rax, %xmm2 409; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 410; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 411; CHECK-NEXT: vgatherdpd (%rdi,%ymm0), %zmm1 {%k1} 412; CHECK-NEXT: shlq $47, %rax 413; CHECK-NEXT: vmovapd %zmm1, %zmm0 414; CHECK-NEXT: orq %rax, %rsp 415; CHECK-NEXT: retq 416entry: 417 %v = call <8 x double> @llvm.x86.avx512.gather.dpd.512(<8 x double> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1) 418 ret <8 x double> %v 419} 420 421declare <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float>, ptr, <8 x i64>, i8, i32) 422 423define <8 x float> @test_llvm_x86_avx512_gather_qps_512(ptr %b, <8 x i64> %iv) #1 { 424; CHECK-LABEL: test_llvm_x86_avx512_gather_qps_512: 425; CHECK: # %bb.0: # %entry 426; CHECK-NEXT: movq %rsp, %rax 427; CHECK-NEXT: movq $-1, %rcx 428; CHECK-NEXT: sarq $63, %rax 429; CHECK-NEXT: kxnorw %k0, %k0, %k1 430; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 431; CHECK-NEXT: orq %rax, %rdi 432; CHECK-NEXT: vpbroadcastq %rax, %zmm2 433; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 434; CHECK-NEXT: vgatherqps (%rdi,%zmm0), %ymm1 {%k1} 435; CHECK-NEXT: shlq $47, %rax 436; CHECK-NEXT: vmovaps %ymm1, %ymm0 437; CHECK-NEXT: orq %rax, %rsp 438; CHECK-NEXT: retq 439entry: 440 %v = call <8 x float> @llvm.x86.avx512.gather.qps.512(<8 x float> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1) 441 ret <8 x float> %v 442} 443 444declare <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double>, ptr, <8 x i64>, i8, i32) 445 446define <8 x double> @test_llvm_x86_avx512_gather_qpd_512(ptr %b, <8 x i64> %iv) #1 { 447; CHECK-LABEL: test_llvm_x86_avx512_gather_qpd_512: 448; CHECK: # %bb.0: # %entry 449; CHECK-NEXT: movq %rsp, %rax 450; CHECK-NEXT: movq $-1, %rcx 451; CHECK-NEXT: sarq $63, %rax 452; CHECK-NEXT: kxnorw %k0, %k0, %k1 453; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 454; CHECK-NEXT: orq %rax, %rdi 455; CHECK-NEXT: vpbroadcastq %rax, %zmm2 456; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 457; CHECK-NEXT: vgatherqpd (%rdi,%zmm0), %zmm1 {%k1} 458; CHECK-NEXT: shlq $47, %rax 459; CHECK-NEXT: vmovapd %zmm1, %zmm0 460; CHECK-NEXT: orq %rax, %rsp 461; CHECK-NEXT: retq 462entry: 463 %v = call <8 x double> @llvm.x86.avx512.gather.qpd.512(<8 x double> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1) 464 ret <8 x double> %v 465} 466 467declare <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32>, ptr, <16 x i32>, i16, i32) 468 469define <16 x i32> @test_llvm_x86_avx512_gather_dpi_512(ptr %b, <16 x i32> %iv) #1 { 470; CHECK-LABEL: test_llvm_x86_avx512_gather_dpi_512: 471; CHECK: # %bb.0: # %entry 472; CHECK-NEXT: movq %rsp, %rax 473; CHECK-NEXT: movq $-1, %rcx 474; CHECK-NEXT: sarq $63, %rax 475; CHECK-NEXT: kxnorw %k0, %k0, %k1 476; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 477; CHECK-NEXT: orq %rax, %rdi 478; CHECK-NEXT: vpbroadcastq %rax, %zmm2 479; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 480; CHECK-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} 481; CHECK-NEXT: shlq $47, %rax 482; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 483; CHECK-NEXT: orq %rax, %rsp 484; CHECK-NEXT: retq 485entry: 486 %v = call <16 x i32> @llvm.x86.avx512.gather.dpi.512(<16 x i32> zeroinitializer, ptr %b, <16 x i32> %iv, i16 -1, i32 1) 487 ret <16 x i32> %v 488} 489 490declare <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64>, ptr, <8 x i32>, i8, i32) 491 492define <8 x i64> @test_llvm_x86_avx512_gather_dpq_512(ptr %b, <8 x i32> %iv) #1 { 493; CHECK-LABEL: test_llvm_x86_avx512_gather_dpq_512: 494; CHECK: # %bb.0: # %entry 495; CHECK-NEXT: movq %rsp, %rax 496; CHECK-NEXT: movq $-1, %rcx 497; CHECK-NEXT: sarq $63, %rax 498; CHECK-NEXT: kxnorw %k0, %k0, %k1 499; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 500; CHECK-NEXT: orq %rax, %rdi 501; CHECK-NEXT: vmovq %rax, %xmm2 502; CHECK-NEXT: vpbroadcastq %xmm2, %ymm2 503; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 504; CHECK-NEXT: vpgatherdq (%rdi,%ymm0), %zmm1 {%k1} 505; CHECK-NEXT: shlq $47, %rax 506; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 507; CHECK-NEXT: orq %rax, %rsp 508; CHECK-NEXT: retq 509entry: 510 %v = call <8 x i64> @llvm.x86.avx512.gather.dpq.512(<8 x i64> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1) 511 ret <8 x i64> %v 512} 513 514 515declare <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32>, ptr, <8 x i64>, i8, i32) 516 517define <8 x i32> @test_llvm_x86_avx512_gather_qpi_512(ptr %b, <8 x i64> %iv) #1 { 518; CHECK-LABEL: test_llvm_x86_avx512_gather_qpi_512: 519; CHECK: # %bb.0: # %entry 520; CHECK-NEXT: movq %rsp, %rax 521; CHECK-NEXT: movq $-1, %rcx 522; CHECK-NEXT: sarq $63, %rax 523; CHECK-NEXT: kxnorw %k0, %k0, %k1 524; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 525; CHECK-NEXT: orq %rax, %rdi 526; CHECK-NEXT: vpbroadcastq %rax, %zmm2 527; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 528; CHECK-NEXT: vpgatherqd (%rdi,%zmm0), %ymm1 {%k1} 529; CHECK-NEXT: shlq $47, %rax 530; CHECK-NEXT: vmovdqa %ymm1, %ymm0 531; CHECK-NEXT: orq %rax, %rsp 532; CHECK-NEXT: retq 533entry: 534 %v = call <8 x i32> @llvm.x86.avx512.gather.qpi.512(<8 x i32> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1) 535 ret <8 x i32> %v 536} 537 538declare <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64>, ptr, <8 x i64>, i8, i32) 539 540define <8 x i64> @test_llvm_x86_avx512_gather_qpq_512(ptr %b, <8 x i64> %iv) #1 { 541; CHECK-LABEL: test_llvm_x86_avx512_gather_qpq_512: 542; CHECK: # %bb.0: # %entry 543; CHECK-NEXT: movq %rsp, %rax 544; CHECK-NEXT: movq $-1, %rcx 545; CHECK-NEXT: sarq $63, %rax 546; CHECK-NEXT: kxnorw %k0, %k0, %k1 547; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 548; CHECK-NEXT: orq %rax, %rdi 549; CHECK-NEXT: vpbroadcastq %rax, %zmm2 550; CHECK-NEXT: vporq %zmm0, %zmm2, %zmm0 551; CHECK-NEXT: vpgatherqq (%rdi,%zmm0), %zmm1 {%k1} 552; CHECK-NEXT: shlq $47, %rax 553; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 554; CHECK-NEXT: orq %rax, %rsp 555; CHECK-NEXT: retq 556entry: 557 %v = call <8 x i64> @llvm.x86.avx512.gather.qpq.512(<8 x i64> zeroinitializer, ptr %b, <8 x i64> %iv, i8 -1, i32 1) 558 ret <8 x i64> %v 559} 560 561declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32) 562 563define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(ptr %b, <4 x i32> %iv) #2 { 564; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_sf: 565; CHECK: # %bb.0: # %entry 566; CHECK-NEXT: movq %rsp, %rax 567; CHECK-NEXT: movq $-1, %rcx 568; CHECK-NEXT: sarq $63, %rax 569; CHECK-NEXT: kxnorw %k0, %k0, %k1 570; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 571; CHECK-NEXT: orq %rax, %rdi 572; CHECK-NEXT: vpbroadcastq %rax, %xmm2 573; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 574; CHECK-NEXT: vgatherdps (%rdi,%xmm0), %xmm1 {%k1} 575; CHECK-NEXT: shlq $47, %rax 576; CHECK-NEXT: vmovaps %xmm1, %xmm0 577; CHECK-NEXT: orq %rax, %rsp 578; CHECK-NEXT: retq 579entry: 580 %v = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1) 581 ret <4 x float> %v 582} 583 584declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, ptr, <2 x i64>, i8, i32) 585 586define <4 x float> @test_llvm_x86_avx512_gather3div4_sf(ptr %b, <2 x i64> %iv) #2 { 587; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_sf: 588; CHECK: # %bb.0: # %entry 589; CHECK-NEXT: movq %rsp, %rax 590; CHECK-NEXT: movq $-1, %rcx 591; CHECK-NEXT: sarq $63, %rax 592; CHECK-NEXT: kxnorw %k0, %k0, %k1 593; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 594; CHECK-NEXT: orq %rax, %rdi 595; CHECK-NEXT: vpbroadcastq %rax, %xmm2 596; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 597; CHECK-NEXT: vgatherqps (%rdi,%xmm0), %xmm1 {%k1} 598; CHECK-NEXT: shlq $47, %rax 599; CHECK-NEXT: vmovaps %xmm1, %xmm0 600; CHECK-NEXT: orq %rax, %rsp 601; CHECK-NEXT: retq 602entry: 603 %v = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1) 604 ret <4 x float> %v 605} 606 607declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, ptr, <4 x i32>, i8, i32) 608 609define <2 x double> @test_llvm_x86_avx512_gather3siv2_df(ptr %b, <4 x i32> %iv) #2 { 610; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_df: 611; CHECK: # %bb.0: # %entry 612; CHECK-NEXT: movq %rsp, %rax 613; CHECK-NEXT: movq $-1, %rcx 614; CHECK-NEXT: sarq $63, %rax 615; CHECK-NEXT: kxnorw %k0, %k0, %k1 616; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 617; CHECK-NEXT: orq %rax, %rdi 618; CHECK-NEXT: vpbroadcastq %rax, %xmm2 619; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 620; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %xmm1 {%k1} 621; CHECK-NEXT: shlq $47, %rax 622; CHECK-NEXT: vmovapd %xmm1, %xmm0 623; CHECK-NEXT: orq %rax, %rsp 624; CHECK-NEXT: retq 625entry: 626 %v = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1) 627 ret <2 x double> %v 628} 629 630declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32) 631 632define <2 x double> @test_llvm_x86_avx512_gather3div2_df(ptr %b, <2 x i64> %iv) #2 { 633; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_df: 634; CHECK: # %bb.0: # %entry 635; CHECK-NEXT: movq %rsp, %rax 636; CHECK-NEXT: movq $-1, %rcx 637; CHECK-NEXT: sarq $63, %rax 638; CHECK-NEXT: kxnorw %k0, %k0, %k1 639; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 640; CHECK-NEXT: orq %rax, %rdi 641; CHECK-NEXT: vpbroadcastq %rax, %xmm2 642; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 643; CHECK-NEXT: vgatherqpd (%rdi,%xmm0), %xmm1 {%k1} 644; CHECK-NEXT: shlq $47, %rax 645; CHECK-NEXT: vmovapd %xmm1, %xmm0 646; CHECK-NEXT: orq %rax, %rsp 647; CHECK-NEXT: retq 648entry: 649 %v = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1) 650 ret <2 x double> %v 651} 652 653declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, ptr, <8 x i32>, i8, i32) 654 655define <8 x float> @test_llvm_x86_avx512_gather3siv8_sf(ptr %b, <8 x i32> %iv) #2 { 656; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_sf: 657; CHECK: # %bb.0: # %entry 658; CHECK-NEXT: movq %rsp, %rax 659; CHECK-NEXT: movq $-1, %rcx 660; CHECK-NEXT: sarq $63, %rax 661; CHECK-NEXT: kxnorw %k0, %k0, %k1 662; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 663; CHECK-NEXT: orq %rax, %rdi 664; CHECK-NEXT: vpbroadcastq %rax, %ymm2 665; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 666; CHECK-NEXT: vgatherdps (%rdi,%ymm0), %ymm1 {%k1} 667; CHECK-NEXT: shlq $47, %rax 668; CHECK-NEXT: vmovaps %ymm1, %ymm0 669; CHECK-NEXT: orq %rax, %rsp 670; CHECK-NEXT: retq 671entry: 672 %v = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1) 673 ret <8 x float> %v 674} 675 676declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, ptr, <4 x i64>, i8, i32) 677 678define <4 x float> @test_llvm_x86_avx512_gather3div8_sf(ptr %b, <4 x i64> %iv) #2 { 679; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_sf: 680; CHECK: # %bb.0: # %entry 681; CHECK-NEXT: movq %rsp, %rax 682; CHECK-NEXT: movq $-1, %rcx 683; CHECK-NEXT: sarq $63, %rax 684; CHECK-NEXT: kxnorw %k0, %k0, %k1 685; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 686; CHECK-NEXT: orq %rax, %rdi 687; CHECK-NEXT: vpbroadcastq %rax, %ymm2 688; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 689; CHECK-NEXT: vgatherqps (%rdi,%ymm0), %xmm1 {%k1} 690; CHECK-NEXT: shlq $47, %rax 691; CHECK-NEXT: vmovaps %xmm1, %xmm0 692; CHECK-NEXT: orq %rax, %rsp 693; CHECK-NEXT: vzeroupper 694; CHECK-NEXT: retq 695entry: 696 %v = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1) 697 ret <4 x float> %v 698} 699 700declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, ptr, <4 x i32>, i8, i32) 701 702define <4 x double> @test_llvm_x86_avx512_gather3siv4_df(ptr %b, <4 x i32> %iv) #2 { 703; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_df: 704; CHECK: # %bb.0: # %entry 705; CHECK-NEXT: movq %rsp, %rax 706; CHECK-NEXT: movq $-1, %rcx 707; CHECK-NEXT: sarq $63, %rax 708; CHECK-NEXT: kxnorw %k0, %k0, %k1 709; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 710; CHECK-NEXT: orq %rax, %rdi 711; CHECK-NEXT: vpbroadcastq %rax, %xmm2 712; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 713; CHECK-NEXT: vgatherdpd (%rdi,%xmm0), %ymm1 {%k1} 714; CHECK-NEXT: shlq $47, %rax 715; CHECK-NEXT: vmovapd %ymm1, %ymm0 716; CHECK-NEXT: orq %rax, %rsp 717; CHECK-NEXT: retq 718entry: 719 %v = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1) 720 ret <4 x double> %v 721} 722 723declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, ptr, <4 x i64>, i8, i32) 724 725define <4 x double> @test_llvm_x86_avx512_gather3div4_df(ptr %b, <4 x i64> %iv) #2 { 726; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_df: 727; CHECK: # %bb.0: # %entry 728; CHECK-NEXT: movq %rsp, %rax 729; CHECK-NEXT: movq $-1, %rcx 730; CHECK-NEXT: sarq $63, %rax 731; CHECK-NEXT: kxnorw %k0, %k0, %k1 732; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 733; CHECK-NEXT: orq %rax, %rdi 734; CHECK-NEXT: vpbroadcastq %rax, %ymm2 735; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 736; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1} 737; CHECK-NEXT: shlq $47, %rax 738; CHECK-NEXT: vmovapd %ymm1, %ymm0 739; CHECK-NEXT: orq %rax, %rsp 740; CHECK-NEXT: retq 741entry: 742 %v = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1) 743 ret <4 x double> %v 744} 745 746declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, ptr, <4 x i32>, i8, i32) 747 748define <4 x i32> @test_llvm_x86_avx512_gather3siv4_si(ptr %b, <4 x i32> %iv) #2 { 749; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_si: 750; CHECK: # %bb.0: # %entry 751; CHECK-NEXT: movq %rsp, %rax 752; CHECK-NEXT: movq $-1, %rcx 753; CHECK-NEXT: sarq $63, %rax 754; CHECK-NEXT: kxnorw %k0, %k0, %k1 755; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 756; CHECK-NEXT: orq %rax, %rdi 757; CHECK-NEXT: vpbroadcastq %rax, %xmm2 758; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 759; CHECK-NEXT: vpgatherdd (%rdi,%xmm0), %xmm1 {%k1} 760; CHECK-NEXT: shlq $47, %rax 761; CHECK-NEXT: vmovdqa %xmm1, %xmm0 762; CHECK-NEXT: orq %rax, %rsp 763; CHECK-NEXT: retq 764entry: 765 %v = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1) 766 ret <4 x i32> %v 767} 768 769declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, ptr, <2 x i64>, i8, i32) 770 771define <4 x i32> @test_llvm_x86_avx512_gather3div4_si(ptr %b, <2 x i64> %iv) #2 { 772; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_si: 773; CHECK: # %bb.0: # %entry 774; CHECK-NEXT: movq %rsp, %rax 775; CHECK-NEXT: movq $-1, %rcx 776; CHECK-NEXT: sarq $63, %rax 777; CHECK-NEXT: kxnorw %k0, %k0, %k1 778; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 779; CHECK-NEXT: orq %rax, %rdi 780; CHECK-NEXT: vpbroadcastq %rax, %xmm2 781; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 782; CHECK-NEXT: vpgatherqd (%rdi,%xmm0), %xmm1 {%k1} 783; CHECK-NEXT: shlq $47, %rax 784; CHECK-NEXT: vmovdqa %xmm1, %xmm0 785; CHECK-NEXT: orq %rax, %rsp 786; CHECK-NEXT: retq 787entry: 788 %v = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1) 789 ret <4 x i32> %v 790} 791 792declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, ptr, <4 x i32>, i8, i32) 793 794define <2 x i64> @test_llvm_x86_avx512_gather3siv2_di(ptr %b, <4 x i32> %iv) #2 { 795; CHECK-LABEL: test_llvm_x86_avx512_gather3siv2_di: 796; CHECK: # %bb.0: # %entry 797; CHECK-NEXT: movq %rsp, %rax 798; CHECK-NEXT: movq $-1, %rcx 799; CHECK-NEXT: sarq $63, %rax 800; CHECK-NEXT: kxnorw %k0, %k0, %k1 801; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 802; CHECK-NEXT: orq %rax, %rdi 803; CHECK-NEXT: vpbroadcastq %rax, %xmm2 804; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 805; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %xmm1 {%k1} 806; CHECK-NEXT: shlq $47, %rax 807; CHECK-NEXT: vmovdqa %xmm1, %xmm0 808; CHECK-NEXT: orq %rax, %rsp 809; CHECK-NEXT: retq 810entry: 811 %v = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1) 812 ret <2 x i64> %v 813} 814 815declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, ptr, <2 x i64>, i8, i32) 816 817define <2 x i64> @test_llvm_x86_avx512_gather3div2_di(ptr %b, <2 x i64> %iv) #2 { 818; CHECK-LABEL: test_llvm_x86_avx512_gather3div2_di: 819; CHECK: # %bb.0: # %entry 820; CHECK-NEXT: movq %rsp, %rax 821; CHECK-NEXT: movq $-1, %rcx 822; CHECK-NEXT: sarq $63, %rax 823; CHECK-NEXT: kxnorw %k0, %k0, %k1 824; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 825; CHECK-NEXT: orq %rax, %rdi 826; CHECK-NEXT: vpbroadcastq %rax, %xmm2 827; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 828; CHECK-NEXT: vpgatherqq (%rdi,%xmm0), %xmm1 {%k1} 829; CHECK-NEXT: shlq $47, %rax 830; CHECK-NEXT: vmovdqa %xmm1, %xmm0 831; CHECK-NEXT: orq %rax, %rsp 832; CHECK-NEXT: retq 833entry: 834 %v = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> zeroinitializer, ptr %b, <2 x i64> %iv, i8 -1, i32 1) 835 ret <2 x i64> %v 836} 837 838declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, ptr, <8 x i32>, i8, i32) 839 840define <8 x i32> @test_llvm_x86_avx512_gather3siv8_si(ptr %b, <8 x i32> %iv) #2 { 841; CHECK-LABEL: test_llvm_x86_avx512_gather3siv8_si: 842; CHECK: # %bb.0: # %entry 843; CHECK-NEXT: movq %rsp, %rax 844; CHECK-NEXT: movq $-1, %rcx 845; CHECK-NEXT: sarq $63, %rax 846; CHECK-NEXT: kxnorw %k0, %k0, %k1 847; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 848; CHECK-NEXT: orq %rax, %rdi 849; CHECK-NEXT: vpbroadcastq %rax, %ymm2 850; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 851; CHECK-NEXT: vpgatherdd (%rdi,%ymm0), %ymm1 {%k1} 852; CHECK-NEXT: shlq $47, %rax 853; CHECK-NEXT: vmovdqa %ymm1, %ymm0 854; CHECK-NEXT: orq %rax, %rsp 855; CHECK-NEXT: retq 856entry: 857 %v = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> zeroinitializer, ptr %b, <8 x i32> %iv, i8 -1, i32 1) 858 ret <8 x i32> %v 859} 860 861declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, ptr, <4 x i64>, i8, i32) 862 863define <4 x i32> @test_llvm_x86_avx512_gather3div8_si(ptr %b, <4 x i64> %iv) #2 { 864; CHECK-LABEL: test_llvm_x86_avx512_gather3div8_si: 865; CHECK: # %bb.0: # %entry 866; CHECK-NEXT: movq %rsp, %rax 867; CHECK-NEXT: movq $-1, %rcx 868; CHECK-NEXT: sarq $63, %rax 869; CHECK-NEXT: kxnorw %k0, %k0, %k1 870; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 871; CHECK-NEXT: orq %rax, %rdi 872; CHECK-NEXT: vpbroadcastq %rax, %ymm2 873; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 874; CHECK-NEXT: vpgatherqd (%rdi,%ymm0), %xmm1 {%k1} 875; CHECK-NEXT: shlq $47, %rax 876; CHECK-NEXT: vmovdqa %xmm1, %xmm0 877; CHECK-NEXT: orq %rax, %rsp 878; CHECK-NEXT: vzeroupper 879; CHECK-NEXT: retq 880entry: 881 %v = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1) 882 ret <4 x i32> %v 883} 884 885declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, ptr, <4 x i32>, i8, i32) 886 887define <4 x i64> @test_llvm_x86_avx512_gather3siv4_di(ptr %b, <4 x i32> %iv) #2 { 888; CHECK-LABEL: test_llvm_x86_avx512_gather3siv4_di: 889; CHECK: # %bb.0: # %entry 890; CHECK-NEXT: movq %rsp, %rax 891; CHECK-NEXT: movq $-1, %rcx 892; CHECK-NEXT: sarq $63, %rax 893; CHECK-NEXT: kxnorw %k0, %k0, %k1 894; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 895; CHECK-NEXT: orq %rax, %rdi 896; CHECK-NEXT: vpbroadcastq %rax, %xmm2 897; CHECK-NEXT: vpor %xmm0, %xmm2, %xmm0 898; CHECK-NEXT: vpgatherdq (%rdi,%xmm0), %ymm1 {%k1} 899; CHECK-NEXT: shlq $47, %rax 900; CHECK-NEXT: vmovdqa %ymm1, %ymm0 901; CHECK-NEXT: orq %rax, %rsp 902; CHECK-NEXT: retq 903entry: 904 %v = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> zeroinitializer, ptr %b, <4 x i32> %iv, i8 -1, i32 1) 905 ret <4 x i64> %v 906} 907 908declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, ptr, <4 x i64>, i8, i32) 909 910define <4 x i64> @test_llvm_x86_avx512_gather3div4_di(ptr %b, <4 x i64> %iv) #2 { 911; CHECK-LABEL: test_llvm_x86_avx512_gather3div4_di: 912; CHECK: # %bb.0: # %entry 913; CHECK-NEXT: movq %rsp, %rax 914; CHECK-NEXT: movq $-1, %rcx 915; CHECK-NEXT: sarq $63, %rax 916; CHECK-NEXT: kxnorw %k0, %k0, %k1 917; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 918; CHECK-NEXT: orq %rax, %rdi 919; CHECK-NEXT: vpbroadcastq %rax, %ymm2 920; CHECK-NEXT: vpor %ymm0, %ymm2, %ymm0 921; CHECK-NEXT: vpgatherqq (%rdi,%ymm0), %ymm1 {%k1} 922; CHECK-NEXT: shlq $47, %rax 923; CHECK-NEXT: vmovdqa %ymm1, %ymm0 924; CHECK-NEXT: orq %rax, %rsp 925; CHECK-NEXT: retq 926entry: 927 %v = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> zeroinitializer, ptr %b, <4 x i64> %iv, i8 -1, i32 1) 928 ret <4 x i64> %v 929} 930 931attributes #0 = { nounwind speculative_load_hardening "target-features"="+avx2" } 932attributes #1 = { nounwind speculative_load_hardening "target-features"="+avx512f" } 933attributes #2 = { nounwind speculative_load_hardening "target-features"="+avx512vl" } 934