1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s 3 4declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, ptr, <16 x i32>, i16, i32) 5declare void @llvm.x86.avx512.scatter.dps.512 (ptr, i16, <16 x i32>, <16 x float>, i32) 6declare <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double>, ptr, <8 x i32>, i8, i32) 7declare void @llvm.x86.avx512.scatter.dpd.512 (ptr, i8, <8 x i32>, <8 x double>, i32) 8 9declare <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float>, ptr, <8 x i64>, i8, i32) 10declare void @llvm.x86.avx512.scatter.qps.512 (ptr, i8, <8 x i64>, <8 x float>, i32) 11declare <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double>, ptr, <8 x i64>, i8, i32) 12declare void @llvm.x86.avx512.scatter.qpd.512 (ptr, i8, <8 x i64>, <8 x double>, i32) 13 14define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, ptr %base, ptr %stbuf) { 15; CHECK-LABEL: gather_mask_dps: 16; CHECK: ## %bb.0: 17; CHECK-NEXT: kmovd %edi, %k1 18; CHECK-NEXT: kmovq %k1, %k2 19; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2} 20; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 21; CHECK-NEXT: vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1} 22; CHECK-NEXT: vzeroupper 23; CHECK-NEXT: retq 24 %x = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, ptr %base, <16 x i32>%ind, i16 %mask, i32 4) 25 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 26 call void @llvm.x86.avx512.scatter.dps.512 (ptr %stbuf, i16 %mask, <16 x i32>%ind2, <16 x float> %x, i32 4) 27 ret void 28} 29 30define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, ptr %base, ptr %stbuf) { 31; CHECK-LABEL: gather_mask_dpd: 32; CHECK: ## %bb.0: 33; CHECK-NEXT: kmovd %edi, %k1 34; CHECK-NEXT: kmovq %k1, %k2 35; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2} 36; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 37; CHECK-NEXT: vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1} 38; CHECK-NEXT: vzeroupper 39; CHECK-NEXT: retq 40 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, ptr %base, <8 x i32>%ind, i8 %mask, i32 4) 41 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 42 call void @llvm.x86.avx512.scatter.dpd.512 (ptr %stbuf, i8 %mask, <8 x i32>%ind2, <8 x double> %x, i32 4) 43 ret void 44} 45 46define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, ptr %base, ptr %stbuf) { 47; CHECK-LABEL: gather_mask_qps: 48; CHECK: ## %bb.0: 49; CHECK-NEXT: kmovd %edi, %k1 50; CHECK-NEXT: kmovq %k1, %k2 51; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2} 52; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 53; CHECK-NEXT: vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1} 54; CHECK-NEXT: vzeroupper 55; CHECK-NEXT: retq 56 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, ptr %base, <8 x i64>%ind, i8 %mask, i32 4) 57 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 58 call void @llvm.x86.avx512.scatter.qps.512 (ptr %stbuf, i8 %mask, <8 x i64>%ind2, <8 x float> %x, i32 4) 59 ret void 60} 61 62define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, ptr %base, ptr %stbuf) { 63; CHECK-LABEL: gather_mask_qpd: 64; CHECK: ## %bb.0: 65; CHECK-NEXT: kmovd %edi, %k1 66; CHECK-NEXT: kmovq %k1, %k2 67; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2} 68; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 69; CHECK-NEXT: vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1} 70; CHECK-NEXT: vzeroupper 71; CHECK-NEXT: retq 72 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, ptr %base, <8 x i64>%ind, i8 %mask, i32 4) 73 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 74 call void @llvm.x86.avx512.scatter.qpd.512 (ptr %stbuf, i8 %mask, <8 x i64>%ind2, <8 x double> %x, i32 4) 75 ret void 76} 77;; 78;; Integer Gather/Scatter 79;; 80declare <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32>, ptr, <16 x i32>, i16, i32) 81declare void @llvm.x86.avx512.scatter.dpi.512 (ptr, i16, <16 x i32>, <16 x i32>, i32) 82declare <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64>, ptr, <8 x i32>, i8, i32) 83declare void @llvm.x86.avx512.scatter.dpq.512 (ptr, i8, <8 x i32>, <8 x i64>, i32) 84 85declare <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32>, ptr, <8 x i64>, i8, i32) 86declare void @llvm.x86.avx512.scatter.qpi.512 (ptr, i8, <8 x i64>, <8 x i32>, i32) 87declare <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64>, ptr, <8 x i64>, i8, i32) 88declare void @llvm.x86.avx512.scatter.qpq.512 (ptr, i8, <8 x i64>, <8 x i64>, i32) 89 90define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, ptr %base, ptr %stbuf) { 91; CHECK-LABEL: gather_mask_dd: 92; CHECK: ## %bb.0: 93; CHECK-NEXT: kmovd %edi, %k1 94; CHECK-NEXT: kmovq %k1, %k2 95; CHECK-NEXT: vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2} 96; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 97; CHECK-NEXT: vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1} 98; CHECK-NEXT: vzeroupper 99; CHECK-NEXT: retq 100 %x = call <16 x i32> @llvm.x86.avx512.gather.dpi.512 (<16 x i32> %src, ptr %base, <16 x i32>%ind, i16 %mask, i32 4) 101 %ind2 = add <16 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 102 call void @llvm.x86.avx512.scatter.dpi.512 (ptr %stbuf, i16 %mask, <16 x i32>%ind2, <16 x i32> %x, i32 4) 103 ret void 104} 105 106define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, ptr %base, ptr %stbuf) { 107; CHECK-LABEL: gather_mask_qd: 108; CHECK: ## %bb.0: 109; CHECK-NEXT: kmovd %edi, %k1 110; CHECK-NEXT: kmovq %k1, %k2 111; CHECK-NEXT: vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2} 112; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 113; CHECK-NEXT: vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1} 114; CHECK-NEXT: vzeroupper 115; CHECK-NEXT: retq 116 %x = call <8 x i32> @llvm.x86.avx512.gather.qpi.512 (<8 x i32> %src, ptr %base, <8 x i64>%ind, i8 %mask, i32 4) 117 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 118 call void @llvm.x86.avx512.scatter.qpi.512 (ptr %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i32> %x, i32 4) 119 ret void 120} 121 122define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, ptr %base, ptr %stbuf) { 123; CHECK-LABEL: gather_mask_qq: 124; CHECK: ## %bb.0: 125; CHECK-NEXT: kmovd %edi, %k1 126; CHECK-NEXT: kmovq %k1, %k2 127; CHECK-NEXT: vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2} 128; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 129; CHECK-NEXT: vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1} 130; CHECK-NEXT: vzeroupper 131; CHECK-NEXT: retq 132 %x = call <8 x i64> @llvm.x86.avx512.gather.qpq.512 (<8 x i64> %src, ptr %base, <8 x i64>%ind, i8 %mask, i32 4) 133 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 134 call void @llvm.x86.avx512.scatter.qpq.512 (ptr %stbuf, i8 %mask, <8 x i64>%ind2, <8 x i64> %x, i32 4) 135 ret void 136} 137 138define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, ptr %base, ptr %stbuf) { 139; CHECK-LABEL: gather_mask_dq: 140; CHECK: ## %bb.0: 141; CHECK-NEXT: kmovd %edi, %k1 142; CHECK-NEXT: kmovq %k1, %k2 143; CHECK-NEXT: vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2} 144; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 145; CHECK-NEXT: vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1} 146; CHECK-NEXT: vzeroupper 147; CHECK-NEXT: retq 148 %x = call <8 x i64> @llvm.x86.avx512.gather.dpq.512 (<8 x i64> %src, ptr %base, <8 x i32>%ind, i8 %mask, i32 4) 149 %ind2 = add <8 x i32> %ind, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 150 call void @llvm.x86.avx512.scatter.dpq.512 (ptr %stbuf, i8 %mask, <8 x i32>%ind2, <8 x i64> %x, i32 4) 151 ret void 152} 153 154define void @gather_mask_dpd_execdomain(<8 x i32> %ind, <8 x double> %src, i8 %mask, ptr %base, ptr %stbuf) { 155; CHECK-LABEL: gather_mask_dpd_execdomain: 156; CHECK: ## %bb.0: 157; CHECK-NEXT: kmovd %edi, %k1 158; CHECK-NEXT: vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k1} 159; CHECK-NEXT: vmovapd %zmm1, (%rdx) 160; CHECK-NEXT: vzeroupper 161; CHECK-NEXT: retq 162 %x = call <8 x double> @llvm.x86.avx512.gather.dpd.512 (<8 x double> %src, ptr %base, <8 x i32>%ind, i8 %mask, i32 4) 163 store <8 x double> %x, ptr %stbuf 164 ret void 165} 166 167define void @gather_mask_qpd_execdomain(<8 x i64> %ind, <8 x double> %src, i8 %mask, ptr %base, ptr %stbuf) { 168; CHECK-LABEL: gather_mask_qpd_execdomain: 169; CHECK: ## %bb.0: 170; CHECK-NEXT: kmovd %edi, %k1 171; CHECK-NEXT: vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k1} 172; CHECK-NEXT: vmovapd %zmm1, (%rdx) 173; CHECK-NEXT: vzeroupper 174; CHECK-NEXT: retq 175 %x = call <8 x double> @llvm.x86.avx512.gather.qpd.512 (<8 x double> %src, ptr %base, <8 x i64>%ind, i8 %mask, i32 4) 176 store <8 x double> %x, ptr %stbuf 177 ret void 178} 179 180define <16 x float> @gather_mask_dps_execdomain(<16 x i32> %ind, <16 x float> %src, i16 %mask, ptr %base) { 181; CHECK-LABEL: gather_mask_dps_execdomain: 182; CHECK: ## %bb.0: 183; CHECK-NEXT: kmovd %edi, %k1 184; CHECK-NEXT: vgatherdps (%rsi,%zmm0,4), %zmm1 {%k1} 185; CHECK-NEXT: vmovaps %zmm1, %zmm0 186; CHECK-NEXT: retq 187 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, ptr %base, <16 x i32>%ind, i16 %mask, i32 4) 188 ret <16 x float> %res; 189} 190 191define <8 x float> @gather_mask_qps_execdomain(<8 x i64> %ind, <8 x float> %src, i8 %mask, ptr %base) { 192; CHECK-LABEL: gather_mask_qps_execdomain: 193; CHECK: ## %bb.0: 194; CHECK-NEXT: kmovd %edi, %k1 195; CHECK-NEXT: vgatherqps (%rsi,%zmm0,4), %ymm1 {%k1} 196; CHECK-NEXT: vmovaps %ymm1, %ymm0 197; CHECK-NEXT: retq 198 %res = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, ptr %base, <8 x i64>%ind, i8 %mask, i32 4) 199 ret <8 x float> %res; 200} 201 202define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, ptr %src, i8 %mask, ptr %base, ptr %stbuf) { 203; CHECK-LABEL: scatter_mask_dpd_execdomain: 204; CHECK: ## %bb.0: 205; CHECK-NEXT: kmovd %esi, %k1 206; CHECK-NEXT: vmovapd (%rdi), %zmm1 207; CHECK-NEXT: vscatterdpd %zmm1, (%rcx,%ymm0,4) {%k1} 208; CHECK-NEXT: vzeroupper 209; CHECK-NEXT: retq 210 %x = load <8 x double>, ptr %src, align 64 211 call void @llvm.x86.avx512.scatter.dpd.512 (ptr %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4) 212 ret void 213} 214 215define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, ptr %src, i8 %mask, ptr %base, ptr %stbuf) { 216; CHECK-LABEL: scatter_mask_qpd_execdomain: 217; CHECK: ## %bb.0: 218; CHECK-NEXT: kmovd %esi, %k1 219; CHECK-NEXT: vmovapd (%rdi), %zmm1 220; CHECK-NEXT: vscatterqpd %zmm1, (%rcx,%zmm0,4) {%k1} 221; CHECK-NEXT: vzeroupper 222; CHECK-NEXT: retq 223 %x = load <8 x double>, ptr %src, align 64 224 call void @llvm.x86.avx512.scatter.qpd.512 (ptr %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4) 225 ret void 226} 227 228define void @scatter_mask_dps_execdomain(<16 x i32> %ind, ptr %src, i16 %mask, ptr %base, ptr %stbuf) { 229; CHECK-LABEL: scatter_mask_dps_execdomain: 230; CHECK: ## %bb.0: 231; CHECK-NEXT: kmovd %esi, %k1 232; CHECK-NEXT: vmovaps (%rdi), %zmm1 233; CHECK-NEXT: vscatterdps %zmm1, (%rcx,%zmm0,4) {%k1} 234; CHECK-NEXT: vzeroupper 235; CHECK-NEXT: retq 236 %x = load <16 x float>, ptr %src, align 64 237 call void @llvm.x86.avx512.scatter.dps.512 (ptr %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4) 238 ret void 239} 240 241define void @scatter_mask_qps_execdomain(<8 x i64> %ind, ptr %src, i8 %mask, ptr %base, ptr %stbuf) { 242; CHECK-LABEL: scatter_mask_qps_execdomain: 243; CHECK: ## %bb.0: 244; CHECK-NEXT: kmovd %esi, %k1 245; CHECK-NEXT: vmovaps (%rdi), %ymm1 246; CHECK-NEXT: vscatterqps %ymm1, (%rcx,%zmm0,4) {%k1} 247; CHECK-NEXT: vzeroupper 248; CHECK-NEXT: retq 249 %x = load <8 x float>, ptr %src, align 32 250 call void @llvm.x86.avx512.scatter.qps.512 (ptr %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4) 251 ret void 252} 253 254define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf) { 255; CHECK-LABEL: gather_qps: 256; CHECK: ## %bb.0: 257; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 258; CHECK-NEXT: kxnorw %k0, %k0, %k1 259; CHECK-NEXT: kxnorw %k0, %k0, %k2 260; CHECK-NEXT: vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2} 261; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 262; CHECK-NEXT: vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1} 263; CHECK-NEXT: vzeroupper 264; CHECK-NEXT: retq 265 %x = call <8 x float> @llvm.x86.avx512.gather.qps.512 (<8 x float> %src, ptr %base, <8 x i64>%ind, i8 -1, i32 4) 266 %ind2 = add <8 x i64> %ind, <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3> 267 call void @llvm.x86.avx512.scatter.qps.512 (ptr %stbuf, i8 -1, <8 x i64>%ind2, <8 x float> %x, i32 4) 268 ret void 269} 270 271declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32) 272 273define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) { 274; CHECK-LABEL: test_int_x86_avx512_gather3div2_df: 275; CHECK: ## %bb.0: 276; CHECK-NEXT: kmovd %esi, %k1 277; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,4), %xmm0 {%k1} 278; CHECK-NEXT: kxnorw %k0, %k0, %k1 279; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 280; CHECK-NEXT: vgatherqpd (%rdi,%xmm1,2), %xmm2 {%k1} 281; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 282; CHECK-NEXT: retq 283 %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3, i32 4) 284 %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 -1, i32 2) 285 %res2 = fadd <2 x double> %res, %res1 286 ret <2 x double> %res2 287} 288 289declare <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64>, ptr, <2 x i64>, i8, i32) 290 291define <2 x i64>@test_int_x86_avx512_gather3div2_di(<2 x i64> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) { 292; CHECK-LABEL: test_int_x86_avx512_gather3div2_di: 293; CHECK: ## %bb.0: 294; CHECK-NEXT: kmovd %esi, %k1 295; CHECK-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1} 296; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0 297; CHECK-NEXT: retq 298 %res = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, ptr %x1, <2 x i64> %x2, i8 %x3, i32 8) 299 %res1 = call <2 x i64> @llvm.x86.avx512.gather3div2.di(<2 x i64> %x0, ptr %x1, <2 x i64> %x2, i8 %x3, i32 8) 300 %res2 = add <2 x i64> %res, %res1 301 ret <2 x i64> %res2 302} 303 304declare <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double>, ptr, <4 x i64>, i8, i32) 305 306define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, ptr %x1, <4 x i64> %x2, i8 %x3) { 307; CHECK-LABEL: test_int_x86_avx512_gather3div4_df: 308; CHECK: ## %bb.0: 309; CHECK-NEXT: kmovd %esi, %k1 310; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,4), %ymm0 {%k1} 311; CHECK-NEXT: kxnorw %k0, %k0, %k1 312; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 313; CHECK-NEXT: vgatherqpd (%rdi,%ymm1,2), %ymm2 {%k1} 314; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 315; CHECK-NEXT: retq 316 %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, ptr %x1, <4 x i64> %x2, i8 %x3, i32 4) 317 %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, ptr %x1, <4 x i64> %x2, i8 -1, i32 2) 318 %res2 = fadd <4 x double> %res, %res1 319 ret <4 x double> %res2 320} 321 322declare <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64>, ptr, <4 x i64>, i8, i32) 323 324define <4 x i64>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, ptr %x1, <4 x i64> %x2, i8 %x3) { 325; CHECK-LABEL: test_int_x86_avx512_gather3div4_di: 326; CHECK: ## %bb.0: 327; CHECK-NEXT: kmovd %esi, %k1 328; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1} 329; CHECK-NEXT: kxnorw %k0, %k0, %k1 330; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 331; CHECK-NEXT: vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1} 332; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 333; CHECK-NEXT: retq 334 %res = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, ptr %x1, <4 x i64> %x2, i8 %x3, i32 8) 335 %res1 = call <4 x i64> @llvm.x86.avx512.gather3div4.di(<4 x i64> %x0, ptr %x1, <4 x i64> %x2, i8 -1, i32 8) 336 %res2 = add <4 x i64> %res, %res1 337 ret <4 x i64> %res2 338} 339 340declare <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float>, ptr, <2 x i64>, i8, i32) 341 342define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) { 343; CHECK-LABEL: test_int_x86_avx512_gather3div4_sf: 344; CHECK: ## %bb.0: 345; CHECK-NEXT: kmovd %esi, %k1 346; CHECK-NEXT: vgatherqps (%rdi,%xmm1,4), %xmm0 {%k1} 347; CHECK-NEXT: kxnorw %k0, %k0, %k1 348; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 349; CHECK-NEXT: vgatherqps (%rdi,%xmm1,2), %xmm2 {%k1} 350; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0 351; CHECK-NEXT: retq 352 %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, ptr %x1, <2 x i64> %x2, i8 %x3, i32 4) 353 %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, ptr %x1, <2 x i64> %x2, i8 -1, i32 2) 354 %res2 = fadd <4 x float> %res, %res1 355 ret <4 x float> %res2 356} 357 358declare <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32>, ptr, <2 x i64>, i8, i32) 359 360define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) { 361; CHECK-LABEL: test_int_x86_avx512_gather3div4_si: 362; CHECK: ## %bb.0: 363; CHECK-NEXT: kmovd %esi, %k1 364; CHECK-NEXT: kxnorw %k0, %k0, %k2 365; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 366; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2} 367; CHECK-NEXT: vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1} 368; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 369; CHECK-NEXT: retq 370 %res = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, ptr %x1, <2 x i64> %x2, i8 -1, i32 4) 371 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div4.si(<4 x i32> %x0, ptr %x1, <2 x i64> %x2, i8 %x3, i32 4) 372 %res2 = add <4 x i32> %res, %res1 373 ret <4 x i32> %res2 374} 375 376declare <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float>, ptr, <4 x i64>, i8, i32) 377 378define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, ptr %x1, <4 x i64> %x2, i8 %x3) { 379; CHECK-LABEL: test_int_x86_avx512_gather3div8_sf: 380; CHECK: ## %bb.0: 381; CHECK-NEXT: kmovd %esi, %k1 382; CHECK-NEXT: vgatherqps (%rdi,%ymm1,4), %xmm0 {%k1} 383; CHECK-NEXT: kxnorw %k0, %k0, %k1 384; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 385; CHECK-NEXT: vgatherqps (%rdi,%ymm1,2), %xmm2 {%k1} 386; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0 387; CHECK-NEXT: vzeroupper 388; CHECK-NEXT: retq 389 %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, ptr %x1, <4 x i64> %x2, i8 %x3, i32 4) 390 %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, ptr %x1, <4 x i64> %x2, i8 -1, i32 2) 391 %res2 = fadd <4 x float> %res, %res1 392 ret <4 x float> %res2 393} 394 395declare <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32>, ptr, <4 x i64>, i8, i32) 396 397define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, ptr %x1, <4 x i64> %x2, i8 %x3) { 398; CHECK-LABEL: test_int_x86_avx512_gather3div8_si: 399; CHECK: ## %bb.0: 400; CHECK-NEXT: kmovd %esi, %k1 401; CHECK-NEXT: vmovdqa %xmm0, %xmm2 402; CHECK-NEXT: kmovq %k1, %k2 403; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2} 404; CHECK-NEXT: vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1} 405; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 406; CHECK-NEXT: vzeroupper 407; CHECK-NEXT: retq 408 %res = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, ptr %x1, <4 x i64> %x2, i8 %x3, i32 4) 409 %res1 = call <4 x i32> @llvm.x86.avx512.gather3div8.si(<4 x i32> %x0, ptr %x1, <4 x i64> %x2, i8 %x3, i32 2) 410 %res2 = add <4 x i32> %res, %res1 411 ret <4 x i32> %res2 412} 413 414declare <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double>, ptr, <4 x i32>, i8, i32) 415 416define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, ptr %x1, <4 x i32> %x2, i8 %x3) { 417; CHECK-LABEL: test_int_x86_avx512_gather3siv2_df: 418; CHECK: ## %bb.0: 419; CHECK-NEXT: kmovd %esi, %k1 420; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %xmm0 {%k1} 421; CHECK-NEXT: kxnorw %k0, %k0, %k1 422; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 423; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %xmm2 {%k1} 424; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0 425; CHECK-NEXT: retq 426 %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 4) 427 %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, ptr %x1, <4 x i32> %x2, i8 -1, i32 2) 428 %res2 = fadd <2 x double> %res, %res1 429 ret <2 x double> %res2 430} 431 432declare <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64>, ptr, <4 x i32>, i8, i32) 433 434define <2 x i64>@test_int_x86_avx512_gather3siv2_di(<2 x i64> %x0, ptr %x1, <4 x i32> %x2, i8 %x3) { 435; CHECK-LABEL: test_int_x86_avx512_gather3siv2_di: 436; CHECK: ## %bb.0: 437; CHECK-NEXT: kmovd %esi, %k1 438; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} 439; CHECK-NEXT: vpaddq %xmm0, %xmm0, %xmm0 440; CHECK-NEXT: retq 441 %res = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 8) 442 %res1 = call <2 x i64> @llvm.x86.avx512.gather3siv2.di(<2 x i64> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 8) 443 %res2 = add <2 x i64> %res, %res1 444 ret <2 x i64> %res2 445} 446 447declare <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double>, ptr, <4 x i32>, i8, i32) 448 449define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, ptr %x1, <4 x i32> %x2, i8 %x3) { 450; CHECK-LABEL: test_int_x86_avx512_gather3siv4_df: 451; CHECK: ## %bb.0: 452; CHECK-NEXT: kmovd %esi, %k1 453; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,4), %ymm0 {%k1} 454; CHECK-NEXT: kxnorw %k0, %k0, %k1 455; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 456; CHECK-NEXT: vgatherdpd (%rdi,%xmm1,2), %ymm2 {%k1} 457; CHECK-NEXT: vaddpd %ymm2, %ymm0, %ymm0 458; CHECK-NEXT: retq 459 %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 4) 460 %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, ptr %x1, <4 x i32> %x2, i8 -1, i32 2) 461 %res2 = fadd <4 x double> %res, %res1 462 ret <4 x double> %res2 463} 464 465declare <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64>, ptr, <4 x i32>, i8, i32) 466 467define <4 x i64>@test_int_x86_avx512_gather3siv4_di(<4 x i64> %x0, ptr %x1, <4 x i32> %x2, i8 %x3) { 468; CHECK-LABEL: test_int_x86_avx512_gather3siv4_di: 469; CHECK: ## %bb.0: 470; CHECK-NEXT: kmovd %esi, %k1 471; CHECK-NEXT: vpgatherdq (%rdi,%xmm1,8), %ymm0 {%k1} 472; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0 473; CHECK-NEXT: retq 474 %res = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 8) 475 %res1 = call <4 x i64> @llvm.x86.avx512.gather3siv4.di(<4 x i64> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 8) 476 %res2 = add <4 x i64> %res, %res1 477 ret <4 x i64> %res2 478} 479 480declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32) 481 482define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, ptr %x1, <4 x i32> %x2, i8 %x3) { 483; CHECK-LABEL: test_int_x86_avx512_gather3siv4_sf: 484; CHECK: ## %bb.0: 485; CHECK-NEXT: kmovd %esi, %k1 486; CHECK-NEXT: vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1} 487; CHECK-NEXT: kxnorw %k0, %k0, %k1 488; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 489; CHECK-NEXT: vgatherdps (%rdi,%xmm1,2), %xmm2 {%k1} 490; CHECK-NEXT: vaddps %xmm2, %xmm0, %xmm0 491; CHECK-NEXT: retq 492 %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 4) 493 %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, ptr %x1, <4 x i32> %x2, i8 -1, i32 2) 494 %res2 = fadd <4 x float> %res, %res1 495 ret <4 x float> %res2 496} 497 498declare <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32>, ptr, <4 x i32>, i8, i32) 499 500define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, ptr %x1, <4 x i32> %x2, i8 %x3) { 501; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si: 502; CHECK: ## %bb.0: 503; CHECK-NEXT: kmovd %esi, %k1 504; CHECK-NEXT: kxnorw %k0, %k0, %k2 505; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2 506; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2} 507; CHECK-NEXT: vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1} 508; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 509; CHECK-NEXT: retq 510 %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, ptr %x1, <4 x i32> %x2, i8 -1, i32 4) 511 %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, ptr %x1, <4 x i32> %x2, i8 %x3, i32 2) 512 %res2 = add <4 x i32> %res, %res1 513 ret <4 x i32> %res2 514} 515 516declare <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float>, ptr, <8 x i32>, i8, i32) 517 518define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, ptr %x1, <8 x i32> %x2, i8 %x3) { 519; CHECK-LABEL: test_int_x86_avx512_gather3siv8_sf: 520; CHECK: ## %bb.0: 521; CHECK-NEXT: kmovd %esi, %k1 522; CHECK-NEXT: vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1} 523; CHECK-NEXT: kxnorw %k0, %k0, %k1 524; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 525; CHECK-NEXT: vgatherdps (%rdi,%ymm1,2), %ymm2 {%k1} 526; CHECK-NEXT: vaddps %ymm2, %ymm0, %ymm0 527; CHECK-NEXT: retq 528 %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, ptr %x1, <8 x i32> %x2, i8 %x3, i32 4) 529 %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, ptr %x1, <8 x i32> %x2, i8 -1, i32 2) 530 %res2 = fadd <8 x float> %res, %res1 531 ret <8 x float> %res2 532} 533 534declare <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32>, ptr, <8 x i32>, i8, i32) 535 536define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, ptr %x1, <8 x i32> %x2, i8 %x3) { 537; CHECK-LABEL: test_int_x86_avx512_gather3siv8_si: 538; CHECK: ## %bb.0: 539; CHECK-NEXT: kmovd %esi, %k1 540; CHECK-NEXT: vmovdqa %ymm0, %ymm2 541; CHECK-NEXT: kmovq %k1, %k2 542; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2} 543; CHECK-NEXT: vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1} 544; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 545; CHECK-NEXT: retq 546 %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, ptr %x1, <8 x i32> %x2, i8 %x3, i32 4) 547 %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, ptr %x1, <8 x i32> %x2, i8 %x3, i32 2) 548 %res2 = add <8 x i32> %res, %res1 549 ret <8 x i32> %res2 550} 551 552declare void @llvm.x86.avx512.scatterdiv2.df(ptr, i8, <2 x i64>, <2 x double>, i32) 553 554define void@test_int_x86_avx512_scatterdiv2_df(ptr %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3) { 555; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df: 556; CHECK: ## %bb.0: 557; CHECK-NEXT: kmovd %esi, %k1 558; CHECK-NEXT: kxnorw %k0, %k0, %k2 559; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2} 560; CHECK-NEXT: vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1} 561; CHECK-NEXT: retq 562 call void @llvm.x86.avx512.scatterdiv2.df(ptr %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2) 563 call void @llvm.x86.avx512.scatterdiv2.df(ptr %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4) 564 ret void 565} 566 567declare void @llvm.x86.avx512.scatterdiv2.di(ptr, i8, <2 x i64>, <2 x i64>, i32) 568 569define void@test_int_x86_avx512_scatterdiv2_di(ptr %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3) { 570; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di: 571; CHECK: ## %bb.0: 572; CHECK-NEXT: kmovd %esi, %k1 573; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1} 574; CHECK-NEXT: kxnorw %k0, %k0, %k1 575; CHECK-NEXT: vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1} 576; CHECK-NEXT: retq 577 call void @llvm.x86.avx512.scatterdiv2.di(ptr %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2) 578 call void @llvm.x86.avx512.scatterdiv2.di(ptr %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4) 579 ret void 580} 581 582declare void @llvm.x86.avx512.scatterdiv4.df(ptr, i8, <4 x i64>, <4 x double>, i32) 583 584define void@test_int_x86_avx512_scatterdiv4_df(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3) { 585; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df: 586; CHECK: ## %bb.0: 587; CHECK-NEXT: kmovd %esi, %k1 588; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1} 589; CHECK-NEXT: kxnorw %k0, %k0, %k1 590; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} 591; CHECK-NEXT: vzeroupper 592; CHECK-NEXT: retq 593 call void @llvm.x86.avx512.scatterdiv4.df(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2) 594 call void @llvm.x86.avx512.scatterdiv4.df(ptr %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4) 595 ret void 596} 597 598declare void @llvm.x86.avx512.scatterdiv4.di(ptr, i8, <4 x i64>, <4 x i64>, i32) 599 600define void@test_int_x86_avx512_scatterdiv4_di(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3) { 601; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di: 602; CHECK: ## %bb.0: 603; CHECK-NEXT: kmovd %esi, %k1 604; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1} 605; CHECK-NEXT: kxnorw %k0, %k0, %k1 606; CHECK-NEXT: vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1} 607; CHECK-NEXT: vzeroupper 608; CHECK-NEXT: retq 609 call void @llvm.x86.avx512.scatterdiv4.di(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2) 610 call void @llvm.x86.avx512.scatterdiv4.di(ptr %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4) 611 ret void 612} 613 614declare void @llvm.x86.avx512.scatterdiv4.sf(ptr, i8, <2 x i64>, <4 x float>, i32) 615 616define void@test_int_x86_avx512_scatterdiv4_sf(ptr %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3) { 617; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf: 618; CHECK: ## %bb.0: 619; CHECK-NEXT: kmovd %esi, %k1 620; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1} 621; CHECK-NEXT: kxnorw %k0, %k0, %k1 622; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1} 623; CHECK-NEXT: retq 624 call void @llvm.x86.avx512.scatterdiv4.sf(ptr %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2) 625 call void @llvm.x86.avx512.scatterdiv4.sf(ptr %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4) 626 ret void 627} 628 629declare void @llvm.x86.avx512.scatterdiv4.si(ptr, i8, <2 x i64>, <4 x i32>, i32) 630 631define void@test_int_x86_avx512_scatterdiv4_si(ptr %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3) { 632; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si: 633; CHECK: ## %bb.0: 634; CHECK-NEXT: kmovd %esi, %k1 635; CHECK-NEXT: kxnorw %k0, %k0, %k2 636; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2} 637; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} 638; CHECK-NEXT: retq 639 call void @llvm.x86.avx512.scatterdiv4.si(ptr %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2) 640 call void @llvm.x86.avx512.scatterdiv4.si(ptr %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4) 641 ret void 642} 643 644declare void @llvm.x86.avx512.scatterdiv8.sf(ptr, i8, <4 x i64>, <4 x float>, i32) 645 646define void@test_int_x86_avx512_scatterdiv8_sf(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3) { 647; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf: 648; CHECK: ## %bb.0: 649; CHECK-NEXT: kmovd %esi, %k1 650; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1} 651; CHECK-NEXT: kxnorw %k0, %k0, %k1 652; CHECK-NEXT: vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1} 653; CHECK-NEXT: vzeroupper 654; CHECK-NEXT: retq 655 call void @llvm.x86.avx512.scatterdiv8.sf(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2) 656 call void @llvm.x86.avx512.scatterdiv8.sf(ptr %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4) 657 ret void 658} 659 660declare void @llvm.x86.avx512.scatterdiv8.si(ptr, i8, <4 x i64>, <4 x i32>, i32) 661 662define void@test_int_x86_avx512_scatterdiv8_si(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3) { 663; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si: 664; CHECK: ## %bb.0: 665; CHECK-NEXT: kmovd %esi, %k1 666; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1} 667; CHECK-NEXT: kxnorw %k0, %k0, %k1 668; CHECK-NEXT: vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1} 669; CHECK-NEXT: vzeroupper 670; CHECK-NEXT: retq 671 call void @llvm.x86.avx512.scatterdiv8.si(ptr %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2) 672 call void @llvm.x86.avx512.scatterdiv8.si(ptr %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4) 673 ret void 674} 675 676declare void @llvm.x86.avx512.scattersiv2.df(ptr, i8, <4 x i32>, <2 x double>, i32) 677 678define void@test_int_x86_avx512_scattersiv2_df(ptr %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3) { 679; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df: 680; CHECK: ## %bb.0: 681; CHECK-NEXT: kmovd %esi, %k1 682; CHECK-NEXT: kxnorw %k0, %k0, %k2 683; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2} 684; CHECK-NEXT: vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1} 685; CHECK-NEXT: retq 686 call void @llvm.x86.avx512.scattersiv2.df(ptr %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2) 687 call void @llvm.x86.avx512.scattersiv2.df(ptr %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4) 688 ret void 689} 690 691declare void @llvm.x86.avx512.scattersiv2.di(ptr, i8, <4 x i32>, <2 x i64>, i32) 692 693define void@test_int_x86_avx512_scattersiv2_di(ptr %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3) { 694; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di: 695; CHECK: ## %bb.0: 696; CHECK-NEXT: kmovd %esi, %k1 697; CHECK-NEXT: kxnorw %k0, %k0, %k2 698; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2} 699; CHECK-NEXT: vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1} 700; CHECK-NEXT: retq 701 call void @llvm.x86.avx512.scattersiv2.di(ptr %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2) 702 call void @llvm.x86.avx512.scattersiv2.di(ptr %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4) 703 ret void 704} 705 706declare void @llvm.x86.avx512.scattersiv4.df(ptr, i8, <4 x i32>, <4 x double>, i32) 707 708define void@test_int_x86_avx512_scattersiv4_df(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3) { 709; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df: 710; CHECK: ## %bb.0: 711; CHECK-NEXT: kmovd %esi, %k1 712; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1} 713; CHECK-NEXT: kxnorw %k0, %k0, %k1 714; CHECK-NEXT: vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1} 715; CHECK-NEXT: vzeroupper 716; CHECK-NEXT: retq 717 call void @llvm.x86.avx512.scattersiv4.df(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2) 718 call void @llvm.x86.avx512.scattersiv4.df(ptr %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4) 719 ret void 720} 721 722declare void @llvm.x86.avx512.scattersiv4.di(ptr, i8, <4 x i32>, <4 x i64>, i32) 723 724define void@test_int_x86_avx512_scattersiv4_di(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3) { 725; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di: 726; CHECK: ## %bb.0: 727; CHECK-NEXT: kmovd %esi, %k1 728; CHECK-NEXT: kxnorw %k0, %k0, %k2 729; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2} 730; CHECK-NEXT: vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1} 731; CHECK-NEXT: vzeroupper 732; CHECK-NEXT: retq 733 call void @llvm.x86.avx512.scattersiv4.di(ptr %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2) 734 call void @llvm.x86.avx512.scattersiv4.di(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4) 735 ret void 736} 737 738declare void @llvm.x86.avx512.scattersiv4.sf(ptr, i8, <4 x i32>, <4 x float>, i32) 739 740define void@test_int_x86_avx512_scattersiv4_sf(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3) { 741; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf: 742; CHECK: ## %bb.0: 743; CHECK-NEXT: kmovd %esi, %k1 744; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1} 745; CHECK-NEXT: kxnorw %k0, %k0, %k1 746; CHECK-NEXT: vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1} 747; CHECK-NEXT: retq 748 call void @llvm.x86.avx512.scattersiv4.sf(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2) 749 call void @llvm.x86.avx512.scattersiv4.sf(ptr %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4) 750 ret void 751} 752 753declare void @llvm.x86.avx512.scattersiv4.si(ptr, i8, <4 x i32>, <4 x i32>, i32) 754 755define void@test_int_x86_avx512_scattersiv4_si(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3) { 756; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si: 757; CHECK: ## %bb.0: 758; CHECK-NEXT: kmovd %esi, %k1 759; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1} 760; CHECK-NEXT: kxnorw %k0, %k0, %k1 761; CHECK-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} 762; CHECK-NEXT: retq 763 call void @llvm.x86.avx512.scattersiv4.si(ptr %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2) 764 call void @llvm.x86.avx512.scattersiv4.si(ptr %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4) 765 ret void 766} 767 768declare void @llvm.x86.avx512.scattersiv8.sf(ptr, i8, <8 x i32>, <8 x float>, i32) 769 770define void@test_int_x86_avx512_scattersiv8_sf(ptr %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3) { 771; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf: 772; CHECK: ## %bb.0: 773; CHECK-NEXT: kmovd %esi, %k1 774; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1} 775; CHECK-NEXT: kxnorw %k0, %k0, %k1 776; CHECK-NEXT: vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1} 777; CHECK-NEXT: vzeroupper 778; CHECK-NEXT: retq 779 call void @llvm.x86.avx512.scattersiv8.sf(ptr %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2) 780 call void @llvm.x86.avx512.scattersiv8.sf(ptr %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4) 781 ret void 782} 783 784declare void @llvm.x86.avx512.scattersiv8.si(ptr, i8, <8 x i32>, <8 x i32>, i32) 785 786define void@test_int_x86_avx512_scattersiv8_si(ptr %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3) { 787; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si: 788; CHECK: ## %bb.0: 789; CHECK-NEXT: kmovd %esi, %k1 790; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} 791; CHECK-NEXT: kxnorw %k0, %k0, %k1 792; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} 793; CHECK-NEXT: vzeroupper 794; CHECK-NEXT: retq 795 call void @llvm.x86.avx512.scattersiv8.si(ptr %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2) 796 call void @llvm.x86.avx512.scattersiv8.si(ptr %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4) 797 ret void 798} 799 800define void @scatter_mask_test(ptr %x0, <8 x i32> %x2, <8 x i32> %x3) { 801; CHECK-LABEL: scatter_mask_test: 802; CHECK: ## %bb.0: 803; CHECK-NEXT: kxnorw %k0, %k0, %k1 804; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} 805; CHECK-NEXT: kxorw %k0, %k0, %k1 806; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} 807; CHECK-NEXT: movb $1, %al 808; CHECK-NEXT: kmovd %eax, %k1 809; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1} 810; CHECK-NEXT: movb $96, %al 811; CHECK-NEXT: kmovd %eax, %k1 812; CHECK-NEXT: vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1} 813; CHECK-NEXT: vzeroupper 814; CHECK-NEXT: retq 815 call void @llvm.x86.avx512.scattersiv8.si(ptr %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 2) 816 call void @llvm.x86.avx512.scattersiv8.si(ptr %x0, i8 0, <8 x i32> %x2, <8 x i32> %x3, i32 4) 817 call void @llvm.x86.avx512.scattersiv8.si(ptr %x0, i8 1, <8 x i32> %x2, <8 x i32> %x3, i32 2) 818 call void @llvm.x86.avx512.scattersiv8.si(ptr %x0, i8 96, <8 x i32> %x2, <8 x i32> %x3, i32 4) 819 ret void 820} 821 822define <16 x float> @gather_mask_test(<16 x i32> %ind, <16 x float> %src, ptr %base) { 823; CHECK-LABEL: gather_mask_test: 824; CHECK: ## %bb.0: 825; CHECK-NEXT: kxnorw %k0, %k0, %k1 826; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 827; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1} 828; CHECK-NEXT: kxorw %k0, %k0, %k1 829; CHECK-NEXT: vmovaps %zmm1, %zmm3 830; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1} 831; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm2 832; CHECK-NEXT: movw $1, %ax 833; CHECK-NEXT: kmovd %eax, %k1 834; CHECK-NEXT: vmovaps %zmm1, %zmm3 835; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm3 {%k1} 836; CHECK-NEXT: movw $220, %ax 837; CHECK-NEXT: kmovd %eax, %k1 838; CHECK-NEXT: vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1} 839; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0 840; CHECK-NEXT: vaddps %zmm2, %zmm0, %zmm0 841; CHECK-NEXT: retq 842 %res = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, ptr %base, <16 x i32>%ind, i16 -1, i32 4) 843 %res1 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, ptr %base, <16 x i32>%ind, i16 0, i32 4) 844 %res2 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, ptr %base, <16 x i32>%ind, i16 1, i32 4) 845 %res3 = call <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float> %src, ptr %base, <16 x i32>%ind, i16 220, i32 4) 846 847 %res4 = fadd <16 x float> %res, %res1 848 %res5 = fadd <16 x float> %res3, %res2 849 %res6 = fadd <16 x float> %res5, %res4 850 ret <16 x float> %res6 851} 852