xref: /llvm-project/llvm/test/CodeGen/X86/gather-scatter-opaque-ptr.ll (revision 027c728f29889ea6502030ec3623774d830c2ac3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-linux -mcpu=skylake-avx512 < %s | FileCheck %s
3
4define void @scatter_scale_512(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
5; CHECK-LABEL: scatter_scale_512:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
8; CHECK-NEXT:    vpmovd2m %xmm1, %k1
9; CHECK-NEXT:    vpsllq $9, %ymm0, %ymm0
10; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
11; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
12; CHECK-NEXT:    vzeroupper
13; CHECK-NEXT:    retq
14  %gep = getelementptr inbounds [512 x i8], ptr %result, <4 x i64> %idx
15  call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
16  ret void
17}
18
19define void @scatter_scale_16(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
20; CHECK-LABEL: scatter_scale_16:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
23; CHECK-NEXT:    vpmovd2m %xmm1, %k1
24; CHECK-NEXT:    vpsllq $4, %ymm0, %ymm0
25; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
26; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
27; CHECK-NEXT:    vzeroupper
28; CHECK-NEXT:    retq
29  %gep = getelementptr inbounds [16 x i8], ptr %result, <4 x i64> %idx
30  call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
31  ret void
32}
33
34define void @scatter_scale_8(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
35; CHECK-LABEL: scatter_scale_8:
36; CHECK:       # %bb.0:
37; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
38; CHECK-NEXT:    vpmovd2m %xmm1, %k1
39; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
40; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,8) {%k1}
41; CHECK-NEXT:    vzeroupper
42; CHECK-NEXT:    retq
43  %gep = getelementptr inbounds [8 x i8], ptr %result, <4 x i64> %idx
44  call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
45  ret void
46}
47
48define void @scatter_scale_4(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
49; CHECK-LABEL: scatter_scale_4:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
52; CHECK-NEXT:    vpmovd2m %xmm1, %k1
53; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
54; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
55; CHECK-NEXT:    vzeroupper
56; CHECK-NEXT:    retq
57  %gep = getelementptr inbounds [4 x i8], ptr %result, <4 x i64> %idx
58  call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
59  ret void
60}
61
62define void @scatter_scale_3(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
63; CHECK-LABEL: scatter_scale_3:
64; CHECK:       # %bb.0:
65; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
66; CHECK-NEXT:    vpmovd2m %xmm1, %k1
67; CHECK-NEXT:    vpaddq %ymm0, %ymm0, %ymm1
68; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
69; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
70; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
71; CHECK-NEXT:    vzeroupper
72; CHECK-NEXT:    retq
73  %gep = getelementptr inbounds [3 x i8], ptr %result, <4 x i64> %idx
74  call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
75  ret void
76}
77
78define void @scatter_scale_1(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
79; CHECK-LABEL: scatter_scale_1:
80; CHECK:       # %bb.0:
81; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
82; CHECK-NEXT:    vpmovd2m %xmm1, %k1
83; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
84; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0) {%k1}
85; CHECK-NEXT:    vzeroupper
86; CHECK-NEXT:    retq
87  %gep = getelementptr inbounds [1 x i8], ptr %result, <4 x i64> %idx
88  call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask)
89  ret void
90}
91
92define <4 x double> @gather_scale_512(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
93; CHECK-LABEL: gather_scale_512:
94; CHECK:       # %bb.0:
95; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
96; CHECK-NEXT:    vpmovd2m %xmm1, %k1
97; CHECK-NEXT:    vpsllq $9, %ymm0, %ymm1
98; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0
99; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1), %ymm0 {%k1}
100; CHECK-NEXT:    retq
101  %gep = getelementptr inbounds [512 x i8], ptr %result, <4 x i64> %idx
102  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
103  ret <4 x double> %res
104}
105
106define <4 x double> @gather_scale_16(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
107; CHECK-LABEL: gather_scale_16:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
110; CHECK-NEXT:    vpmovd2m %xmm1, %k1
111; CHECK-NEXT:    vpsllq $4, %ymm0, %ymm1
112; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0
113; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1), %ymm0 {%k1}
114; CHECK-NEXT:    retq
115  %gep = getelementptr inbounds [16 x i8], ptr %result, <4 x i64> %idx
116  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
117  ret <4 x double> %res
118}
119
120define <4 x double> @gather_scale_8(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
121; CHECK-LABEL: gather_scale_8:
122; CHECK:       # %bb.0:
123; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
124; CHECK-NEXT:    vpmovd2m %xmm1, %k1
125; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
126; CHECK-NEXT:    vgatherqpd (%rdi,%ymm0,8), %ymm1 {%k1}
127; CHECK-NEXT:    vmovapd %ymm1, %ymm0
128; CHECK-NEXT:    retq
129  %gep = getelementptr inbounds [8 x i8], ptr %result, <4 x i64> %idx
130  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
131  ret <4 x double> %res
132}
133
134define <4 x double> @gather_scale_4(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
135; CHECK-LABEL: gather_scale_4:
136; CHECK:       # %bb.0:
137; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
138; CHECK-NEXT:    vpmovd2m %xmm1, %k1
139; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
140; CHECK-NEXT:    vgatherqpd (%rdi,%ymm0,4), %ymm1 {%k1}
141; CHECK-NEXT:    vmovapd %ymm1, %ymm0
142; CHECK-NEXT:    retq
143  %gep = getelementptr inbounds [4 x i8], ptr %result, <4 x i64> %idx
144  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
145  ret <4 x double> %res
146}
147
148define <4 x double> @gather_scale_3(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
149; CHECK-LABEL: gather_scale_3:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
152; CHECK-NEXT:    vpmovd2m %xmm1, %k1
153; CHECK-NEXT:    vpaddq %ymm0, %ymm0, %ymm1
154; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm1
155; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0
156; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1), %ymm0 {%k1}
157; CHECK-NEXT:    retq
158  %gep = getelementptr inbounds [3 x i8], ptr %result, <4 x i64> %idx
159  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
160  ret <4 x double> %res
161}
162
163define <4 x double> @gather_scale_1(ptr %result, <4 x i64> %idx, <4 x i1> %mask) {
164; CHECK-LABEL: gather_scale_1:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
167; CHECK-NEXT:    vpmovd2m %xmm1, %k1
168; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
169; CHECK-NEXT:    vgatherqpd (%rdi,%ymm0), %ymm1 {%k1}
170; CHECK-NEXT:    vmovapd %ymm1, %ymm0
171; CHECK-NEXT:    retq
172  %gep = getelementptr inbounds [1 x i8], ptr %result, <4 x i64> %idx
173  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer)
174  ret <4 x double> %res
175}
176
177declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>)
178declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>)
179