xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/powi.ll (revision 3be72f402925b99adbec4a2ee5bacdf76ba6c8d1)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64 -passes=slp-vectorizer -S | FileCheck %s
3; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v2 -passes=slp-vectorizer -S | FileCheck %s
4; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v3 -passes=slp-vectorizer -S | FileCheck %s
5; RUN: opt < %s -mtriple=x86_64-linux-gnu -mcpu=x86-64-v4 -passes=slp-vectorizer -S | FileCheck %s
6
7define <2 x double> @buildvector_powi_2f64_6(<2 x double> %a) {
8; CHECK-LABEL: @buildvector_powi_2f64_6(
9; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> [[A:%.*]], i32 6)
10; CHECK-NEXT:    ret <2 x double> [[TMP1]]
11;
12  %a0 = extractelement <2 x double> %a, i32 0
13  %a1 = extractelement <2 x double> %a, i32 1
14  %c0  = call double @llvm.powi.f64(double %a0 , i32 6)
15  %c1  = call double @llvm.powi.f64(double %a1 , i32 6)
16  %r0 = insertelement <2 x double> poison, double %c0, i32 0
17  %r1 = insertelement <2 x double> %r0,   double %c1, i32 1
18  ret <2 x double> %r1
19}
20
21define <2 x double> @buildvector_powi_2f64_var(<2 x double> %a, i32 %b) {
22; CHECK-LABEL: @buildvector_powi_2f64_var(
23; CHECK-NEXT:    [[A0:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0
24; CHECK-NEXT:    [[A1:%.*]] = extractelement <2 x double> [[A]], i32 1
25; CHECK-NEXT:    [[C0:%.*]] = call double @llvm.powi.f64.i32(double [[A0]], i32 [[B:%.*]])
26; CHECK-NEXT:    [[C1:%.*]] = call double @llvm.powi.f64.i32(double [[A1]], i32 [[B]])
27; CHECK-NEXT:    [[R0:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
28; CHECK-NEXT:    [[R1:%.*]] = insertelement <2 x double> [[R0]], double [[C1]], i32 1
29; CHECK-NEXT:    ret <2 x double> [[R1]]
30;
31  %a0 = extractelement <2 x double> %a, i32 0
32  %a1 = extractelement <2 x double> %a, i32 1
33  %c0  = call double @llvm.powi.f64(double %a0 , i32 %b)
34  %c1  = call double @llvm.powi.f64(double %a1 , i32 %b)
35  %r0 = insertelement <2 x double> poison, double %c0, i32 0
36  %r1 = insertelement <2 x double> %r0,   double %c1, i32 1
37  ret <2 x double> %r1
38}
39
40define <4 x float> @buildvector_powi_4f32_3(<4 x float> %a) {
41; CHECK-LABEL: @buildvector_powi_4f32_3(
42; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[A:%.*]], i32 3)
43; CHECK-NEXT:    ret <4 x float> [[TMP1]]
44;
45  %a0 = extractelement <4 x float> %a, i32 0
46  %a1 = extractelement <4 x float> %a, i32 1
47  %a2 = extractelement <4 x float> %a, i32 2
48  %a3 = extractelement <4 x float> %a, i32 3
49  %c0  = call float @llvm.powi.f32(float %a0 , i32 3)
50  %c1  = call float @llvm.powi.f32(float %a1 , i32 3)
51  %c2  = call float @llvm.powi.f32(float %a2 , i32 3)
52  %c3  = call float @llvm.powi.f32(float %a3 , i32 3)
53  %r0 = insertelement <4 x float> poison, float %c0, i32 0
54  %r1 = insertelement <4 x float> %r0,   float %c1, i32 1
55  %r2 = insertelement <4 x float> %r1,   float %c2, i32 2
56  %r3 = insertelement <4 x float> %r2,   float %c3, i32 3
57  ret <4 x float> %r3
58}
59
60;
61; 256-bit Vectors
62;
63
64define <4 x double> @buildvector_powi_4f64_16(<4 x double> %a) {
65; CHECK-LABEL: @buildvector_powi_4f64_16(
66; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> [[A:%.*]], i32 16)
67; CHECK-NEXT:    ret <4 x double> [[TMP1]]
68;
69  %a0 = extractelement <4 x double> %a, i32 0
70  %a1 = extractelement <4 x double> %a, i32 1
71  %a2 = extractelement <4 x double> %a, i32 2
72  %a3 = extractelement <4 x double> %a, i32 3
73  %c0  = call double @llvm.powi.f64(double %a0 , i32 16)
74  %c1  = call double @llvm.powi.f64(double %a1 , i32 16)
75  %c2  = call double @llvm.powi.f64(double %a2 , i32 16)
76  %c3  = call double @llvm.powi.f64(double %a3 , i32 16)
77  %r0 = insertelement <4 x double> poison, double %c0, i32 0
78  %r1 = insertelement <4 x double> %r0,   double %c1, i32 1
79  %r2 = insertelement <4 x double> %r1,   double %c2, i32 2
80  %r3 = insertelement <4 x double> %r2,   double %c3, i32 3
81  ret <4 x double> %r3
82}
83
84define <8 x float> @buildvector_powi_8f32_4(<8 x float> %a) {
85; CHECK-LABEL: @buildvector_powi_8f32_4(
86; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> [[A:%.*]], i32 4)
87; CHECK-NEXT:    ret <8 x float> [[TMP1]]
88;
89  %a0 = extractelement <8 x float> %a, i32 0
90  %a1 = extractelement <8 x float> %a, i32 1
91  %a2 = extractelement <8 x float> %a, i32 2
92  %a3 = extractelement <8 x float> %a, i32 3
93  %a4 = extractelement <8 x float> %a, i32 4
94  %a5 = extractelement <8 x float> %a, i32 5
95  %a6 = extractelement <8 x float> %a, i32 6
96  %a7 = extractelement <8 x float> %a, i32 7
97  %c0  = call float @llvm.powi.f32(float %a0 , i32 4)
98  %c1  = call float @llvm.powi.f32(float %a1 , i32 4)
99  %c2  = call float @llvm.powi.f32(float %a2 , i32 4)
100  %c3  = call float @llvm.powi.f32(float %a3 , i32 4)
101  %c4  = call float @llvm.powi.f32(float %a4 , i32 4)
102  %c5  = call float @llvm.powi.f32(float %a5 , i32 4)
103  %c6  = call float @llvm.powi.f32(float %a6 , i32 4)
104  %c7  = call float @llvm.powi.f32(float %a7 , i32 4)
105  %r0 = insertelement <8 x float> poison, float %c0, i32 0
106  %r1 = insertelement <8 x float> %r0,   float %c1, i32 1
107  %r2 = insertelement <8 x float> %r1,   float %c2, i32 2
108  %r3 = insertelement <8 x float> %r2,   float %c3, i32 3
109  %r4 = insertelement <8 x float> %r3,   float %c4, i32 4
110  %r5 = insertelement <8 x float> %r4,   float %c5, i32 5
111  %r6 = insertelement <8 x float> %r5,   float %c6, i32 6
112  %r7 = insertelement <8 x float> %r6,   float %c7, i32 7
113  ret <8 x float> %r7
114}
115
116;
117; 512-bit Vectors
118;
119
120define <8 x double> @buildvector_powi_8f64_5(<8 x double> %a) {
121; CHECK-LABEL: @buildvector_powi_8f64_5(
122; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x double> @llvm.powi.v8f64.i32(<8 x double> [[A:%.*]], i32 5)
123; CHECK-NEXT:    ret <8 x double> [[TMP1]]
124;
125  %a0 = extractelement <8 x double> %a, i32 0
126  %a1 = extractelement <8 x double> %a, i32 1
127  %a2 = extractelement <8 x double> %a, i32 2
128  %a3 = extractelement <8 x double> %a, i32 3
129  %a4 = extractelement <8 x double> %a, i32 4
130  %a5 = extractelement <8 x double> %a, i32 5
131  %a6 = extractelement <8 x double> %a, i32 6
132  %a7 = extractelement <8 x double> %a, i32 7
133  %c0  = call double @llvm.powi.f64(double %a0 , i32 5)
134  %c1  = call double @llvm.powi.f64(double %a1 , i32 5)
135  %c2  = call double @llvm.powi.f64(double %a2 , i32 5)
136  %c3  = call double @llvm.powi.f64(double %a3 , i32 5)
137  %c4  = call double @llvm.powi.f64(double %a4 , i32 5)
138  %c5  = call double @llvm.powi.f64(double %a5 , i32 5)
139  %c6  = call double @llvm.powi.f64(double %a6 , i32 5)
140  %c7  = call double @llvm.powi.f64(double %a7 , i32 5)
141  %r0 = insertelement <8 x double> poison, double %c0, i32 0
142  %r1 = insertelement <8 x double> %r0,   double %c1, i32 1
143  %r2 = insertelement <8 x double> %r1,   double %c2, i32 2
144  %r3 = insertelement <8 x double> %r2,   double %c3, i32 3
145  %r4 = insertelement <8 x double> %r3,   double %c4, i32 4
146  %r5 = insertelement <8 x double> %r4,   double %c5, i32 5
147  %r6 = insertelement <8 x double> %r5,   double %c6, i32 6
148  %r7 = insertelement <8 x double> %r6,   double %c7, i32 7
149  ret <8 x double> %r7
150}
151
152define <8 x double> @buildvector_powi_8f64_mismatch(<8 x double> %a) {
153; CHECK-LABEL: @buildvector_powi_8f64_mismatch(
154; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x double> [[A:%.*]], i32 0
155; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x double> [[A]], i32 1
156; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x double> [[A]], i32 2
157; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x double> [[A]], i32 3
158; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x double> [[A]], i32 4
159; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x double> [[A]], i32 5
160; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x double> [[A]], i32 6
161; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x double> [[A]], i32 7
162; CHECK-NEXT:    [[C0:%.*]] = call double @llvm.powi.f64.i32(double [[A0]], i32 1)
163; CHECK-NEXT:    [[C1:%.*]] = call double @llvm.powi.f64.i32(double [[A1]], i32 2)
164; CHECK-NEXT:    [[C2:%.*]] = call double @llvm.powi.f64.i32(double [[A2]], i32 3)
165; CHECK-NEXT:    [[C3:%.*]] = call double @llvm.powi.f64.i32(double [[A3]], i32 4)
166; CHECK-NEXT:    [[C4:%.*]] = call double @llvm.powi.f64.i32(double [[A4]], i32 5)
167; CHECK-NEXT:    [[C5:%.*]] = call double @llvm.powi.f64.i32(double [[A5]], i32 6)
168; CHECK-NEXT:    [[C6:%.*]] = call double @llvm.powi.f64.i32(double [[A6]], i32 7)
169; CHECK-NEXT:    [[C7:%.*]] = call double @llvm.powi.f64.i32(double [[A7]], i32 -8)
170; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> poison, double [[C0]], i32 0
171; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1
172; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2
173; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3
174; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4
175; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5
176; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6
177; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7
178; CHECK-NEXT:    ret <8 x double> [[R7]]
179;
180  %a0 = extractelement <8 x double> %a, i32 0
181  %a1 = extractelement <8 x double> %a, i32 1
182  %a2 = extractelement <8 x double> %a, i32 2
183  %a3 = extractelement <8 x double> %a, i32 3
184  %a4 = extractelement <8 x double> %a, i32 4
185  %a5 = extractelement <8 x double> %a, i32 5
186  %a6 = extractelement <8 x double> %a, i32 6
187  %a7 = extractelement <8 x double> %a, i32 7
188  %c0  = call double @llvm.powi.f64(double %a0 , i32 1)
189  %c1  = call double @llvm.powi.f64(double %a1 , i32 2)
190  %c2  = call double @llvm.powi.f64(double %a2 , i32 3)
191  %c3  = call double @llvm.powi.f64(double %a3 , i32 4)
192  %c4  = call double @llvm.powi.f64(double %a4 , i32 5)
193  %c5  = call double @llvm.powi.f64(double %a5 , i32 6)
194  %c6  = call double @llvm.powi.f64(double %a6 , i32 7)
195  %c7  = call double @llvm.powi.f64(double %a7 , i32 -8)
196  %r0 = insertelement <8 x double> poison, double %c0, i32 0
197  %r1 = insertelement <8 x double> %r0,   double %c1, i32 1
198  %r2 = insertelement <8 x double> %r1,   double %c2, i32 2
199  %r3 = insertelement <8 x double> %r2,   double %c3, i32 3
200  %r4 = insertelement <8 x double> %r3,   double %c4, i32 4
201  %r5 = insertelement <8 x double> %r4,   double %c5, i32 5
202  %r6 = insertelement <8 x double> %r5,   double %c6, i32 6
203  %r7 = insertelement <8 x double> %r6,   double %c7, i32 7
204  ret <8 x double> %r7
205}
206
207define <16 x float> @buildvector_powi_16f32_n13(<16 x float> %a) {
208; CHECK-LABEL: @buildvector_powi_16f32_n13(
209; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> [[A:%.*]], i32 -13)
210; CHECK-NEXT:    ret <16 x float> [[TMP1]]
211;
212  %a0  = extractelement <16 x float> %a, i32 0
213  %a1  = extractelement <16 x float> %a, i32 1
214  %a2  = extractelement <16 x float> %a, i32 2
215  %a3  = extractelement <16 x float> %a, i32 3
216  %a4  = extractelement <16 x float> %a, i32 4
217  %a5  = extractelement <16 x float> %a, i32 5
218  %a6  = extractelement <16 x float> %a, i32 6
219  %a7  = extractelement <16 x float> %a, i32 7
220  %a8  = extractelement <16 x float> %a, i32 8
221  %a9  = extractelement <16 x float> %a, i32 9
222  %a10 = extractelement <16 x float> %a, i32 10
223  %a11 = extractelement <16 x float> %a, i32 11
224  %a12 = extractelement <16 x float> %a, i32 12
225  %a13 = extractelement <16 x float> %a, i32 13
226  %a14 = extractelement <16 x float> %a, i32 14
227  %a15 = extractelement <16 x float> %a, i32 15
228  %c0  = call float @llvm.powi.f32(float %a0 , i32 -13)
229  %c1  = call float @llvm.powi.f32(float %a1 , i32 -13)
230  %c2  = call float @llvm.powi.f32(float %a2 , i32 -13)
231  %c3  = call float @llvm.powi.f32(float %a3 , i32 -13)
232  %c4  = call float @llvm.powi.f32(float %a4 , i32 -13)
233  %c5  = call float @llvm.powi.f32(float %a5 , i32 -13)
234  %c6  = call float @llvm.powi.f32(float %a6 , i32 -13)
235  %c7  = call float @llvm.powi.f32(float %a7 , i32 -13)
236  %c8  = call float @llvm.powi.f32(float %a8 , i32 -13)
237  %c9  = call float @llvm.powi.f32(float %a9 , i32 -13)
238  %c10 = call float @llvm.powi.f32(float %a10 , i32 -13)
239  %c11 = call float @llvm.powi.f32(float %a11 , i32 -13)
240  %c12 = call float @llvm.powi.f32(float %a12 , i32 -13)
241  %c13 = call float @llvm.powi.f32(float %a13 , i32 -13)
242  %c14 = call float @llvm.powi.f32(float %a14 , i32 -13)
243  %c15 = call float @llvm.powi.f32(float %a15 , i32 -13)
244  %r0  = insertelement <16 x float> poison, float %c0 , i32 0
245  %r1  = insertelement <16 x float> %r0 ,  float %c1 , i32 1
246  %r2  = insertelement <16 x float> %r1 ,  float %c2 , i32 2
247  %r3  = insertelement <16 x float> %r2 ,  float %c3 , i32 3
248  %r4  = insertelement <16 x float> %r3 ,  float %c4 , i32 4
249  %r5  = insertelement <16 x float> %r4 ,  float %c5 , i32 5
250  %r6  = insertelement <16 x float> %r5 ,  float %c6 , i32 6
251  %r7  = insertelement <16 x float> %r6 ,  float %c7 , i32 7
252  %r8  = insertelement <16 x float> %r7 ,  float %c8 , i32 8
253  %r9  = insertelement <16 x float> %r8 ,  float %c9 , i32 9
254  %r10 = insertelement <16 x float> %r9 ,  float %c10, i32 10
255  %r11 = insertelement <16 x float> %r10,  float %c11, i32 11
256  %r12 = insertelement <16 x float> %r11,  float %c12, i32 12
257  %r13 = insertelement <16 x float> %r12,  float %c13, i32 13
258  %r14 = insertelement <16 x float> %r13,  float %c14, i32 14
259  %r15 = insertelement <16 x float> %r14,  float %c15, i32 15
260  ret <16 x float> %r15
261}
262
263declare float @llvm.powi.f32(float, i32)
264declare double @llvm.powi.f64(double, i32)
265