xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll (revision d70963a762850e74b79e178e006dc7861c2c94e5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3; RUN:     -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
4; RUN:     | FileCheck %s
5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6; RUN:     | FileCheck %s --check-prefix=DEFAULT
7
8declare float @fabsf(float) readonly nounwind willreturn
9
10define <4 x float> @fabs_4x(ptr %a) {
11; CHECK-LABEL: define <4 x float> @fabs_4x
12; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
15; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
16; CHECK-NEXT:    ret <4 x float> [[TMP1]]
17;
18; DEFAULT-LABEL: define <4 x float> @fabs_4x
19; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
20; DEFAULT-NEXT:  entry:
21; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
22; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
23; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
24;
25entry:
26  %0 = load <4 x float>, ptr %a, align 16
27  %vecext = extractelement <4 x float> %0, i32 0
28  %1 = tail call fast float @fabsf(float %vecext)
29  %vecins = insertelement <4 x float> undef, float %1, i32 0
30  %vecext.1 = extractelement <4 x float> %0, i32 1
31  %2 = tail call fast float @fabsf(float %vecext.1)
32  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
33  %vecext.2 = extractelement <4 x float> %0, i32 2
34  %3 = tail call fast float @fabsf(float %vecext.2)
35  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
36  %vecext.3 = extractelement <4 x float> %0, i32 3
37  %4 = tail call fast float @fabsf(float %vecext.3)
38  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
39  ret <4 x float> %vecins.3
40}
41
42declare float @llvm.fabs.f32(float)
43
44define <4 x float> @int_fabs_4x(ptr %a) {
45; CHECK-LABEL: define <4 x float> @int_fabs_4x
46; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
47; CHECK-NEXT:  entry:
48; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
49; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
50; CHECK-NEXT:    ret <4 x float> [[TMP1]]
51;
52; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
53; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
54; DEFAULT-NEXT:  entry:
55; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
56; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
57; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
58;
59entry:
60  %0 = load <4 x float>, ptr %a, align 16
61  %vecext = extractelement <4 x float> %0, i32 0
62  %1 = tail call fast float @llvm.fabs.f32(float %vecext)
63  %vecins = insertelement <4 x float> undef, float %1, i32 0
64  %vecext.1 = extractelement <4 x float> %0, i32 1
65  %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
66  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
67  %vecext.2 = extractelement <4 x float> %0, i32 2
68  %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
69  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
70  %vecext.3 = extractelement <4 x float> %0, i32 3
71  %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
72  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
73  ret <4 x float> %vecins.3
74}
75
76declare float @sqrtf(float) readonly nounwind willreturn
77
78define <4 x float> @sqrt_4x(ptr %a) {
79; CHECK-LABEL: define <4 x float> @sqrt_4x
80; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
83; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
84; CHECK-NEXT:    ret <4 x float> [[TMP1]]
85;
86; DEFAULT-LABEL: define <4 x float> @sqrt_4x
87; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
88; DEFAULT-NEXT:  entry:
89; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
90; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
91; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
92;
93entry:
94  %0 = load <4 x float>, ptr %a, align 16
95  %vecext = extractelement <4 x float> %0, i32 0
96  %1 = tail call fast float @sqrtf(float %vecext)
97  %vecins = insertelement <4 x float> undef, float %1, i32 0
98  %vecext.1 = extractelement <4 x float> %0, i32 1
99  %2 = tail call fast float @sqrtf(float %vecext.1)
100  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
101  %vecext.2 = extractelement <4 x float> %0, i32 2
102  %3 = tail call fast float @sqrtf(float %vecext.2)
103  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
104  %vecext.3 = extractelement <4 x float> %0, i32 3
105  %4 = tail call fast float @sqrtf(float %vecext.3)
106  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
107  ret <4 x float> %vecins.3
108}
109
110declare float @llvm.sqrt.f32(float)
111
112define <4 x float> @int_sqrt_4x(ptr %a) {
113; CHECK-LABEL: define <4 x float> @int_sqrt_4x
114; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
117; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
118; CHECK-NEXT:    ret <4 x float> [[TMP1]]
119;
120; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
121; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
122; DEFAULT-NEXT:  entry:
123; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
124; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
125; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
126;
127entry:
128  %0 = load <4 x float>, ptr %a, align 16
129  %vecext = extractelement <4 x float> %0, i32 0
130  %1 = tail call fast float @llvm.sqrt.f32(float %vecext)
131  %vecins = insertelement <4 x float> undef, float %1, i32 0
132  %vecext.1 = extractelement <4 x float> %0, i32 1
133  %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
134  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
135  %vecext.2 = extractelement <4 x float> %0, i32 2
136  %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
137  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
138  %vecext.3 = extractelement <4 x float> %0, i32 3
139  %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
140  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
141  ret <4 x float> %vecins.3
142}
143
144declare float @expf(float) readonly nounwind willreturn
145
146; We can not vectorized exp since RISCV has no such instruction.
147define <4 x float> @exp_4x(ptr %a) {
148; CHECK-LABEL: define <4 x float> @exp_4x
149; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
150; CHECK-NEXT:  entry:
151; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
152; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
153; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
154; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
155; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
156; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
157; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
158; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
159; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
160; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
161; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
162; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
163; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
164; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
165;
166; DEFAULT-LABEL: define <4 x float> @exp_4x
167; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
168; DEFAULT-NEXT:  entry:
169; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
170; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
171; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
172; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
173; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
174; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
175; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
176; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
177; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
178; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
179; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
180; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
181; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
182; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
183;
184entry:
185  %0 = load <4 x float>, ptr %a, align 16
186  %vecext = extractelement <4 x float> %0, i32 0
187  %1 = tail call fast float @expf(float %vecext)
188  %vecins = insertelement <4 x float> undef, float %1, i32 0
189  %vecext.1 = extractelement <4 x float> %0, i32 1
190  %2 = tail call fast float @expf(float %vecext.1)
191  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
192  %vecext.2 = extractelement <4 x float> %0, i32 2
193  %3 = tail call fast float @expf(float %vecext.2)
194  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
195  %vecext.3 = extractelement <4 x float> %0, i32 3
196  %4 = tail call fast float @expf(float %vecext.3)
197  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
198  ret <4 x float> %vecins.3
199}
200
201declare float @llvm.exp.f32(float)
202
203; We can not vectorized exp since RISCV has no such instruction.
204define <4 x float> @int_exp_4x(ptr %a) {
205; CHECK-LABEL: define <4 x float> @int_exp_4x
206; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
207; CHECK-NEXT:  entry:
208; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
209; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
210; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
211; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
212; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
213; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
214; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
215; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
216; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
217; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
218; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
219; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
220; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
221; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
222;
223; DEFAULT-LABEL: define <4 x float> @int_exp_4x
224; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
225; DEFAULT-NEXT:  entry:
226; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
227; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
228; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
229; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
230; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
231; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
232; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
233; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
234; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
235; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
236; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
237; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
238; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
239; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
240;
241entry:
242  %0 = load <4 x float>, ptr %a, align 16
243  %vecext = extractelement <4 x float> %0, i32 0
244  %1 = tail call fast float @llvm.exp.f32(float %vecext)
245  %vecins = insertelement <4 x float> undef, float %1, i32 0
246  %vecext.1 = extractelement <4 x float> %0, i32 1
247  %2 = tail call fast float @llvm.exp.f32(float %vecext.1)
248  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
249  %vecext.2 = extractelement <4 x float> %0, i32 2
250  %3 = tail call fast float @llvm.exp.f32(float %vecext.2)
251  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
252  %vecext.3 = extractelement <4 x float> %0, i32 3
253  %4 = tail call fast float @llvm.exp.f32(float %vecext.3)
254  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
255  ret <4 x float> %vecins.3
256}
257
258declare float @logf(float) readonly nounwind willreturn
259
260; We can not vectorized log since RISCV has no such instruction.
261define <4 x float> @log_4x(ptr %a) {
262; CHECK-LABEL: define <4 x float> @log_4x
263; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
264; CHECK-NEXT:  entry:
265; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
266; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
267; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
268; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
269; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
270; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
271; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
272; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
273; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
274; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
275; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
276; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
277; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
278; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
279;
280; DEFAULT-LABEL: define <4 x float> @log_4x
281; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
282; DEFAULT-NEXT:  entry:
283; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
284; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
285; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
286; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
287; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
288; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
289; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
290; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
291; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
292; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
293; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
294; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
295; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
296; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
297;
298entry:
299  %0 = load <4 x float>, ptr %a, align 16
300  %vecext = extractelement <4 x float> %0, i32 0
301  %1 = tail call fast float @logf(float %vecext)
302  %vecins = insertelement <4 x float> undef, float %1, i32 0
303  %vecext.1 = extractelement <4 x float> %0, i32 1
304  %2 = tail call fast float @logf(float %vecext.1)
305  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
306  %vecext.2 = extractelement <4 x float> %0, i32 2
307  %3 = tail call fast float @logf(float %vecext.2)
308  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
309  %vecext.3 = extractelement <4 x float> %0, i32 3
310  %4 = tail call fast float @logf(float %vecext.3)
311  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
312  ret <4 x float> %vecins.3
313}
314
315declare float @llvm.log.f32(float)
316
317; We can not vectorized log since RISCV has no such instruction.
318define <4 x float> @int_log_4x(ptr %a) {
319; CHECK-LABEL: define <4 x float> @int_log_4x
320; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
321; CHECK-NEXT:  entry:
322; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
323; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
324; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
325; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
326; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
327; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
328; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
329; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
330; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
331; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
332; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
333; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
334; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
335; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
336;
337; DEFAULT-LABEL: define <4 x float> @int_log_4x
338; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
339; DEFAULT-NEXT:  entry:
340; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
341; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
342; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
343; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
344; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
345; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
346; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
347; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
348; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
349; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
350; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
351; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
352; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
353; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
354;
355entry:
356  %0 = load <4 x float>, ptr %a, align 16
357  %vecext = extractelement <4 x float> %0, i32 0
358  %1 = tail call fast float @llvm.log.f32(float %vecext)
359  %vecins = insertelement <4 x float> undef, float %1, i32 0
360  %vecext.1 = extractelement <4 x float> %0, i32 1
361  %2 = tail call fast float @llvm.log.f32(float %vecext.1)
362  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
363  %vecext.2 = extractelement <4 x float> %0, i32 2
364  %3 = tail call fast float @llvm.log.f32(float %vecext.2)
365  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
366  %vecext.3 = extractelement <4 x float> %0, i32 3
367  %4 = tail call fast float @llvm.log.f32(float %vecext.3)
368  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
369  ret <4 x float> %vecins.3
370}
371
372declare float @sinf(float) readonly nounwind willreturn
373
374; We can not vectorized sin since RISCV has no such instruction.
375define <4 x float> @sin_4x(ptr %a) {
376; CHECK-LABEL: define <4 x float> @sin_4x
377; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
378; CHECK-NEXT:  entry:
379; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
380; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
381; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
382; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
383; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
384; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
385; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
386; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
387; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
388; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
389; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
390; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
391; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
392; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
393;
394; DEFAULT-LABEL: define <4 x float> @sin_4x
395; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
396; DEFAULT-NEXT:  entry:
397; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
398; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
399; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
400; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
401; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
402; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
403; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
404; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
405; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
406; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
407; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
408; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
409; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
410; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
411;
412entry:
413  %0 = load <4 x float>, ptr %a, align 16
414  %vecext = extractelement <4 x float> %0, i32 0
415  %1 = tail call fast float @sinf(float %vecext)
416  %vecins = insertelement <4 x float> undef, float %1, i32 0
417  %vecext.1 = extractelement <4 x float> %0, i32 1
418  %2 = tail call fast float @sinf(float %vecext.1)
419  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
420  %vecext.2 = extractelement <4 x float> %0, i32 2
421  %3 = tail call fast float @sinf(float %vecext.2)
422  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
423  %vecext.3 = extractelement <4 x float> %0, i32 3
424  %4 = tail call fast float @sinf(float %vecext.3)
425  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
426  ret <4 x float> %vecins.3
427}
428
429declare float @llvm.sin.f32(float)
430
431; We can not vectorized sin since RISCV has no such instruction.
432define <4 x float> @int_sin_4x(ptr %a) {
433; CHECK-LABEL: define <4 x float> @int_sin_4x
434; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
435; CHECK-NEXT:  entry:
436; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
437; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
438; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
439; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
440; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
441; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
442; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
443; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
444; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
445; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
446; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
447; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
448; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
449; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
450;
451; DEFAULT-LABEL: define <4 x float> @int_sin_4x
452; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
453; DEFAULT-NEXT:  entry:
454; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
455; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
456; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
457; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
458; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
459; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
460; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
461; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
462; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
463; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
464; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
465; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
466; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
467; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
468;
469entry:
470  %0 = load <4 x float>, ptr %a, align 16
471  %vecext = extractelement <4 x float> %0, i32 0
472  %1 = tail call fast float @llvm.sin.f32(float %vecext)
473  %vecins = insertelement <4 x float> undef, float %1, i32 0
474  %vecext.1 = extractelement <4 x float> %0, i32 1
475  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
476  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
477  %vecext.2 = extractelement <4 x float> %0, i32 2
478  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
479  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
480  %vecext.3 = extractelement <4 x float> %0, i32 3
481  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
482  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
483  ret <4 x float> %vecins.3
484}
485
486declare float @asinf(float) readonly nounwind willreturn
487
488; We can not vectorized asin since RISCV has no such instruction.
489define <4 x float> @asin_4x(ptr %a) {
490; CHECK-LABEL: define <4 x float> @asin_4x
491; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
492; CHECK-NEXT:  entry:
493; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
494; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
495; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
496; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
497; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
498; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
499; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
500; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
501; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
502; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
503; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
504; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
505; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
506; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
507;
508; DEFAULT-LABEL: define <4 x float> @asin_4x
509; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
510; DEFAULT-NEXT:  entry:
511; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
512; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
513; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
514; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
515; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
516; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
517; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
518; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
519; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
520; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
521; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
522; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
523; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
524; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
525;
526entry:
527  %0 = load <4 x float>, ptr %a, align 16
528  %vecext = extractelement <4 x float> %0, i32 0
529  %1 = tail call fast float @asinf(float %vecext)
530  %vecins = insertelement <4 x float> undef, float %1, i32 0
531  %vecext.1 = extractelement <4 x float> %0, i32 1
532  %2 = tail call fast float @asinf(float %vecext.1)
533  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
534  %vecext.2 = extractelement <4 x float> %0, i32 2
535  %3 = tail call fast float @asinf(float %vecext.2)
536  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
537  %vecext.3 = extractelement <4 x float> %0, i32 3
538  %4 = tail call fast float @asinf(float %vecext.3)
539  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
540  ret <4 x float> %vecins.3
541}
542
543declare float @llvm.asin.f32(float)
544
545; We can not vectorized asin since RISCV has no such instruction.
546define <4 x float> @int_asin_4x(ptr %a) {
547; CHECK-LABEL: define <4 x float> @int_asin_4x
548; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
549; CHECK-NEXT:  entry:
550; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
551; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
552; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
553; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
554; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
555; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
556; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
557; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
558; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
559; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
560; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
561; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
562; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
563; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
564;
565; DEFAULT-LABEL: define <4 x float> @int_asin_4x
566; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
567; DEFAULT-NEXT:  entry:
568; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
569; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
570; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
571; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
572; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
573; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
574; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
575; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
576; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
577; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
578; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
579; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
580; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
581; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
582;
583entry:
584  %0 = load <4 x float>, ptr %a, align 16
585  %vecext = extractelement <4 x float> %0, i32 0
586  %1 = tail call fast float @llvm.asin.f32(float %vecext)
587  %vecins = insertelement <4 x float> undef, float %1, i32 0
588  %vecext.1 = extractelement <4 x float> %0, i32 1
589  %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
590  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
591  %vecext.2 = extractelement <4 x float> %0, i32 2
592  %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
593  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
594  %vecext.3 = extractelement <4 x float> %0, i32 3
595  %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
596  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
597  ret <4 x float> %vecins.3
598}
599
600declare float @coshf(float) readonly nounwind willreturn
601
602; We can not vectorized cosh since RISCV has no such instruction.
603define <4 x float> @cosh_4x(ptr %a) {
604; CHECK-LABEL: define <4 x float> @cosh_4x
605; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
606; CHECK-NEXT:  entry:
607; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
608; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
609; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
610; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
611; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
612; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
613; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
614; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
615; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
616; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
617; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
618; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
619; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
620; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
621;
622; DEFAULT-LABEL: define <4 x float> @cosh_4x
623; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
624; DEFAULT-NEXT:  entry:
625; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
626; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
627; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
628; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
629; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
630; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
631; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
632; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
633; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
634; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
635; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
636; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
637; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
638; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
639;
640entry:
641  %0 = load <4 x float>, ptr %a, align 16
642  %vecext = extractelement <4 x float> %0, i32 0
643  %1 = tail call fast float @coshf(float %vecext)
644  %vecins = insertelement <4 x float> undef, float %1, i32 0
645  %vecext.1 = extractelement <4 x float> %0, i32 1
646  %2 = tail call fast float @coshf(float %vecext.1)
647  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
648  %vecext.2 = extractelement <4 x float> %0, i32 2
649  %3 = tail call fast float @coshf(float %vecext.2)
650  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
651  %vecext.3 = extractelement <4 x float> %0, i32 3
652  %4 = tail call fast float @coshf(float %vecext.3)
653  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
654  ret <4 x float> %vecins.3
655}
656
657declare float @llvm.cosh.f32(float)
658
659; We can not vectorized cosh since RISCV has no such instruction.
660define <4 x float> @int_cosh_4x(ptr %a) {
661; CHECK-LABEL: define <4 x float> @int_cosh_4x
662; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
663; CHECK-NEXT:  entry:
664; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
665; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
666; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
667; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
668; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
669; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
670; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
671; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
672; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
673; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
674; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
675; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
676; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
677; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
678;
679; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
680; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
681; DEFAULT-NEXT:  entry:
682; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
683; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
684; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
685; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
686; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
687; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
688; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
689; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
690; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
691; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
692; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
693; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
694; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
695; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
696;
697entry:
698  %0 = load <4 x float>, ptr %a, align 16
699  %vecext = extractelement <4 x float> %0, i32 0
700  %1 = tail call fast float @llvm.cosh.f32(float %vecext)
701  %vecins = insertelement <4 x float> undef, float %1, i32 0
702  %vecext.1 = extractelement <4 x float> %0, i32 1
703  %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
704  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
705  %vecext.2 = extractelement <4 x float> %0, i32 2
706  %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
707  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
708  %vecext.3 = extractelement <4 x float> %0, i32 3
709  %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
710  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
711  ret <4 x float> %vecins.3
712}
713
714declare float @atanhf(float) readonly nounwind willreturn
715
716; We can not vectorized atanh since RISCV has no such instruction.
717define <4 x float> @atanh_4x(ptr %a) {
718; CHECK-LABEL: define <4 x float> @atanh_4x
719; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
720; CHECK-NEXT:  entry:
721; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
722; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
723; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
724; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
725; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
726; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
727; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
728; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
729; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
730; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
731; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
732; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
733; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
734; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
735;
736; DEFAULT-LABEL: define <4 x float> @atanh_4x
737; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
738; DEFAULT-NEXT:  entry:
739; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
740; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
741; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
742; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
743; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
744; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
745; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
746; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
747; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
748; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
749; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
750; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
751; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
752; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
753;
754entry:
755  %0 = load <4 x float>, ptr %a, align 16
756  %vecext = extractelement <4 x float> %0, i32 0
757  %1 = tail call fast float @atanhf(float %vecext)
758  %vecins = insertelement <4 x float> undef, float %1, i32 0
759  %vecext.1 = extractelement <4 x float> %0, i32 1
760  %2 = tail call fast float @atanhf(float %vecext.1)
761  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
762  %vecext.2 = extractelement <4 x float> %0, i32 2
763  %3 = tail call fast float @atanhf(float %vecext.2)
764  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
765  %vecext.3 = extractelement <4 x float> %0, i32 3
766  %4 = tail call fast float @atanhf(float %vecext.3)
767  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
768  ret <4 x float> %vecins.3
769}
770
771declare float @llvm.atanh.f32(float)
772
773; We can not vectorized atanh since RISCV has no such instruction.
774define <4 x float> @int_atanh_4x(ptr %a) {
775; CHECK-LABEL: define <4 x float> @int_atanh_4x
776; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
777; CHECK-NEXT:  entry:
778; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
779; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
780; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
781; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
782; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
783; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
784; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
785; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
786; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
787; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
788; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
789; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
790; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
791; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
792;
793; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
794; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
795; DEFAULT-NEXT:  entry:
796; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
797; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
798; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
799; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
800; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
801; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
802; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
803; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
804; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
805; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
806; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
807; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
808; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
809; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
810;
811entry:
812  %0 = load <4 x float>, ptr %a, align 16
813  %vecext = extractelement <4 x float> %0, i32 0
814  %1 = tail call fast float @llvm.atanh.f32(float %vecext)
815  %vecins = insertelement <4 x float> undef, float %1, i32 0
816  %vecext.1 = extractelement <4 x float> %0, i32 1
817  %2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
818  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
819  %vecext.2 = extractelement <4 x float> %0, i32 2
820  %3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
821  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
822  %vecext.3 = extractelement <4 x float> %0, i32 3
823  %4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
824  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
825  ret <4 x float> %vecins.3
826}
827