xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll (revision 8f548610a61a449316cf2a8c212cb6b64dd9acc3)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3; RUN:     -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
4; RUN:     | FileCheck %s
5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6; RUN:     | FileCheck %s --check-prefix=DEFAULT
7
8declare float @fabsf(float) readonly nounwind willreturn
9
10define <4 x float> @fabs_4x(ptr %a) {
11; CHECK-LABEL: define <4 x float> @fabs_4x
12; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
15; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
16; CHECK-NEXT:    ret <4 x float> [[TMP1]]
17;
18; DEFAULT-LABEL: define <4 x float> @fabs_4x
19; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
20; DEFAULT-NEXT:  entry:
21; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
22; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
23; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
24;
25entry:
26  %0 = load <4 x float>, ptr %a, align 16
27  %vecext = extractelement <4 x float> %0, i32 0
28  %1 = tail call fast float @fabsf(float %vecext)
29  %vecins = insertelement <4 x float> undef, float %1, i32 0
30  %vecext.1 = extractelement <4 x float> %0, i32 1
31  %2 = tail call fast float @fabsf(float %vecext.1)
32  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
33  %vecext.2 = extractelement <4 x float> %0, i32 2
34  %3 = tail call fast float @fabsf(float %vecext.2)
35  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
36  %vecext.3 = extractelement <4 x float> %0, i32 3
37  %4 = tail call fast float @fabsf(float %vecext.3)
38  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
39  ret <4 x float> %vecins.3
40}
41
42declare float @llvm.fabs.f32(float)
43
44define <4 x float> @int_fabs_4x(ptr %a) {
45; CHECK-LABEL: define <4 x float> @int_fabs_4x
46; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
47; CHECK-NEXT:  entry:
48; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
49; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
50; CHECK-NEXT:    ret <4 x float> [[TMP1]]
51;
52; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
53; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
54; DEFAULT-NEXT:  entry:
55; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
56; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
57; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
58;
59entry:
60  %0 = load <4 x float>, ptr %a, align 16
61  %vecext = extractelement <4 x float> %0, i32 0
62  %1 = tail call fast float @llvm.fabs.f32(float %vecext)
63  %vecins = insertelement <4 x float> undef, float %1, i32 0
64  %vecext.1 = extractelement <4 x float> %0, i32 1
65  %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
66  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
67  %vecext.2 = extractelement <4 x float> %0, i32 2
68  %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
69  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
70  %vecext.3 = extractelement <4 x float> %0, i32 3
71  %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
72  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
73  ret <4 x float> %vecins.3
74}
75
76declare float @sqrtf(float) readonly nounwind willreturn
77
78define <4 x float> @sqrt_4x(ptr %a) {
79; CHECK-LABEL: define <4 x float> @sqrt_4x
80; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
83; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
84; CHECK-NEXT:    ret <4 x float> [[TMP1]]
85;
86; DEFAULT-LABEL: define <4 x float> @sqrt_4x
87; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
88; DEFAULT-NEXT:  entry:
89; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
90; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
91; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
92;
93entry:
94  %0 = load <4 x float>, ptr %a, align 16
95  %vecext = extractelement <4 x float> %0, i32 0
96  %1 = tail call fast float @sqrtf(float %vecext)
97  %vecins = insertelement <4 x float> undef, float %1, i32 0
98  %vecext.1 = extractelement <4 x float> %0, i32 1
99  %2 = tail call fast float @sqrtf(float %vecext.1)
100  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
101  %vecext.2 = extractelement <4 x float> %0, i32 2
102  %3 = tail call fast float @sqrtf(float %vecext.2)
103  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
104  %vecext.3 = extractelement <4 x float> %0, i32 3
105  %4 = tail call fast float @sqrtf(float %vecext.3)
106  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
107  ret <4 x float> %vecins.3
108}
109
110declare float @llvm.sqrt.f32(float)
111
112define <4 x float> @int_sqrt_4x(ptr %a) {
113; CHECK-LABEL: define <4 x float> @int_sqrt_4x
114; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
117; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
118; CHECK-NEXT:    ret <4 x float> [[TMP1]]
119;
120; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
121; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
122; DEFAULT-NEXT:  entry:
123; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
124; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
125; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
126;
127entry:
128  %0 = load <4 x float>, ptr %a, align 16
129  %vecext = extractelement <4 x float> %0, i32 0
130  %1 = tail call fast float @llvm.sqrt.f32(float %vecext)
131  %vecins = insertelement <4 x float> undef, float %1, i32 0
132  %vecext.1 = extractelement <4 x float> %0, i32 1
133  %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
134  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
135  %vecext.2 = extractelement <4 x float> %0, i32 2
136  %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
137  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
138  %vecext.3 = extractelement <4 x float> %0, i32 3
139  %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
140  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
141  ret <4 x float> %vecins.3
142}
143
144declare float @expf(float) readonly nounwind willreturn
145
146; We can not vectorized exp since RISCV has no such instruction.
147define <4 x float> @exp_4x(ptr %a) {
148; CHECK-LABEL: define <4 x float> @exp_4x
149; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
150; CHECK-NEXT:  entry:
151; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
152; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
153; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
154; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
155; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
156; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
157; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
158; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
159; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
160; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
161; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
162; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
163;
164; DEFAULT-LABEL: define <4 x float> @exp_4x
165; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
166; DEFAULT-NEXT:  entry:
167; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
168; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
169; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
170; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
171; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
172; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
173; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
174; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
175; DEFAULT-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
176; DEFAULT-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
177; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
178; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
179;
180entry:
181  %0 = load <4 x float>, ptr %a, align 16
182  %vecext = extractelement <4 x float> %0, i32 0
183  %1 = tail call fast float @expf(float %vecext)
184  %vecins = insertelement <4 x float> undef, float %1, i32 0
185  %vecext.1 = extractelement <4 x float> %0, i32 1
186  %2 = tail call fast float @expf(float %vecext.1)
187  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
188  %vecext.2 = extractelement <4 x float> %0, i32 2
189  %3 = tail call fast float @expf(float %vecext.2)
190  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
191  %vecext.3 = extractelement <4 x float> %0, i32 3
192  %4 = tail call fast float @expf(float %vecext.3)
193  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
194  ret <4 x float> %vecins.3
195}
196
197declare float @llvm.exp.f32(float)
198
199; We can not vectorized exp since RISCV has no such instruction.
200define <4 x float> @int_exp_4x(ptr %a) {
201; CHECK-LABEL: define <4 x float> @int_exp_4x
202; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
203; CHECK-NEXT:  entry:
204; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
205; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
206; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
207; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
208; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
209; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
210; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
211; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
212; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
213; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
214; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
215; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
216;
217; DEFAULT-LABEL: define <4 x float> @int_exp_4x
218; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
219; DEFAULT-NEXT:  entry:
220; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
221; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
222; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
223; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
224; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
225; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
226; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
227; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
228; DEFAULT-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]])
229; DEFAULT-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
230; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
231; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
232;
233entry:
234  %0 = load <4 x float>, ptr %a, align 16
235  %vecext = extractelement <4 x float> %0, i32 0
236  %1 = tail call fast float @llvm.exp.f32(float %vecext)
237  %vecins = insertelement <4 x float> undef, float %1, i32 0
238  %vecext.1 = extractelement <4 x float> %0, i32 1
239  %2 = tail call fast float @llvm.exp.f32(float %vecext.1)
240  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
241  %vecext.2 = extractelement <4 x float> %0, i32 2
242  %3 = tail call fast float @llvm.exp.f32(float %vecext.2)
243  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
244  %vecext.3 = extractelement <4 x float> %0, i32 3
245  %4 = tail call fast float @llvm.exp.f32(float %vecext.3)
246  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
247  ret <4 x float> %vecins.3
248}
249
250declare float @logf(float) readonly nounwind willreturn
251
252; We can not vectorized log since RISCV has no such instruction.
253define <4 x float> @log_4x(ptr %a) {
254; CHECK-LABEL: define <4 x float> @log_4x
255; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
256; CHECK-NEXT:  entry:
257; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
258; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
259; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
260; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
261; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
262; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
263; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
264; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
265; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
266; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
267; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
268; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
269;
270; DEFAULT-LABEL: define <4 x float> @log_4x
271; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
272; DEFAULT-NEXT:  entry:
273; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
274; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
275; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
276; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
277; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
278; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
279; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
280; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
281; DEFAULT-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
282; DEFAULT-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
283; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
284; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
285;
286entry:
287  %0 = load <4 x float>, ptr %a, align 16
288  %vecext = extractelement <4 x float> %0, i32 0
289  %1 = tail call fast float @logf(float %vecext)
290  %vecins = insertelement <4 x float> undef, float %1, i32 0
291  %vecext.1 = extractelement <4 x float> %0, i32 1
292  %2 = tail call fast float @logf(float %vecext.1)
293  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
294  %vecext.2 = extractelement <4 x float> %0, i32 2
295  %3 = tail call fast float @logf(float %vecext.2)
296  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
297  %vecext.3 = extractelement <4 x float> %0, i32 3
298  %4 = tail call fast float @logf(float %vecext.3)
299  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
300  ret <4 x float> %vecins.3
301}
302
303declare float @llvm.log.f32(float)
304
305; We can not vectorized log since RISCV has no such instruction.
306define <4 x float> @int_log_4x(ptr %a) {
307; CHECK-LABEL: define <4 x float> @int_log_4x
308; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
309; CHECK-NEXT:  entry:
310; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
311; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
312; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
313; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
314; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
315; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
316; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
317; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
318; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
319; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
320; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
321; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
322;
323; DEFAULT-LABEL: define <4 x float> @int_log_4x
324; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
325; DEFAULT-NEXT:  entry:
326; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
327; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
328; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
329; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
330; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
331; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
332; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
333; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
334; DEFAULT-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]])
335; DEFAULT-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
336; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
337; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
338;
339entry:
340  %0 = load <4 x float>, ptr %a, align 16
341  %vecext = extractelement <4 x float> %0, i32 0
342  %1 = tail call fast float @llvm.log.f32(float %vecext)
343  %vecins = insertelement <4 x float> undef, float %1, i32 0
344  %vecext.1 = extractelement <4 x float> %0, i32 1
345  %2 = tail call fast float @llvm.log.f32(float %vecext.1)
346  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
347  %vecext.2 = extractelement <4 x float> %0, i32 2
348  %3 = tail call fast float @llvm.log.f32(float %vecext.2)
349  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
350  %vecext.3 = extractelement <4 x float> %0, i32 3
351  %4 = tail call fast float @llvm.log.f32(float %vecext.3)
352  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
353  ret <4 x float> %vecins.3
354}
355
356declare float @sinf(float) readonly nounwind willreturn
357
358; We can not vectorized sin since RISCV has no such instruction.
359define <4 x float> @sin_4x(ptr %a) {
360; CHECK-LABEL: define <4 x float> @sin_4x
361; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
362; CHECK-NEXT:  entry:
363; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
364; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
365; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
366; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
367; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
368; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
369; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
370; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
371; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
372; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
373; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
374; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
375;
376; DEFAULT-LABEL: define <4 x float> @sin_4x
377; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
378; DEFAULT-NEXT:  entry:
379; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
380; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
381; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
382; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
383; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
384; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
385; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
386; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
387; DEFAULT-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
388; DEFAULT-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
389; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
390; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
391;
392entry:
393  %0 = load <4 x float>, ptr %a, align 16
394  %vecext = extractelement <4 x float> %0, i32 0
395  %1 = tail call fast float @sinf(float %vecext)
396  %vecins = insertelement <4 x float> undef, float %1, i32 0
397  %vecext.1 = extractelement <4 x float> %0, i32 1
398  %2 = tail call fast float @sinf(float %vecext.1)
399  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
400  %vecext.2 = extractelement <4 x float> %0, i32 2
401  %3 = tail call fast float @sinf(float %vecext.2)
402  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
403  %vecext.3 = extractelement <4 x float> %0, i32 3
404  %4 = tail call fast float @sinf(float %vecext.3)
405  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
406  ret <4 x float> %vecins.3
407}
408
409declare float @llvm.sin.f32(float)
410
411; We can not vectorized sin since RISCV has no such instruction.
412define <4 x float> @int_sin_4x(ptr %a) {
413; CHECK-LABEL: define <4 x float> @int_sin_4x
414; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
415; CHECK-NEXT:  entry:
416; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
417; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
418; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
419; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
420; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
421; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
422; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
423; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
424; CHECK-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
425; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
426; CHECK-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
427; CHECK-NEXT:    ret <4 x float> [[VECINS_31]]
428;
429; DEFAULT-LABEL: define <4 x float> @int_sin_4x
430; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
431; DEFAULT-NEXT:  entry:
432; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
433; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
434; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
435; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
436; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
437; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
438; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
439; DEFAULT-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
440; DEFAULT-NEXT:    [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]])
441; DEFAULT-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
442; DEFAULT-NEXT:    [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
443; DEFAULT-NEXT:    ret <4 x float> [[VECINS_31]]
444;
445entry:
446  %0 = load <4 x float>, ptr %a, align 16
447  %vecext = extractelement <4 x float> %0, i32 0
448  %1 = tail call fast float @llvm.sin.f32(float %vecext)
449  %vecins = insertelement <4 x float> undef, float %1, i32 0
450  %vecext.1 = extractelement <4 x float> %0, i32 1
451  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
452  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
453  %vecext.2 = extractelement <4 x float> %0, i32 2
454  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
455  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
456  %vecext.3 = extractelement <4 x float> %0, i32 3
457  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
458  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
459  ret <4 x float> %vecins.3
460}
461
462declare float @asinf(float) readonly nounwind willreturn
463
464; We can not vectorized asin since RISCV has no such instruction.
465define <4 x float> @asin_4x(ptr %a) {
466; CHECK-LABEL: define <4 x float> @asin_4x
467; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
468; CHECK-NEXT:  entry:
469; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
470; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
471; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
472; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
473; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
474; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
475; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
476; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
477; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
478; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
479; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
480; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
481; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
482; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
483;
484; DEFAULT-LABEL: define <4 x float> @asin_4x
485; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
486; DEFAULT-NEXT:  entry:
487; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
488; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
489; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
490; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
491; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
492; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
493; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
494; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
495; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
496; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
497; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
498; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
499; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
500; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
501;
502entry:
503  %0 = load <4 x float>, ptr %a, align 16
504  %vecext = extractelement <4 x float> %0, i32 0
505  %1 = tail call fast float @asinf(float %vecext)
506  %vecins = insertelement <4 x float> undef, float %1, i32 0
507  %vecext.1 = extractelement <4 x float> %0, i32 1
508  %2 = tail call fast float @asinf(float %vecext.1)
509  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
510  %vecext.2 = extractelement <4 x float> %0, i32 2
511  %3 = tail call fast float @asinf(float %vecext.2)
512  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
513  %vecext.3 = extractelement <4 x float> %0, i32 3
514  %4 = tail call fast float @asinf(float %vecext.3)
515  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
516  ret <4 x float> %vecins.3
517}
518
519declare float @llvm.asin.f32(float)
520
521; We can not vectorized asin since RISCV has no such instruction.
522define <4 x float> @int_asin_4x(ptr %a) {
523; CHECK-LABEL: define <4 x float> @int_asin_4x
524; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
525; CHECK-NEXT:  entry:
526; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
527; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
528; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
529; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
530; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
531; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
532; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
533; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
534; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
535; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
536; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
537; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
538; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
539; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
540;
541; DEFAULT-LABEL: define <4 x float> @int_asin_4x
542; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
543; DEFAULT-NEXT:  entry:
544; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
545; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
546; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
547; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
548; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
549; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
550; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
551; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
552; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
553; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
554; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
555; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
556; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
557; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
558;
559entry:
560  %0 = load <4 x float>, ptr %a, align 16
561  %vecext = extractelement <4 x float> %0, i32 0
562  %1 = tail call fast float @llvm.asin.f32(float %vecext)
563  %vecins = insertelement <4 x float> undef, float %1, i32 0
564  %vecext.1 = extractelement <4 x float> %0, i32 1
565  %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
566  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
567  %vecext.2 = extractelement <4 x float> %0, i32 2
568  %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
569  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
570  %vecext.3 = extractelement <4 x float> %0, i32 3
571  %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
572  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
573  ret <4 x float> %vecins.3
574}
575
576declare float @coshf(float) readonly nounwind willreturn
577
578; We can not vectorized cosh since RISCV has no such instruction.
579define <4 x float> @cosh_4x(ptr %a) {
580; CHECK-LABEL: define <4 x float> @cosh_4x
581; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
582; CHECK-NEXT:  entry:
583; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
584; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
585; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
586; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
587; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
588; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
589; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
590; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
591; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
592; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
593; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
594; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
595; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
596; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
597;
598; DEFAULT-LABEL: define <4 x float> @cosh_4x
599; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
600; DEFAULT-NEXT:  entry:
601; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
602; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
603; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
604; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
605; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
606; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
607; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
608; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
609; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
610; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
611; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
612; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
613; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
614; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
615;
616entry:
617  %0 = load <4 x float>, ptr %a, align 16
618  %vecext = extractelement <4 x float> %0, i32 0
619  %1 = tail call fast float @coshf(float %vecext)
620  %vecins = insertelement <4 x float> undef, float %1, i32 0
621  %vecext.1 = extractelement <4 x float> %0, i32 1
622  %2 = tail call fast float @coshf(float %vecext.1)
623  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
624  %vecext.2 = extractelement <4 x float> %0, i32 2
625  %3 = tail call fast float @coshf(float %vecext.2)
626  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
627  %vecext.3 = extractelement <4 x float> %0, i32 3
628  %4 = tail call fast float @coshf(float %vecext.3)
629  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
630  ret <4 x float> %vecins.3
631}
632
633declare float @llvm.cosh.f32(float)
634
635; We can not vectorized cosh since RISCV has no such instruction.
636define <4 x float> @int_cosh_4x(ptr %a) {
637; CHECK-LABEL: define <4 x float> @int_cosh_4x
638; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
639; CHECK-NEXT:  entry:
640; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
641; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
642; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
643; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
644; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
645; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
646; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
647; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
648; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
649; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
650; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
651; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
652; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
653; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
654;
655; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
656; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
657; DEFAULT-NEXT:  entry:
658; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
659; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
660; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
661; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
662; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
663; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
664; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
665; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
666; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
667; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
668; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
669; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
670; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
671; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
672;
673entry:
674  %0 = load <4 x float>, ptr %a, align 16
675  %vecext = extractelement <4 x float> %0, i32 0
676  %1 = tail call fast float @llvm.cosh.f32(float %vecext)
677  %vecins = insertelement <4 x float> undef, float %1, i32 0
678  %vecext.1 = extractelement <4 x float> %0, i32 1
679  %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
680  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
681  %vecext.2 = extractelement <4 x float> %0, i32 2
682  %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
683  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
684  %vecext.3 = extractelement <4 x float> %0, i32 3
685  %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
686  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
687  ret <4 x float> %vecins.3
688}
689
690declare float @atanhf(float) readonly nounwind willreturn
691
692; We can not vectorized atanh since RISCV has no such instruction.
693define <4 x float> @atanh_4x(ptr %a) {
694; CHECK-LABEL: define <4 x float> @atanh_4x
695; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
696; CHECK-NEXT:  entry:
697; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
698; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
699; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
700; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
701; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
702; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
703; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
704; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
705; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
706; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
707; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
708; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
709; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
710; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
711;
712; DEFAULT-LABEL: define <4 x float> @atanh_4x
713; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
714; DEFAULT-NEXT:  entry:
715; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
716; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
717; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
718; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
719; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
720; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
721; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
722; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
723; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
724; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
725; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
726; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
727; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
728; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
729;
730entry:
731  %0 = load <4 x float>, ptr %a, align 16
732  %vecext = extractelement <4 x float> %0, i32 0
733  %1 = tail call fast float @atanhf(float %vecext)
734  %vecins = insertelement <4 x float> undef, float %1, i32 0
735  %vecext.1 = extractelement <4 x float> %0, i32 1
736  %2 = tail call fast float @atanhf(float %vecext.1)
737  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
738  %vecext.2 = extractelement <4 x float> %0, i32 2
739  %3 = tail call fast float @atanhf(float %vecext.2)
740  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
741  %vecext.3 = extractelement <4 x float> %0, i32 3
742  %4 = tail call fast float @atanhf(float %vecext.3)
743  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
744  ret <4 x float> %vecins.3
745}
746
747declare float @llvm.atanh.f32(float)
748
749; We can not vectorized atanh since RISCV has no such instruction.
750define <4 x float> @int_atanh_4x(ptr %a) {
751; CHECK-LABEL: define <4 x float> @int_atanh_4x
752; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
753; CHECK-NEXT:  entry:
754; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
755; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
756; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
757; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
758; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
759; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
760; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
761; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
762; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
763; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
764; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
765; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
766; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
767; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
768;
769; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
770; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
771; DEFAULT-NEXT:  entry:
772; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
773; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
774; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
775; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
776; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
777; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
778; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
779; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
780; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
781; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
782; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
783; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
784; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
785; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
786;
787entry:
788  %0 = load <4 x float>, ptr %a, align 16
789  %vecext = extractelement <4 x float> %0, i32 0
790  %1 = tail call fast float @llvm.atanh.f32(float %vecext)
791  %vecins = insertelement <4 x float> undef, float %1, i32 0
792  %vecext.1 = extractelement <4 x float> %0, i32 1
793  %2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
794  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
795  %vecext.2 = extractelement <4 x float> %0, i32 2
796  %3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
797  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
798  %vecext.3 = extractelement <4 x float> %0, i32 3
799  %4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
800  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
801  ret <4 x float> %vecins.3
802}
803