xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll (revision ceb613a8bed218e2c98cd4fad3fd2a4a3217bd77)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3; RUN:     -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
4; RUN:     | FileCheck %s
5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6; RUN:     | FileCheck %s --check-prefix=DEFAULT
7
8declare float @fabsf(float) readonly nounwind willreturn
9
10define <4 x float> @fabs_4x(ptr %a) {
11; CHECK-LABEL: define <4 x float> @fabs_4x
12; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
15; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
16; CHECK-NEXT:    ret <4 x float> [[TMP1]]
17;
18; DEFAULT-LABEL: define <4 x float> @fabs_4x
19; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
20; DEFAULT-NEXT:  entry:
21; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
22; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
23; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
24;
25entry:
26  %0 = load <4 x float>, ptr %a, align 16
27  %vecext = extractelement <4 x float> %0, i32 0
28  %1 = tail call fast float @fabsf(float %vecext)
29  %vecins = insertelement <4 x float> undef, float %1, i32 0
30  %vecext.1 = extractelement <4 x float> %0, i32 1
31  %2 = tail call fast float @fabsf(float %vecext.1)
32  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
33  %vecext.2 = extractelement <4 x float> %0, i32 2
34  %3 = tail call fast float @fabsf(float %vecext.2)
35  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
36  %vecext.3 = extractelement <4 x float> %0, i32 3
37  %4 = tail call fast float @fabsf(float %vecext.3)
38  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
39  ret <4 x float> %vecins.3
40}
41
42declare float @llvm.fabs.f32(float)
43
44define <4 x float> @int_fabs_4x(ptr %a) {
45; CHECK-LABEL: define <4 x float> @int_fabs_4x
46; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
47; CHECK-NEXT:  entry:
48; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
49; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
50; CHECK-NEXT:    ret <4 x float> [[TMP1]]
51;
52; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
53; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
54; DEFAULT-NEXT:  entry:
55; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
56; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
57; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
58;
59entry:
60  %0 = load <4 x float>, ptr %a, align 16
61  %vecext = extractelement <4 x float> %0, i32 0
62  %1 = tail call fast float @llvm.fabs.f32(float %vecext)
63  %vecins = insertelement <4 x float> undef, float %1, i32 0
64  %vecext.1 = extractelement <4 x float> %0, i32 1
65  %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
66  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
67  %vecext.2 = extractelement <4 x float> %0, i32 2
68  %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
69  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
70  %vecext.3 = extractelement <4 x float> %0, i32 3
71  %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
72  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
73  ret <4 x float> %vecins.3
74}
75
76declare float @sqrtf(float) readonly nounwind willreturn
77
78define <4 x float> @sqrt_4x(ptr %a) {
79; CHECK-LABEL: define <4 x float> @sqrt_4x
80; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
81; CHECK-NEXT:  entry:
82; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
83; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
84; CHECK-NEXT:    ret <4 x float> [[TMP1]]
85;
86; DEFAULT-LABEL: define <4 x float> @sqrt_4x
87; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
88; DEFAULT-NEXT:  entry:
89; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
90; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
91; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
92;
93entry:
94  %0 = load <4 x float>, ptr %a, align 16
95  %vecext = extractelement <4 x float> %0, i32 0
96  %1 = tail call fast float @sqrtf(float %vecext)
97  %vecins = insertelement <4 x float> undef, float %1, i32 0
98  %vecext.1 = extractelement <4 x float> %0, i32 1
99  %2 = tail call fast float @sqrtf(float %vecext.1)
100  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
101  %vecext.2 = extractelement <4 x float> %0, i32 2
102  %3 = tail call fast float @sqrtf(float %vecext.2)
103  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
104  %vecext.3 = extractelement <4 x float> %0, i32 3
105  %4 = tail call fast float @sqrtf(float %vecext.3)
106  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
107  ret <4 x float> %vecins.3
108}
109
110declare float @llvm.sqrt.f32(float)
111
112define <4 x float> @int_sqrt_4x(ptr %a) {
113; CHECK-LABEL: define <4 x float> @int_sqrt_4x
114; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
117; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
118; CHECK-NEXT:    ret <4 x float> [[TMP1]]
119;
120; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
121; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
122; DEFAULT-NEXT:  entry:
123; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
124; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
125; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
126;
127entry:
128  %0 = load <4 x float>, ptr %a, align 16
129  %vecext = extractelement <4 x float> %0, i32 0
130  %1 = tail call fast float @llvm.sqrt.f32(float %vecext)
131  %vecins = insertelement <4 x float> undef, float %1, i32 0
132  %vecext.1 = extractelement <4 x float> %0, i32 1
133  %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
134  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
135  %vecext.2 = extractelement <4 x float> %0, i32 2
136  %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
137  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
138  %vecext.3 = extractelement <4 x float> %0, i32 3
139  %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
140  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
141  ret <4 x float> %vecins.3
142}
143
144declare float @expf(float) readonly nounwind willreturn
145
146; We can not vectorized exp since RISCV has no such instruction.
147define <4 x float> @exp_4x(ptr %a) {
148; CHECK-LABEL: define <4 x float> @exp_4x
149; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
150; CHECK-NEXT:  entry:
151; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
152; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
153; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
154; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
155; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
156; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
157; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
158; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
159; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
160; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
161; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
162; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
163; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
164; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
165;
166; DEFAULT-LABEL: define <4 x float> @exp_4x
167; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
168; DEFAULT-NEXT:  entry:
169; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
170; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
171; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
172; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
173; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
174; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
175; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
176; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
177; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
178; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
179; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
180; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
181; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
182; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
183;
184entry:
185  %0 = load <4 x float>, ptr %a, align 16
186  %vecext = extractelement <4 x float> %0, i32 0
187  %1 = tail call fast float @expf(float %vecext)
188  %vecins = insertelement <4 x float> undef, float %1, i32 0
189  %vecext.1 = extractelement <4 x float> %0, i32 1
190  %2 = tail call fast float @expf(float %vecext.1)
191  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
192  %vecext.2 = extractelement <4 x float> %0, i32 2
193  %3 = tail call fast float @expf(float %vecext.2)
194  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
195  %vecext.3 = extractelement <4 x float> %0, i32 3
196  %4 = tail call fast float @expf(float %vecext.3)
197  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
198  ret <4 x float> %vecins.3
199}
200
201declare float @llvm.exp.f32(float)
202
203; We can not vectorized exp since RISCV has no such instruction.
204define <4 x float> @int_exp_4x(ptr %a) {
205; CHECK-LABEL: define <4 x float> @int_exp_4x
206; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
207; CHECK-NEXT:  entry:
208; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
209; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
210; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
211; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
212; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
213; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
214; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
215; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
216; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
217; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
218; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
219; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
220; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
221; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
222;
223; DEFAULT-LABEL: define <4 x float> @int_exp_4x
224; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
225; DEFAULT-NEXT:  entry:
226; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
227; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
228; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
229; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
230; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
231; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
232; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
233; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
234; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
235; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
236; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
237; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
238; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
239; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
240;
241entry:
242  %0 = load <4 x float>, ptr %a, align 16
243  %vecext = extractelement <4 x float> %0, i32 0
244  %1 = tail call fast float @llvm.exp.f32(float %vecext)
245  %vecins = insertelement <4 x float> undef, float %1, i32 0
246  %vecext.1 = extractelement <4 x float> %0, i32 1
247  %2 = tail call fast float @llvm.exp.f32(float %vecext.1)
248  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
249  %vecext.2 = extractelement <4 x float> %0, i32 2
250  %3 = tail call fast float @llvm.exp.f32(float %vecext.2)
251  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
252  %vecext.3 = extractelement <4 x float> %0, i32 3
253  %4 = tail call fast float @llvm.exp.f32(float %vecext.3)
254  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
255  ret <4 x float> %vecins.3
256}
257
258declare float @logf(float) readonly nounwind willreturn
259
260; We can not vectorized log since RISCV has no such instruction.
261define <4 x float> @log_4x(ptr %a) {
262; CHECK-LABEL: define <4 x float> @log_4x
263; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
264; CHECK-NEXT:  entry:
265; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
266; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
267; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
268; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
269; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
270; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
271; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
272; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
273; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
274; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
275; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
276; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
277; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
278; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
279;
280; DEFAULT-LABEL: define <4 x float> @log_4x
281; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
282; DEFAULT-NEXT:  entry:
283; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
284; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
285; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
286; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
287; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
288; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
289; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
290; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
291; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
292; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
293; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
294; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
295; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
296; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
297;
298entry:
299  %0 = load <4 x float>, ptr %a, align 16
300  %vecext = extractelement <4 x float> %0, i32 0
301  %1 = tail call fast float @logf(float %vecext)
302  %vecins = insertelement <4 x float> undef, float %1, i32 0
303  %vecext.1 = extractelement <4 x float> %0, i32 1
304  %2 = tail call fast float @logf(float %vecext.1)
305  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
306  %vecext.2 = extractelement <4 x float> %0, i32 2
307  %3 = tail call fast float @logf(float %vecext.2)
308  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
309  %vecext.3 = extractelement <4 x float> %0, i32 3
310  %4 = tail call fast float @logf(float %vecext.3)
311  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
312  ret <4 x float> %vecins.3
313}
314
315declare float @llvm.log.f32(float)
316
317; We can not vectorized log since RISCV has no such instruction.
318define <4 x float> @int_log_4x(ptr %a) {
319; CHECK-LABEL: define <4 x float> @int_log_4x
320; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
321; CHECK-NEXT:  entry:
322; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
323; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
324; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
325; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
326; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
327; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
328; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
329; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
330; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
331; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
332; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
333; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
334; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
335; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
336;
337; DEFAULT-LABEL: define <4 x float> @int_log_4x
338; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
339; DEFAULT-NEXT:  entry:
340; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
341; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
342; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
343; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
344; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
345; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
346; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
347; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
348; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
349; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
350; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
351; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
352; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
353; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
354;
355entry:
356  %0 = load <4 x float>, ptr %a, align 16
357  %vecext = extractelement <4 x float> %0, i32 0
358  %1 = tail call fast float @llvm.log.f32(float %vecext)
359  %vecins = insertelement <4 x float> undef, float %1, i32 0
360  %vecext.1 = extractelement <4 x float> %0, i32 1
361  %2 = tail call fast float @llvm.log.f32(float %vecext.1)
362  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
363  %vecext.2 = extractelement <4 x float> %0, i32 2
364  %3 = tail call fast float @llvm.log.f32(float %vecext.2)
365  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
366  %vecext.3 = extractelement <4 x float> %0, i32 3
367  %4 = tail call fast float @llvm.log.f32(float %vecext.3)
368  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
369  ret <4 x float> %vecins.3
370}
371
372declare float @sinf(float) readonly nounwind willreturn
373
374; We can not vectorized sin since RISCV has no such instruction.
375define <4 x float> @sin_4x(ptr %a) {
376; CHECK-LABEL: define <4 x float> @sin_4x
377; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
378; CHECK-NEXT:  entry:
379; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
380; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
381; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
382; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
383; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
384; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
385; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
386; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
387; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
388; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
389; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
390; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
391; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
392; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
393;
394; DEFAULT-LABEL: define <4 x float> @sin_4x
395; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
396; DEFAULT-NEXT:  entry:
397; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
398; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
399; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
400; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
401; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
402; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
403; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
404; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
405; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
406; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
407; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
408; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
409; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
410; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
411;
412entry:
413  %0 = load <4 x float>, ptr %a, align 16
414  %vecext = extractelement <4 x float> %0, i32 0
415  %1 = tail call fast float @sinf(float %vecext)
416  %vecins = insertelement <4 x float> undef, float %1, i32 0
417  %vecext.1 = extractelement <4 x float> %0, i32 1
418  %2 = tail call fast float @sinf(float %vecext.1)
419  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
420  %vecext.2 = extractelement <4 x float> %0, i32 2
421  %3 = tail call fast float @sinf(float %vecext.2)
422  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
423  %vecext.3 = extractelement <4 x float> %0, i32 3
424  %4 = tail call fast float @sinf(float %vecext.3)
425  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
426  ret <4 x float> %vecins.3
427}
428
429declare float @llvm.sin.f32(float)
430
431; We can not vectorized sin since RISCV has no such instruction.
432define <4 x float> @int_sin_4x(ptr %a) {
433; CHECK-LABEL: define <4 x float> @int_sin_4x
434; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
435; CHECK-NEXT:  entry:
436; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
437; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
438; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
439; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
440; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
441; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
442; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
443; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
444; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
445; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
446; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
447; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
448; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
449; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
450;
451; DEFAULT-LABEL: define <4 x float> @int_sin_4x
452; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
453; DEFAULT-NEXT:  entry:
454; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
455; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
456; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
457; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
458; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
459; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
460; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
461; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
462; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
463; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
464; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
465; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
466; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
467; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
468;
469entry:
470  %0 = load <4 x float>, ptr %a, align 16
471  %vecext = extractelement <4 x float> %0, i32 0
472  %1 = tail call fast float @llvm.sin.f32(float %vecext)
473  %vecins = insertelement <4 x float> undef, float %1, i32 0
474  %vecext.1 = extractelement <4 x float> %0, i32 1
475  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
476  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
477  %vecext.2 = extractelement <4 x float> %0, i32 2
478  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
479  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
480  %vecext.3 = extractelement <4 x float> %0, i32 3
481  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
482  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
483  ret <4 x float> %vecins.3
484}
485
486declare float @asinf(float) readonly nounwind willreturn
487
488; We can not vectorized asin since RISCV has no such instruction.
489define <4 x float> @asin_4x(ptr %a) {
490; CHECK-LABEL: define <4 x float> @asin_4x
491; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
492; CHECK-NEXT:  entry:
493; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
494; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
495; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
496; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
497; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
498; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
499; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
500; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
501; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
502; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
503; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
504; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
505; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
506; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
507;
508; DEFAULT-LABEL: define <4 x float> @asin_4x
509; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
510; DEFAULT-NEXT:  entry:
511; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
512; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
513; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
514; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
515; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
516; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
517; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
518; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
519; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
520; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
521; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
522; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
523; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
524; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
525;
526entry:
527  %0 = load <4 x float>, ptr %a, align 16
528  %vecext = extractelement <4 x float> %0, i32 0
529  %1 = tail call fast float @asinf(float %vecext)
530  %vecins = insertelement <4 x float> undef, float %1, i32 0
531  %vecext.1 = extractelement <4 x float> %0, i32 1
532  %2 = tail call fast float @asinf(float %vecext.1)
533  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
534  %vecext.2 = extractelement <4 x float> %0, i32 2
535  %3 = tail call fast float @asinf(float %vecext.2)
536  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
537  %vecext.3 = extractelement <4 x float> %0, i32 3
538  %4 = tail call fast float @asinf(float %vecext.3)
539  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
540  ret <4 x float> %vecins.3
541}
542
543declare float @llvm.asin.f32(float)
544
545; We can not vectorized asin since RISCV has no such instruction.
546define <4 x float> @int_asin_4x(ptr %a) {
547; CHECK-LABEL: define <4 x float> @int_asin_4x
548; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
549; CHECK-NEXT:  entry:
550; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
551; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
552; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
553; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
554; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
555; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
556; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
557; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
558; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
559; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
560; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
561; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
562; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
563; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
564;
565; DEFAULT-LABEL: define <4 x float> @int_asin_4x
566; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
567; DEFAULT-NEXT:  entry:
568; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
569; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
570; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
571; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
572; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
573; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
574; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
575; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
576; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
577; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
578; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
579; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
580; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
581; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
582;
583entry:
584  %0 = load <4 x float>, ptr %a, align 16
585  %vecext = extractelement <4 x float> %0, i32 0
586  %1 = tail call fast float @llvm.asin.f32(float %vecext)
587  %vecins = insertelement <4 x float> undef, float %1, i32 0
588  %vecext.1 = extractelement <4 x float> %0, i32 1
589  %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
590  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
591  %vecext.2 = extractelement <4 x float> %0, i32 2
592  %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
593  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
594  %vecext.3 = extractelement <4 x float> %0, i32 3
595  %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
596  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
597  ret <4 x float> %vecins.3
598}
599
600declare float @cosf(float) readonly nounwind willreturn
601
602; We can not vectorized cos cosce RISCV has no such instruction.
603define <4 x float> @cos_4x(ptr %a) {
604; CHECK-LABEL: define <4 x float> @cos_4x
605; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
606; CHECK-NEXT:  entry:
607; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
608; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
609; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
610; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
611; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
612; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
613; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
614; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
615; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
616; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
617; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
618; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
619; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
620; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
621;
622; DEFAULT-LABEL: define <4 x float> @cos_4x
623; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
624; DEFAULT-NEXT:  entry:
625; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
626; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
627; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
628; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
629; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
630; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
631; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
632; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
633; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
634; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
635; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
636; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
637; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
638; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
639;
640entry:
641  %0 = load <4 x float>, ptr %a, align 16
642  %vecext = extractelement <4 x float> %0, i32 0
643  %1 = tail call fast float @cosf(float %vecext)
644  %vecins = insertelement <4 x float> undef, float %1, i32 0
645  %vecext.1 = extractelement <4 x float> %0, i32 1
646  %2 = tail call fast float @cosf(float %vecext.1)
647  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
648  %vecext.2 = extractelement <4 x float> %0, i32 2
649  %3 = tail call fast float @cosf(float %vecext.2)
650  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
651  %vecext.3 = extractelement <4 x float> %0, i32 3
652  %4 = tail call fast float @cosf(float %vecext.3)
653  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
654  ret <4 x float> %vecins.3
655}
656
657declare float @llvm.cos.f32(float)
658
659; We can not vectorized cos cosce RISCV has no such instruction.
660define <4 x float> @int_cos_4x(ptr %a) {
661; CHECK-LABEL: define <4 x float> @int_cos_4x
662; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
663; CHECK-NEXT:  entry:
664; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
665; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
666; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
667; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
668; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
669; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
670; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
671; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
672; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
673; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
674; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
675; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
676; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
677; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
678;
679; DEFAULT-LABEL: define <4 x float> @int_cos_4x
680; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
681; DEFAULT-NEXT:  entry:
682; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
683; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
684; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
685; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
686; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
687; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
688; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
689; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
690; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
691; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
692; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
693; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
694; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
695; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
696;
697entry:
698  %0 = load <4 x float>, ptr %a, align 16
699  %vecext = extractelement <4 x float> %0, i32 0
700  %1 = tail call fast float @llvm.cos.f32(float %vecext)
701  %vecins = insertelement <4 x float> undef, float %1, i32 0
702  %vecext.1 = extractelement <4 x float> %0, i32 1
703  %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
704  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
705  %vecext.2 = extractelement <4 x float> %0, i32 2
706  %3 = tail call fast float @llvm.cos.f32(float %vecext.2)
707  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
708  %vecext.3 = extractelement <4 x float> %0, i32 3
709  %4 = tail call fast float @llvm.cos.f32(float %vecext.3)
710  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
711  ret <4 x float> %vecins.3
712}
713
714declare float @acosf(float) readonly nounwind willreturn
715
716; We can not vectorized acos cosce RISCV has no such instruction.
717define <4 x float> @acos_4x(ptr %a) {
718; CHECK-LABEL: define <4 x float> @acos_4x
719; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
720; CHECK-NEXT:  entry:
721; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
722; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
723; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
724; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
725; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
726; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
727; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
728; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
729; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
730; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
731; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
732; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
733; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
734; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
735;
736; DEFAULT-LABEL: define <4 x float> @acos_4x
737; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
738; DEFAULT-NEXT:  entry:
739; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
740; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
741; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
742; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
743; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
744; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
745; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
746; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
747; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
748; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
749; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
750; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
751; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
752; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
753;
754entry:
755  %0 = load <4 x float>, ptr %a, align 16
756  %vecext = extractelement <4 x float> %0, i32 0
757  %1 = tail call fast float @acosf(float %vecext)
758  %vecins = insertelement <4 x float> undef, float %1, i32 0
759  %vecext.1 = extractelement <4 x float> %0, i32 1
760  %2 = tail call fast float @acosf(float %vecext.1)
761  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
762  %vecext.2 = extractelement <4 x float> %0, i32 2
763  %3 = tail call fast float @acosf(float %vecext.2)
764  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
765  %vecext.3 = extractelement <4 x float> %0, i32 3
766  %4 = tail call fast float @acosf(float %vecext.3)
767  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
768  ret <4 x float> %vecins.3
769}
770
771declare float @llvm.acos.f32(float)
772
773; We can not vectorized acos cosce RISCV has no such instruction.
774define <4 x float> @int_acos_4x(ptr %a) {
775; CHECK-LABEL: define <4 x float> @int_acos_4x
776; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
777; CHECK-NEXT:  entry:
778; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
779; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
780; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
781; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
782; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
783; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
784; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
785; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
786; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
787; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
788; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
789; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
790; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
791; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
792;
793; DEFAULT-LABEL: define <4 x float> @int_acos_4x
794; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
795; DEFAULT-NEXT:  entry:
796; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
797; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
798; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
799; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
800; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
801; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
802; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
803; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
804; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
805; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
806; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
807; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
808; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
809; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
810;
811entry:
812  %0 = load <4 x float>, ptr %a, align 16
813  %vecext = extractelement <4 x float> %0, i32 0
814  %1 = tail call fast float @llvm.acos.f32(float %vecext)
815  %vecins = insertelement <4 x float> undef, float %1, i32 0
816  %vecext.1 = extractelement <4 x float> %0, i32 1
817  %2 = tail call fast float @llvm.acos.f32(float %vecext.1)
818  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
819  %vecext.2 = extractelement <4 x float> %0, i32 2
820  %3 = tail call fast float @llvm.acos.f32(float %vecext.2)
821  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
822  %vecext.3 = extractelement <4 x float> %0, i32 3
823  %4 = tail call fast float @llvm.acos.f32(float %vecext.3)
824  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
825  ret <4 x float> %vecins.3
826}
827
828declare float @tanf(float) readonly nounwind willreturn
829
830; We can not vectorized tan tance RISCV has no such instruction.
831define <4 x float> @tan_4x(ptr %a) {
832; CHECK-LABEL: define <4 x float> @tan_4x
833; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
834; CHECK-NEXT:  entry:
835; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
836; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
837; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
838; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
839; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
840; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
841; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
842; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
843; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
844; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
845; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
846; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
847; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
848; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
849;
850; DEFAULT-LABEL: define <4 x float> @tan_4x
851; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
852; DEFAULT-NEXT:  entry:
853; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
854; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
855; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
856; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
857; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
858; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
859; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
860; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
861; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
862; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
863; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
864; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
865; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
866; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
867;
868entry:
869  %0 = load <4 x float>, ptr %a, align 16
870  %vecext = extractelement <4 x float> %0, i32 0
871  %1 = tail call fast float @tanf(float %vecext)
872  %vecins = insertelement <4 x float> undef, float %1, i32 0
873  %vecext.1 = extractelement <4 x float> %0, i32 1
874  %2 = tail call fast float @tanf(float %vecext.1)
875  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
876  %vecext.2 = extractelement <4 x float> %0, i32 2
877  %3 = tail call fast float @tanf(float %vecext.2)
878  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
879  %vecext.3 = extractelement <4 x float> %0, i32 3
880  %4 = tail call fast float @tanf(float %vecext.3)
881  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
882  ret <4 x float> %vecins.3
883}
884
885declare float @llvm.tan.f32(float)
886
887; We can not vectorized tan tance RISCV has no such instruction.
888define <4 x float> @int_tan_4x(ptr %a) {
889; CHECK-LABEL: define <4 x float> @int_tan_4x
890; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
891; CHECK-NEXT:  entry:
892; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
893; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
894; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
895; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
896; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
897; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
898; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
899; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
900; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
901; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
902; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
903; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
904; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
905; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
906;
907; DEFAULT-LABEL: define <4 x float> @int_tan_4x
908; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
909; DEFAULT-NEXT:  entry:
910; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
911; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
912; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
913; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
914; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
915; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
916; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
917; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
918; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
919; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
920; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
921; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
922; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
923; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
924;
925entry:
926  %0 = load <4 x float>, ptr %a, align 16
927  %vecext = extractelement <4 x float> %0, i32 0
928  %1 = tail call fast float @llvm.tan.f32(float %vecext)
929  %vecins = insertelement <4 x float> undef, float %1, i32 0
930  %vecext.1 = extractelement <4 x float> %0, i32 1
931  %2 = tail call fast float @llvm.tan.f32(float %vecext.1)
932  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
933  %vecext.2 = extractelement <4 x float> %0, i32 2
934  %3 = tail call fast float @llvm.tan.f32(float %vecext.2)
935  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
936  %vecext.3 = extractelement <4 x float> %0, i32 3
937  %4 = tail call fast float @llvm.tan.f32(float %vecext.3)
938  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
939  ret <4 x float> %vecins.3
940}
941
942declare float @atanf(float) readonly nounwind willreturn
943
944; We can not vectorized atan tance RISCV has no such instruction.
945define <4 x float> @atan_4x(ptr %a) {
946; CHECK-LABEL: define <4 x float> @atan_4x
947; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
948; CHECK-NEXT:  entry:
949; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
950; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
951; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
952; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
953; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
954; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
955; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
956; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
957; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
958; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
959; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
960; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
961; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
962; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
963;
964; DEFAULT-LABEL: define <4 x float> @atan_4x
965; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
966; DEFAULT-NEXT:  entry:
967; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
968; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
969; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
970; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
971; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
972; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
973; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
974; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
975; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
976; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
977; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
978; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
979; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
980; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
981;
982entry:
983  %0 = load <4 x float>, ptr %a, align 16
984  %vecext = extractelement <4 x float> %0, i32 0
985  %1 = tail call fast float @atanf(float %vecext)
986  %vecins = insertelement <4 x float> undef, float %1, i32 0
987  %vecext.1 = extractelement <4 x float> %0, i32 1
988  %2 = tail call fast float @atanf(float %vecext.1)
989  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
990  %vecext.2 = extractelement <4 x float> %0, i32 2
991  %3 = tail call fast float @atanf(float %vecext.2)
992  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
993  %vecext.3 = extractelement <4 x float> %0, i32 3
994  %4 = tail call fast float @atanf(float %vecext.3)
995  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
996  ret <4 x float> %vecins.3
997}
998
999declare float @llvm.atan.f32(float)
1000
1001; We can not vectorized atan tance RISCV has no such instruction.
1002define <4 x float> @int_atan_4x(ptr %a) {
1003; CHECK-LABEL: define <4 x float> @int_atan_4x
1004; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1005; CHECK-NEXT:  entry:
1006; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1007; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1008; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
1009; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1010; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1011; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
1012; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1013; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1014; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
1015; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1016; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1017; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
1018; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1019; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1020;
1021; DEFAULT-LABEL: define <4 x float> @int_atan_4x
1022; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1023; DEFAULT-NEXT:  entry:
1024; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1025; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1026; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
1027; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1028; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1029; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
1030; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1031; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1032; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
1033; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1034; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1035; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
1036; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1037; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1038;
1039entry:
1040  %0 = load <4 x float>, ptr %a, align 16
1041  %vecext = extractelement <4 x float> %0, i32 0
1042  %1 = tail call fast float @llvm.atan.f32(float %vecext)
1043  %vecins = insertelement <4 x float> undef, float %1, i32 0
1044  %vecext.1 = extractelement <4 x float> %0, i32 1
1045  %2 = tail call fast float @llvm.atan.f32(float %vecext.1)
1046  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1047  %vecext.2 = extractelement <4 x float> %0, i32 2
1048  %3 = tail call fast float @llvm.atan.f32(float %vecext.2)
1049  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1050  %vecext.3 = extractelement <4 x float> %0, i32 3
1051  %4 = tail call fast float @llvm.atan.f32(float %vecext.3)
1052  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1053  ret <4 x float> %vecins.3
1054}
1055
1056declare float @sinhf(float) readonly nounwind willreturn
1057
1058; We can not vectorized sinh since RISCV has no such instruction.
1059define <4 x float> @sinh_4x(ptr %a) {
1060; CHECK-LABEL: define <4 x float> @sinh_4x
1061; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1062; CHECK-NEXT:  entry:
1063; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1064; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1065; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
1066; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1067; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1068; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
1069; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1070; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1071; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
1072; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1073; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1074; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
1075; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1076; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1077;
1078; DEFAULT-LABEL: define <4 x float> @sinh_4x
1079; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1080; DEFAULT-NEXT:  entry:
1081; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1082; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1083; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
1084; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1085; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1086; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
1087; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1088; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1089; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
1090; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1091; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1092; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
1093; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1094; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1095;
1096entry:
1097  %0 = load <4 x float>, ptr %a, align 16
1098  %vecext = extractelement <4 x float> %0, i32 0
1099  %1 = tail call fast float @sinhf(float %vecext)
1100  %vecins = insertelement <4 x float> undef, float %1, i32 0
1101  %vecext.1 = extractelement <4 x float> %0, i32 1
1102  %2 = tail call fast float @sinhf(float %vecext.1)
1103  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1104  %vecext.2 = extractelement <4 x float> %0, i32 2
1105  %3 = tail call fast float @sinhf(float %vecext.2)
1106  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1107  %vecext.3 = extractelement <4 x float> %0, i32 3
1108  %4 = tail call fast float @sinhf(float %vecext.3)
1109  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1110  ret <4 x float> %vecins.3
1111}
1112
1113declare float @llvm.sinh.f32(float)
1114
1115; We can not vectorized sinh since RISCV has no such instruction.
1116define <4 x float> @int_sinh_4x(ptr %a) {
1117; CHECK-LABEL: define <4 x float> @int_sinh_4x
1118; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1119; CHECK-NEXT:  entry:
1120; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1121; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1122; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
1123; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1124; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1125; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
1126; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1127; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1128; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
1129; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1130; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1131; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
1132; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1133; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1134;
1135; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
1136; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1137; DEFAULT-NEXT:  entry:
1138; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1139; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1140; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
1141; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1142; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1143; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
1144; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1145; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1146; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
1147; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1148; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1149; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
1150; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1151; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1152;
1153entry:
1154  %0 = load <4 x float>, ptr %a, align 16
1155  %vecext = extractelement <4 x float> %0, i32 0
1156  %1 = tail call fast float @llvm.sinh.f32(float %vecext)
1157  %vecins = insertelement <4 x float> undef, float %1, i32 0
1158  %vecext.1 = extractelement <4 x float> %0, i32 1
1159  %2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
1160  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1161  %vecext.2 = extractelement <4 x float> %0, i32 2
1162  %3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
1163  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1164  %vecext.3 = extractelement <4 x float> %0, i32 3
1165  %4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
1166  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1167  ret <4 x float> %vecins.3
1168}
1169
1170declare float @asinhf(float) readonly nounwind willreturn
1171
1172; We can not vectorized asinh since RISCV has no such instruction.
1173define <4 x float> @asinh_4x(ptr %a) {
1174; CHECK-LABEL: define <4 x float> @asinh_4x
1175; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1176; CHECK-NEXT:  entry:
1177; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1178; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1179; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
1180; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1181; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1182; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
1183; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1184; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1185; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
1186; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1187; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1188; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
1189; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1190; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1191;
1192; DEFAULT-LABEL: define <4 x float> @asinh_4x
1193; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1194; DEFAULT-NEXT:  entry:
1195; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1196; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1197; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
1198; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1199; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1200; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
1201; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1202; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1203; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
1204; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1205; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1206; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
1207; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1208; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1209;
1210entry:
1211  %0 = load <4 x float>, ptr %a, align 16
1212  %vecext = extractelement <4 x float> %0, i32 0
1213  %1 = tail call fast float @asinhf(float %vecext)
1214  %vecins = insertelement <4 x float> undef, float %1, i32 0
1215  %vecext.1 = extractelement <4 x float> %0, i32 1
1216  %2 = tail call fast float @asinhf(float %vecext.1)
1217  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1218  %vecext.2 = extractelement <4 x float> %0, i32 2
1219  %3 = tail call fast float @asinhf(float %vecext.2)
1220  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1221  %vecext.3 = extractelement <4 x float> %0, i32 3
1222  %4 = tail call fast float @asinhf(float %vecext.3)
1223  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1224  ret <4 x float> %vecins.3
1225}
1226
1227declare float @llvm.asinh.f32(float)
1228
1229; We can not vectorized asinh since RISCV has no such instruction.
1230define <4 x float> @int_asinh_4x(ptr %a) {
1231; CHECK-LABEL: define <4 x float> @int_asinh_4x
1232; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1233; CHECK-NEXT:  entry:
1234; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1235; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1236; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
1237; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1238; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1239; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
1240; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1241; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1242; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
1243; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1244; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1245; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
1246; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1247; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1248;
1249; DEFAULT-LABEL: define <4 x float> @int_asinh_4x
1250; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1251; DEFAULT-NEXT:  entry:
1252; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1253; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1254; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
1255; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1256; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1257; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
1258; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1259; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1260; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
1261; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1262; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1263; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
1264; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1265; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1266;
1267entry:
1268  %0 = load <4 x float>, ptr %a, align 16
1269  %vecext = extractelement <4 x float> %0, i32 0
1270  %1 = tail call fast float @llvm.asinh.f32(float %vecext)
1271  %vecins = insertelement <4 x float> undef, float %1, i32 0
1272  %vecext.1 = extractelement <4 x float> %0, i32 1
1273  %2 = tail call fast float @llvm.asinh.f32(float %vecext.1)
1274  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1275  %vecext.2 = extractelement <4 x float> %0, i32 2
1276  %3 = tail call fast float @llvm.asinh.f32(float %vecext.2)
1277  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1278  %vecext.3 = extractelement <4 x float> %0, i32 3
1279  %4 = tail call fast float @llvm.asinh.f32(float %vecext.3)
1280  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1281  ret <4 x float> %vecins.3
1282}
1283
1284declare float @coshf(float) readonly nounwind willreturn
1285
1286; We can not vectorized cosh since RISCV has no such instruction.
1287define <4 x float> @cosh_4x(ptr %a) {
1288; CHECK-LABEL: define <4 x float> @cosh_4x
1289; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1290; CHECK-NEXT:  entry:
1291; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1292; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1293; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
1294; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1295; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1296; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
1297; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1298; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1299; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
1300; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1301; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1302; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
1303; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1304; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1305;
1306; DEFAULT-LABEL: define <4 x float> @cosh_4x
1307; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1308; DEFAULT-NEXT:  entry:
1309; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1310; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1311; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
1312; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1313; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1314; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
1315; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1316; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1317; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
1318; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1319; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1320; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
1321; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1322; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1323;
1324entry:
1325  %0 = load <4 x float>, ptr %a, align 16
1326  %vecext = extractelement <4 x float> %0, i32 0
1327  %1 = tail call fast float @coshf(float %vecext)
1328  %vecins = insertelement <4 x float> undef, float %1, i32 0
1329  %vecext.1 = extractelement <4 x float> %0, i32 1
1330  %2 = tail call fast float @coshf(float %vecext.1)
1331  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1332  %vecext.2 = extractelement <4 x float> %0, i32 2
1333  %3 = tail call fast float @coshf(float %vecext.2)
1334  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1335  %vecext.3 = extractelement <4 x float> %0, i32 3
1336  %4 = tail call fast float @coshf(float %vecext.3)
1337  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1338  ret <4 x float> %vecins.3
1339}
1340
1341declare float @llvm.cosh.f32(float)
1342
1343; We can not vectorized cosh since RISCV has no such instruction.
1344define <4 x float> @int_cosh_4x(ptr %a) {
1345; CHECK-LABEL: define <4 x float> @int_cosh_4x
1346; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1347; CHECK-NEXT:  entry:
1348; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1349; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1350; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
1351; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1352; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1353; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
1354; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1355; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1356; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
1357; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1358; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1359; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
1360; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1361; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1362;
1363; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
1364; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1365; DEFAULT-NEXT:  entry:
1366; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1367; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1368; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
1369; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1370; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1371; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
1372; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1373; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1374; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
1375; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1376; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1377; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
1378; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1379; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1380;
1381entry:
1382  %0 = load <4 x float>, ptr %a, align 16
1383  %vecext = extractelement <4 x float> %0, i32 0
1384  %1 = tail call fast float @llvm.cosh.f32(float %vecext)
1385  %vecins = insertelement <4 x float> undef, float %1, i32 0
1386  %vecext.1 = extractelement <4 x float> %0, i32 1
1387  %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
1388  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1389  %vecext.2 = extractelement <4 x float> %0, i32 2
1390  %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
1391  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1392  %vecext.3 = extractelement <4 x float> %0, i32 3
1393  %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
1394  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1395  ret <4 x float> %vecins.3
1396}
1397
1398declare float @acoshf(float) readonly nounwind willreturn
1399
1400; We can not vectorized acosh since RISCV has no such instruction.
1401define <4 x float> @acosh_4x(ptr %a) {
1402; CHECK-LABEL: define <4 x float> @acosh_4x
1403; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1404; CHECK-NEXT:  entry:
1405; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1406; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1407; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
1408; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1409; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1410; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
1411; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1412; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1413; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
1414; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1415; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1416; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
1417; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1418; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1419;
1420; DEFAULT-LABEL: define <4 x float> @acosh_4x
1421; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1422; DEFAULT-NEXT:  entry:
1423; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1424; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1425; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
1426; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1427; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1428; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
1429; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1430; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1431; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
1432; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1433; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1434; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
1435; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1436; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1437;
1438entry:
1439  %0 = load <4 x float>, ptr %a, align 16
1440  %vecext = extractelement <4 x float> %0, i32 0
1441  %1 = tail call fast float @acoshf(float %vecext)
1442  %vecins = insertelement <4 x float> undef, float %1, i32 0
1443  %vecext.1 = extractelement <4 x float> %0, i32 1
1444  %2 = tail call fast float @acoshf(float %vecext.1)
1445  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1446  %vecext.2 = extractelement <4 x float> %0, i32 2
1447  %3 = tail call fast float @acoshf(float %vecext.2)
1448  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1449  %vecext.3 = extractelement <4 x float> %0, i32 3
1450  %4 = tail call fast float @acoshf(float %vecext.3)
1451  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1452  ret <4 x float> %vecins.3
1453}
1454
1455declare float @llvm.acosh.f32(float)
1456
1457; We can not vectorized acosh since RISCV has no such instruction.
1458define <4 x float> @int_acosh_4x(ptr %a) {
1459; CHECK-LABEL: define <4 x float> @int_acosh_4x
1460; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1461; CHECK-NEXT:  entry:
1462; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1463; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1464; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
1465; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1466; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1467; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
1468; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1469; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1470; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
1471; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1472; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1473; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
1474; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1475; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1476;
1477; DEFAULT-LABEL: define <4 x float> @int_acosh_4x
1478; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1479; DEFAULT-NEXT:  entry:
1480; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1481; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1482; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
1483; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1484; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1485; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
1486; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1487; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1488; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
1489; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1490; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1491; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
1492; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1493; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1494;
1495entry:
1496  %0 = load <4 x float>, ptr %a, align 16
1497  %vecext = extractelement <4 x float> %0, i32 0
1498  %1 = tail call fast float @llvm.acosh.f32(float %vecext)
1499  %vecins = insertelement <4 x float> undef, float %1, i32 0
1500  %vecext.1 = extractelement <4 x float> %0, i32 1
1501  %2 = tail call fast float @llvm.acosh.f32(float %vecext.1)
1502  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1503  %vecext.2 = extractelement <4 x float> %0, i32 2
1504  %3 = tail call fast float @llvm.acosh.f32(float %vecext.2)
1505  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1506  %vecext.3 = extractelement <4 x float> %0, i32 3
1507  %4 = tail call fast float @llvm.acosh.f32(float %vecext.3)
1508  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1509  ret <4 x float> %vecins.3
1510}
1511
1512declare float @tanhf(float) readonly nounwind willreturn
1513
1514; We can not vectorized tanh since RISCV has no such instruction.
1515define <4 x float> @tanh_4x(ptr %a) {
1516; CHECK-LABEL: define <4 x float> @tanh_4x
1517; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1518; CHECK-NEXT:  entry:
1519; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1520; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1521; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
1522; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1523; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1524; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
1525; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1526; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1527; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
1528; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1529; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1530; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
1531; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1532; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1533;
1534; DEFAULT-LABEL: define <4 x float> @tanh_4x
1535; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1536; DEFAULT-NEXT:  entry:
1537; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1538; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1539; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
1540; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1541; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1542; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
1543; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1544; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1545; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
1546; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1547; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1548; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
1549; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1550; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1551;
1552entry:
1553  %0 = load <4 x float>, ptr %a, align 16
1554  %vecext = extractelement <4 x float> %0, i32 0
1555  %1 = tail call fast float @tanhf(float %vecext)
1556  %vecins = insertelement <4 x float> undef, float %1, i32 0
1557  %vecext.1 = extractelement <4 x float> %0, i32 1
1558  %2 = tail call fast float @tanhf(float %vecext.1)
1559  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1560  %vecext.2 = extractelement <4 x float> %0, i32 2
1561  %3 = tail call fast float @tanhf(float %vecext.2)
1562  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1563  %vecext.3 = extractelement <4 x float> %0, i32 3
1564  %4 = tail call fast float @tanhf(float %vecext.3)
1565  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1566  ret <4 x float> %vecins.3
1567}
1568
1569declare float @llvm.tanh.f32(float)
1570
1571; We can not vectorized tanh since RISCV has no such instruction.
1572define <4 x float> @int_tanh_4x(ptr %a) {
1573; CHECK-LABEL: define <4 x float> @int_tanh_4x
1574; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1575; CHECK-NEXT:  entry:
1576; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1577; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1578; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
1579; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1580; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1581; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
1582; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1583; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1584; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
1585; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1586; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1587; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
1588; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1589; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1590;
1591; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
1592; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1593; DEFAULT-NEXT:  entry:
1594; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1595; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1596; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
1597; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1598; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1599; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
1600; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1601; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1602; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
1603; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1604; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1605; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
1606; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1607; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1608;
1609entry:
1610  %0 = load <4 x float>, ptr %a, align 16
1611  %vecext = extractelement <4 x float> %0, i32 0
1612  %1 = tail call fast float @llvm.tanh.f32(float %vecext)
1613  %vecins = insertelement <4 x float> undef, float %1, i32 0
1614  %vecext.1 = extractelement <4 x float> %0, i32 1
1615  %2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
1616  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1617  %vecext.2 = extractelement <4 x float> %0, i32 2
1618  %3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
1619  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1620  %vecext.3 = extractelement <4 x float> %0, i32 3
1621  %4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
1622  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1623  ret <4 x float> %vecins.3
1624}
1625
1626declare float @atanhf(float) readonly nounwind willreturn
1627
1628; We can not vectorized atanh since RISCV has no such instruction.
1629define <4 x float> @atanh_4x(ptr %a) {
1630; CHECK-LABEL: define <4 x float> @atanh_4x
1631; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1632; CHECK-NEXT:  entry:
1633; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1634; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1635; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
1636; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1637; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1638; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
1639; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1640; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1641; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
1642; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1643; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1644; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
1645; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1646; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1647;
1648; DEFAULT-LABEL: define <4 x float> @atanh_4x
1649; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1650; DEFAULT-NEXT:  entry:
1651; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1652; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1653; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
1654; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1655; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1656; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
1657; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1658; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1659; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
1660; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1661; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1662; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
1663; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1664; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1665;
1666entry:
1667  %0 = load <4 x float>, ptr %a, align 16
1668  %vecext = extractelement <4 x float> %0, i32 0
1669  %1 = tail call fast float @atanhf(float %vecext)
1670  %vecins = insertelement <4 x float> undef, float %1, i32 0
1671  %vecext.1 = extractelement <4 x float> %0, i32 1
1672  %2 = tail call fast float @atanhf(float %vecext.1)
1673  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1674  %vecext.2 = extractelement <4 x float> %0, i32 2
1675  %3 = tail call fast float @atanhf(float %vecext.2)
1676  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1677  %vecext.3 = extractelement <4 x float> %0, i32 3
1678  %4 = tail call fast float @atanhf(float %vecext.3)
1679  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1680  ret <4 x float> %vecins.3
1681}
1682
1683declare float @llvm.atanh.f32(float)
1684
1685; We can not vectorized atanh since RISCV has no such instruction.
1686define <4 x float> @int_atanh_4x(ptr %a) {
1687; CHECK-LABEL: define <4 x float> @int_atanh_4x
1688; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1689; CHECK-NEXT:  entry:
1690; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1691; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1692; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
1693; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1694; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1695; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
1696; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1697; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1698; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
1699; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1700; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1701; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
1702; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1703; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1704;
1705; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
1706; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1707; DEFAULT-NEXT:  entry:
1708; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1709; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1710; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
1711; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1712; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1713; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
1714; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1715; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1716; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
1717; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1718; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1719; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
1720; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1721; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1722;
1723entry:
1724  %0 = load <4 x float>, ptr %a, align 16
1725  %vecext = extractelement <4 x float> %0, i32 0
1726  %1 = tail call fast float @llvm.atanh.f32(float %vecext)
1727  %vecins = insertelement <4 x float> undef, float %1, i32 0
1728  %vecext.1 = extractelement <4 x float> %0, i32 1
1729  %2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
1730  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1731  %vecext.2 = extractelement <4 x float> %0, i32 2
1732  %3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
1733  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1734  %vecext.3 = extractelement <4 x float> %0, i32 3
1735  %4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
1736  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1737  ret <4 x float> %vecins.3
1738}
1739