xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll (revision ceb613a8bed218e2c98cd4fad3fd2a4a3217bd77)
1013235a2SBen Shi; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2013235a2SBen Shi; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3013235a2SBen Shi; RUN:     -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
4013235a2SBen Shi; RUN:     | FileCheck %s
5013235a2SBen Shi; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6013235a2SBen Shi; RUN:     | FileCheck %s --check-prefix=DEFAULT
7013235a2SBen Shi
8013235a2SBen Shideclare float @fabsf(float) readonly nounwind willreturn
9013235a2SBen Shi
10013235a2SBen Shidefine <4 x float> @fabs_4x(ptr %a) {
11013235a2SBen Shi; CHECK-LABEL: define <4 x float> @fabs_4x
12013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
13013235a2SBen Shi; CHECK-NEXT:  entry:
14013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
15013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
16013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[TMP1]]
17013235a2SBen Shi;
18013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @fabs_4x
19013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] {
20013235a2SBen Shi; DEFAULT-NEXT:  entry:
21013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
227f26c27eSPhilip Reames; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
237f26c27eSPhilip Reames; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
24013235a2SBen Shi;
25013235a2SBen Shientry:
26013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
27013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
28013235a2SBen Shi  %1 = tail call fast float @fabsf(float %vecext)
29013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
30013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
31013235a2SBen Shi  %2 = tail call fast float @fabsf(float %vecext.1)
32013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
33013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
34013235a2SBen Shi  %3 = tail call fast float @fabsf(float %vecext.2)
35013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
36013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
37013235a2SBen Shi  %4 = tail call fast float @fabsf(float %vecext.3)
38013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
39013235a2SBen Shi  ret <4 x float> %vecins.3
40013235a2SBen Shi}
41013235a2SBen Shi
42013235a2SBen Shideclare float @llvm.fabs.f32(float)
43013235a2SBen Shi
44013235a2SBen Shidefine <4 x float> @int_fabs_4x(ptr %a) {
45013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_fabs_4x
46013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
47013235a2SBen Shi; CHECK-NEXT:  entry:
48013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
49013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
50013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[TMP1]]
51013235a2SBen Shi;
52013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_fabs_4x
53013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
54013235a2SBen Shi; DEFAULT-NEXT:  entry:
55013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
567f26c27eSPhilip Reames; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
577f26c27eSPhilip Reames; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
58013235a2SBen Shi;
59013235a2SBen Shientry:
60013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
61013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
62013235a2SBen Shi  %1 = tail call fast float @llvm.fabs.f32(float %vecext)
63013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
64013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
65013235a2SBen Shi  %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
66013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
67013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
68013235a2SBen Shi  %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
69013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
70013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
71013235a2SBen Shi  %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
72013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
73013235a2SBen Shi  ret <4 x float> %vecins.3
74013235a2SBen Shi}
75013235a2SBen Shi
76013235a2SBen Shideclare float @sqrtf(float) readonly nounwind willreturn
77013235a2SBen Shi
78013235a2SBen Shidefine <4 x float> @sqrt_4x(ptr %a) {
79013235a2SBen Shi; CHECK-LABEL: define <4 x float> @sqrt_4x
80013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
81013235a2SBen Shi; CHECK-NEXT:  entry:
82013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
83013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
84013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[TMP1]]
85013235a2SBen Shi;
86013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @sqrt_4x
87013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
88013235a2SBen Shi; DEFAULT-NEXT:  entry:
89013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
907f26c27eSPhilip Reames; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
917f26c27eSPhilip Reames; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
92013235a2SBen Shi;
93013235a2SBen Shientry:
94013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
95013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
96013235a2SBen Shi  %1 = tail call fast float @sqrtf(float %vecext)
97013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
98013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
99013235a2SBen Shi  %2 = tail call fast float @sqrtf(float %vecext.1)
100013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
101013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
102013235a2SBen Shi  %3 = tail call fast float @sqrtf(float %vecext.2)
103013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
104013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
105013235a2SBen Shi  %4 = tail call fast float @sqrtf(float %vecext.3)
106013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
107013235a2SBen Shi  ret <4 x float> %vecins.3
108013235a2SBen Shi}
109013235a2SBen Shi
110013235a2SBen Shideclare float @llvm.sqrt.f32(float)
111013235a2SBen Shi
112013235a2SBen Shidefine <4 x float> @int_sqrt_4x(ptr %a) {
113013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_sqrt_4x
114013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
115013235a2SBen Shi; CHECK-NEXT:  entry:
116013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
117013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
118013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[TMP1]]
119013235a2SBen Shi;
120013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_sqrt_4x
121013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
122013235a2SBen Shi; DEFAULT-NEXT:  entry:
123013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1247f26c27eSPhilip Reames; DEFAULT-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
1257f26c27eSPhilip Reames; DEFAULT-NEXT:    ret <4 x float> [[TMP1]]
126013235a2SBen Shi;
127013235a2SBen Shientry:
128013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
129013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
130013235a2SBen Shi  %1 = tail call fast float @llvm.sqrt.f32(float %vecext)
131013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
132013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
133013235a2SBen Shi  %2 = tail call fast float @llvm.sqrt.f32(float %vecext.1)
134013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
135013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
136013235a2SBen Shi  %3 = tail call fast float @llvm.sqrt.f32(float %vecext.2)
137013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
138013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
139013235a2SBen Shi  %4 = tail call fast float @llvm.sqrt.f32(float %vecext.3)
140013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
141013235a2SBen Shi  ret <4 x float> %vecins.3
142013235a2SBen Shi}
143013235a2SBen Shi
144013235a2SBen Shideclare float @expf(float) readonly nounwind willreturn
145013235a2SBen Shi
146013235a2SBen Shi; We can not vectorized exp since RISCV has no such instruction.
147013235a2SBen Shidefine <4 x float> @exp_4x(ptr %a) {
148013235a2SBen Shi; CHECK-LABEL: define <4 x float> @exp_4x
149013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
150013235a2SBen Shi; CHECK-NEXT:  entry:
151013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
152013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
153013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
154013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
155013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
156013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
157013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
158d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
159d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
160d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
161d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
162d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
163d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
164d70963a7SAlexey Bataev; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
165013235a2SBen Shi;
166013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @exp_4x
167013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
168013235a2SBen Shi; DEFAULT-NEXT:  entry:
169013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
170013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
171013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
172013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
173013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
174013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
175013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
176d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
177d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
178d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
179d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
180d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
181d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
182d70963a7SAlexey Bataev; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
183013235a2SBen Shi;
184013235a2SBen Shientry:
185013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
186013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
187013235a2SBen Shi  %1 = tail call fast float @expf(float %vecext)
188013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
189013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
190013235a2SBen Shi  %2 = tail call fast float @expf(float %vecext.1)
191013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
192013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
193013235a2SBen Shi  %3 = tail call fast float @expf(float %vecext.2)
194013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
195013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
196013235a2SBen Shi  %4 = tail call fast float @expf(float %vecext.3)
197013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
198013235a2SBen Shi  ret <4 x float> %vecins.3
199013235a2SBen Shi}
200013235a2SBen Shi
201013235a2SBen Shideclare float @llvm.exp.f32(float)
202013235a2SBen Shi
203013235a2SBen Shi; We can not vectorized exp since RISCV has no such instruction.
204013235a2SBen Shidefine <4 x float> @int_exp_4x(ptr %a) {
205013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_exp_4x
206013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
207013235a2SBen Shi; CHECK-NEXT:  entry:
208013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
209013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
210013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
211013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
212013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
213013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
214013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
215d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
216d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
217d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
218d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
219d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
220d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
221d70963a7SAlexey Bataev; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
222013235a2SBen Shi;
223013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_exp_4x
224013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
225013235a2SBen Shi; DEFAULT-NEXT:  entry:
226013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
227013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
228013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
229013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
230013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
231013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
232013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
233d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
234d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
235d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
236d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
237d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
238d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
239d70963a7SAlexey Bataev; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
240013235a2SBen Shi;
241013235a2SBen Shientry:
242013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
243013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
244013235a2SBen Shi  %1 = tail call fast float @llvm.exp.f32(float %vecext)
245013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
246013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
247013235a2SBen Shi  %2 = tail call fast float @llvm.exp.f32(float %vecext.1)
248013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
249013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
250013235a2SBen Shi  %3 = tail call fast float @llvm.exp.f32(float %vecext.2)
251013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
252013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
253013235a2SBen Shi  %4 = tail call fast float @llvm.exp.f32(float %vecext.3)
254013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
255013235a2SBen Shi  ret <4 x float> %vecins.3
256013235a2SBen Shi}
257013235a2SBen Shi
258013235a2SBen Shideclare float @logf(float) readonly nounwind willreturn
259013235a2SBen Shi
260013235a2SBen Shi; We can not vectorized log since RISCV has no such instruction.
261013235a2SBen Shidefine <4 x float> @log_4x(ptr %a) {
262013235a2SBen Shi; CHECK-LABEL: define <4 x float> @log_4x
263013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
264013235a2SBen Shi; CHECK-NEXT:  entry:
265013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
266013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
267013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
268013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
269013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
270013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
271013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
272d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
273d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
274d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
275d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
276d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
277d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
278d70963a7SAlexey Bataev; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
279013235a2SBen Shi;
280013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @log_4x
281013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
282013235a2SBen Shi; DEFAULT-NEXT:  entry:
283013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
284013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
285013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
286013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
287013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
288013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
289013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
290d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
291d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
292d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
293d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
294d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
295d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
296d70963a7SAlexey Bataev; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
297013235a2SBen Shi;
298013235a2SBen Shientry:
299013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
300013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
301013235a2SBen Shi  %1 = tail call fast float @logf(float %vecext)
302013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
303013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
304013235a2SBen Shi  %2 = tail call fast float @logf(float %vecext.1)
305013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
306013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
307013235a2SBen Shi  %3 = tail call fast float @logf(float %vecext.2)
308013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
309013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
310013235a2SBen Shi  %4 = tail call fast float @logf(float %vecext.3)
311013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
312013235a2SBen Shi  ret <4 x float> %vecins.3
313013235a2SBen Shi}
314013235a2SBen Shi
315013235a2SBen Shideclare float @llvm.log.f32(float)
316013235a2SBen Shi
317013235a2SBen Shi; We can not vectorized log since RISCV has no such instruction.
318013235a2SBen Shidefine <4 x float> @int_log_4x(ptr %a) {
319013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_log_4x
320013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
321013235a2SBen Shi; CHECK-NEXT:  entry:
322013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
323013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
324013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
325013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
326013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
327013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
328013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
329d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
330d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
331d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
332d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
333d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
334d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
335d70963a7SAlexey Bataev; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
336013235a2SBen Shi;
337013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_log_4x
338013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
339013235a2SBen Shi; DEFAULT-NEXT:  entry:
340013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
341013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
342013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
343013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
344013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
345013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]])
346013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
347d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
348d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]])
349d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
350d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
351d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]])
352d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
353d70963a7SAlexey Bataev; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
354013235a2SBen Shi;
355013235a2SBen Shientry:
356013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
357013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
358013235a2SBen Shi  %1 = tail call fast float @llvm.log.f32(float %vecext)
359013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
360013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
361013235a2SBen Shi  %2 = tail call fast float @llvm.log.f32(float %vecext.1)
362013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
363013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
364013235a2SBen Shi  %3 = tail call fast float @llvm.log.f32(float %vecext.2)
365013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
366013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
367013235a2SBen Shi  %4 = tail call fast float @llvm.log.f32(float %vecext.3)
368013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
369013235a2SBen Shi  ret <4 x float> %vecins.3
370013235a2SBen Shi}
371013235a2SBen Shi
372013235a2SBen Shideclare float @sinf(float) readonly nounwind willreturn
373013235a2SBen Shi
374013235a2SBen Shi; We can not vectorized sin since RISCV has no such instruction.
375013235a2SBen Shidefine <4 x float> @sin_4x(ptr %a) {
376013235a2SBen Shi; CHECK-LABEL: define <4 x float> @sin_4x
377013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
378013235a2SBen Shi; CHECK-NEXT:  entry:
379013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
380013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
381013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
382013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
383013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
384013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
385013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
386d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
387d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
388d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
389d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
390d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
391d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
392d70963a7SAlexey Bataev; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
393013235a2SBen Shi;
394013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @sin_4x
395013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
396013235a2SBen Shi; DEFAULT-NEXT:  entry:
397013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
398013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
399013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
400013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
401013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
402013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
403013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
404d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
405d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
406d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
407d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
408d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
409d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
410d70963a7SAlexey Bataev; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
411013235a2SBen Shi;
412013235a2SBen Shientry:
413013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
414013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
415013235a2SBen Shi  %1 = tail call fast float @sinf(float %vecext)
416013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
417013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
418013235a2SBen Shi  %2 = tail call fast float @sinf(float %vecext.1)
419013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
420013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
421013235a2SBen Shi  %3 = tail call fast float @sinf(float %vecext.2)
422013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
423013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
424013235a2SBen Shi  %4 = tail call fast float @sinf(float %vecext.3)
425013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
426013235a2SBen Shi  ret <4 x float> %vecins.3
427013235a2SBen Shi}
428013235a2SBen Shi
429013235a2SBen Shideclare float @llvm.sin.f32(float)
430013235a2SBen Shi
431013235a2SBen Shi; We can not vectorized sin since RISCV has no such instruction.
432013235a2SBen Shidefine <4 x float> @int_sin_4x(ptr %a) {
433013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_sin_4x
434013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
435013235a2SBen Shi; CHECK-NEXT:  entry:
436013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
437013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
438013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
439013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
440013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
441013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
442013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
443d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
444d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
445d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
446d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
447d70963a7SAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
448d70963a7SAlexey Bataev; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
449d70963a7SAlexey Bataev; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
450013235a2SBen Shi;
451013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_sin_4x
452013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
453013235a2SBen Shi; DEFAULT-NEXT:  entry:
454013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
455013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
456013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
457013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
458013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
459013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
460013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
461d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
462d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
463d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
464d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
465d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
466d70963a7SAlexey Bataev; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
467d70963a7SAlexey Bataev; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
468013235a2SBen Shi;
469013235a2SBen Shientry:
470013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
471013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
472013235a2SBen Shi  %1 = tail call fast float @llvm.sin.f32(float %vecext)
473013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
474013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
475013235a2SBen Shi  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
476013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
477013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
478013235a2SBen Shi  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
479013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
480013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
481013235a2SBen Shi  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
482013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
483013235a2SBen Shi  ret <4 x float> %vecins.3
484013235a2SBen Shi}
485013235a2SBen Shi
486013235a2SBen Shideclare float @asinf(float) readonly nounwind willreturn
487013235a2SBen Shi
488013235a2SBen Shi; We can not vectorized asin since RISCV has no such instruction.
489013235a2SBen Shidefine <4 x float> @asin_4x(ptr %a) {
490013235a2SBen Shi; CHECK-LABEL: define <4 x float> @asin_4x
491013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
492013235a2SBen Shi; CHECK-NEXT:  entry:
493013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
494013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
495013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
496013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
497013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
498013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
499013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
500013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
501013235a2SBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
502013235a2SBen Shi; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
503013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
504013235a2SBen Shi; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
505013235a2SBen Shi; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
506013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
507013235a2SBen Shi;
508013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @asin_4x
509013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
510013235a2SBen Shi; DEFAULT-NEXT:  entry:
511013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
512013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
513013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
514013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
515013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
516013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
517013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
518013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
519013235a2SBen Shi; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
520013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
521013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
522013235a2SBen Shi; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
523013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
524013235a2SBen Shi; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
525013235a2SBen Shi;
526013235a2SBen Shientry:
527013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
528013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
529013235a2SBen Shi  %1 = tail call fast float @asinf(float %vecext)
530013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
531013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
532013235a2SBen Shi  %2 = tail call fast float @asinf(float %vecext.1)
533013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
534013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
535013235a2SBen Shi  %3 = tail call fast float @asinf(float %vecext.2)
536013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
537013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
538013235a2SBen Shi  %4 = tail call fast float @asinf(float %vecext.3)
539013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
540013235a2SBen Shi  ret <4 x float> %vecins.3
541013235a2SBen Shi}
542013235a2SBen Shi
543013235a2SBen Shideclare float @llvm.asin.f32(float)
544013235a2SBen Shi
545013235a2SBen Shi; We can not vectorized asin since RISCV has no such instruction.
546013235a2SBen Shidefine <4 x float> @int_asin_4x(ptr %a) {
547013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_asin_4x
548013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
549013235a2SBen Shi; CHECK-NEXT:  entry:
550013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
551013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
552013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
553013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
554013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
555013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
556013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
557013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
558013235a2SBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
559013235a2SBen Shi; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
560013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
561013235a2SBen Shi; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
562013235a2SBen Shi; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
563013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
564013235a2SBen Shi;
565013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_asin_4x
566013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
567013235a2SBen Shi; DEFAULT-NEXT:  entry:
568013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
569013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
570013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT]])
571013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
572013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
573013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_1]])
574013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
575013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
576013235a2SBen Shi; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_2]])
577013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
578013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
579013235a2SBen Shi; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asin.f32(float [[VECEXT_3]])
580013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
581013235a2SBen Shi; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
582013235a2SBen Shi;
583013235a2SBen Shientry:
584013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
585013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
586013235a2SBen Shi  %1 = tail call fast float @llvm.asin.f32(float %vecext)
587013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
588013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
589013235a2SBen Shi  %2 = tail call fast float @llvm.asin.f32(float %vecext.1)
590013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
591013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
592013235a2SBen Shi  %3 = tail call fast float @llvm.asin.f32(float %vecext.2)
593013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
594013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
595013235a2SBen Shi  %4 = tail call fast float @llvm.asin.f32(float %vecext.3)
596013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
597013235a2SBen Shi  ret <4 x float> %vecins.3
598013235a2SBen Shi}
599013235a2SBen Shi
600*ceb613a8SSimon Pilgrimdeclare float @cosf(float) readonly nounwind willreturn
601*ceb613a8SSimon Pilgrim
602*ceb613a8SSimon Pilgrim; We can not vectorized cos cosce RISCV has no such instruction.
603*ceb613a8SSimon Pilgrimdefine <4 x float> @cos_4x(ptr %a) {
604*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @cos_4x
605*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
606*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
607*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
608*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
609*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
610*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
611*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
612*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
613*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
614*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
615*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
616*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
617*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
618*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
619*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
620*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
621*ceb613a8SSimon Pilgrim;
622*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @cos_4x
623*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
624*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
625*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
626*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
627*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
628*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
629*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
630*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
631*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
632*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
633*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
634*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
635*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
636*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
637*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
638*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
639*ceb613a8SSimon Pilgrim;
640*ceb613a8SSimon Pilgrimentry:
641*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
642*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
643*ceb613a8SSimon Pilgrim  %1 = tail call fast float @cosf(float %vecext)
644*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
645*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
646*ceb613a8SSimon Pilgrim  %2 = tail call fast float @cosf(float %vecext.1)
647*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
648*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
649*ceb613a8SSimon Pilgrim  %3 = tail call fast float @cosf(float %vecext.2)
650*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
651*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
652*ceb613a8SSimon Pilgrim  %4 = tail call fast float @cosf(float %vecext.3)
653*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
654*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
655*ceb613a8SSimon Pilgrim}
656*ceb613a8SSimon Pilgrim
657*ceb613a8SSimon Pilgrimdeclare float @llvm.cos.f32(float)
658*ceb613a8SSimon Pilgrim
659*ceb613a8SSimon Pilgrim; We can not vectorized cos cosce RISCV has no such instruction.
660*ceb613a8SSimon Pilgrimdefine <4 x float> @int_cos_4x(ptr %a) {
661*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_cos_4x
662*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
663*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
664*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
665*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
666*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
667*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
668*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
669*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
670*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
671*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
672*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
673*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
674*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
675*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
676*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
677*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
678*ceb613a8SSimon Pilgrim;
679*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_cos_4x
680*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
681*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
682*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
683*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
684*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]])
685*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
686*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
687*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]])
688*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
689*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
690*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_2]])
691*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
692*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
693*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_3]])
694*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
695*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
696*ceb613a8SSimon Pilgrim;
697*ceb613a8SSimon Pilgrimentry:
698*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
699*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
700*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.cos.f32(float %vecext)
701*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
702*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
703*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.cos.f32(float %vecext.1)
704*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
705*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
706*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.cos.f32(float %vecext.2)
707*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
708*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
709*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.cos.f32(float %vecext.3)
710*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
711*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
712*ceb613a8SSimon Pilgrim}
713*ceb613a8SSimon Pilgrim
714*ceb613a8SSimon Pilgrimdeclare float @acosf(float) readonly nounwind willreturn
715*ceb613a8SSimon Pilgrim
716*ceb613a8SSimon Pilgrim; We can not vectorized acos cosce RISCV has no such instruction.
717*ceb613a8SSimon Pilgrimdefine <4 x float> @acos_4x(ptr %a) {
718*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @acos_4x
719*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
720*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
721*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
722*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
723*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
724*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
725*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
726*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
727*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
728*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
729*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
730*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
731*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
732*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
733*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
734*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
735*ceb613a8SSimon Pilgrim;
736*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @acos_4x
737*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
738*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
739*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
740*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
741*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
742*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
743*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
744*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
745*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
746*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
747*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
748*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
749*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
750*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
751*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
752*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
753*ceb613a8SSimon Pilgrim;
754*ceb613a8SSimon Pilgrimentry:
755*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
756*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
757*ceb613a8SSimon Pilgrim  %1 = tail call fast float @acosf(float %vecext)
758*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
759*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
760*ceb613a8SSimon Pilgrim  %2 = tail call fast float @acosf(float %vecext.1)
761*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
762*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
763*ceb613a8SSimon Pilgrim  %3 = tail call fast float @acosf(float %vecext.2)
764*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
765*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
766*ceb613a8SSimon Pilgrim  %4 = tail call fast float @acosf(float %vecext.3)
767*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
768*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
769*ceb613a8SSimon Pilgrim}
770*ceb613a8SSimon Pilgrim
771*ceb613a8SSimon Pilgrimdeclare float @llvm.acos.f32(float)
772*ceb613a8SSimon Pilgrim
773*ceb613a8SSimon Pilgrim; We can not vectorized acos cosce RISCV has no such instruction.
774*ceb613a8SSimon Pilgrimdefine <4 x float> @int_acos_4x(ptr %a) {
775*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_acos_4x
776*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
777*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
778*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
779*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
780*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
781*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
782*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
783*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
784*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
785*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
786*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
787*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
788*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
789*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
790*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
791*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
792*ceb613a8SSimon Pilgrim;
793*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_acos_4x
794*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
795*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
796*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
797*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
798*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT]])
799*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
800*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
801*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_1]])
802*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
803*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
804*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_2]])
805*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
806*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
807*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acos.f32(float [[VECEXT_3]])
808*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
809*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
810*ceb613a8SSimon Pilgrim;
811*ceb613a8SSimon Pilgrimentry:
812*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
813*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
814*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.acos.f32(float %vecext)
815*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
816*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
817*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.acos.f32(float %vecext.1)
818*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
819*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
820*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.acos.f32(float %vecext.2)
821*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
822*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
823*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.acos.f32(float %vecext.3)
824*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
825*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
826*ceb613a8SSimon Pilgrim}
827*ceb613a8SSimon Pilgrim
828*ceb613a8SSimon Pilgrimdeclare float @tanf(float) readonly nounwind willreturn
829*ceb613a8SSimon Pilgrim
830*ceb613a8SSimon Pilgrim; We can not vectorized tan tance RISCV has no such instruction.
831*ceb613a8SSimon Pilgrimdefine <4 x float> @tan_4x(ptr %a) {
832*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @tan_4x
833*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
834*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
835*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
836*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
837*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
838*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
839*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
840*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
841*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
842*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
843*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
844*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
845*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
846*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
847*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
848*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
849*ceb613a8SSimon Pilgrim;
850*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @tan_4x
851*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
852*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
853*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
854*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
855*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
856*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
857*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
858*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
859*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
860*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
861*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
862*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
863*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
864*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
865*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
866*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
867*ceb613a8SSimon Pilgrim;
868*ceb613a8SSimon Pilgrimentry:
869*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
870*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
871*ceb613a8SSimon Pilgrim  %1 = tail call fast float @tanf(float %vecext)
872*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
873*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
874*ceb613a8SSimon Pilgrim  %2 = tail call fast float @tanf(float %vecext.1)
875*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
876*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
877*ceb613a8SSimon Pilgrim  %3 = tail call fast float @tanf(float %vecext.2)
878*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
879*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
880*ceb613a8SSimon Pilgrim  %4 = tail call fast float @tanf(float %vecext.3)
881*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
882*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
883*ceb613a8SSimon Pilgrim}
884*ceb613a8SSimon Pilgrim
885*ceb613a8SSimon Pilgrimdeclare float @llvm.tan.f32(float)
886*ceb613a8SSimon Pilgrim
887*ceb613a8SSimon Pilgrim; We can not vectorized tan tance RISCV has no such instruction.
888*ceb613a8SSimon Pilgrimdefine <4 x float> @int_tan_4x(ptr %a) {
889*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_tan_4x
890*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
891*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
892*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
893*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
894*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
895*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
896*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
897*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
898*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
899*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
900*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
901*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
902*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
903*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
904*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
905*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
906*ceb613a8SSimon Pilgrim;
907*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_tan_4x
908*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
909*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
910*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
911*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
912*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT]])
913*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
914*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
915*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_1]])
916*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
917*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
918*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_2]])
919*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
920*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
921*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tan.f32(float [[VECEXT_3]])
922*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
923*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
924*ceb613a8SSimon Pilgrim;
925*ceb613a8SSimon Pilgrimentry:
926*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
927*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
928*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.tan.f32(float %vecext)
929*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
930*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
931*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.tan.f32(float %vecext.1)
932*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
933*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
934*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.tan.f32(float %vecext.2)
935*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
936*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
937*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.tan.f32(float %vecext.3)
938*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
939*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
940*ceb613a8SSimon Pilgrim}
941*ceb613a8SSimon Pilgrim
942*ceb613a8SSimon Pilgrimdeclare float @atanf(float) readonly nounwind willreturn
943*ceb613a8SSimon Pilgrim
944*ceb613a8SSimon Pilgrim; We can not vectorized atan tance RISCV has no such instruction.
945*ceb613a8SSimon Pilgrimdefine <4 x float> @atan_4x(ptr %a) {
946*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @atan_4x
947*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
948*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
949*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
950*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
951*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
952*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
953*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
954*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
955*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
956*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
957*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
958*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
959*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
960*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
961*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
962*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
963*ceb613a8SSimon Pilgrim;
964*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @atan_4x
965*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
966*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
967*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
968*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
969*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
970*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
971*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
972*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
973*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
974*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
975*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
976*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
977*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
978*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
979*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
980*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
981*ceb613a8SSimon Pilgrim;
982*ceb613a8SSimon Pilgrimentry:
983*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
984*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
985*ceb613a8SSimon Pilgrim  %1 = tail call fast float @atanf(float %vecext)
986*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
987*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
988*ceb613a8SSimon Pilgrim  %2 = tail call fast float @atanf(float %vecext.1)
989*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
990*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
991*ceb613a8SSimon Pilgrim  %3 = tail call fast float @atanf(float %vecext.2)
992*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
993*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
994*ceb613a8SSimon Pilgrim  %4 = tail call fast float @atanf(float %vecext.3)
995*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
996*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
997*ceb613a8SSimon Pilgrim}
998*ceb613a8SSimon Pilgrim
999*ceb613a8SSimon Pilgrimdeclare float @llvm.atan.f32(float)
1000*ceb613a8SSimon Pilgrim
1001*ceb613a8SSimon Pilgrim; We can not vectorized atan tance RISCV has no such instruction.
1002*ceb613a8SSimon Pilgrimdefine <4 x float> @int_atan_4x(ptr %a) {
1003*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_atan_4x
1004*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1005*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1006*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1007*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1008*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
1009*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1010*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1011*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
1012*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1013*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1014*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
1015*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1016*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1017*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
1018*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1019*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1020*ceb613a8SSimon Pilgrim;
1021*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_atan_4x
1022*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1023*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1024*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1025*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1026*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT]])
1027*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1028*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1029*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_1]])
1030*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1031*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1032*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_2]])
1033*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1034*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1035*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atan.f32(float [[VECEXT_3]])
1036*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1037*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1038*ceb613a8SSimon Pilgrim;
1039*ceb613a8SSimon Pilgrimentry:
1040*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1041*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1042*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.atan.f32(float %vecext)
1043*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1044*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1045*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.atan.f32(float %vecext.1)
1046*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1047*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1048*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.atan.f32(float %vecext.2)
1049*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1050*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1051*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.atan.f32(float %vecext.3)
1052*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1053*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1054*ceb613a8SSimon Pilgrim}
1055*ceb613a8SSimon Pilgrim
1056*ceb613a8SSimon Pilgrimdeclare float @sinhf(float) readonly nounwind willreturn
1057*ceb613a8SSimon Pilgrim
1058*ceb613a8SSimon Pilgrim; We can not vectorized sinh since RISCV has no such instruction.
1059*ceb613a8SSimon Pilgrimdefine <4 x float> @sinh_4x(ptr %a) {
1060*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @sinh_4x
1061*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1062*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1063*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1064*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1065*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
1066*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1067*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1068*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
1069*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1070*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1071*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
1072*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1073*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1074*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
1075*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1076*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1077*ceb613a8SSimon Pilgrim;
1078*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @sinh_4x
1079*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1080*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1081*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1082*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1083*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
1084*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1085*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1086*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
1087*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1088*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1089*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
1090*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1091*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1092*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
1093*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1094*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1095*ceb613a8SSimon Pilgrim;
1096*ceb613a8SSimon Pilgrimentry:
1097*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1098*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1099*ceb613a8SSimon Pilgrim  %1 = tail call fast float @sinhf(float %vecext)
1100*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1101*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1102*ceb613a8SSimon Pilgrim  %2 = tail call fast float @sinhf(float %vecext.1)
1103*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1104*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1105*ceb613a8SSimon Pilgrim  %3 = tail call fast float @sinhf(float %vecext.2)
1106*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1107*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1108*ceb613a8SSimon Pilgrim  %4 = tail call fast float @sinhf(float %vecext.3)
1109*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1110*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1111*ceb613a8SSimon Pilgrim}
1112*ceb613a8SSimon Pilgrim
1113*ceb613a8SSimon Pilgrimdeclare float @llvm.sinh.f32(float)
1114*ceb613a8SSimon Pilgrim
1115*ceb613a8SSimon Pilgrim; We can not vectorized sinh since RISCV has no such instruction.
1116*ceb613a8SSimon Pilgrimdefine <4 x float> @int_sinh_4x(ptr %a) {
1117*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_sinh_4x
1118*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1119*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1120*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1121*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1122*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
1123*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1124*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1125*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
1126*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1127*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1128*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
1129*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1130*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1131*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
1132*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1133*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1134*ceb613a8SSimon Pilgrim;
1135*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_sinh_4x
1136*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1137*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1138*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1139*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1140*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT]])
1141*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1142*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1143*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_1]])
1144*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1145*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1146*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_2]])
1147*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1148*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1149*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sinh.f32(float [[VECEXT_3]])
1150*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1151*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1152*ceb613a8SSimon Pilgrim;
1153*ceb613a8SSimon Pilgrimentry:
1154*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1155*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1156*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.sinh.f32(float %vecext)
1157*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1158*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1159*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.sinh.f32(float %vecext.1)
1160*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1161*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1162*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.sinh.f32(float %vecext.2)
1163*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1164*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1165*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.sinh.f32(float %vecext.3)
1166*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1167*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1168*ceb613a8SSimon Pilgrim}
1169*ceb613a8SSimon Pilgrim
1170*ceb613a8SSimon Pilgrimdeclare float @asinhf(float) readonly nounwind willreturn
1171*ceb613a8SSimon Pilgrim
1172*ceb613a8SSimon Pilgrim; We can not vectorized asinh since RISCV has no such instruction.
1173*ceb613a8SSimon Pilgrimdefine <4 x float> @asinh_4x(ptr %a) {
1174*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @asinh_4x
1175*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1176*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1177*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1178*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1179*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
1180*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1181*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1182*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
1183*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1184*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1185*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
1186*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1187*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1188*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
1189*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1190*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1191*ceb613a8SSimon Pilgrim;
1192*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @asinh_4x
1193*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1194*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1195*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1196*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1197*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
1198*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1199*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1200*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
1201*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1202*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1203*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
1204*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1205*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1206*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
1207*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1208*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1209*ceb613a8SSimon Pilgrim;
1210*ceb613a8SSimon Pilgrimentry:
1211*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1212*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1213*ceb613a8SSimon Pilgrim  %1 = tail call fast float @asinhf(float %vecext)
1214*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1215*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1216*ceb613a8SSimon Pilgrim  %2 = tail call fast float @asinhf(float %vecext.1)
1217*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1218*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1219*ceb613a8SSimon Pilgrim  %3 = tail call fast float @asinhf(float %vecext.2)
1220*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1221*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1222*ceb613a8SSimon Pilgrim  %4 = tail call fast float @asinhf(float %vecext.3)
1223*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1224*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1225*ceb613a8SSimon Pilgrim}
1226*ceb613a8SSimon Pilgrim
1227*ceb613a8SSimon Pilgrimdeclare float @llvm.asinh.f32(float)
1228*ceb613a8SSimon Pilgrim
1229*ceb613a8SSimon Pilgrim; We can not vectorized asinh since RISCV has no such instruction.
1230*ceb613a8SSimon Pilgrimdefine <4 x float> @int_asinh_4x(ptr %a) {
1231*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_asinh_4x
1232*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1233*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1234*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1235*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1236*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
1237*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1238*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1239*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
1240*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1241*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1242*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
1243*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1244*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1245*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
1246*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1247*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1248*ceb613a8SSimon Pilgrim;
1249*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_asinh_4x
1250*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1251*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1252*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1253*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1254*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT]])
1255*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1256*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1257*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_1]])
1258*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1259*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1260*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_2]])
1261*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1262*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1263*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.asinh.f32(float [[VECEXT_3]])
1264*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1265*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1266*ceb613a8SSimon Pilgrim;
1267*ceb613a8SSimon Pilgrimentry:
1268*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1269*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1270*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.asinh.f32(float %vecext)
1271*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1272*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1273*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.asinh.f32(float %vecext.1)
1274*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1275*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1276*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.asinh.f32(float %vecext.2)
1277*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1278*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1279*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.asinh.f32(float %vecext.3)
1280*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1281*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1282*ceb613a8SSimon Pilgrim}
1283*ceb613a8SSimon Pilgrim
1284013235a2SBen Shideclare float @coshf(float) readonly nounwind willreturn
1285013235a2SBen Shi
1286013235a2SBen Shi; We can not vectorized cosh since RISCV has no such instruction.
1287013235a2SBen Shidefine <4 x float> @cosh_4x(ptr %a) {
1288013235a2SBen Shi; CHECK-LABEL: define <4 x float> @cosh_4x
1289013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1290013235a2SBen Shi; CHECK-NEXT:  entry:
1291013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1292013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1293013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
1294013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1295013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1296013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
1297013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1298013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1299013235a2SBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
1300013235a2SBen Shi; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1301013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1302013235a2SBen Shi; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
1303013235a2SBen Shi; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1304013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1305013235a2SBen Shi;
1306013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @cosh_4x
1307013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1308013235a2SBen Shi; DEFAULT-NEXT:  entry:
1309013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1310013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1311013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
1312013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1313013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1314013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
1315013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1316013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1317013235a2SBen Shi; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
1318013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1319013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1320013235a2SBen Shi; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
1321013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1322013235a2SBen Shi; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1323013235a2SBen Shi;
1324013235a2SBen Shientry:
1325013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
1326013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
1327013235a2SBen Shi  %1 = tail call fast float @coshf(float %vecext)
1328013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
1329013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
1330013235a2SBen Shi  %2 = tail call fast float @coshf(float %vecext.1)
1331013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1332013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
1333013235a2SBen Shi  %3 = tail call fast float @coshf(float %vecext.2)
1334013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1335013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
1336013235a2SBen Shi  %4 = tail call fast float @coshf(float %vecext.3)
1337013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1338013235a2SBen Shi  ret <4 x float> %vecins.3
1339013235a2SBen Shi}
1340013235a2SBen Shi
1341013235a2SBen Shideclare float @llvm.cosh.f32(float)
1342013235a2SBen Shi
1343013235a2SBen Shi; We can not vectorized cosh since RISCV has no such instruction.
1344013235a2SBen Shidefine <4 x float> @int_cosh_4x(ptr %a) {
1345013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_cosh_4x
1346013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1347013235a2SBen Shi; CHECK-NEXT:  entry:
1348013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1349013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1350013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
1351013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1352013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1353013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
1354013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1355013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1356013235a2SBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
1357013235a2SBen Shi; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1358013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1359013235a2SBen Shi; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
1360013235a2SBen Shi; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1361013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1362013235a2SBen Shi;
1363013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_cosh_4x
1364013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1365013235a2SBen Shi; DEFAULT-NEXT:  entry:
1366013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1367013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1368013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT]])
1369013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1370013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1371013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_1]])
1372013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1373013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1374013235a2SBen Shi; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_2]])
1375013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1376013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1377013235a2SBen Shi; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.cosh.f32(float [[VECEXT_3]])
1378013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1379013235a2SBen Shi; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1380013235a2SBen Shi;
1381013235a2SBen Shientry:
1382013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
1383013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
1384013235a2SBen Shi  %1 = tail call fast float @llvm.cosh.f32(float %vecext)
1385013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
1386013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
1387013235a2SBen Shi  %2 = tail call fast float @llvm.cosh.f32(float %vecext.1)
1388013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1389013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
1390013235a2SBen Shi  %3 = tail call fast float @llvm.cosh.f32(float %vecext.2)
1391013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1392013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
1393013235a2SBen Shi  %4 = tail call fast float @llvm.cosh.f32(float %vecext.3)
1394013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1395013235a2SBen Shi  ret <4 x float> %vecins.3
1396013235a2SBen Shi}
1397013235a2SBen Shi
1398*ceb613a8SSimon Pilgrimdeclare float @acoshf(float) readonly nounwind willreturn
1399*ceb613a8SSimon Pilgrim
1400*ceb613a8SSimon Pilgrim; We can not vectorized acosh since RISCV has no such instruction.
1401*ceb613a8SSimon Pilgrimdefine <4 x float> @acosh_4x(ptr %a) {
1402*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @acosh_4x
1403*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1404*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1405*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1406*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1407*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
1408*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1409*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1410*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
1411*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1412*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1413*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
1414*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1415*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1416*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
1417*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1418*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1419*ceb613a8SSimon Pilgrim;
1420*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @acosh_4x
1421*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1422*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1423*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1424*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1425*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
1426*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1427*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1428*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
1429*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1430*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1431*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
1432*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1433*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1434*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
1435*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1436*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1437*ceb613a8SSimon Pilgrim;
1438*ceb613a8SSimon Pilgrimentry:
1439*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1440*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1441*ceb613a8SSimon Pilgrim  %1 = tail call fast float @acoshf(float %vecext)
1442*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1443*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1444*ceb613a8SSimon Pilgrim  %2 = tail call fast float @acoshf(float %vecext.1)
1445*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1446*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1447*ceb613a8SSimon Pilgrim  %3 = tail call fast float @acoshf(float %vecext.2)
1448*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1449*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1450*ceb613a8SSimon Pilgrim  %4 = tail call fast float @acoshf(float %vecext.3)
1451*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1452*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1453*ceb613a8SSimon Pilgrim}
1454*ceb613a8SSimon Pilgrim
1455*ceb613a8SSimon Pilgrimdeclare float @llvm.acosh.f32(float)
1456*ceb613a8SSimon Pilgrim
1457*ceb613a8SSimon Pilgrim; We can not vectorized acosh since RISCV has no such instruction.
1458*ceb613a8SSimon Pilgrimdefine <4 x float> @int_acosh_4x(ptr %a) {
1459*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_acosh_4x
1460*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1461*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1462*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1463*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1464*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
1465*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1466*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1467*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
1468*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1469*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1470*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
1471*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1472*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1473*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
1474*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1475*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1476*ceb613a8SSimon Pilgrim;
1477*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_acosh_4x
1478*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1479*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1480*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1481*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1482*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT]])
1483*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1484*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1485*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_1]])
1486*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1487*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1488*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_2]])
1489*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1490*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1491*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.acosh.f32(float [[VECEXT_3]])
1492*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1493*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1494*ceb613a8SSimon Pilgrim;
1495*ceb613a8SSimon Pilgrimentry:
1496*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1497*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1498*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.acosh.f32(float %vecext)
1499*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1500*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1501*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.acosh.f32(float %vecext.1)
1502*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1503*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1504*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.acosh.f32(float %vecext.2)
1505*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1506*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1507*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.acosh.f32(float %vecext.3)
1508*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1509*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1510*ceb613a8SSimon Pilgrim}
1511*ceb613a8SSimon Pilgrim
1512*ceb613a8SSimon Pilgrimdeclare float @tanhf(float) readonly nounwind willreturn
1513*ceb613a8SSimon Pilgrim
1514*ceb613a8SSimon Pilgrim; We can not vectorized tanh since RISCV has no such instruction.
1515*ceb613a8SSimon Pilgrimdefine <4 x float> @tanh_4x(ptr %a) {
1516*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @tanh_4x
1517*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1518*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1519*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1520*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1521*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
1522*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1523*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1524*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
1525*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1526*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1527*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
1528*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1529*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1530*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
1531*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1532*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1533*ceb613a8SSimon Pilgrim;
1534*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @tanh_4x
1535*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1536*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1537*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1538*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1539*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
1540*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1541*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1542*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
1543*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1544*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1545*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
1546*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1547*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1548*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
1549*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1550*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1551*ceb613a8SSimon Pilgrim;
1552*ceb613a8SSimon Pilgrimentry:
1553*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1554*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1555*ceb613a8SSimon Pilgrim  %1 = tail call fast float @tanhf(float %vecext)
1556*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1557*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1558*ceb613a8SSimon Pilgrim  %2 = tail call fast float @tanhf(float %vecext.1)
1559*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1560*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1561*ceb613a8SSimon Pilgrim  %3 = tail call fast float @tanhf(float %vecext.2)
1562*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1563*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1564*ceb613a8SSimon Pilgrim  %4 = tail call fast float @tanhf(float %vecext.3)
1565*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1566*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1567*ceb613a8SSimon Pilgrim}
1568*ceb613a8SSimon Pilgrim
1569*ceb613a8SSimon Pilgrimdeclare float @llvm.tanh.f32(float)
1570*ceb613a8SSimon Pilgrim
1571*ceb613a8SSimon Pilgrim; We can not vectorized tanh since RISCV has no such instruction.
1572*ceb613a8SSimon Pilgrimdefine <4 x float> @int_tanh_4x(ptr %a) {
1573*ceb613a8SSimon Pilgrim; CHECK-LABEL: define <4 x float> @int_tanh_4x
1574*ceb613a8SSimon Pilgrim; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1575*ceb613a8SSimon Pilgrim; CHECK-NEXT:  entry:
1576*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1577*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1578*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
1579*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1580*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1581*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
1582*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1583*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1584*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
1585*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1586*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1587*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
1588*ceb613a8SSimon Pilgrim; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1589*ceb613a8SSimon Pilgrim; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1590*ceb613a8SSimon Pilgrim;
1591*ceb613a8SSimon Pilgrim; DEFAULT-LABEL: define <4 x float> @int_tanh_4x
1592*ceb613a8SSimon Pilgrim; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1593*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:  entry:
1594*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1595*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1596*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT]])
1597*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1598*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1599*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_1]])
1600*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1601*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1602*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_2]])
1603*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1604*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1605*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.tanh.f32(float [[VECEXT_3]])
1606*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1607*ceb613a8SSimon Pilgrim; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1608*ceb613a8SSimon Pilgrim;
1609*ceb613a8SSimon Pilgrimentry:
1610*ceb613a8SSimon Pilgrim  %0 = load <4 x float>, ptr %a, align 16
1611*ceb613a8SSimon Pilgrim  %vecext = extractelement <4 x float> %0, i32 0
1612*ceb613a8SSimon Pilgrim  %1 = tail call fast float @llvm.tanh.f32(float %vecext)
1613*ceb613a8SSimon Pilgrim  %vecins = insertelement <4 x float> undef, float %1, i32 0
1614*ceb613a8SSimon Pilgrim  %vecext.1 = extractelement <4 x float> %0, i32 1
1615*ceb613a8SSimon Pilgrim  %2 = tail call fast float @llvm.tanh.f32(float %vecext.1)
1616*ceb613a8SSimon Pilgrim  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1617*ceb613a8SSimon Pilgrim  %vecext.2 = extractelement <4 x float> %0, i32 2
1618*ceb613a8SSimon Pilgrim  %3 = tail call fast float @llvm.tanh.f32(float %vecext.2)
1619*ceb613a8SSimon Pilgrim  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1620*ceb613a8SSimon Pilgrim  %vecext.3 = extractelement <4 x float> %0, i32 3
1621*ceb613a8SSimon Pilgrim  %4 = tail call fast float @llvm.tanh.f32(float %vecext.3)
1622*ceb613a8SSimon Pilgrim  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1623*ceb613a8SSimon Pilgrim  ret <4 x float> %vecins.3
1624*ceb613a8SSimon Pilgrim}
1625*ceb613a8SSimon Pilgrim
1626013235a2SBen Shideclare float @atanhf(float) readonly nounwind willreturn
1627013235a2SBen Shi
1628013235a2SBen Shi; We can not vectorized atanh since RISCV has no such instruction.
1629013235a2SBen Shidefine <4 x float> @atanh_4x(ptr %a) {
1630013235a2SBen Shi; CHECK-LABEL: define <4 x float> @atanh_4x
1631013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1632013235a2SBen Shi; CHECK-NEXT:  entry:
1633013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1634013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1635013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
1636013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1637013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1638013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
1639013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1640013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1641013235a2SBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
1642013235a2SBen Shi; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1643013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1644013235a2SBen Shi; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
1645013235a2SBen Shi; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1646013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1647013235a2SBen Shi;
1648013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @atanh_4x
1649013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1650013235a2SBen Shi; DEFAULT-NEXT:  entry:
1651013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1652013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1653013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
1654013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1655013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1656013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
1657013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1658013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1659013235a2SBen Shi; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
1660013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1661013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1662013235a2SBen Shi; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
1663013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1664013235a2SBen Shi; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1665013235a2SBen Shi;
1666013235a2SBen Shientry:
1667013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
1668013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
1669013235a2SBen Shi  %1 = tail call fast float @atanhf(float %vecext)
1670013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
1671013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
1672013235a2SBen Shi  %2 = tail call fast float @atanhf(float %vecext.1)
1673013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1674013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
1675013235a2SBen Shi  %3 = tail call fast float @atanhf(float %vecext.2)
1676013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1677013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
1678013235a2SBen Shi  %4 = tail call fast float @atanhf(float %vecext.3)
1679013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1680013235a2SBen Shi  ret <4 x float> %vecins.3
1681013235a2SBen Shi}
1682013235a2SBen Shi
1683013235a2SBen Shideclare float @llvm.atanh.f32(float)
1684013235a2SBen Shi
1685013235a2SBen Shi; We can not vectorized atanh since RISCV has no such instruction.
1686013235a2SBen Shidefine <4 x float> @int_atanh_4x(ptr %a) {
1687013235a2SBen Shi; CHECK-LABEL: define <4 x float> @int_atanh_4x
1688013235a2SBen Shi; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1689013235a2SBen Shi; CHECK-NEXT:  entry:
1690013235a2SBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1691013235a2SBen Shi; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1692013235a2SBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
1693013235a2SBen Shi; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1694013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1695013235a2SBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
1696013235a2SBen Shi; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1697013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1698013235a2SBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
1699013235a2SBen Shi; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1700013235a2SBen Shi; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1701013235a2SBen Shi; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
1702013235a2SBen Shi; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1703013235a2SBen Shi; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
1704013235a2SBen Shi;
1705013235a2SBen Shi; DEFAULT-LABEL: define <4 x float> @int_atanh_4x
1706013235a2SBen Shi; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
1707013235a2SBen Shi; DEFAULT-NEXT:  entry:
1708013235a2SBen Shi; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
1709013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
1710013235a2SBen Shi; DEFAULT-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT]])
1711013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
1712013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
1713013235a2SBen Shi; DEFAULT-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_1]])
1714013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
1715013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
1716013235a2SBen Shi; DEFAULT-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_2]])
1717013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
1718013235a2SBen Shi; DEFAULT-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
1719013235a2SBen Shi; DEFAULT-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.atanh.f32(float [[VECEXT_3]])
1720013235a2SBen Shi; DEFAULT-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
1721013235a2SBen Shi; DEFAULT-NEXT:    ret <4 x float> [[VECINS_3]]
1722013235a2SBen Shi;
1723013235a2SBen Shientry:
1724013235a2SBen Shi  %0 = load <4 x float>, ptr %a, align 16
1725013235a2SBen Shi  %vecext = extractelement <4 x float> %0, i32 0
1726013235a2SBen Shi  %1 = tail call fast float @llvm.atanh.f32(float %vecext)
1727013235a2SBen Shi  %vecins = insertelement <4 x float> undef, float %1, i32 0
1728013235a2SBen Shi  %vecext.1 = extractelement <4 x float> %0, i32 1
1729013235a2SBen Shi  %2 = tail call fast float @llvm.atanh.f32(float %vecext.1)
1730013235a2SBen Shi  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
1731013235a2SBen Shi  %vecext.2 = extractelement <4 x float> %0, i32 2
1732013235a2SBen Shi  %3 = tail call fast float @llvm.atanh.f32(float %vecext.2)
1733013235a2SBen Shi  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
1734013235a2SBen Shi  %vecext.3 = extractelement <4 x float> %0, i32 3
1735013235a2SBen Shi  %4 = tail call fast float @llvm.atanh.f32(float %vecext.3)
1736013235a2SBen Shi  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
1737013235a2SBen Shi  ret <4 x float> %vecins.3
1738013235a2SBen Shi}
1739