xref: /llvm-project/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c (revision 0e01c72c5645259d9a08a1a7ed39cb5cc41ce311)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s
3 
4 #include <lsxintrin.h>
5 
6 // CHECK-LABEL: @vsll_b(
7 // CHECK-NEXT:  entry:
8 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
9 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
10 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
11 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
12 // CHECK-NEXT:    ret i128 [[TMP3]]
13 //
vsll_b(v16i8 _1,v16i8 _2)14 v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); }
15 // CHECK-LABEL: @vsll_h(
16 // CHECK-NEXT:  entry:
17 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
18 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
19 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
20 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
21 // CHECK-NEXT:    ret i128 [[TMP3]]
22 //
vsll_h(v8i16 _1,v8i16 _2)23 v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); }
24 // CHECK-LABEL: @vsll_w(
25 // CHECK-NEXT:  entry:
26 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
27 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
28 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
29 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
30 // CHECK-NEXT:    ret i128 [[TMP3]]
31 //
vsll_w(v4i32 _1,v4i32 _2)32 v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); }
33 // CHECK-LABEL: @vsll_d(
34 // CHECK-NEXT:  entry:
35 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
36 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
37 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
38 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
39 // CHECK-NEXT:    ret i128 [[TMP3]]
40 //
vsll_d(v2i64 _1,v2i64 _2)41 v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); }
42 // CHECK-LABEL: @vslli_b(
43 // CHECK-NEXT:  entry:
44 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
45 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1)
46 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
47 // CHECK-NEXT:    ret i128 [[TMP2]]
48 //
vslli_b(v16i8 _1)49 v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); }
50 // CHECK-LABEL: @vslli_h(
51 // CHECK-NEXT:  entry:
52 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
53 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1)
54 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
55 // CHECK-NEXT:    ret i128 [[TMP2]]
56 //
vslli_h(v8i16 _1)57 v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); }
58 // CHECK-LABEL: @vslli_w(
59 // CHECK-NEXT:  entry:
60 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
61 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1)
62 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
63 // CHECK-NEXT:    ret i128 [[TMP2]]
64 //
vslli_w(v4i32 _1)65 v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); }
66 // CHECK-LABEL: @vslli_d(
67 // CHECK-NEXT:  entry:
68 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
69 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1)
70 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
71 // CHECK-NEXT:    ret i128 [[TMP2]]
72 //
vslli_d(v2i64 _1)73 v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); }
74 // CHECK-LABEL: @vsra_b(
75 // CHECK-NEXT:  entry:
76 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
77 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
78 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
79 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
80 // CHECK-NEXT:    ret i128 [[TMP3]]
81 //
vsra_b(v16i8 _1,v16i8 _2)82 v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); }
83 // CHECK-LABEL: @vsra_h(
84 // CHECK-NEXT:  entry:
85 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
86 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
87 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
88 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
89 // CHECK-NEXT:    ret i128 [[TMP3]]
90 //
vsra_h(v8i16 _1,v8i16 _2)91 v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); }
92 // CHECK-LABEL: @vsra_w(
93 // CHECK-NEXT:  entry:
94 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
95 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
96 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
97 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
98 // CHECK-NEXT:    ret i128 [[TMP3]]
99 //
vsra_w(v4i32 _1,v4i32 _2)100 v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); }
101 // CHECK-LABEL: @vsra_d(
102 // CHECK-NEXT:  entry:
103 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
104 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
105 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
106 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
107 // CHECK-NEXT:    ret i128 [[TMP3]]
108 //
vsra_d(v2i64 _1,v2i64 _2)109 v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); }
110 // CHECK-LABEL: @vsrai_b(
111 // CHECK-NEXT:  entry:
112 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
113 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1)
114 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
115 // CHECK-NEXT:    ret i128 [[TMP2]]
116 //
vsrai_b(v16i8 _1)117 v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); }
118 // CHECK-LABEL: @vsrai_h(
119 // CHECK-NEXT:  entry:
120 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
121 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1)
122 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
123 // CHECK-NEXT:    ret i128 [[TMP2]]
124 //
vsrai_h(v8i16 _1)125 v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); }
126 // CHECK-LABEL: @vsrai_w(
127 // CHECK-NEXT:  entry:
128 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
129 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1)
130 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
131 // CHECK-NEXT:    ret i128 [[TMP2]]
132 //
vsrai_w(v4i32 _1)133 v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); }
134 // CHECK-LABEL: @vsrai_d(
135 // CHECK-NEXT:  entry:
136 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
137 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1)
138 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
139 // CHECK-NEXT:    ret i128 [[TMP2]]
140 //
vsrai_d(v2i64 _1)141 v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); }
142 // CHECK-LABEL: @vsrar_b(
143 // CHECK-NEXT:  entry:
144 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
145 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
146 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
147 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
148 // CHECK-NEXT:    ret i128 [[TMP3]]
149 //
vsrar_b(v16i8 _1,v16i8 _2)150 v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); }
151 // CHECK-LABEL: @vsrar_h(
152 // CHECK-NEXT:  entry:
153 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
154 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
155 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
156 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
157 // CHECK-NEXT:    ret i128 [[TMP3]]
158 //
vsrar_h(v8i16 _1,v8i16 _2)159 v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); }
160 // CHECK-LABEL: @vsrar_w(
161 // CHECK-NEXT:  entry:
162 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
163 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
164 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
165 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
166 // CHECK-NEXT:    ret i128 [[TMP3]]
167 //
vsrar_w(v4i32 _1,v4i32 _2)168 v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); }
169 // CHECK-LABEL: @vsrar_d(
170 // CHECK-NEXT:  entry:
171 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
172 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
173 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
174 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
175 // CHECK-NEXT:    ret i128 [[TMP3]]
176 //
vsrar_d(v2i64 _1,v2i64 _2)177 v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); }
178 // CHECK-LABEL: @vsrari_b(
179 // CHECK-NEXT:  entry:
180 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
181 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1)
182 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
183 // CHECK-NEXT:    ret i128 [[TMP2]]
184 //
vsrari_b(v16i8 _1)185 v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); }
186 // CHECK-LABEL: @vsrari_h(
187 // CHECK-NEXT:  entry:
188 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
189 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1)
190 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
191 // CHECK-NEXT:    ret i128 [[TMP2]]
192 //
vsrari_h(v8i16 _1)193 v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); }
194 // CHECK-LABEL: @vsrari_w(
195 // CHECK-NEXT:  entry:
196 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
197 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1)
198 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
199 // CHECK-NEXT:    ret i128 [[TMP2]]
200 //
vsrari_w(v4i32 _1)201 v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); }
202 // CHECK-LABEL: @vsrari_d(
203 // CHECK-NEXT:  entry:
204 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
205 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1)
206 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
207 // CHECK-NEXT:    ret i128 [[TMP2]]
208 //
vsrari_d(v2i64 _1)209 v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); }
210 // CHECK-LABEL: @vsrl_b(
211 // CHECK-NEXT:  entry:
212 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
213 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
214 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
215 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
216 // CHECK-NEXT:    ret i128 [[TMP3]]
217 //
vsrl_b(v16i8 _1,v16i8 _2)218 v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); }
219 // CHECK-LABEL: @vsrl_h(
220 // CHECK-NEXT:  entry:
221 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
222 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
223 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
224 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
225 // CHECK-NEXT:    ret i128 [[TMP3]]
226 //
vsrl_h(v8i16 _1,v8i16 _2)227 v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); }
228 // CHECK-LABEL: @vsrl_w(
229 // CHECK-NEXT:  entry:
230 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
231 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
232 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
233 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
234 // CHECK-NEXT:    ret i128 [[TMP3]]
235 //
vsrl_w(v4i32 _1,v4i32 _2)236 v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); }
237 // CHECK-LABEL: @vsrl_d(
238 // CHECK-NEXT:  entry:
239 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
240 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
241 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
242 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
243 // CHECK-NEXT:    ret i128 [[TMP3]]
244 //
vsrl_d(v2i64 _1,v2i64 _2)245 v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); }
246 // CHECK-LABEL: @vsrli_b(
247 // CHECK-NEXT:  entry:
248 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
249 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1)
250 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
251 // CHECK-NEXT:    ret i128 [[TMP2]]
252 //
vsrli_b(v16i8 _1)253 v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); }
254 // CHECK-LABEL: @vsrli_h(
255 // CHECK-NEXT:  entry:
256 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
257 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1)
258 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
259 // CHECK-NEXT:    ret i128 [[TMP2]]
260 //
vsrli_h(v8i16 _1)261 v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); }
262 // CHECK-LABEL: @vsrli_w(
263 // CHECK-NEXT:  entry:
264 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
265 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1)
266 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
267 // CHECK-NEXT:    ret i128 [[TMP2]]
268 //
vsrli_w(v4i32 _1)269 v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); }
270 // CHECK-LABEL: @vsrli_d(
271 // CHECK-NEXT:  entry:
272 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
273 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1)
274 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
275 // CHECK-NEXT:    ret i128 [[TMP2]]
276 //
vsrli_d(v2i64 _1)277 v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); }
278 // CHECK-LABEL: @vsrlr_b(
279 // CHECK-NEXT:  entry:
280 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
281 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
282 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
283 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
284 // CHECK-NEXT:    ret i128 [[TMP3]]
285 //
vsrlr_b(v16i8 _1,v16i8 _2)286 v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); }
287 // CHECK-LABEL: @vsrlr_h(
288 // CHECK-NEXT:  entry:
289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
291 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
292 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
293 // CHECK-NEXT:    ret i128 [[TMP3]]
294 //
vsrlr_h(v8i16 _1,v8i16 _2)295 v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); }
296 // CHECK-LABEL: @vsrlr_w(
297 // CHECK-NEXT:  entry:
298 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
299 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
300 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
301 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
302 // CHECK-NEXT:    ret i128 [[TMP3]]
303 //
vsrlr_w(v4i32 _1,v4i32 _2)304 v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); }
305 // CHECK-LABEL: @vsrlr_d(
306 // CHECK-NEXT:  entry:
307 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
308 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
309 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
310 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
311 // CHECK-NEXT:    ret i128 [[TMP3]]
312 //
vsrlr_d(v2i64 _1,v2i64 _2)313 v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); }
314 // CHECK-LABEL: @vsrlri_b(
315 // CHECK-NEXT:  entry:
316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
317 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1)
318 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
319 // CHECK-NEXT:    ret i128 [[TMP2]]
320 //
vsrlri_b(v16i8 _1)321 v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); }
322 // CHECK-LABEL: @vsrlri_h(
323 // CHECK-NEXT:  entry:
324 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
325 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1)
326 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
327 // CHECK-NEXT:    ret i128 [[TMP2]]
328 //
vsrlri_h(v8i16 _1)329 v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); }
330 // CHECK-LABEL: @vsrlri_w(
331 // CHECK-NEXT:  entry:
332 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
333 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1)
334 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
335 // CHECK-NEXT:    ret i128 [[TMP2]]
336 //
vsrlri_w(v4i32 _1)337 v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); }
338 // CHECK-LABEL: @vsrlri_d(
339 // CHECK-NEXT:  entry:
340 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
341 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1)
342 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
343 // CHECK-NEXT:    ret i128 [[TMP2]]
344 //
vsrlri_d(v2i64 _1)345 v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); }
346 // CHECK-LABEL: @vbitclr_b(
347 // CHECK-NEXT:  entry:
348 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
349 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
350 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
351 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
352 // CHECK-NEXT:    ret i128 [[TMP3]]
353 //
vbitclr_b(v16u8 _1,v16u8 _2)354 v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); }
355 // CHECK-LABEL: @vbitclr_h(
356 // CHECK-NEXT:  entry:
357 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
358 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
359 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
360 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
361 // CHECK-NEXT:    ret i128 [[TMP3]]
362 //
vbitclr_h(v8u16 _1,v8u16 _2)363 v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); }
364 // CHECK-LABEL: @vbitclr_w(
365 // CHECK-NEXT:  entry:
366 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
367 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
368 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
369 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
370 // CHECK-NEXT:    ret i128 [[TMP3]]
371 //
vbitclr_w(v4u32 _1,v4u32 _2)372 v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); }
373 // CHECK-LABEL: @vbitclr_d(
374 // CHECK-NEXT:  entry:
375 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
376 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
377 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
378 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
379 // CHECK-NEXT:    ret i128 [[TMP3]]
380 //
vbitclr_d(v2u64 _1,v2u64 _2)381 v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); }
382 // CHECK-LABEL: @vbitclri_b(
383 // CHECK-NEXT:  entry:
384 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
385 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1)
386 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
387 // CHECK-NEXT:    ret i128 [[TMP2]]
388 //
vbitclri_b(v16u8 _1)389 v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); }
390 // CHECK-LABEL: @vbitclri_h(
391 // CHECK-NEXT:  entry:
392 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
393 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1)
394 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
395 // CHECK-NEXT:    ret i128 [[TMP2]]
396 //
vbitclri_h(v8u16 _1)397 v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); }
398 // CHECK-LABEL: @vbitclri_w(
399 // CHECK-NEXT:  entry:
400 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
401 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1)
402 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
403 // CHECK-NEXT:    ret i128 [[TMP2]]
404 //
vbitclri_w(v4u32 _1)405 v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); }
406 // CHECK-LABEL: @vbitclri_d(
407 // CHECK-NEXT:  entry:
408 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
409 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1)
410 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
411 // CHECK-NEXT:    ret i128 [[TMP2]]
412 //
vbitclri_d(v2u64 _1)413 v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); }
414 // CHECK-LABEL: @vbitset_b(
415 // CHECK-NEXT:  entry:
416 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
417 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
418 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
419 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
420 // CHECK-NEXT:    ret i128 [[TMP3]]
421 //
vbitset_b(v16u8 _1,v16u8 _2)422 v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); }
423 // CHECK-LABEL: @vbitset_h(
424 // CHECK-NEXT:  entry:
425 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
426 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
427 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
428 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
429 // CHECK-NEXT:    ret i128 [[TMP3]]
430 //
vbitset_h(v8u16 _1,v8u16 _2)431 v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); }
432 // CHECK-LABEL: @vbitset_w(
433 // CHECK-NEXT:  entry:
434 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
435 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
436 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
437 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
438 // CHECK-NEXT:    ret i128 [[TMP3]]
439 //
vbitset_w(v4u32 _1,v4u32 _2)440 v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); }
441 // CHECK-LABEL: @vbitset_d(
442 // CHECK-NEXT:  entry:
443 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
444 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
445 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
446 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
447 // CHECK-NEXT:    ret i128 [[TMP3]]
448 //
vbitset_d(v2u64 _1,v2u64 _2)449 v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); }
450 // CHECK-LABEL: @vbitseti_b(
451 // CHECK-NEXT:  entry:
452 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
453 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1)
454 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
455 // CHECK-NEXT:    ret i128 [[TMP2]]
456 //
vbitseti_b(v16u8 _1)457 v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); }
458 // CHECK-LABEL: @vbitseti_h(
459 // CHECK-NEXT:  entry:
460 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
461 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1)
462 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
463 // CHECK-NEXT:    ret i128 [[TMP2]]
464 //
vbitseti_h(v8u16 _1)465 v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); }
466 // CHECK-LABEL: @vbitseti_w(
467 // CHECK-NEXT:  entry:
468 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
469 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1)
470 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
471 // CHECK-NEXT:    ret i128 [[TMP2]]
472 //
vbitseti_w(v4u32 _1)473 v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); }
474 // CHECK-LABEL: @vbitseti_d(
475 // CHECK-NEXT:  entry:
476 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
477 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1)
478 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
479 // CHECK-NEXT:    ret i128 [[TMP2]]
480 //
vbitseti_d(v2u64 _1)481 v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); }
482 // CHECK-LABEL: @vbitrev_b(
483 // CHECK-NEXT:  entry:
484 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
485 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
486 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
487 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
488 // CHECK-NEXT:    ret i128 [[TMP3]]
489 //
vbitrev_b(v16u8 _1,v16u8 _2)490 v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); }
491 // CHECK-LABEL: @vbitrev_h(
492 // CHECK-NEXT:  entry:
493 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
494 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
495 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
496 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
497 // CHECK-NEXT:    ret i128 [[TMP3]]
498 //
vbitrev_h(v8u16 _1,v8u16 _2)499 v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); }
500 // CHECK-LABEL: @vbitrev_w(
501 // CHECK-NEXT:  entry:
502 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
503 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
504 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
505 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
506 // CHECK-NEXT:    ret i128 [[TMP3]]
507 //
vbitrev_w(v4u32 _1,v4u32 _2)508 v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); }
509 // CHECK-LABEL: @vbitrev_d(
510 // CHECK-NEXT:  entry:
511 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
512 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
513 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
514 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
515 // CHECK-NEXT:    ret i128 [[TMP3]]
516 //
vbitrev_d(v2u64 _1,v2u64 _2)517 v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); }
518 // CHECK-LABEL: @vbitrevi_b(
519 // CHECK-NEXT:  entry:
520 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
521 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1)
522 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
523 // CHECK-NEXT:    ret i128 [[TMP2]]
524 //
vbitrevi_b(v16u8 _1)525 v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); }
526 // CHECK-LABEL: @vbitrevi_h(
527 // CHECK-NEXT:  entry:
528 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
529 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1)
530 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
531 // CHECK-NEXT:    ret i128 [[TMP2]]
532 //
vbitrevi_h(v8u16 _1)533 v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); }
534 // CHECK-LABEL: @vbitrevi_w(
535 // CHECK-NEXT:  entry:
536 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
537 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1)
538 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
539 // CHECK-NEXT:    ret i128 [[TMP2]]
540 //
vbitrevi_w(v4u32 _1)541 v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); }
542 // CHECK-LABEL: @vbitrevi_d(
543 // CHECK-NEXT:  entry:
544 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
545 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1)
546 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
547 // CHECK-NEXT:    ret i128 [[TMP2]]
548 //
vbitrevi_d(v2u64 _1)549 v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); }
550 // CHECK-LABEL: @vadd_b(
551 // CHECK-NEXT:  entry:
552 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
553 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
554 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
555 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
556 // CHECK-NEXT:    ret i128 [[TMP3]]
557 //
vadd_b(v16i8 _1,v16i8 _2)558 v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); }
559 // CHECK-LABEL: @vadd_h(
560 // CHECK-NEXT:  entry:
561 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
562 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
563 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
564 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
565 // CHECK-NEXT:    ret i128 [[TMP3]]
566 //
vadd_h(v8i16 _1,v8i16 _2)567 v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); }
568 // CHECK-LABEL: @vadd_w(
569 // CHECK-NEXT:  entry:
570 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
571 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
572 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
573 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
574 // CHECK-NEXT:    ret i128 [[TMP3]]
575 //
vadd_w(v4i32 _1,v4i32 _2)576 v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); }
577 // CHECK-LABEL: @vadd_d(
578 // CHECK-NEXT:  entry:
579 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
580 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
581 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
582 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
583 // CHECK-NEXT:    ret i128 [[TMP3]]
584 //
vadd_d(v2i64 _1,v2i64 _2)585 v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); }
586 // CHECK-LABEL: @vaddi_bu(
587 // CHECK-NEXT:  entry:
588 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
589 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1)
590 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
591 // CHECK-NEXT:    ret i128 [[TMP2]]
592 //
vaddi_bu(v16i8 _1)593 v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); }
594 // CHECK-LABEL: @vaddi_hu(
595 // CHECK-NEXT:  entry:
596 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
597 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1)
598 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
599 // CHECK-NEXT:    ret i128 [[TMP2]]
600 //
vaddi_hu(v8i16 _1)601 v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); }
602 // CHECK-LABEL: @vaddi_wu(
603 // CHECK-NEXT:  entry:
604 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
605 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1)
606 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
607 // CHECK-NEXT:    ret i128 [[TMP2]]
608 //
vaddi_wu(v4i32 _1)609 v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); }
610 // CHECK-LABEL: @vaddi_du(
611 // CHECK-NEXT:  entry:
612 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
613 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1)
614 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
615 // CHECK-NEXT:    ret i128 [[TMP2]]
616 //
vaddi_du(v2i64 _1)617 v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); }
618 // CHECK-LABEL: @vsub_b(
619 // CHECK-NEXT:  entry:
620 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
621 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
622 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
623 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
624 // CHECK-NEXT:    ret i128 [[TMP3]]
625 //
vsub_b(v16i8 _1,v16i8 _2)626 v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); }
627 // CHECK-LABEL: @vsub_h(
628 // CHECK-NEXT:  entry:
629 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
630 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
631 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
632 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
633 // CHECK-NEXT:    ret i128 [[TMP3]]
634 //
vsub_h(v8i16 _1,v8i16 _2)635 v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); }
636 // CHECK-LABEL: @vsub_w(
637 // CHECK-NEXT:  entry:
638 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
639 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
640 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
641 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
642 // CHECK-NEXT:    ret i128 [[TMP3]]
643 //
vsub_w(v4i32 _1,v4i32 _2)644 v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); }
645 // CHECK-LABEL: @vsub_d(
646 // CHECK-NEXT:  entry:
647 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
648 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
649 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
650 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
651 // CHECK-NEXT:    ret i128 [[TMP3]]
652 //
vsub_d(v2i64 _1,v2i64 _2)653 v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); }
654 // CHECK-LABEL: @vsubi_bu(
655 // CHECK-NEXT:  entry:
656 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
657 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1)
658 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
659 // CHECK-NEXT:    ret i128 [[TMP2]]
660 //
vsubi_bu(v16i8 _1)661 v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); }
662 // CHECK-LABEL: @vsubi_hu(
663 // CHECK-NEXT:  entry:
664 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
665 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1)
666 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
667 // CHECK-NEXT:    ret i128 [[TMP2]]
668 //
vsubi_hu(v8i16 _1)669 v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); }
670 // CHECK-LABEL: @vsubi_wu(
671 // CHECK-NEXT:  entry:
672 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
673 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1)
674 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
675 // CHECK-NEXT:    ret i128 [[TMP2]]
676 //
vsubi_wu(v4i32 _1)677 v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); }
678 // CHECK-LABEL: @vsubi_du(
679 // CHECK-NEXT:  entry:
680 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
681 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1)
682 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
683 // CHECK-NEXT:    ret i128 [[TMP2]]
684 //
vsubi_du(v2i64 _1)685 v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); }
686 // CHECK-LABEL: @vmax_b(
687 // CHECK-NEXT:  entry:
688 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
689 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
690 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
691 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
692 // CHECK-NEXT:    ret i128 [[TMP3]]
693 //
vmax_b(v16i8 _1,v16i8 _2)694 v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); }
695 // CHECK-LABEL: @vmax_h(
696 // CHECK-NEXT:  entry:
697 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
698 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
699 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
700 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
701 // CHECK-NEXT:    ret i128 [[TMP3]]
702 //
vmax_h(v8i16 _1,v8i16 _2)703 v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); }
704 // CHECK-LABEL: @vmax_w(
705 // CHECK-NEXT:  entry:
706 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
707 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
708 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
709 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
710 // CHECK-NEXT:    ret i128 [[TMP3]]
711 //
vmax_w(v4i32 _1,v4i32 _2)712 v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); }
713 // CHECK-LABEL: @vmax_d(
714 // CHECK-NEXT:  entry:
715 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
716 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
717 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
718 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
719 // CHECK-NEXT:    ret i128 [[TMP3]]
720 //
vmax_d(v2i64 _1,v2i64 _2)721 v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); }
722 // CHECK-LABEL: @vmaxi_b(
723 // CHECK-NEXT:  entry:
724 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
725 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1)
726 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
727 // CHECK-NEXT:    ret i128 [[TMP2]]
728 //
vmaxi_b(v16i8 _1)729 v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); }
730 // CHECK-LABEL: @vmaxi_h(
731 // CHECK-NEXT:  entry:
732 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
733 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1)
734 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
735 // CHECK-NEXT:    ret i128 [[TMP2]]
736 //
vmaxi_h(v8i16 _1)737 v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); }
738 // CHECK-LABEL: @vmaxi_w(
739 // CHECK-NEXT:  entry:
740 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
741 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1)
742 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
743 // CHECK-NEXT:    ret i128 [[TMP2]]
744 //
vmaxi_w(v4i32 _1)745 v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); }
746 // CHECK-LABEL: @vmaxi_d(
747 // CHECK-NEXT:  entry:
748 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
749 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1)
750 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
751 // CHECK-NEXT:    ret i128 [[TMP2]]
752 //
vmaxi_d(v2i64 _1)753 v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); }
754 // CHECK-LABEL: @vmax_bu(
755 // CHECK-NEXT:  entry:
756 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
757 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
758 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
759 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
760 // CHECK-NEXT:    ret i128 [[TMP3]]
761 //
vmax_bu(v16u8 _1,v16u8 _2)762 v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); }
763 // CHECK-LABEL: @vmax_hu(
764 // CHECK-NEXT:  entry:
765 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
766 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
767 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
768 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
769 // CHECK-NEXT:    ret i128 [[TMP3]]
770 //
vmax_hu(v8u16 _1,v8u16 _2)771 v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); }
772 // CHECK-LABEL: @vmax_wu(
773 // CHECK-NEXT:  entry:
774 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
775 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
776 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
777 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
778 // CHECK-NEXT:    ret i128 [[TMP3]]
779 //
vmax_wu(v4u32 _1,v4u32 _2)780 v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); }
781 // CHECK-LABEL: @vmax_du(
782 // CHECK-NEXT:  entry:
783 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
784 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
785 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
786 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
787 // CHECK-NEXT:    ret i128 [[TMP3]]
788 //
vmax_du(v2u64 _1,v2u64 _2)789 v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); }
790 // CHECK-LABEL: @vmaxi_bu(
791 // CHECK-NEXT:  entry:
792 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
793 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1)
794 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
795 // CHECK-NEXT:    ret i128 [[TMP2]]
796 //
vmaxi_bu(v16u8 _1)797 v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); }
798 // CHECK-LABEL: @vmaxi_hu(
799 // CHECK-NEXT:  entry:
800 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
801 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1)
802 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
803 // CHECK-NEXT:    ret i128 [[TMP2]]
804 //
vmaxi_hu(v8u16 _1)805 v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); }
806 // CHECK-LABEL: @vmaxi_wu(
807 // CHECK-NEXT:  entry:
808 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
809 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1)
810 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
811 // CHECK-NEXT:    ret i128 [[TMP2]]
812 //
vmaxi_wu(v4u32 _1)813 v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); }
814 // CHECK-LABEL: @vmaxi_du(
815 // CHECK-NEXT:  entry:
816 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
817 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1)
818 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
819 // CHECK-NEXT:    ret i128 [[TMP2]]
820 //
vmaxi_du(v2u64 _1)821 v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); }
822 // CHECK-LABEL: @vmin_b(
823 // CHECK-NEXT:  entry:
824 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
825 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
826 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
827 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
828 // CHECK-NEXT:    ret i128 [[TMP3]]
829 //
vmin_b(v16i8 _1,v16i8 _2)830 v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); }
831 // CHECK-LABEL: @vmin_h(
832 // CHECK-NEXT:  entry:
833 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
834 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
835 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
836 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
837 // CHECK-NEXT:    ret i128 [[TMP3]]
838 //
vmin_h(v8i16 _1,v8i16 _2)839 v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); }
840 // CHECK-LABEL: @vmin_w(
841 // CHECK-NEXT:  entry:
842 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
843 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
844 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
845 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
846 // CHECK-NEXT:    ret i128 [[TMP3]]
847 //
vmin_w(v4i32 _1,v4i32 _2)848 v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); }
849 // CHECK-LABEL: @vmin_d(
850 // CHECK-NEXT:  entry:
851 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
852 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
853 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
854 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
855 // CHECK-NEXT:    ret i128 [[TMP3]]
856 //
vmin_d(v2i64 _1,v2i64 _2)857 v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); }
858 // CHECK-LABEL: @vmini_b(
859 // CHECK-NEXT:  entry:
860 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
861 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1)
862 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
863 // CHECK-NEXT:    ret i128 [[TMP2]]
864 //
vmini_b(v16i8 _1)865 v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); }
866 // CHECK-LABEL: @vmini_h(
867 // CHECK-NEXT:  entry:
868 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
869 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1)
870 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
871 // CHECK-NEXT:    ret i128 [[TMP2]]
872 //
vmini_h(v8i16 _1)873 v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); }
874 // CHECK-LABEL: @vmini_w(
875 // CHECK-NEXT:  entry:
876 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
877 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1)
878 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
879 // CHECK-NEXT:    ret i128 [[TMP2]]
880 //
vmini_w(v4i32 _1)881 v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); }
882 // CHECK-LABEL: @vmini_d(
883 // CHECK-NEXT:  entry:
884 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
885 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1)
886 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
887 // CHECK-NEXT:    ret i128 [[TMP2]]
888 //
vmini_d(v2i64 _1)889 v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); }
890 // CHECK-LABEL: @vmin_bu(
891 // CHECK-NEXT:  entry:
892 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
893 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
894 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
895 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
896 // CHECK-NEXT:    ret i128 [[TMP3]]
897 //
vmin_bu(v16u8 _1,v16u8 _2)898 v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); }
899 // CHECK-LABEL: @vmin_hu(
900 // CHECK-NEXT:  entry:
901 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
902 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
903 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
904 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
905 // CHECK-NEXT:    ret i128 [[TMP3]]
906 //
vmin_hu(v8u16 _1,v8u16 _2)907 v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); }
908 // CHECK-LABEL: @vmin_wu(
909 // CHECK-NEXT:  entry:
910 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
911 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
912 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
913 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
914 // CHECK-NEXT:    ret i128 [[TMP3]]
915 //
vmin_wu(v4u32 _1,v4u32 _2)916 v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); }
917 // CHECK-LABEL: @vmin_du(
918 // CHECK-NEXT:  entry:
919 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
920 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
921 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
922 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
923 // CHECK-NEXT:    ret i128 [[TMP3]]
924 //
vmin_du(v2u64 _1,v2u64 _2)925 v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); }
926 // CHECK-LABEL: @vmini_bu(
927 // CHECK-NEXT:  entry:
928 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
929 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1)
930 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
931 // CHECK-NEXT:    ret i128 [[TMP2]]
932 //
vmini_bu(v16u8 _1)933 v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); }
934 // CHECK-LABEL: @vmini_hu(
935 // CHECK-NEXT:  entry:
936 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
937 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1)
938 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
939 // CHECK-NEXT:    ret i128 [[TMP2]]
940 //
vmini_hu(v8u16 _1)941 v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); }
942 // CHECK-LABEL: @vmini_wu(
943 // CHECK-NEXT:  entry:
944 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
945 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1)
946 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
947 // CHECK-NEXT:    ret i128 [[TMP2]]
948 //
vmini_wu(v4u32 _1)949 v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); }
950 // CHECK-LABEL: @vmini_du(
951 // CHECK-NEXT:  entry:
952 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
953 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1)
954 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
955 // CHECK-NEXT:    ret i128 [[TMP2]]
956 //
vmini_du(v2u64 _1)957 v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); }
958 // CHECK-LABEL: @vseq_b(
959 // CHECK-NEXT:  entry:
960 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
961 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
962 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
963 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
964 // CHECK-NEXT:    ret i128 [[TMP3]]
965 //
vseq_b(v16i8 _1,v16i8 _2)966 v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); }
967 // CHECK-LABEL: @vseq_h(
968 // CHECK-NEXT:  entry:
969 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
970 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
971 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
972 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
973 // CHECK-NEXT:    ret i128 [[TMP3]]
974 //
vseq_h(v8i16 _1,v8i16 _2)975 v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); }
976 // CHECK-LABEL: @vseq_w(
977 // CHECK-NEXT:  entry:
978 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
979 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
980 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
981 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
982 // CHECK-NEXT:    ret i128 [[TMP3]]
983 //
vseq_w(v4i32 _1,v4i32 _2)984 v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); }
985 // CHECK-LABEL: @vseq_d(
986 // CHECK-NEXT:  entry:
987 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
988 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
989 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
990 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
991 // CHECK-NEXT:    ret i128 [[TMP3]]
992 //
vseq_d(v2i64 _1,v2i64 _2)993 v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); }
994 // CHECK-LABEL: @vseqi_b(
995 // CHECK-NEXT:  entry:
996 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
997 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1)
998 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
999 // CHECK-NEXT:    ret i128 [[TMP2]]
1000 //
vseqi_b(v16i8 _1)1001 v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); }
1002 // CHECK-LABEL: @vseqi_h(
1003 // CHECK-NEXT:  entry:
1004 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1005 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1)
1006 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1007 // CHECK-NEXT:    ret i128 [[TMP2]]
1008 //
vseqi_h(v8i16 _1)1009 v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); }
1010 // CHECK-LABEL: @vseqi_w(
1011 // CHECK-NEXT:  entry:
1012 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1013 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1)
1014 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1015 // CHECK-NEXT:    ret i128 [[TMP2]]
1016 //
vseqi_w(v4i32 _1)1017 v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); }
1018 // CHECK-LABEL: @vseqi_d(
1019 // CHECK-NEXT:  entry:
1020 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1021 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1)
1022 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1023 // CHECK-NEXT:    ret i128 [[TMP2]]
1024 //
vseqi_d(v2i64 _1)1025 v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); }
1026 // CHECK-LABEL: @vslti_b(
1027 // CHECK-NEXT:  entry:
1028 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1029 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1)
1030 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1031 // CHECK-NEXT:    ret i128 [[TMP2]]
1032 //
vslti_b(v16i8 _1)1033 v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); }
1034 // CHECK-LABEL: @vslt_b(
1035 // CHECK-NEXT:  entry:
1036 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1037 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1038 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1039 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1040 // CHECK-NEXT:    ret i128 [[TMP3]]
1041 //
vslt_b(v16i8 _1,v16i8 _2)1042 v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); }
1043 // CHECK-LABEL: @vslt_h(
1044 // CHECK-NEXT:  entry:
1045 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1046 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1047 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1048 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1049 // CHECK-NEXT:    ret i128 [[TMP3]]
1050 //
vslt_h(v8i16 _1,v8i16 _2)1051 v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); }
1052 // CHECK-LABEL: @vslt_w(
1053 // CHECK-NEXT:  entry:
1054 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1055 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1056 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1057 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1058 // CHECK-NEXT:    ret i128 [[TMP3]]
1059 //
vslt_w(v4i32 _1,v4i32 _2)1060 v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); }
1061 // CHECK-LABEL: @vslt_d(
1062 // CHECK-NEXT:  entry:
1063 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1064 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1065 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1066 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1067 // CHECK-NEXT:    ret i128 [[TMP3]]
1068 //
vslt_d(v2i64 _1,v2i64 _2)1069 v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); }
1070 // CHECK-LABEL: @vslti_h(
1071 // CHECK-NEXT:  entry:
1072 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1073 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1)
1074 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1075 // CHECK-NEXT:    ret i128 [[TMP2]]
1076 //
vslti_h(v8i16 _1)1077 v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); }
1078 // CHECK-LABEL: @vslti_w(
1079 // CHECK-NEXT:  entry:
1080 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1081 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1)
1082 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1083 // CHECK-NEXT:    ret i128 [[TMP2]]
1084 //
vslti_w(v4i32 _1)1085 v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); }
1086 // CHECK-LABEL: @vslti_d(
1087 // CHECK-NEXT:  entry:
1088 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1089 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1)
1090 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1091 // CHECK-NEXT:    ret i128 [[TMP2]]
1092 //
vslti_d(v2i64 _1)1093 v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); }
1094 // CHECK-LABEL: @vslt_bu(
1095 // CHECK-NEXT:  entry:
1096 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1097 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1098 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1099 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1100 // CHECK-NEXT:    ret i128 [[TMP3]]
1101 //
vslt_bu(v16u8 _1,v16u8 _2)1102 v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); }
1103 // CHECK-LABEL: @vslt_hu(
1104 // CHECK-NEXT:  entry:
1105 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1106 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1107 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1108 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1109 // CHECK-NEXT:    ret i128 [[TMP3]]
1110 //
vslt_hu(v8u16 _1,v8u16 _2)1111 v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); }
1112 // CHECK-LABEL: @vslt_wu(
1113 // CHECK-NEXT:  entry:
1114 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1115 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1116 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1117 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1118 // CHECK-NEXT:    ret i128 [[TMP3]]
1119 //
vslt_wu(v4u32 _1,v4u32 _2)1120 v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); }
1121 // CHECK-LABEL: @vslt_du(
1122 // CHECK-NEXT:  entry:
1123 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1124 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1125 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1126 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1127 // CHECK-NEXT:    ret i128 [[TMP3]]
1128 //
vslt_du(v2u64 _1,v2u64 _2)1129 v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); }
1130 // CHECK-LABEL: @vslti_bu(
1131 // CHECK-NEXT:  entry:
1132 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1133 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1)
1134 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1135 // CHECK-NEXT:    ret i128 [[TMP2]]
1136 //
vslti_bu(v16u8 _1)1137 v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); }
1138 // CHECK-LABEL: @vslti_hu(
1139 // CHECK-NEXT:  entry:
1140 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1141 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1)
1142 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1143 // CHECK-NEXT:    ret i128 [[TMP2]]
1144 //
vslti_hu(v8u16 _1)1145 v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); }
1146 // CHECK-LABEL: @vslti_wu(
1147 // CHECK-NEXT:  entry:
1148 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1149 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1)
1150 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1151 // CHECK-NEXT:    ret i128 [[TMP2]]
1152 //
vslti_wu(v4u32 _1)1153 v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); }
1154 // CHECK-LABEL: @vslti_du(
1155 // CHECK-NEXT:  entry:
1156 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1157 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1)
1158 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1159 // CHECK-NEXT:    ret i128 [[TMP2]]
1160 //
vslti_du(v2u64 _1)1161 v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); }
1162 // CHECK-LABEL: @vsle_b(
1163 // CHECK-NEXT:  entry:
1164 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1165 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1166 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1167 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1168 // CHECK-NEXT:    ret i128 [[TMP3]]
1169 //
vsle_b(v16i8 _1,v16i8 _2)1170 v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); }
1171 // CHECK-LABEL: @vsle_h(
1172 // CHECK-NEXT:  entry:
1173 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1174 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1175 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1176 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1177 // CHECK-NEXT:    ret i128 [[TMP3]]
1178 //
vsle_h(v8i16 _1,v8i16 _2)1179 v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); }
1180 // CHECK-LABEL: @vsle_w(
1181 // CHECK-NEXT:  entry:
1182 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1183 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1184 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1185 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1186 // CHECK-NEXT:    ret i128 [[TMP3]]
1187 //
vsle_w(v4i32 _1,v4i32 _2)1188 v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); }
1189 // CHECK-LABEL: @vsle_d(
1190 // CHECK-NEXT:  entry:
1191 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1192 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1193 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1194 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1195 // CHECK-NEXT:    ret i128 [[TMP3]]
1196 //
vsle_d(v2i64 _1,v2i64 _2)1197 v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); }
1198 // CHECK-LABEL: @vslei_b(
1199 // CHECK-NEXT:  entry:
1200 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1201 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1)
1202 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1203 // CHECK-NEXT:    ret i128 [[TMP2]]
1204 //
vslei_b(v16i8 _1)1205 v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); }
1206 // CHECK-LABEL: @vslei_h(
1207 // CHECK-NEXT:  entry:
1208 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1209 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1)
1210 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1211 // CHECK-NEXT:    ret i128 [[TMP2]]
1212 //
vslei_h(v8i16 _1)1213 v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); }
1214 // CHECK-LABEL: @vslei_w(
1215 // CHECK-NEXT:  entry:
1216 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1217 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1)
1218 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1219 // CHECK-NEXT:    ret i128 [[TMP2]]
1220 //
vslei_w(v4i32 _1)1221 v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); }
1222 // CHECK-LABEL: @vslei_d(
1223 // CHECK-NEXT:  entry:
1224 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1225 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1)
1226 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1227 // CHECK-NEXT:    ret i128 [[TMP2]]
1228 //
vslei_d(v2i64 _1)1229 v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); }
1230 // CHECK-LABEL: @vsle_bu(
1231 // CHECK-NEXT:  entry:
1232 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1233 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1234 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1235 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1236 // CHECK-NEXT:    ret i128 [[TMP3]]
1237 //
vsle_bu(v16u8 _1,v16u8 _2)1238 v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); }
1239 // CHECK-LABEL: @vsle_hu(
1240 // CHECK-NEXT:  entry:
1241 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1242 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1243 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1244 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1245 // CHECK-NEXT:    ret i128 [[TMP3]]
1246 //
vsle_hu(v8u16 _1,v8u16 _2)1247 v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); }
1248 // CHECK-LABEL: @vsle_wu(
1249 // CHECK-NEXT:  entry:
1250 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1251 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1252 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1253 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1254 // CHECK-NEXT:    ret i128 [[TMP3]]
1255 //
vsle_wu(v4u32 _1,v4u32 _2)1256 v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); }
1257 // CHECK-LABEL: @vsle_du(
1258 // CHECK-NEXT:  entry:
1259 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1260 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1261 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1262 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1263 // CHECK-NEXT:    ret i128 [[TMP3]]
1264 //
vsle_du(v2u64 _1,v2u64 _2)1265 v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); }
1266 // CHECK-LABEL: @vslei_bu(
1267 // CHECK-NEXT:  entry:
1268 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1269 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1)
1270 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1271 // CHECK-NEXT:    ret i128 [[TMP2]]
1272 //
vslei_bu(v16u8 _1)1273 v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); }
1274 // CHECK-LABEL: @vslei_hu(
1275 // CHECK-NEXT:  entry:
1276 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1277 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1)
1278 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1279 // CHECK-NEXT:    ret i128 [[TMP2]]
1280 //
vslei_hu(v8u16 _1)1281 v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); }
1282 // CHECK-LABEL: @vslei_wu(
1283 // CHECK-NEXT:  entry:
1284 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1285 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1)
1286 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1287 // CHECK-NEXT:    ret i128 [[TMP2]]
1288 //
vslei_wu(v4u32 _1)1289 v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); }
1290 // CHECK-LABEL: @vslei_du(
1291 // CHECK-NEXT:  entry:
1292 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1293 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1)
1294 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1295 // CHECK-NEXT:    ret i128 [[TMP2]]
1296 //
vslei_du(v2u64 _1)1297 v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); }
1298 // CHECK-LABEL: @vsat_b(
1299 // CHECK-NEXT:  entry:
1300 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1301 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1)
1302 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1303 // CHECK-NEXT:    ret i128 [[TMP2]]
1304 //
vsat_b(v16i8 _1)1305 v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); }
1306 // CHECK-LABEL: @vsat_h(
1307 // CHECK-NEXT:  entry:
1308 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1309 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1)
1310 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1311 // CHECK-NEXT:    ret i128 [[TMP2]]
1312 //
vsat_h(v8i16 _1)1313 v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); }
1314 // CHECK-LABEL: @vsat_w(
1315 // CHECK-NEXT:  entry:
1316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1317 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1)
1318 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1319 // CHECK-NEXT:    ret i128 [[TMP2]]
1320 //
vsat_w(v4i32 _1)1321 v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); }
1322 // CHECK-LABEL: @vsat_d(
1323 // CHECK-NEXT:  entry:
1324 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1325 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1)
1326 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1327 // CHECK-NEXT:    ret i128 [[TMP2]]
1328 //
vsat_d(v2i64 _1)1329 v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); }
1330 // CHECK-LABEL: @vsat_bu(
1331 // CHECK-NEXT:  entry:
1332 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1333 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1)
1334 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
1335 // CHECK-NEXT:    ret i128 [[TMP2]]
1336 //
vsat_bu(v16u8 _1)1337 v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); }
1338 // CHECK-LABEL: @vsat_hu(
1339 // CHECK-NEXT:  entry:
1340 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1341 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1)
1342 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
1343 // CHECK-NEXT:    ret i128 [[TMP2]]
1344 //
vsat_hu(v8u16 _1)1345 v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); }
1346 // CHECK-LABEL: @vsat_wu(
1347 // CHECK-NEXT:  entry:
1348 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1349 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1)
1350 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1351 // CHECK-NEXT:    ret i128 [[TMP2]]
1352 //
vsat_wu(v4u32 _1)1353 v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); }
1354 // CHECK-LABEL: @vsat_du(
1355 // CHECK-NEXT:  entry:
1356 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1357 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1)
1358 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1359 // CHECK-NEXT:    ret i128 [[TMP2]]
1360 //
vsat_du(v2u64 _1)1361 v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); }
1362 // CHECK-LABEL: @vadda_b(
1363 // CHECK-NEXT:  entry:
1364 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1365 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1366 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1367 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1368 // CHECK-NEXT:    ret i128 [[TMP3]]
1369 //
vadda_b(v16i8 _1,v16i8 _2)1370 v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); }
1371 // CHECK-LABEL: @vadda_h(
1372 // CHECK-NEXT:  entry:
1373 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1374 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1375 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1376 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1377 // CHECK-NEXT:    ret i128 [[TMP3]]
1378 //
vadda_h(v8i16 _1,v8i16 _2)1379 v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); }
1380 // CHECK-LABEL: @vadda_w(
1381 // CHECK-NEXT:  entry:
1382 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1383 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1384 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1385 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1386 // CHECK-NEXT:    ret i128 [[TMP3]]
1387 //
vadda_w(v4i32 _1,v4i32 _2)1388 v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); }
1389 // CHECK-LABEL: @vadda_d(
1390 // CHECK-NEXT:  entry:
1391 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1392 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1393 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1394 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1395 // CHECK-NEXT:    ret i128 [[TMP3]]
1396 //
vadda_d(v2i64 _1,v2i64 _2)1397 v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); }
1398 // CHECK-LABEL: @vsadd_b(
1399 // CHECK-NEXT:  entry:
1400 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1401 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1402 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1403 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1404 // CHECK-NEXT:    ret i128 [[TMP3]]
1405 //
vsadd_b(v16i8 _1,v16i8 _2)1406 v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); }
1407 // CHECK-LABEL: @vsadd_h(
1408 // CHECK-NEXT:  entry:
1409 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1410 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1411 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1412 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1413 // CHECK-NEXT:    ret i128 [[TMP3]]
1414 //
vsadd_h(v8i16 _1,v8i16 _2)1415 v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); }
1416 // CHECK-LABEL: @vsadd_w(
1417 // CHECK-NEXT:  entry:
1418 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1419 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1420 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1421 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1422 // CHECK-NEXT:    ret i128 [[TMP3]]
1423 //
vsadd_w(v4i32 _1,v4i32 _2)1424 v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); }
1425 // CHECK-LABEL: @vsadd_d(
1426 // CHECK-NEXT:  entry:
1427 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1428 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1429 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1430 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1431 // CHECK-NEXT:    ret i128 [[TMP3]]
1432 //
vsadd_d(v2i64 _1,v2i64 _2)1433 v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); }
1434 // CHECK-LABEL: @vsadd_bu(
1435 // CHECK-NEXT:  entry:
1436 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1437 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1438 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1439 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1440 // CHECK-NEXT:    ret i128 [[TMP3]]
1441 //
vsadd_bu(v16u8 _1,v16u8 _2)1442 v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); }
1443 // CHECK-LABEL: @vsadd_hu(
1444 // CHECK-NEXT:  entry:
1445 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1446 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1447 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1448 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1449 // CHECK-NEXT:    ret i128 [[TMP3]]
1450 //
vsadd_hu(v8u16 _1,v8u16 _2)1451 v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); }
1452 // CHECK-LABEL: @vsadd_wu(
1453 // CHECK-NEXT:  entry:
1454 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1455 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1456 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1457 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1458 // CHECK-NEXT:    ret i128 [[TMP3]]
1459 //
vsadd_wu(v4u32 _1,v4u32 _2)1460 v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); }
1461 // CHECK-LABEL: @vsadd_du(
1462 // CHECK-NEXT:  entry:
1463 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1464 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1465 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1466 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1467 // CHECK-NEXT:    ret i128 [[TMP3]]
1468 //
vsadd_du(v2u64 _1,v2u64 _2)1469 v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); }
1470 // CHECK-LABEL: @vavg_b(
1471 // CHECK-NEXT:  entry:
1472 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1473 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1474 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1475 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1476 // CHECK-NEXT:    ret i128 [[TMP3]]
1477 //
vavg_b(v16i8 _1,v16i8 _2)1478 v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); }
1479 // CHECK-LABEL: @vavg_h(
1480 // CHECK-NEXT:  entry:
1481 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1482 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1483 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1484 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1485 // CHECK-NEXT:    ret i128 [[TMP3]]
1486 //
vavg_h(v8i16 _1,v8i16 _2)1487 v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); }
1488 // CHECK-LABEL: @vavg_w(
1489 // CHECK-NEXT:  entry:
1490 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1491 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1492 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1493 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1494 // CHECK-NEXT:    ret i128 [[TMP3]]
1495 //
vavg_w(v4i32 _1,v4i32 _2)1496 v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); }
1497 // CHECK-LABEL: @vavg_d(
1498 // CHECK-NEXT:  entry:
1499 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1500 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1501 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1502 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1503 // CHECK-NEXT:    ret i128 [[TMP3]]
1504 //
vavg_d(v2i64 _1,v2i64 _2)1505 v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); }
1506 // CHECK-LABEL: @vavg_bu(
1507 // CHECK-NEXT:  entry:
1508 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1509 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1510 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1511 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1512 // CHECK-NEXT:    ret i128 [[TMP3]]
1513 //
vavg_bu(v16u8 _1,v16u8 _2)1514 v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); }
1515 // CHECK-LABEL: @vavg_hu(
1516 // CHECK-NEXT:  entry:
1517 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1518 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1519 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1520 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1521 // CHECK-NEXT:    ret i128 [[TMP3]]
1522 //
vavg_hu(v8u16 _1,v8u16 _2)1523 v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); }
1524 // CHECK-LABEL: @vavg_wu(
1525 // CHECK-NEXT:  entry:
1526 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1527 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1528 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1529 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1530 // CHECK-NEXT:    ret i128 [[TMP3]]
1531 //
vavg_wu(v4u32 _1,v4u32 _2)1532 v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); }
1533 // CHECK-LABEL: @vavg_du(
1534 // CHECK-NEXT:  entry:
1535 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1536 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1537 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1538 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1539 // CHECK-NEXT:    ret i128 [[TMP3]]
1540 //
vavg_du(v2u64 _1,v2u64 _2)1541 v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); }
1542 // CHECK-LABEL: @vavgr_b(
1543 // CHECK-NEXT:  entry:
1544 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1545 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1546 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1547 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1548 // CHECK-NEXT:    ret i128 [[TMP3]]
1549 //
vavgr_b(v16i8 _1,v16i8 _2)1550 v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); }
1551 // CHECK-LABEL: @vavgr_h(
1552 // CHECK-NEXT:  entry:
1553 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1554 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1555 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1556 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1557 // CHECK-NEXT:    ret i128 [[TMP3]]
1558 //
vavgr_h(v8i16 _1,v8i16 _2)1559 v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); }
1560 // CHECK-LABEL: @vavgr_w(
1561 // CHECK-NEXT:  entry:
1562 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1563 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1564 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1565 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1566 // CHECK-NEXT:    ret i128 [[TMP3]]
1567 //
vavgr_w(v4i32 _1,v4i32 _2)1568 v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); }
1569 // CHECK-LABEL: @vavgr_d(
1570 // CHECK-NEXT:  entry:
1571 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1572 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1573 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1574 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1575 // CHECK-NEXT:    ret i128 [[TMP3]]
1576 //
vavgr_d(v2i64 _1,v2i64 _2)1577 v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); }
1578 // CHECK-LABEL: @vavgr_bu(
1579 // CHECK-NEXT:  entry:
1580 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1581 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1582 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1583 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1584 // CHECK-NEXT:    ret i128 [[TMP3]]
1585 //
vavgr_bu(v16u8 _1,v16u8 _2)1586 v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); }
1587 // CHECK-LABEL: @vavgr_hu(
1588 // CHECK-NEXT:  entry:
1589 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1590 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1591 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1592 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1593 // CHECK-NEXT:    ret i128 [[TMP3]]
1594 //
vavgr_hu(v8u16 _1,v8u16 _2)1595 v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); }
1596 // CHECK-LABEL: @vavgr_wu(
1597 // CHECK-NEXT:  entry:
1598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1599 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1600 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1601 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1602 // CHECK-NEXT:    ret i128 [[TMP3]]
1603 //
vavgr_wu(v4u32 _1,v4u32 _2)1604 v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); }
1605 // CHECK-LABEL: @vavgr_du(
1606 // CHECK-NEXT:  entry:
1607 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1608 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1609 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1610 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1611 // CHECK-NEXT:    ret i128 [[TMP3]]
1612 //
vavgr_du(v2u64 _1,v2u64 _2)1613 v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); }
1614 // CHECK-LABEL: @vssub_b(
1615 // CHECK-NEXT:  entry:
1616 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1617 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1618 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1619 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1620 // CHECK-NEXT:    ret i128 [[TMP3]]
1621 //
vssub_b(v16i8 _1,v16i8 _2)1622 v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); }
1623 // CHECK-LABEL: @vssub_h(
1624 // CHECK-NEXT:  entry:
1625 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1626 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1627 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1628 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1629 // CHECK-NEXT:    ret i128 [[TMP3]]
1630 //
vssub_h(v8i16 _1,v8i16 _2)1631 v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); }
1632 // CHECK-LABEL: @vssub_w(
1633 // CHECK-NEXT:  entry:
1634 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1635 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1636 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1637 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1638 // CHECK-NEXT:    ret i128 [[TMP3]]
1639 //
vssub_w(v4i32 _1,v4i32 _2)1640 v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); }
1641 // CHECK-LABEL: @vssub_d(
1642 // CHECK-NEXT:  entry:
1643 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1644 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1645 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1646 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1647 // CHECK-NEXT:    ret i128 [[TMP3]]
1648 //
vssub_d(v2i64 _1,v2i64 _2)1649 v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); }
1650 // CHECK-LABEL: @vssub_bu(
1651 // CHECK-NEXT:  entry:
1652 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1653 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1654 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1655 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1656 // CHECK-NEXT:    ret i128 [[TMP3]]
1657 //
vssub_bu(v16u8 _1,v16u8 _2)1658 v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); }
1659 // CHECK-LABEL: @vssub_hu(
1660 // CHECK-NEXT:  entry:
1661 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1662 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1663 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1664 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1665 // CHECK-NEXT:    ret i128 [[TMP3]]
1666 //
vssub_hu(v8u16 _1,v8u16 _2)1667 v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); }
1668 // CHECK-LABEL: @vssub_wu(
1669 // CHECK-NEXT:  entry:
1670 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1671 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1672 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1673 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1674 // CHECK-NEXT:    ret i128 [[TMP3]]
1675 //
vssub_wu(v4u32 _1,v4u32 _2)1676 v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); }
1677 // CHECK-LABEL: @vssub_du(
1678 // CHECK-NEXT:  entry:
1679 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1680 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1681 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1682 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1683 // CHECK-NEXT:    ret i128 [[TMP3]]
1684 //
vssub_du(v2u64 _1,v2u64 _2)1685 v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); }
1686 // CHECK-LABEL: @vabsd_b(
1687 // CHECK-NEXT:  entry:
1688 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1689 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1690 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1691 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1692 // CHECK-NEXT:    ret i128 [[TMP3]]
1693 //
vabsd_b(v16i8 _1,v16i8 _2)1694 v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); }
1695 // CHECK-LABEL: @vabsd_h(
1696 // CHECK-NEXT:  entry:
1697 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1698 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1699 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1700 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1701 // CHECK-NEXT:    ret i128 [[TMP3]]
1702 //
vabsd_h(v8i16 _1,v8i16 _2)1703 v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); }
1704 // CHECK-LABEL: @vabsd_w(
1705 // CHECK-NEXT:  entry:
1706 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1707 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1708 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1709 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1710 // CHECK-NEXT:    ret i128 [[TMP3]]
1711 //
vabsd_w(v4i32 _1,v4i32 _2)1712 v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); }
1713 // CHECK-LABEL: @vabsd_d(
1714 // CHECK-NEXT:  entry:
1715 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1716 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1717 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1718 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1719 // CHECK-NEXT:    ret i128 [[TMP3]]
1720 //
vabsd_d(v2i64 _1,v2i64 _2)1721 v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); }
1722 // CHECK-LABEL: @vabsd_bu(
1723 // CHECK-NEXT:  entry:
1724 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1725 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1726 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1727 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1728 // CHECK-NEXT:    ret i128 [[TMP3]]
1729 //
vabsd_bu(v16u8 _1,v16u8 _2)1730 v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); }
1731 // CHECK-LABEL: @vabsd_hu(
1732 // CHECK-NEXT:  entry:
1733 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1734 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1735 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1736 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1737 // CHECK-NEXT:    ret i128 [[TMP3]]
1738 //
vabsd_hu(v8u16 _1,v8u16 _2)1739 v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); }
1740 // CHECK-LABEL: @vabsd_wu(
1741 // CHECK-NEXT:  entry:
1742 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1743 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1744 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1745 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1746 // CHECK-NEXT:    ret i128 [[TMP3]]
1747 //
vabsd_wu(v4u32 _1,v4u32 _2)1748 v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); }
1749 // CHECK-LABEL: @vabsd_du(
1750 // CHECK-NEXT:  entry:
1751 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1752 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1753 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1754 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1755 // CHECK-NEXT:    ret i128 [[TMP3]]
1756 //
vabsd_du(v2u64 _1,v2u64 _2)1757 v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); }
1758 // CHECK-LABEL: @vmul_b(
1759 // CHECK-NEXT:  entry:
1760 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1761 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1762 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1763 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1764 // CHECK-NEXT:    ret i128 [[TMP3]]
1765 //
vmul_b(v16i8 _1,v16i8 _2)1766 v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); }
1767 // CHECK-LABEL: @vmul_h(
1768 // CHECK-NEXT:  entry:
1769 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1770 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1771 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1772 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1773 // CHECK-NEXT:    ret i128 [[TMP3]]
1774 //
vmul_h(v8i16 _1,v8i16 _2)1775 v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); }
1776 // CHECK-LABEL: @vmul_w(
1777 // CHECK-NEXT:  entry:
1778 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1779 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1780 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1781 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1782 // CHECK-NEXT:    ret i128 [[TMP3]]
1783 //
vmul_w(v4i32 _1,v4i32 _2)1784 v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); }
1785 // CHECK-LABEL: @vmul_d(
1786 // CHECK-NEXT:  entry:
1787 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1788 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1789 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1790 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1791 // CHECK-NEXT:    ret i128 [[TMP3]]
1792 //
vmul_d(v2i64 _1,v2i64 _2)1793 v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); }
1794 // CHECK-LABEL: @vmadd_b(
1795 // CHECK-NEXT:  entry:
1796 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1797 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1798 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
1799 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1800 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
1801 // CHECK-NEXT:    ret i128 [[TMP4]]
1802 //
vmadd_b(v16i8 _1,v16i8 _2,v16i8 _3)1803 v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) {
1804   return __lsx_vmadd_b(_1, _2, _3);
1805 }
1806 // CHECK-LABEL: @vmadd_h(
1807 // CHECK-NEXT:  entry:
1808 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1809 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1810 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
1811 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1812 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
1813 // CHECK-NEXT:    ret i128 [[TMP4]]
1814 //
vmadd_h(v8i16 _1,v8i16 _2,v8i16 _3)1815 v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) {
1816   return __lsx_vmadd_h(_1, _2, _3);
1817 }
1818 // CHECK-LABEL: @vmadd_w(
1819 // CHECK-NEXT:  entry:
1820 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1821 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1822 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
1823 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
1824 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1825 // CHECK-NEXT:    ret i128 [[TMP4]]
1826 //
vmadd_w(v4i32 _1,v4i32 _2,v4i32 _3)1827 v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) {
1828   return __lsx_vmadd_w(_1, _2, _3);
1829 }
1830 // CHECK-LABEL: @vmadd_d(
1831 // CHECK-NEXT:  entry:
1832 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1833 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1834 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
1835 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
1836 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1837 // CHECK-NEXT:    ret i128 [[TMP4]]
1838 //
vmadd_d(v2i64 _1,v2i64 _2,v2i64 _3)1839 v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) {
1840   return __lsx_vmadd_d(_1, _2, _3);
1841 }
1842 // CHECK-LABEL: @vmsub_b(
1843 // CHECK-NEXT:  entry:
1844 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1845 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1846 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
1847 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1848 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
1849 // CHECK-NEXT:    ret i128 [[TMP4]]
1850 //
vmsub_b(v16i8 _1,v16i8 _2,v16i8 _3)1851 v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) {
1852   return __lsx_vmsub_b(_1, _2, _3);
1853 }
1854 // CHECK-LABEL: @vmsub_h(
1855 // CHECK-NEXT:  entry:
1856 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1857 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1858 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
1859 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1860 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
1861 // CHECK-NEXT:    ret i128 [[TMP4]]
1862 //
vmsub_h(v8i16 _1,v8i16 _2,v8i16 _3)1863 v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) {
1864   return __lsx_vmsub_h(_1, _2, _3);
1865 }
1866 // CHECK-LABEL: @vmsub_w(
1867 // CHECK-NEXT:  entry:
1868 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1869 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1870 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
1871 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
1872 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1873 // CHECK-NEXT:    ret i128 [[TMP4]]
1874 //
vmsub_w(v4i32 _1,v4i32 _2,v4i32 _3)1875 v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) {
1876   return __lsx_vmsub_w(_1, _2, _3);
1877 }
1878 // CHECK-LABEL: @vmsub_d(
1879 // CHECK-NEXT:  entry:
1880 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1881 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1882 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
1883 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
1884 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1885 // CHECK-NEXT:    ret i128 [[TMP4]]
1886 //
vmsub_d(v2i64 _1,v2i64 _2,v2i64 _3)1887 v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) {
1888   return __lsx_vmsub_d(_1, _2, _3);
1889 }
1890 // CHECK-LABEL: @vdiv_b(
1891 // CHECK-NEXT:  entry:
1892 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1893 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1894 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1895 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1896 // CHECK-NEXT:    ret i128 [[TMP3]]
1897 //
vdiv_b(v16i8 _1,v16i8 _2)1898 v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); }
1899 // CHECK-LABEL: @vdiv_h(
1900 // CHECK-NEXT:  entry:
1901 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1902 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1903 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1904 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1905 // CHECK-NEXT:    ret i128 [[TMP3]]
1906 //
vdiv_h(v8i16 _1,v8i16 _2)1907 v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); }
1908 // CHECK-LABEL: @vdiv_w(
1909 // CHECK-NEXT:  entry:
1910 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1911 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1912 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1913 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1914 // CHECK-NEXT:    ret i128 [[TMP3]]
1915 //
vdiv_w(v4i32 _1,v4i32 _2)1916 v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); }
1917 // CHECK-LABEL: @vdiv_d(
1918 // CHECK-NEXT:  entry:
1919 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1920 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1921 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1922 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1923 // CHECK-NEXT:    ret i128 [[TMP3]]
1924 //
vdiv_d(v2i64 _1,v2i64 _2)1925 v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); }
1926 // CHECK-LABEL: @vdiv_bu(
1927 // CHECK-NEXT:  entry:
1928 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1929 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1930 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1931 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
1932 // CHECK-NEXT:    ret i128 [[TMP3]]
1933 //
vdiv_bu(v16u8 _1,v16u8 _2)1934 v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); }
1935 // CHECK-LABEL: @vdiv_hu(
1936 // CHECK-NEXT:  entry:
1937 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1938 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1939 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1940 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1941 // CHECK-NEXT:    ret i128 [[TMP3]]
1942 //
vdiv_hu(v8u16 _1,v8u16 _2)1943 v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); }
1944 // CHECK-LABEL: @vdiv_wu(
1945 // CHECK-NEXT:  entry:
1946 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1947 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1948 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1949 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1950 // CHECK-NEXT:    ret i128 [[TMP3]]
1951 //
vdiv_wu(v4u32 _1,v4u32 _2)1952 v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); }
1953 // CHECK-LABEL: @vdiv_du(
1954 // CHECK-NEXT:  entry:
1955 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
1956 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
1957 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
1958 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1959 // CHECK-NEXT:    ret i128 [[TMP3]]
1960 //
vdiv_du(v2u64 _1,v2u64 _2)1961 v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); }
1962 // CHECK-LABEL: @vhaddw_h_b(
1963 // CHECK-NEXT:  entry:
1964 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1965 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1966 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1967 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1968 // CHECK-NEXT:    ret i128 [[TMP3]]
1969 //
vhaddw_h_b(v16i8 _1,v16i8 _2)1970 v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); }
1971 // CHECK-LABEL: @vhaddw_w_h(
1972 // CHECK-NEXT:  entry:
1973 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
1974 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
1975 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1976 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1977 // CHECK-NEXT:    ret i128 [[TMP3]]
1978 //
vhaddw_w_h(v8i16 _1,v8i16 _2)1979 v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); }
1980 // CHECK-LABEL: @vhaddw_d_w(
1981 // CHECK-NEXT:  entry:
1982 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
1983 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
1984 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
1985 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1986 // CHECK-NEXT:    ret i128 [[TMP3]]
1987 //
vhaddw_d_w(v4i32 _1,v4i32 _2)1988 v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); }
1989 // CHECK-LABEL: @vhaddw_hu_bu(
1990 // CHECK-NEXT:  entry:
1991 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
1992 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
1993 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1994 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1995 // CHECK-NEXT:    ret i128 [[TMP3]]
1996 //
vhaddw_hu_bu(v16u8 _1,v16u8 _2)1997 v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); }
1998 // CHECK-LABEL: @vhaddw_wu_hu(
1999 // CHECK-NEXT:  entry:
2000 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2001 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2002 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2003 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2004 // CHECK-NEXT:    ret i128 [[TMP3]]
2005 //
vhaddw_wu_hu(v8u16 _1,v8u16 _2)2006 v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); }
2007 // CHECK-LABEL: @vhaddw_du_wu(
2008 // CHECK-NEXT:  entry:
2009 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2010 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2011 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2012 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2013 // CHECK-NEXT:    ret i128 [[TMP3]]
2014 //
vhaddw_du_wu(v4u32 _1,v4u32 _2)2015 v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); }
2016 // CHECK-LABEL: @vhsubw_h_b(
2017 // CHECK-NEXT:  entry:
2018 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2019 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2020 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2021 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2022 // CHECK-NEXT:    ret i128 [[TMP3]]
2023 //
vhsubw_h_b(v16i8 _1,v16i8 _2)2024 v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); }
2025 // CHECK-LABEL: @vhsubw_w_h(
2026 // CHECK-NEXT:  entry:
2027 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2028 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2029 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2030 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2031 // CHECK-NEXT:    ret i128 [[TMP3]]
2032 //
vhsubw_w_h(v8i16 _1,v8i16 _2)2033 v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); }
2034 // CHECK-LABEL: @vhsubw_d_w(
2035 // CHECK-NEXT:  entry:
2036 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2037 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2038 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2039 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2040 // CHECK-NEXT:    ret i128 [[TMP3]]
2041 //
vhsubw_d_w(v4i32 _1,v4i32 _2)2042 v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); }
2043 // CHECK-LABEL: @vhsubw_hu_bu(
2044 // CHECK-NEXT:  entry:
2045 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2046 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2047 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2048 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2049 // CHECK-NEXT:    ret i128 [[TMP3]]
2050 //
vhsubw_hu_bu(v16u8 _1,v16u8 _2)2051 v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); }
2052 // CHECK-LABEL: @vhsubw_wu_hu(
2053 // CHECK-NEXT:  entry:
2054 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2055 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2056 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2057 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2058 // CHECK-NEXT:    ret i128 [[TMP3]]
2059 //
vhsubw_wu_hu(v8u16 _1,v8u16 _2)2060 v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); }
2061 // CHECK-LABEL: @vhsubw_du_wu(
2062 // CHECK-NEXT:  entry:
2063 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2064 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2065 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2066 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2067 // CHECK-NEXT:    ret i128 [[TMP3]]
2068 //
vhsubw_du_wu(v4u32 _1,v4u32 _2)2069 v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); }
2070 // CHECK-LABEL: @vmod_b(
2071 // CHECK-NEXT:  entry:
2072 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2073 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2074 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2075 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2076 // CHECK-NEXT:    ret i128 [[TMP3]]
2077 //
vmod_b(v16i8 _1,v16i8 _2)2078 v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); }
2079 // CHECK-LABEL: @vmod_h(
2080 // CHECK-NEXT:  entry:
2081 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2082 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2083 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2084 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2085 // CHECK-NEXT:    ret i128 [[TMP3]]
2086 //
vmod_h(v8i16 _1,v8i16 _2)2087 v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); }
2088 // CHECK-LABEL: @vmod_w(
2089 // CHECK-NEXT:  entry:
2090 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2091 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2092 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2093 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2094 // CHECK-NEXT:    ret i128 [[TMP3]]
2095 //
vmod_w(v4i32 _1,v4i32 _2)2096 v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); }
2097 // CHECK-LABEL: @vmod_d(
2098 // CHECK-NEXT:  entry:
2099 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2100 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2101 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2102 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2103 // CHECK-NEXT:    ret i128 [[TMP3]]
2104 //
vmod_d(v2i64 _1,v2i64 _2)2105 v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); }
2106 // CHECK-LABEL: @vmod_bu(
2107 // CHECK-NEXT:  entry:
2108 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2109 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2110 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2111 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2112 // CHECK-NEXT:    ret i128 [[TMP3]]
2113 //
vmod_bu(v16u8 _1,v16u8 _2)2114 v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); }
2115 // CHECK-LABEL: @vmod_hu(
2116 // CHECK-NEXT:  entry:
2117 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2118 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2119 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2120 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2121 // CHECK-NEXT:    ret i128 [[TMP3]]
2122 //
vmod_hu(v8u16 _1,v8u16 _2)2123 v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); }
2124 // CHECK-LABEL: @vmod_wu(
2125 // CHECK-NEXT:  entry:
2126 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2127 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2128 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2129 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2130 // CHECK-NEXT:    ret i128 [[TMP3]]
2131 //
vmod_wu(v4u32 _1,v4u32 _2)2132 v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); }
2133 // CHECK-LABEL: @vmod_du(
2134 // CHECK-NEXT:  entry:
2135 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2136 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2137 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2138 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2139 // CHECK-NEXT:    ret i128 [[TMP3]]
2140 //
vmod_du(v2u64 _1,v2u64 _2)2141 v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); }
2142 // CHECK-LABEL: @vreplve_b(
2143 // CHECK-NEXT:  entry:
2144 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2145 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]])
2146 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2147 // CHECK-NEXT:    ret i128 [[TMP2]]
2148 //
vreplve_b(v16i8 _1,int _2)2149 v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); }
2150 // CHECK-LABEL: @vreplve_h(
2151 // CHECK-NEXT:  entry:
2152 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2153 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]])
2154 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2155 // CHECK-NEXT:    ret i128 [[TMP2]]
2156 //
vreplve_h(v8i16 _1,int _2)2157 v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); }
2158 // CHECK-LABEL: @vreplve_w(
2159 // CHECK-NEXT:  entry:
2160 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2161 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]])
2162 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2163 // CHECK-NEXT:    ret i128 [[TMP2]]
2164 //
vreplve_w(v4i32 _1,int _2)2165 v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); }
2166 // CHECK-LABEL: @vreplve_d(
2167 // CHECK-NEXT:  entry:
2168 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2169 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]])
2170 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2171 // CHECK-NEXT:    ret i128 [[TMP2]]
2172 //
vreplve_d(v2i64 _1,int _2)2173 v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); }
2174 // CHECK-LABEL: @vreplvei_b(
2175 // CHECK-NEXT:  entry:
2176 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2177 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1)
2178 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2179 // CHECK-NEXT:    ret i128 [[TMP2]]
2180 //
vreplvei_b(v16i8 _1)2181 v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); }
2182 // CHECK-LABEL: @vreplvei_h(
2183 // CHECK-NEXT:  entry:
2184 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2185 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1)
2186 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2187 // CHECK-NEXT:    ret i128 [[TMP2]]
2188 //
vreplvei_h(v8i16 _1)2189 v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); }
2190 // CHECK-LABEL: @vreplvei_w(
2191 // CHECK-NEXT:  entry:
2192 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2193 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1)
2194 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2195 // CHECK-NEXT:    ret i128 [[TMP2]]
2196 //
vreplvei_w(v4i32 _1)2197 v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); }
2198 // CHECK-LABEL: @vreplvei_d(
2199 // CHECK-NEXT:  entry:
2200 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2201 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1)
2202 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2203 // CHECK-NEXT:    ret i128 [[TMP2]]
2204 //
vreplvei_d(v2i64 _1)2205 v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); }
2206 // CHECK-LABEL: @vpickev_b(
2207 // CHECK-NEXT:  entry:
2208 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2209 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2210 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2211 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2212 // CHECK-NEXT:    ret i128 [[TMP3]]
2213 //
vpickev_b(v16i8 _1,v16i8 _2)2214 v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); }
2215 // CHECK-LABEL: @vpickev_h(
2216 // CHECK-NEXT:  entry:
2217 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2218 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2219 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2220 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2221 // CHECK-NEXT:    ret i128 [[TMP3]]
2222 //
vpickev_h(v8i16 _1,v8i16 _2)2223 v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); }
2224 // CHECK-LABEL: @vpickev_w(
2225 // CHECK-NEXT:  entry:
2226 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2227 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2228 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2229 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2230 // CHECK-NEXT:    ret i128 [[TMP3]]
2231 //
vpickev_w(v4i32 _1,v4i32 _2)2232 v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); }
2233 // CHECK-LABEL: @vpickev_d(
2234 // CHECK-NEXT:  entry:
2235 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2236 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2237 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2238 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2239 // CHECK-NEXT:    ret i128 [[TMP3]]
2240 //
vpickev_d(v2i64 _1,v2i64 _2)2241 v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); }
2242 // CHECK-LABEL: @vpickod_b(
2243 // CHECK-NEXT:  entry:
2244 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2245 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2246 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2247 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2248 // CHECK-NEXT:    ret i128 [[TMP3]]
2249 //
vpickod_b(v16i8 _1,v16i8 _2)2250 v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); }
2251 // CHECK-LABEL: @vpickod_h(
2252 // CHECK-NEXT:  entry:
2253 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2254 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2255 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2256 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2257 // CHECK-NEXT:    ret i128 [[TMP3]]
2258 //
vpickod_h(v8i16 _1,v8i16 _2)2259 v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); }
2260 // CHECK-LABEL: @vpickod_w(
2261 // CHECK-NEXT:  entry:
2262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2263 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2264 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2265 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2266 // CHECK-NEXT:    ret i128 [[TMP3]]
2267 //
vpickod_w(v4i32 _1,v4i32 _2)2268 v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); }
2269 // CHECK-LABEL: @vpickod_d(
2270 // CHECK-NEXT:  entry:
2271 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2272 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2273 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2274 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2275 // CHECK-NEXT:    ret i128 [[TMP3]]
2276 //
vpickod_d(v2i64 _1,v2i64 _2)2277 v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); }
2278 // CHECK-LABEL: @vilvh_b(
2279 // CHECK-NEXT:  entry:
2280 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2281 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2282 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2283 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2284 // CHECK-NEXT:    ret i128 [[TMP3]]
2285 //
vilvh_b(v16i8 _1,v16i8 _2)2286 v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); }
2287 // CHECK-LABEL: @vilvh_h(
2288 // CHECK-NEXT:  entry:
2289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2291 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2292 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2293 // CHECK-NEXT:    ret i128 [[TMP3]]
2294 //
vilvh_h(v8i16 _1,v8i16 _2)2295 v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); }
2296 // CHECK-LABEL: @vilvh_w(
2297 // CHECK-NEXT:  entry:
2298 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2299 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2300 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2301 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2302 // CHECK-NEXT:    ret i128 [[TMP3]]
2303 //
vilvh_w(v4i32 _1,v4i32 _2)2304 v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); }
2305 // CHECK-LABEL: @vilvh_d(
2306 // CHECK-NEXT:  entry:
2307 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2308 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2309 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2310 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2311 // CHECK-NEXT:    ret i128 [[TMP3]]
2312 //
vilvh_d(v2i64 _1,v2i64 _2)2313 v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); }
2314 // CHECK-LABEL: @vilvl_b(
2315 // CHECK-NEXT:  entry:
2316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2317 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2318 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2319 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2320 // CHECK-NEXT:    ret i128 [[TMP3]]
2321 //
vilvl_b(v16i8 _1,v16i8 _2)2322 v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); }
2323 // CHECK-LABEL: @vilvl_h(
2324 // CHECK-NEXT:  entry:
2325 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2326 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2327 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2328 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2329 // CHECK-NEXT:    ret i128 [[TMP3]]
2330 //
vilvl_h(v8i16 _1,v8i16 _2)2331 v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); }
2332 // CHECK-LABEL: @vilvl_w(
2333 // CHECK-NEXT:  entry:
2334 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2335 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2336 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2337 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2338 // CHECK-NEXT:    ret i128 [[TMP3]]
2339 //
vilvl_w(v4i32 _1,v4i32 _2)2340 v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); }
2341 // CHECK-LABEL: @vilvl_d(
2342 // CHECK-NEXT:  entry:
2343 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2344 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2345 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2346 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2347 // CHECK-NEXT:    ret i128 [[TMP3]]
2348 //
vilvl_d(v2i64 _1,v2i64 _2)2349 v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); }
2350 // CHECK-LABEL: @vpackev_b(
2351 // CHECK-NEXT:  entry:
2352 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2353 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2354 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2355 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2356 // CHECK-NEXT:    ret i128 [[TMP3]]
2357 //
vpackev_b(v16i8 _1,v16i8 _2)2358 v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); }
2359 // CHECK-LABEL: @vpackev_h(
2360 // CHECK-NEXT:  entry:
2361 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2362 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2363 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2364 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2365 // CHECK-NEXT:    ret i128 [[TMP3]]
2366 //
vpackev_h(v8i16 _1,v8i16 _2)2367 v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); }
2368 // CHECK-LABEL: @vpackev_w(
2369 // CHECK-NEXT:  entry:
2370 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2371 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2372 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2373 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2374 // CHECK-NEXT:    ret i128 [[TMP3]]
2375 //
vpackev_w(v4i32 _1,v4i32 _2)2376 v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); }
2377 // CHECK-LABEL: @vpackev_d(
2378 // CHECK-NEXT:  entry:
2379 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2380 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2381 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2382 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2383 // CHECK-NEXT:    ret i128 [[TMP3]]
2384 //
vpackev_d(v2i64 _1,v2i64 _2)2385 v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); }
2386 // CHECK-LABEL: @vpackod_b(
2387 // CHECK-NEXT:  entry:
2388 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2389 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2390 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2391 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2392 // CHECK-NEXT:    ret i128 [[TMP3]]
2393 //
vpackod_b(v16i8 _1,v16i8 _2)2394 v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); }
2395 // CHECK-LABEL: @vpackod_h(
2396 // CHECK-NEXT:  entry:
2397 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2398 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2399 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2400 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2401 // CHECK-NEXT:    ret i128 [[TMP3]]
2402 //
vpackod_h(v8i16 _1,v8i16 _2)2403 v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); }
2404 // CHECK-LABEL: @vpackod_w(
2405 // CHECK-NEXT:  entry:
2406 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2407 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2408 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
2409 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
2410 // CHECK-NEXT:    ret i128 [[TMP3]]
2411 //
vpackod_w(v4i32 _1,v4i32 _2)2412 v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); }
2413 // CHECK-LABEL: @vpackod_d(
2414 // CHECK-NEXT:  entry:
2415 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2416 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2417 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
2418 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2419 // CHECK-NEXT:    ret i128 [[TMP3]]
2420 //
vpackod_d(v2i64 _1,v2i64 _2)2421 v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); }
2422 // CHECK-LABEL: @vshuf_h(
2423 // CHECK-NEXT:  entry:
2424 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2425 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
2426 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
2427 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2428 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
2429 // CHECK-NEXT:    ret i128 [[TMP4]]
2430 //
vshuf_h(v8i16 _1,v8i16 _2,v8i16 _3)2431 v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) {
2432   return __lsx_vshuf_h(_1, _2, _3);
2433 }
2434 // CHECK-LABEL: @vshuf_w(
2435 // CHECK-NEXT:  entry:
2436 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2437 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
2438 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
2439 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2440 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
2441 // CHECK-NEXT:    ret i128 [[TMP4]]
2442 //
vshuf_w(v4i32 _1,v4i32 _2,v4i32 _3)2443 v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) {
2444   return __lsx_vshuf_w(_1, _2, _3);
2445 }
2446 // CHECK-LABEL: @vshuf_d(
2447 // CHECK-NEXT:  entry:
2448 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2449 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
2450 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
2451 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2452 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2453 // CHECK-NEXT:    ret i128 [[TMP4]]
2454 //
vshuf_d(v2i64 _1,v2i64 _2,v2i64 _3)2455 v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) {
2456   return __lsx_vshuf_d(_1, _2, _3);
2457 }
2458 // CHECK-LABEL: @vand_v(
2459 // CHECK-NEXT:  entry:
2460 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2461 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2462 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2463 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2464 // CHECK-NEXT:    ret i128 [[TMP3]]
2465 //
vand_v(v16u8 _1,v16u8 _2)2466 v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); }
2467 // CHECK-LABEL: @vandi_b(
2468 // CHECK-NEXT:  entry:
2469 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2470 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1)
2471 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2472 // CHECK-NEXT:    ret i128 [[TMP2]]
2473 //
vandi_b(v16u8 _1)2474 v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); }
2475 // CHECK-LABEL: @vor_v(
2476 // CHECK-NEXT:  entry:
2477 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2478 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2479 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2480 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2481 // CHECK-NEXT:    ret i128 [[TMP3]]
2482 //
vor_v(v16u8 _1,v16u8 _2)2483 v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); }
2484 // CHECK-LABEL: @vori_b(
2485 // CHECK-NEXT:  entry:
2486 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2487 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1)
2488 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2489 // CHECK-NEXT:    ret i128 [[TMP2]]
2490 //
vori_b(v16u8 _1)2491 v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); }
2492 // CHECK-LABEL: @vnor_v(
2493 // CHECK-NEXT:  entry:
2494 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2495 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2496 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2497 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2498 // CHECK-NEXT:    ret i128 [[TMP3]]
2499 //
vnor_v(v16u8 _1,v16u8 _2)2500 v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); }
2501 // CHECK-LABEL: @vnori_b(
2502 // CHECK-NEXT:  entry:
2503 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2504 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1)
2505 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2506 // CHECK-NEXT:    ret i128 [[TMP2]]
2507 //
vnori_b(v16u8 _1)2508 v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); }
2509 // CHECK-LABEL: @vxor_v(
2510 // CHECK-NEXT:  entry:
2511 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2512 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2513 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2514 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2515 // CHECK-NEXT:    ret i128 [[TMP3]]
2516 //
vxor_v(v16u8 _1,v16u8 _2)2517 v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); }
2518 // CHECK-LABEL: @vxori_b(
2519 // CHECK-NEXT:  entry:
2520 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2521 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1)
2522 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2523 // CHECK-NEXT:    ret i128 [[TMP2]]
2524 //
vxori_b(v16u8 _1)2525 v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); }
2526 // CHECK-LABEL: @vbitsel_v(
2527 // CHECK-NEXT:  entry:
2528 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2529 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2530 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
2531 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2532 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
2533 // CHECK-NEXT:    ret i128 [[TMP4]]
2534 //
vbitsel_v(v16u8 _1,v16u8 _2,v16u8 _3)2535 v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) {
2536   return __lsx_vbitsel_v(_1, _2, _3);
2537 }
2538 // CHECK-LABEL: @vbitseli_b(
2539 // CHECK-NEXT:  entry:
2540 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2541 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
2542 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
2543 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
2544 // CHECK-NEXT:    ret i128 [[TMP3]]
2545 //
vbitseli_b(v16u8 _1,v16u8 _2)2546 v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); }
2547 // CHECK-LABEL: @vshuf4i_b(
2548 // CHECK-NEXT:  entry:
2549 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2550 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1)
2551 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2552 // CHECK-NEXT:    ret i128 [[TMP2]]
2553 //
vshuf4i_b(v16i8 _1)2554 v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); }
2555 // CHECK-LABEL: @vshuf4i_h(
2556 // CHECK-NEXT:  entry:
2557 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2558 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1)
2559 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2560 // CHECK-NEXT:    ret i128 [[TMP2]]
2561 //
vshuf4i_h(v8i16 _1)2562 v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); }
2563 // CHECK-LABEL: @vshuf4i_w(
2564 // CHECK-NEXT:  entry:
2565 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2566 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1)
2567 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2568 // CHECK-NEXT:    ret i128 [[TMP2]]
2569 //
vshuf4i_w(v4i32 _1)2570 v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); }
2571 // CHECK-LABEL: @vreplgr2vr_b(
2572 // CHECK-NEXT:  entry:
2573 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]])
2574 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
2575 // CHECK-NEXT:    ret i128 [[TMP1]]
2576 //
vreplgr2vr_b(int _1)2577 v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); }
2578 // CHECK-LABEL: @vreplgr2vr_h(
2579 // CHECK-NEXT:  entry:
2580 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]])
2581 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
2582 // CHECK-NEXT:    ret i128 [[TMP1]]
2583 //
vreplgr2vr_h(int _1)2584 v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); }
2585 // CHECK-LABEL: @vreplgr2vr_w(
2586 // CHECK-NEXT:  entry:
2587 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]])
2588 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
2589 // CHECK-NEXT:    ret i128 [[TMP1]]
2590 //
vreplgr2vr_w(int _1)2591 v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); }
2592 // CHECK-LABEL: @vreplgr2vr_d(
2593 // CHECK-NEXT:  entry:
2594 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]])
2595 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
2596 // CHECK-NEXT:    ret i128 [[TMP1]]
2597 //
vreplgr2vr_d(long _1)2598 v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); }
2599 // CHECK-LABEL: @vpcnt_b(
2600 // CHECK-NEXT:  entry:
2601 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2602 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]])
2603 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2604 // CHECK-NEXT:    ret i128 [[TMP2]]
2605 //
vpcnt_b(v16i8 _1)2606 v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); }
2607 // CHECK-LABEL: @vpcnt_h(
2608 // CHECK-NEXT:  entry:
2609 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2610 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]])
2611 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2612 // CHECK-NEXT:    ret i128 [[TMP2]]
2613 //
vpcnt_h(v8i16 _1)2614 v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); }
2615 // CHECK-LABEL: @vpcnt_w(
2616 // CHECK-NEXT:  entry:
2617 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2618 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]])
2619 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2620 // CHECK-NEXT:    ret i128 [[TMP2]]
2621 //
vpcnt_w(v4i32 _1)2622 v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); }
2623 // CHECK-LABEL: @vpcnt_d(
2624 // CHECK-NEXT:  entry:
2625 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2626 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]])
2627 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2628 // CHECK-NEXT:    ret i128 [[TMP2]]
2629 //
vpcnt_d(v2i64 _1)2630 v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); }
2631 // CHECK-LABEL: @vclo_b(
2632 // CHECK-NEXT:  entry:
2633 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2634 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]])
2635 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2636 // CHECK-NEXT:    ret i128 [[TMP2]]
2637 //
vclo_b(v16i8 _1)2638 v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); }
2639 // CHECK-LABEL: @vclo_h(
2640 // CHECK-NEXT:  entry:
2641 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2642 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]])
2643 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2644 // CHECK-NEXT:    ret i128 [[TMP2]]
2645 //
vclo_h(v8i16 _1)2646 v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); }
2647 // CHECK-LABEL: @vclo_w(
2648 // CHECK-NEXT:  entry:
2649 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2650 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]])
2651 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2652 // CHECK-NEXT:    ret i128 [[TMP2]]
2653 //
vclo_w(v4i32 _1)2654 v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); }
2655 // CHECK-LABEL: @vclo_d(
2656 // CHECK-NEXT:  entry:
2657 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2658 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]])
2659 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2660 // CHECK-NEXT:    ret i128 [[TMP2]]
2661 //
vclo_d(v2i64 _1)2662 v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); }
2663 // CHECK-LABEL: @vclz_b(
2664 // CHECK-NEXT:  entry:
2665 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2666 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]])
2667 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2668 // CHECK-NEXT:    ret i128 [[TMP2]]
2669 //
vclz_b(v16i8 _1)2670 v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); }
2671 // CHECK-LABEL: @vclz_h(
2672 // CHECK-NEXT:  entry:
2673 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2674 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]])
2675 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2676 // CHECK-NEXT:    ret i128 [[TMP2]]
2677 //
vclz_h(v8i16 _1)2678 v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); }
2679 // CHECK-LABEL: @vclz_w(
2680 // CHECK-NEXT:  entry:
2681 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2682 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]])
2683 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2684 // CHECK-NEXT:    ret i128 [[TMP2]]
2685 //
vclz_w(v4i32 _1)2686 v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); }
2687 // CHECK-LABEL: @vclz_d(
2688 // CHECK-NEXT:  entry:
2689 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2690 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]])
2691 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2692 // CHECK-NEXT:    ret i128 [[TMP2]]
2693 //
vclz_d(v2i64 _1)2694 v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); }
2695 // CHECK-LABEL: @vpickve2gr_b(
2696 // CHECK-NEXT:  entry:
2697 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2698 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1)
2699 // CHECK-NEXT:    ret i32 [[TMP1]]
2700 //
vpickve2gr_b(v16i8 _1)2701 int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); }
2702 // CHECK-LABEL: @vpickve2gr_h(
2703 // CHECK-NEXT:  entry:
2704 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2705 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1)
2706 // CHECK-NEXT:    ret i32 [[TMP1]]
2707 //
vpickve2gr_h(v8i16 _1)2708 int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); }
2709 // CHECK-LABEL: @vpickve2gr_w(
2710 // CHECK-NEXT:  entry:
2711 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2712 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1)
2713 // CHECK-NEXT:    ret i32 [[TMP1]]
2714 //
vpickve2gr_w(v4i32 _1)2715 int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); }
2716 // CHECK-LABEL: @vpickve2gr_d(
2717 // CHECK-NEXT:  entry:
2718 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2719 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1)
2720 // CHECK-NEXT:    ret i64 [[TMP1]]
2721 //
vpickve2gr_d(v2i64 _1)2722 long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); }
2723 // CHECK-LABEL: @vpickve2gr_bu(
2724 // CHECK-NEXT:  entry:
2725 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2726 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1)
2727 // CHECK-NEXT:    ret i32 [[TMP1]]
2728 //
vpickve2gr_bu(v16i8 _1)2729 unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); }
2730 // CHECK-LABEL: @vpickve2gr_hu(
2731 // CHECK-NEXT:  entry:
2732 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2733 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1)
2734 // CHECK-NEXT:    ret i32 [[TMP1]]
2735 //
vpickve2gr_hu(v8i16 _1)2736 unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); }
2737 // CHECK-LABEL: @vpickve2gr_wu(
2738 // CHECK-NEXT:  entry:
2739 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2740 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1)
2741 // CHECK-NEXT:    ret i32 [[TMP1]]
2742 //
vpickve2gr_wu(v4i32 _1)2743 unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); }
2744 // CHECK-LABEL: @vpickve2gr_du(
2745 // CHECK-NEXT:  entry:
2746 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2747 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1)
2748 // CHECK-NEXT:    ret i64 [[TMP1]]
2749 //
vpickve2gr_du(v2i64 _1)2750 unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); }
2751 // CHECK-LABEL: @vinsgr2vr_b(
2752 // CHECK-NEXT:  entry:
2753 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
2754 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1)
2755 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
2756 // CHECK-NEXT:    ret i128 [[TMP2]]
2757 //
vinsgr2vr_b(v16i8 _1)2758 v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); }
2759 // CHECK-LABEL: @vinsgr2vr_h(
2760 // CHECK-NEXT:  entry:
2761 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
2762 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1)
2763 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
2764 // CHECK-NEXT:    ret i128 [[TMP2]]
2765 //
vinsgr2vr_h(v8i16 _1)2766 v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); }
2767 // CHECK-LABEL: @vinsgr2vr_w(
2768 // CHECK-NEXT:  entry:
2769 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
2770 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1)
2771 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2772 // CHECK-NEXT:    ret i128 [[TMP2]]
2773 //
vinsgr2vr_w(v4i32 _1)2774 v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); }
2775 // CHECK-LABEL: @vinsgr2vr_d(
2776 // CHECK-NEXT:  entry:
2777 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
2778 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1)
2779 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2780 // CHECK-NEXT:    ret i128 [[TMP2]]
2781 //
vinsgr2vr_d(v2i64 _1)2782 v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); }
2783 // CHECK-LABEL: @vfadd_s(
2784 // CHECK-NEXT:  entry:
2785 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2786 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2787 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2788 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2789 // CHECK-NEXT:    ret i128 [[TMP3]]
2790 //
vfadd_s(v4f32 _1,v4f32 _2)2791 v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); }
2792 // CHECK-LABEL: @vfadd_d(
2793 // CHECK-NEXT:  entry:
2794 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2795 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2796 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2797 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2798 // CHECK-NEXT:    ret i128 [[TMP3]]
2799 //
vfadd_d(v2f64 _1,v2f64 _2)2800 v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); }
2801 // CHECK-LABEL: @vfsub_s(
2802 // CHECK-NEXT:  entry:
2803 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2804 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2805 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2806 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2807 // CHECK-NEXT:    ret i128 [[TMP3]]
2808 //
vfsub_s(v4f32 _1,v4f32 _2)2809 v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); }
2810 // CHECK-LABEL: @vfsub_d(
2811 // CHECK-NEXT:  entry:
2812 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2813 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2814 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2815 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2816 // CHECK-NEXT:    ret i128 [[TMP3]]
2817 //
vfsub_d(v2f64 _1,v2f64 _2)2818 v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); }
2819 // CHECK-LABEL: @vfmul_s(
2820 // CHECK-NEXT:  entry:
2821 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2822 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2823 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2824 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2825 // CHECK-NEXT:    ret i128 [[TMP3]]
2826 //
vfmul_s(v4f32 _1,v4f32 _2)2827 v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); }
2828 // CHECK-LABEL: @vfmul_d(
2829 // CHECK-NEXT:  entry:
2830 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2831 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2832 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2833 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2834 // CHECK-NEXT:    ret i128 [[TMP3]]
2835 //
vfmul_d(v2f64 _1,v2f64 _2)2836 v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); }
2837 // CHECK-LABEL: @vfdiv_s(
2838 // CHECK-NEXT:  entry:
2839 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2840 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2841 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2842 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2843 // CHECK-NEXT:    ret i128 [[TMP3]]
2844 //
vfdiv_s(v4f32 _1,v4f32 _2)2845 v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); }
2846 // CHECK-LABEL: @vfdiv_d(
2847 // CHECK-NEXT:  entry:
2848 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2849 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2850 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2851 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2852 // CHECK-NEXT:    ret i128 [[TMP3]]
2853 //
vfdiv_d(v2f64 _1,v2f64 _2)2854 v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); }
2855 // CHECK-LABEL: @vfcvt_h_s(
2856 // CHECK-NEXT:  entry:
2857 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2858 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2859 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2860 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
2861 // CHECK-NEXT:    ret i128 [[TMP3]]
2862 //
vfcvt_h_s(v4f32 _1,v4f32 _2)2863 v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); }
2864 // CHECK-LABEL: @vfcvt_s_d(
2865 // CHECK-NEXT:  entry:
2866 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2867 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2868 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2869 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2870 // CHECK-NEXT:    ret i128 [[TMP3]]
2871 //
vfcvt_s_d(v2f64 _1,v2f64 _2)2872 v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); }
2873 // CHECK-LABEL: @vfmin_s(
2874 // CHECK-NEXT:  entry:
2875 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2876 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2877 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2878 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2879 // CHECK-NEXT:    ret i128 [[TMP3]]
2880 //
vfmin_s(v4f32 _1,v4f32 _2)2881 v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); }
2882 // CHECK-LABEL: @vfmin_d(
2883 // CHECK-NEXT:  entry:
2884 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2885 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2886 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2887 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2888 // CHECK-NEXT:    ret i128 [[TMP3]]
2889 //
vfmin_d(v2f64 _1,v2f64 _2)2890 v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); }
2891 // CHECK-LABEL: @vfmina_s(
2892 // CHECK-NEXT:  entry:
2893 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2894 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2895 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2896 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2897 // CHECK-NEXT:    ret i128 [[TMP3]]
2898 //
vfmina_s(v4f32 _1,v4f32 _2)2899 v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); }
2900 // CHECK-LABEL: @vfmina_d(
2901 // CHECK-NEXT:  entry:
2902 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2903 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2904 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2905 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2906 // CHECK-NEXT:    ret i128 [[TMP3]]
2907 //
vfmina_d(v2f64 _1,v2f64 _2)2908 v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); }
2909 // CHECK-LABEL: @vfmax_s(
2910 // CHECK-NEXT:  entry:
2911 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2912 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2913 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2914 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2915 // CHECK-NEXT:    ret i128 [[TMP3]]
2916 //
vfmax_s(v4f32 _1,v4f32 _2)2917 v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); }
2918 // CHECK-LABEL: @vfmax_d(
2919 // CHECK-NEXT:  entry:
2920 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2921 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2922 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2923 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2924 // CHECK-NEXT:    ret i128 [[TMP3]]
2925 //
vfmax_d(v2f64 _1,v2f64 _2)2926 v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); }
2927 // CHECK-LABEL: @vfmaxa_s(
2928 // CHECK-NEXT:  entry:
2929 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2930 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
2931 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2932 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
2933 // CHECK-NEXT:    ret i128 [[TMP3]]
2934 //
vfmaxa_s(v4f32 _1,v4f32 _2)2935 v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); }
2936 // CHECK-LABEL: @vfmaxa_d(
2937 // CHECK-NEXT:  entry:
2938 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2939 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
2940 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2941 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128
2942 // CHECK-NEXT:    ret i128 [[TMP3]]
2943 //
vfmaxa_d(v2f64 _1,v2f64 _2)2944 v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); }
2945 // CHECK-LABEL: @vfclass_s(
2946 // CHECK-NEXT:  entry:
2947 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2948 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]])
2949 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2950 // CHECK-NEXT:    ret i128 [[TMP2]]
2951 //
vfclass_s(v4f32 _1)2952 v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); }
2953 // CHECK-LABEL: @vfclass_d(
2954 // CHECK-NEXT:  entry:
2955 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2956 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]])
2957 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
2958 // CHECK-NEXT:    ret i128 [[TMP2]]
2959 //
vfclass_d(v2f64 _1)2960 v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); }
2961 // CHECK-LABEL: @vfsqrt_s(
2962 // CHECK-NEXT:  entry:
2963 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2964 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]])
2965 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
2966 // CHECK-NEXT:    ret i128 [[TMP2]]
2967 //
vfsqrt_s(v4f32 _1)2968 v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); }
2969 // CHECK-LABEL: @vfsqrt_d(
2970 // CHECK-NEXT:  entry:
2971 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2972 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]])
2973 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
2974 // CHECK-NEXT:    ret i128 [[TMP2]]
2975 //
vfsqrt_d(v2f64 _1)2976 v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); }
2977 // CHECK-LABEL: @vfrecip_s(
2978 // CHECK-NEXT:  entry:
2979 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2980 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]])
2981 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
2982 // CHECK-NEXT:    ret i128 [[TMP2]]
2983 //
vfrecip_s(v4f32 _1)2984 v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); }
2985 // CHECK-LABEL: @vfrecip_d(
2986 // CHECK-NEXT:  entry:
2987 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
2988 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]])
2989 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
2990 // CHECK-NEXT:    ret i128 [[TMP2]]
2991 //
vfrecip_d(v2f64 _1)2992 v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); }
2993 // CHECK-LABEL: @vfrint_s(
2994 // CHECK-NEXT:  entry:
2995 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
2996 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]])
2997 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
2998 // CHECK-NEXT:    ret i128 [[TMP2]]
2999 //
vfrint_s(v4f32 _1)3000 v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); }
3001 // CHECK-LABEL: @vfrint_d(
3002 // CHECK-NEXT:  entry:
3003 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3004 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]])
3005 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3006 // CHECK-NEXT:    ret i128 [[TMP2]]
3007 //
vfrint_d(v2f64 _1)3008 v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); }
3009 // CHECK-LABEL: @vfrsqrt_s(
3010 // CHECK-NEXT:  entry:
3011 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3012 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]])
3013 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3014 // CHECK-NEXT:    ret i128 [[TMP2]]
3015 //
vfrsqrt_s(v4f32 _1)3016 v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); }
3017 // CHECK-LABEL: @vfrsqrt_d(
3018 // CHECK-NEXT:  entry:
3019 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3020 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]])
3021 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3022 // CHECK-NEXT:    ret i128 [[TMP2]]
3023 //
vfrsqrt_d(v2f64 _1)3024 v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); }
3025 // CHECK-LABEL: @vflogb_s(
3026 // CHECK-NEXT:  entry:
3027 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3028 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]])
3029 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3030 // CHECK-NEXT:    ret i128 [[TMP2]]
3031 //
vflogb_s(v4f32 _1)3032 v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); }
3033 // CHECK-LABEL: @vflogb_d(
3034 // CHECK-NEXT:  entry:
3035 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3036 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]])
3037 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3038 // CHECK-NEXT:    ret i128 [[TMP2]]
3039 //
vflogb_d(v2f64 _1)3040 v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); }
3041 // CHECK-LABEL: @vfcvth_s_h(
3042 // CHECK-NEXT:  entry:
3043 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3044 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]])
3045 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3046 // CHECK-NEXT:    ret i128 [[TMP2]]
3047 //
vfcvth_s_h(v8i16 _1)3048 v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); }
3049 // CHECK-LABEL: @vfcvth_d_s(
3050 // CHECK-NEXT:  entry:
3051 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3052 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]])
3053 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3054 // CHECK-NEXT:    ret i128 [[TMP2]]
3055 //
vfcvth_d_s(v4f32 _1)3056 v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); }
3057 // CHECK-LABEL: @vfcvtl_s_h(
3058 // CHECK-NEXT:  entry:
3059 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3060 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]])
3061 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3062 // CHECK-NEXT:    ret i128 [[TMP2]]
3063 //
vfcvtl_s_h(v8i16 _1)3064 v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); }
3065 // CHECK-LABEL: @vfcvtl_d_s(
3066 // CHECK-NEXT:  entry:
3067 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3068 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]])
3069 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3070 // CHECK-NEXT:    ret i128 [[TMP2]]
3071 //
vfcvtl_d_s(v4f32 _1)3072 v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); }
3073 // CHECK-LABEL: @vftint_w_s(
3074 // CHECK-NEXT:  entry:
3075 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3076 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]])
3077 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3078 // CHECK-NEXT:    ret i128 [[TMP2]]
3079 //
vftint_w_s(v4f32 _1)3080 v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); }
3081 // CHECK-LABEL: @vftint_l_d(
3082 // CHECK-NEXT:  entry:
3083 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3084 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]])
3085 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3086 // CHECK-NEXT:    ret i128 [[TMP2]]
3087 //
vftint_l_d(v2f64 _1)3088 v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); }
3089 // CHECK-LABEL: @vftint_wu_s(
3090 // CHECK-NEXT:  entry:
3091 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3092 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]])
3093 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3094 // CHECK-NEXT:    ret i128 [[TMP2]]
3095 //
vftint_wu_s(v4f32 _1)3096 v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); }
3097 // CHECK-LABEL: @vftint_lu_d(
3098 // CHECK-NEXT:  entry:
3099 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3100 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]])
3101 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3102 // CHECK-NEXT:    ret i128 [[TMP2]]
3103 //
vftint_lu_d(v2f64 _1)3104 v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); }
3105 // CHECK-LABEL: @vftintrz_w_s(
3106 // CHECK-NEXT:  entry:
3107 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3108 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]])
3109 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3110 // CHECK-NEXT:    ret i128 [[TMP2]]
3111 //
vftintrz_w_s(v4f32 _1)3112 v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); }
3113 // CHECK-LABEL: @vftintrz_l_d(
3114 // CHECK-NEXT:  entry:
3115 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3116 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]])
3117 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3118 // CHECK-NEXT:    ret i128 [[TMP2]]
3119 //
vftintrz_l_d(v2f64 _1)3120 v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); }
3121 // CHECK-LABEL: @vftintrz_wu_s(
3122 // CHECK-NEXT:  entry:
3123 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3124 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]])
3125 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3126 // CHECK-NEXT:    ret i128 [[TMP2]]
3127 //
vftintrz_wu_s(v4f32 _1)3128 v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); }
3129 // CHECK-LABEL: @vftintrz_lu_d(
3130 // CHECK-NEXT:  entry:
3131 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3132 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]])
3133 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3134 // CHECK-NEXT:    ret i128 [[TMP2]]
3135 //
vftintrz_lu_d(v2f64 _1)3136 v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); }
3137 // CHECK-LABEL: @vffint_s_w(
3138 // CHECK-NEXT:  entry:
3139 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3140 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]])
3141 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3142 // CHECK-NEXT:    ret i128 [[TMP2]]
3143 //
vffint_s_w(v4i32 _1)3144 v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); }
3145 // CHECK-LABEL: @vffint_d_l(
3146 // CHECK-NEXT:  entry:
3147 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3148 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]])
3149 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3150 // CHECK-NEXT:    ret i128 [[TMP2]]
3151 //
vffint_d_l(v2i64 _1)3152 v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); }
3153 // CHECK-LABEL: @vffint_s_wu(
3154 // CHECK-NEXT:  entry:
3155 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3156 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]])
3157 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
3158 // CHECK-NEXT:    ret i128 [[TMP2]]
3159 //
vffint_s_wu(v4u32 _1)3160 v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); }
3161 // CHECK-LABEL: @vffint_d_lu(
3162 // CHECK-NEXT:  entry:
3163 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3164 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]])
3165 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3166 // CHECK-NEXT:    ret i128 [[TMP2]]
3167 //
vffint_d_lu(v2u64 _1)3168 v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); }
3169 // CHECK-LABEL: @vandn_v(
3170 // CHECK-NEXT:  entry:
3171 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3172 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3173 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3174 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3175 // CHECK-NEXT:    ret i128 [[TMP3]]
3176 //
vandn_v(v16u8 _1,v16u8 _2)3177 v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); }
3178 // CHECK-LABEL: @vneg_b(
3179 // CHECK-NEXT:  entry:
3180 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3181 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]])
3182 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3183 // CHECK-NEXT:    ret i128 [[TMP2]]
3184 //
vneg_b(v16i8 _1)3185 v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); }
3186 // CHECK-LABEL: @vneg_h(
3187 // CHECK-NEXT:  entry:
3188 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3189 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]])
3190 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3191 // CHECK-NEXT:    ret i128 [[TMP2]]
3192 //
vneg_h(v8i16 _1)3193 v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); }
3194 // CHECK-LABEL: @vneg_w(
3195 // CHECK-NEXT:  entry:
3196 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3197 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]])
3198 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3199 // CHECK-NEXT:    ret i128 [[TMP2]]
3200 //
vneg_w(v4i32 _1)3201 v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); }
3202 // CHECK-LABEL: @vneg_d(
3203 // CHECK-NEXT:  entry:
3204 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3205 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]])
3206 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3207 // CHECK-NEXT:    ret i128 [[TMP2]]
3208 //
vneg_d(v2i64 _1)3209 v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); }
3210 // CHECK-LABEL: @vmuh_b(
3211 // CHECK-NEXT:  entry:
3212 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3213 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3214 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3215 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3216 // CHECK-NEXT:    ret i128 [[TMP3]]
3217 //
vmuh_b(v16i8 _1,v16i8 _2)3218 v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); }
3219 // CHECK-LABEL: @vmuh_h(
3220 // CHECK-NEXT:  entry:
3221 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3222 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3223 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3224 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3225 // CHECK-NEXT:    ret i128 [[TMP3]]
3226 //
vmuh_h(v8i16 _1,v8i16 _2)3227 v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); }
3228 // CHECK-LABEL: @vmuh_w(
3229 // CHECK-NEXT:  entry:
3230 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3231 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3232 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3233 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3234 // CHECK-NEXT:    ret i128 [[TMP3]]
3235 //
vmuh_w(v4i32 _1,v4i32 _2)3236 v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); }
3237 // CHECK-LABEL: @vmuh_d(
3238 // CHECK-NEXT:  entry:
3239 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3240 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3241 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3242 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3243 // CHECK-NEXT:    ret i128 [[TMP3]]
3244 //
vmuh_d(v2i64 _1,v2i64 _2)3245 v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); }
3246 // CHECK-LABEL: @vmuh_bu(
3247 // CHECK-NEXT:  entry:
3248 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3249 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3250 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3251 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3252 // CHECK-NEXT:    ret i128 [[TMP3]]
3253 //
vmuh_bu(v16u8 _1,v16u8 _2)3254 v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); }
3255 // CHECK-LABEL: @vmuh_hu(
3256 // CHECK-NEXT:  entry:
3257 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3258 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3259 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3260 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3261 // CHECK-NEXT:    ret i128 [[TMP3]]
3262 //
vmuh_hu(v8u16 _1,v8u16 _2)3263 v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); }
3264 // CHECK-LABEL: @vmuh_wu(
3265 // CHECK-NEXT:  entry:
3266 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3267 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3268 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3269 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3270 // CHECK-NEXT:    ret i128 [[TMP3]]
3271 //
vmuh_wu(v4u32 _1,v4u32 _2)3272 v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); }
3273 // CHECK-LABEL: @vmuh_du(
3274 // CHECK-NEXT:  entry:
3275 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3276 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3277 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3278 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3279 // CHECK-NEXT:    ret i128 [[TMP3]]
3280 //
vmuh_du(v2u64 _1,v2u64 _2)3281 v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); }
3282 // CHECK-LABEL: @vsllwil_h_b(
3283 // CHECK-NEXT:  entry:
3284 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3285 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1)
3286 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3287 // CHECK-NEXT:    ret i128 [[TMP2]]
3288 //
vsllwil_h_b(v16i8 _1)3289 v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); }
3290 // CHECK-LABEL: @vsllwil_w_h(
3291 // CHECK-NEXT:  entry:
3292 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3293 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1)
3294 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3295 // CHECK-NEXT:    ret i128 [[TMP2]]
3296 //
vsllwil_w_h(v8i16 _1)3297 v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); }
3298 // CHECK-LABEL: @vsllwil_d_w(
3299 // CHECK-NEXT:  entry:
3300 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3301 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1)
3302 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3303 // CHECK-NEXT:    ret i128 [[TMP2]]
3304 //
vsllwil_d_w(v4i32 _1)3305 v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); }
3306 // CHECK-LABEL: @vsllwil_hu_bu(
3307 // CHECK-NEXT:  entry:
3308 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3309 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1)
3310 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3311 // CHECK-NEXT:    ret i128 [[TMP2]]
3312 //
vsllwil_hu_bu(v16u8 _1)3313 v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); }
3314 // CHECK-LABEL: @vsllwil_wu_hu(
3315 // CHECK-NEXT:  entry:
3316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3317 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1)
3318 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3319 // CHECK-NEXT:    ret i128 [[TMP2]]
3320 //
vsllwil_wu_hu(v8u16 _1)3321 v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); }
3322 // CHECK-LABEL: @vsllwil_du_wu(
3323 // CHECK-NEXT:  entry:
3324 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3325 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1)
3326 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3327 // CHECK-NEXT:    ret i128 [[TMP2]]
3328 //
vsllwil_du_wu(v4u32 _1)3329 v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); }
3330 // CHECK-LABEL: @vsran_b_h(
3331 // CHECK-NEXT:  entry:
3332 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3333 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3334 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3335 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3336 // CHECK-NEXT:    ret i128 [[TMP3]]
3337 //
vsran_b_h(v8i16 _1,v8i16 _2)3338 v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); }
3339 // CHECK-LABEL: @vsran_h_w(
3340 // CHECK-NEXT:  entry:
3341 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3342 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3343 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3344 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3345 // CHECK-NEXT:    ret i128 [[TMP3]]
3346 //
vsran_h_w(v4i32 _1,v4i32 _2)3347 v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); }
3348 // CHECK-LABEL: @vsran_w_d(
3349 // CHECK-NEXT:  entry:
3350 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3351 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3352 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3353 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3354 // CHECK-NEXT:    ret i128 [[TMP3]]
3355 //
vsran_w_d(v2i64 _1,v2i64 _2)3356 v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); }
3357 // CHECK-LABEL: @vssran_b_h(
3358 // CHECK-NEXT:  entry:
3359 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3360 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3361 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3362 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3363 // CHECK-NEXT:    ret i128 [[TMP3]]
3364 //
vssran_b_h(v8i16 _1,v8i16 _2)3365 v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); }
3366 // CHECK-LABEL: @vssran_h_w(
3367 // CHECK-NEXT:  entry:
3368 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3369 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3370 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3371 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3372 // CHECK-NEXT:    ret i128 [[TMP3]]
3373 //
vssran_h_w(v4i32 _1,v4i32 _2)3374 v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); }
3375 // CHECK-LABEL: @vssran_w_d(
3376 // CHECK-NEXT:  entry:
3377 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3378 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3379 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3380 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3381 // CHECK-NEXT:    ret i128 [[TMP3]]
3382 //
vssran_w_d(v2i64 _1,v2i64 _2)3383 v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); }
3384 // CHECK-LABEL: @vssran_bu_h(
3385 // CHECK-NEXT:  entry:
3386 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3387 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3388 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3389 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3390 // CHECK-NEXT:    ret i128 [[TMP3]]
3391 //
vssran_bu_h(v8u16 _1,v8u16 _2)3392 v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); }
3393 // CHECK-LABEL: @vssran_hu_w(
3394 // CHECK-NEXT:  entry:
3395 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3396 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3397 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3398 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3399 // CHECK-NEXT:    ret i128 [[TMP3]]
3400 //
vssran_hu_w(v4u32 _1,v4u32 _2)3401 v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); }
3402 // CHECK-LABEL: @vssran_wu_d(
3403 // CHECK-NEXT:  entry:
3404 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3405 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3406 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3407 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3408 // CHECK-NEXT:    ret i128 [[TMP3]]
3409 //
vssran_wu_d(v2u64 _1,v2u64 _2)3410 v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); }
3411 // CHECK-LABEL: @vsrarn_b_h(
3412 // CHECK-NEXT:  entry:
3413 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3414 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3415 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3416 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3417 // CHECK-NEXT:    ret i128 [[TMP3]]
3418 //
vsrarn_b_h(v8i16 _1,v8i16 _2)3419 v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); }
3420 // CHECK-LABEL: @vsrarn_h_w(
3421 // CHECK-NEXT:  entry:
3422 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3423 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3424 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3425 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3426 // CHECK-NEXT:    ret i128 [[TMP3]]
3427 //
vsrarn_h_w(v4i32 _1,v4i32 _2)3428 v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); }
3429 // CHECK-LABEL: @vsrarn_w_d(
3430 // CHECK-NEXT:  entry:
3431 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3432 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3433 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3434 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3435 // CHECK-NEXT:    ret i128 [[TMP3]]
3436 //
vsrarn_w_d(v2i64 _1,v2i64 _2)3437 v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); }
3438 // CHECK-LABEL: @vssrarn_b_h(
3439 // CHECK-NEXT:  entry:
3440 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3441 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3442 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3443 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3444 // CHECK-NEXT:    ret i128 [[TMP3]]
3445 //
vssrarn_b_h(v8i16 _1,v8i16 _2)3446 v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); }
3447 // CHECK-LABEL: @vssrarn_h_w(
3448 // CHECK-NEXT:  entry:
3449 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3450 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3451 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3452 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3453 // CHECK-NEXT:    ret i128 [[TMP3]]
3454 //
vssrarn_h_w(v4i32 _1,v4i32 _2)3455 v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); }
3456 // CHECK-LABEL: @vssrarn_w_d(
3457 // CHECK-NEXT:  entry:
3458 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3459 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3460 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3461 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3462 // CHECK-NEXT:    ret i128 [[TMP3]]
3463 //
vssrarn_w_d(v2i64 _1,v2i64 _2)3464 v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); }
3465 // CHECK-LABEL: @vssrarn_bu_h(
3466 // CHECK-NEXT:  entry:
3467 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3468 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3469 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3470 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3471 // CHECK-NEXT:    ret i128 [[TMP3]]
3472 //
vssrarn_bu_h(v8u16 _1,v8u16 _2)3473 v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); }
3474 // CHECK-LABEL: @vssrarn_hu_w(
3475 // CHECK-NEXT:  entry:
3476 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3477 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3478 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3479 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3480 // CHECK-NEXT:    ret i128 [[TMP3]]
3481 //
vssrarn_hu_w(v4u32 _1,v4u32 _2)3482 v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); }
3483 // CHECK-LABEL: @vssrarn_wu_d(
3484 // CHECK-NEXT:  entry:
3485 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3486 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3487 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3488 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3489 // CHECK-NEXT:    ret i128 [[TMP3]]
3490 //
vssrarn_wu_d(v2u64 _1,v2u64 _2)3491 v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); }
3492 // CHECK-LABEL: @vsrln_b_h(
3493 // CHECK-NEXT:  entry:
3494 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3495 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3496 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3497 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3498 // CHECK-NEXT:    ret i128 [[TMP3]]
3499 //
vsrln_b_h(v8i16 _1,v8i16 _2)3500 v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); }
3501 // CHECK-LABEL: @vsrln_h_w(
3502 // CHECK-NEXT:  entry:
3503 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3504 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3505 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3506 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3507 // CHECK-NEXT:    ret i128 [[TMP3]]
3508 //
vsrln_h_w(v4i32 _1,v4i32 _2)3509 v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); }
3510 // CHECK-LABEL: @vsrln_w_d(
3511 // CHECK-NEXT:  entry:
3512 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3513 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3514 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3515 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3516 // CHECK-NEXT:    ret i128 [[TMP3]]
3517 //
vsrln_w_d(v2i64 _1,v2i64 _2)3518 v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); }
3519 // CHECK-LABEL: @vssrln_bu_h(
3520 // CHECK-NEXT:  entry:
3521 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3522 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3523 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3524 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3525 // CHECK-NEXT:    ret i128 [[TMP3]]
3526 //
vssrln_bu_h(v8u16 _1,v8u16 _2)3527 v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); }
3528 // CHECK-LABEL: @vssrln_hu_w(
3529 // CHECK-NEXT:  entry:
3530 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3531 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3532 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3533 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3534 // CHECK-NEXT:    ret i128 [[TMP3]]
3535 //
vssrln_hu_w(v4u32 _1,v4u32 _2)3536 v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); }
3537 // CHECK-LABEL: @vssrln_wu_d(
3538 // CHECK-NEXT:  entry:
3539 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3540 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3541 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3542 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3543 // CHECK-NEXT:    ret i128 [[TMP3]]
3544 //
vssrln_wu_d(v2u64 _1,v2u64 _2)3545 v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); }
3546 // CHECK-LABEL: @vsrlrn_b_h(
3547 // CHECK-NEXT:  entry:
3548 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3549 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3550 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3551 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3552 // CHECK-NEXT:    ret i128 [[TMP3]]
3553 //
vsrlrn_b_h(v8i16 _1,v8i16 _2)3554 v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); }
3555 // CHECK-LABEL: @vsrlrn_h_w(
3556 // CHECK-NEXT:  entry:
3557 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3558 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3559 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3560 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3561 // CHECK-NEXT:    ret i128 [[TMP3]]
3562 //
vsrlrn_h_w(v4i32 _1,v4i32 _2)3563 v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); }
3564 // CHECK-LABEL: @vsrlrn_w_d(
3565 // CHECK-NEXT:  entry:
3566 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3567 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3568 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3569 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3570 // CHECK-NEXT:    ret i128 [[TMP3]]
3571 //
vsrlrn_w_d(v2i64 _1,v2i64 _2)3572 v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); }
3573 // CHECK-LABEL: @vssrlrn_bu_h(
3574 // CHECK-NEXT:  entry:
3575 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3576 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3577 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3578 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3579 // CHECK-NEXT:    ret i128 [[TMP3]]
3580 //
vssrlrn_bu_h(v8u16 _1,v8u16 _2)3581 v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); }
3582 // CHECK-LABEL: @vssrlrn_hu_w(
3583 // CHECK-NEXT:  entry:
3584 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3585 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3586 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3587 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3588 // CHECK-NEXT:    ret i128 [[TMP3]]
3589 //
vssrlrn_hu_w(v4u32 _1,v4u32 _2)3590 v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); }
3591 // CHECK-LABEL: @vssrlrn_wu_d(
3592 // CHECK-NEXT:  entry:
3593 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3594 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3595 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3596 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3597 // CHECK-NEXT:    ret i128 [[TMP3]]
3598 //
vssrlrn_wu_d(v2u64 _1,v2u64 _2)3599 v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); }
3600 // CHECK-LABEL: @vfrstpi_b(
3601 // CHECK-NEXT:  entry:
3602 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3603 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3604 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
3605 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3606 // CHECK-NEXT:    ret i128 [[TMP3]]
3607 //
vfrstpi_b(v16i8 _1,v16i8 _2)3608 v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); }
3609 // CHECK-LABEL: @vfrstpi_h(
3610 // CHECK-NEXT:  entry:
3611 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3612 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3613 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
3614 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3615 // CHECK-NEXT:    ret i128 [[TMP3]]
3616 //
vfrstpi_h(v8i16 _1,v8i16 _2)3617 v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); }
3618 // CHECK-LABEL: @vfrstp_b(
3619 // CHECK-NEXT:  entry:
3620 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3621 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3622 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
3623 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
3624 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
3625 // CHECK-NEXT:    ret i128 [[TMP4]]
3626 //
vfrstp_b(v16i8 _1,v16i8 _2,v16i8 _3)3627 v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) {
3628   return __lsx_vfrstp_b(_1, _2, _3);
3629 }
3630 // CHECK-LABEL: @vfrstp_h(
3631 // CHECK-NEXT:  entry:
3632 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3633 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3634 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
3635 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
3636 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
3637 // CHECK-NEXT:    ret i128 [[TMP4]]
3638 //
vfrstp_h(v8i16 _1,v8i16 _2,v8i16 _3)3639 v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) {
3640   return __lsx_vfrstp_h(_1, _2, _3);
3641 }
3642 // CHECK-LABEL: @vshuf4i_d(
3643 // CHECK-NEXT:  entry:
3644 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3645 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3646 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
3647 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3648 // CHECK-NEXT:    ret i128 [[TMP3]]
3649 //
vshuf4i_d(v2i64 _1,v2i64 _2)3650 v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); }
3651 // CHECK-LABEL: @vbsrl_v(
3652 // CHECK-NEXT:  entry:
3653 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3654 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1)
3655 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3656 // CHECK-NEXT:    ret i128 [[TMP2]]
3657 //
vbsrl_v(v16i8 _1)3658 v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); }
3659 // CHECK-LABEL: @vbsll_v(
3660 // CHECK-NEXT:  entry:
3661 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3662 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1)
3663 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3664 // CHECK-NEXT:    ret i128 [[TMP2]]
3665 //
vbsll_v(v16i8 _1)3666 v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); }
3667 // CHECK-LABEL: @vextrins_b(
3668 // CHECK-NEXT:  entry:
3669 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3670 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3671 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
3672 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3673 // CHECK-NEXT:    ret i128 [[TMP3]]
3674 //
vextrins_b(v16i8 _1,v16i8 _2)3675 v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); }
3676 // CHECK-LABEL: @vextrins_h(
3677 // CHECK-NEXT:  entry:
3678 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3679 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3680 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
3681 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3682 // CHECK-NEXT:    ret i128 [[TMP3]]
3683 //
vextrins_h(v8i16 _1,v8i16 _2)3684 v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); }
3685 // CHECK-LABEL: @vextrins_w(
3686 // CHECK-NEXT:  entry:
3687 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3688 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3689 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
3690 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3691 // CHECK-NEXT:    ret i128 [[TMP3]]
3692 //
vextrins_w(v4i32 _1,v4i32 _2)3693 v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); }
3694 // CHECK-LABEL: @vextrins_d(
3695 // CHECK-NEXT:  entry:
3696 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3697 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3698 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
3699 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3700 // CHECK-NEXT:    ret i128 [[TMP3]]
3701 //
vextrins_d(v2i64 _1,v2i64 _2)3702 v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); }
3703 // CHECK-LABEL: @vmskltz_b(
3704 // CHECK-NEXT:  entry:
3705 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3706 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]])
3707 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
3708 // CHECK-NEXT:    ret i128 [[TMP2]]
3709 //
vmskltz_b(v16i8 _1)3710 v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); }
3711 // CHECK-LABEL: @vmskltz_h(
3712 // CHECK-NEXT:  entry:
3713 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3714 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]])
3715 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
3716 // CHECK-NEXT:    ret i128 [[TMP2]]
3717 //
vmskltz_h(v8i16 _1)3718 v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); }
3719 // CHECK-LABEL: @vmskltz_w(
3720 // CHECK-NEXT:  entry:
3721 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3722 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]])
3723 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3724 // CHECK-NEXT:    ret i128 [[TMP2]]
3725 //
vmskltz_w(v4i32 _1)3726 v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); }
3727 // CHECK-LABEL: @vmskltz_d(
3728 // CHECK-NEXT:  entry:
3729 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3730 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]])
3731 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3732 // CHECK-NEXT:    ret i128 [[TMP2]]
3733 //
vmskltz_d(v2i64 _1)3734 v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); }
3735 // CHECK-LABEL: @vsigncov_b(
3736 // CHECK-NEXT:  entry:
3737 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
3738 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
3739 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
3740 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
3741 // CHECK-NEXT:    ret i128 [[TMP3]]
3742 //
vsigncov_b(v16i8 _1,v16i8 _2)3743 v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); }
3744 // CHECK-LABEL: @vsigncov_h(
3745 // CHECK-NEXT:  entry:
3746 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
3747 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
3748 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3749 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
3750 // CHECK-NEXT:    ret i128 [[TMP3]]
3751 //
vsigncov_h(v8i16 _1,v8i16 _2)3752 v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); }
3753 // CHECK-LABEL: @vsigncov_w(
3754 // CHECK-NEXT:  entry:
3755 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3756 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
3757 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
3758 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3759 // CHECK-NEXT:    ret i128 [[TMP3]]
3760 //
vsigncov_w(v4i32 _1,v4i32 _2)3761 v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); }
3762 // CHECK-LABEL: @vsigncov_d(
3763 // CHECK-NEXT:  entry:
3764 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3765 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3766 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3767 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
3768 // CHECK-NEXT:    ret i128 [[TMP3]]
3769 //
vsigncov_d(v2i64 _1,v2i64 _2)3770 v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); }
3771 // CHECK-LABEL: @vfmadd_s(
3772 // CHECK-NEXT:  entry:
3773 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3774 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3775 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3776 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3777 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3778 // CHECK-NEXT:    ret i128 [[TMP4]]
3779 //
vfmadd_s(v4f32 _1,v4f32 _2,v4f32 _3)3780 v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3781   return __lsx_vfmadd_s(_1, _2, _3);
3782 }
3783 // CHECK-LABEL: @vfmadd_d(
3784 // CHECK-NEXT:  entry:
3785 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3786 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3787 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3788 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3789 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3790 // CHECK-NEXT:    ret i128 [[TMP4]]
3791 //
vfmadd_d(v2f64 _1,v2f64 _2,v2f64 _3)3792 v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3793   return __lsx_vfmadd_d(_1, _2, _3);
3794 }
3795 // CHECK-LABEL: @vfmsub_s(
3796 // CHECK-NEXT:  entry:
3797 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3798 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3799 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3800 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3801 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3802 // CHECK-NEXT:    ret i128 [[TMP4]]
3803 //
vfmsub_s(v4f32 _1,v4f32 _2,v4f32 _3)3804 v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3805   return __lsx_vfmsub_s(_1, _2, _3);
3806 }
3807 // CHECK-LABEL: @vfmsub_d(
3808 // CHECK-NEXT:  entry:
3809 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3810 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3811 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3812 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3813 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3814 // CHECK-NEXT:    ret i128 [[TMP4]]
3815 //
vfmsub_d(v2f64 _1,v2f64 _2,v2f64 _3)3816 v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3817   return __lsx_vfmsub_d(_1, _2, _3);
3818 }
3819 // CHECK-LABEL: @vfnmadd_s(
3820 // CHECK-NEXT:  entry:
3821 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3822 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3823 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3824 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3825 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3826 // CHECK-NEXT:    ret i128 [[TMP4]]
3827 //
vfnmadd_s(v4f32 _1,v4f32 _2,v4f32 _3)3828 v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3829   return __lsx_vfnmadd_s(_1, _2, _3);
3830 }
3831 // CHECK-LABEL: @vfnmadd_d(
3832 // CHECK-NEXT:  entry:
3833 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3834 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3835 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3836 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3837 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3838 // CHECK-NEXT:    ret i128 [[TMP4]]
3839 //
vfnmadd_d(v2f64 _1,v2f64 _2,v2f64 _3)3840 v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3841   return __lsx_vfnmadd_d(_1, _2, _3);
3842 }
3843 // CHECK-LABEL: @vfnmsub_s(
3844 // CHECK-NEXT:  entry:
3845 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3846 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
3847 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float>
3848 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
3849 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128
3850 // CHECK-NEXT:    ret i128 [[TMP4]]
3851 //
vfnmsub_s(v4f32 _1,v4f32 _2,v4f32 _3)3852 v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) {
3853   return __lsx_vfnmsub_s(_1, _2, _3);
3854 }
3855 // CHECK-LABEL: @vfnmsub_d(
3856 // CHECK-NEXT:  entry:
3857 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3858 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3859 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double>
3860 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
3861 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128
3862 // CHECK-NEXT:    ret i128 [[TMP4]]
3863 //
vfnmsub_d(v2f64 _1,v2f64 _2,v2f64 _3)3864 v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) {
3865   return __lsx_vfnmsub_d(_1, _2, _3);
3866 }
3867 // CHECK-LABEL: @vftintrne_w_s(
3868 // CHECK-NEXT:  entry:
3869 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3870 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]])
3871 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3872 // CHECK-NEXT:    ret i128 [[TMP2]]
3873 //
vftintrne_w_s(v4f32 _1)3874 v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); }
3875 // CHECK-LABEL: @vftintrne_l_d(
3876 // CHECK-NEXT:  entry:
3877 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3878 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]])
3879 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3880 // CHECK-NEXT:    ret i128 [[TMP2]]
3881 //
vftintrne_l_d(v2f64 _1)3882 v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); }
3883 // CHECK-LABEL: @vftintrp_w_s(
3884 // CHECK-NEXT:  entry:
3885 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3886 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]])
3887 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3888 // CHECK-NEXT:    ret i128 [[TMP2]]
3889 //
vftintrp_w_s(v4f32 _1)3890 v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); }
3891 // CHECK-LABEL: @vftintrp_l_d(
3892 // CHECK-NEXT:  entry:
3893 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3894 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]])
3895 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3896 // CHECK-NEXT:    ret i128 [[TMP2]]
3897 //
vftintrp_l_d(v2f64 _1)3898 v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); }
3899 // CHECK-LABEL: @vftintrm_w_s(
3900 // CHECK-NEXT:  entry:
3901 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3902 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]])
3903 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3904 // CHECK-NEXT:    ret i128 [[TMP2]]
3905 //
vftintrm_w_s(v4f32 _1)3906 v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); }
3907 // CHECK-LABEL: @vftintrm_l_d(
3908 // CHECK-NEXT:  entry:
3909 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3910 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]])
3911 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3912 // CHECK-NEXT:    ret i128 [[TMP2]]
3913 //
vftintrm_l_d(v2f64 _1)3914 v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); }
3915 // CHECK-LABEL: @vftint_w_d(
3916 // CHECK-NEXT:  entry:
3917 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3918 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3919 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3920 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3921 // CHECK-NEXT:    ret i128 [[TMP3]]
3922 //
vftint_w_d(v2f64 _1,v2f64 _2)3923 v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); }
3924 // CHECK-LABEL: @vffint_s_l(
3925 // CHECK-NEXT:  entry:
3926 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
3927 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
3928 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
3929 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128
3930 // CHECK-NEXT:    ret i128 [[TMP3]]
3931 //
vffint_s_l(v2i64 _1,v2i64 _2)3932 v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); }
3933 // CHECK-LABEL: @vftintrz_w_d(
3934 // CHECK-NEXT:  entry:
3935 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3936 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3937 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3938 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3939 // CHECK-NEXT:    ret i128 [[TMP3]]
3940 //
vftintrz_w_d(v2f64 _1,v2f64 _2)3941 v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); }
3942 // CHECK-LABEL: @vftintrp_w_d(
3943 // CHECK-NEXT:  entry:
3944 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3945 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3946 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3947 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3948 // CHECK-NEXT:    ret i128 [[TMP3]]
3949 //
vftintrp_w_d(v2f64 _1,v2f64 _2)3950 v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); }
3951 // CHECK-LABEL: @vftintrm_w_d(
3952 // CHECK-NEXT:  entry:
3953 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3954 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3955 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3956 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3957 // CHECK-NEXT:    ret i128 [[TMP3]]
3958 //
vftintrm_w_d(v2f64 _1,v2f64 _2)3959 v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); }
3960 // CHECK-LABEL: @vftintrne_w_d(
3961 // CHECK-NEXT:  entry:
3962 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
3963 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
3964 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
3965 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
3966 // CHECK-NEXT:    ret i128 [[TMP3]]
3967 //
vftintrne_w_d(v2f64 _1,v2f64 _2)3968 v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); }
3969 // CHECK-LABEL: @vftintl_l_s(
3970 // CHECK-NEXT:  entry:
3971 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3972 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]])
3973 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3974 // CHECK-NEXT:    ret i128 [[TMP2]]
3975 //
vftintl_l_s(v4f32 _1)3976 v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); }
3977 // CHECK-LABEL: @vftinth_l_s(
3978 // CHECK-NEXT:  entry:
3979 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
3980 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]])
3981 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
3982 // CHECK-NEXT:    ret i128 [[TMP2]]
3983 //
vftinth_l_s(v4f32 _1)3984 v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); }
3985 // CHECK-LABEL: @vffinth_d_w(
3986 // CHECK-NEXT:  entry:
3987 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3988 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]])
3989 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3990 // CHECK-NEXT:    ret i128 [[TMP2]]
3991 //
vffinth_d_w(v4i32 _1)3992 v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); }
3993 // CHECK-LABEL: @vffintl_d_w(
3994 // CHECK-NEXT:  entry:
3995 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
3996 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]])
3997 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
3998 // CHECK-NEXT:    ret i128 [[TMP2]]
3999 //
vffintl_d_w(v4i32 _1)4000 v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); }
4001 // CHECK-LABEL: @vftintrzl_l_s(
4002 // CHECK-NEXT:  entry:
4003 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4004 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]])
4005 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4006 // CHECK-NEXT:    ret i128 [[TMP2]]
4007 //
vftintrzl_l_s(v4f32 _1)4008 v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); }
4009 // CHECK-LABEL: @vftintrzh_l_s(
4010 // CHECK-NEXT:  entry:
4011 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4012 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]])
4013 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4014 // CHECK-NEXT:    ret i128 [[TMP2]]
4015 //
vftintrzh_l_s(v4f32 _1)4016 v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); }
4017 // CHECK-LABEL: @vftintrpl_l_s(
4018 // CHECK-NEXT:  entry:
4019 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4020 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]])
4021 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4022 // CHECK-NEXT:    ret i128 [[TMP2]]
4023 //
vftintrpl_l_s(v4f32 _1)4024 v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); }
4025 // CHECK-LABEL: @vftintrph_l_s(
4026 // CHECK-NEXT:  entry:
4027 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4028 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]])
4029 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4030 // CHECK-NEXT:    ret i128 [[TMP2]]
4031 //
vftintrph_l_s(v4f32 _1)4032 v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); }
4033 // CHECK-LABEL: @vftintrml_l_s(
4034 // CHECK-NEXT:  entry:
4035 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4036 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]])
4037 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4038 // CHECK-NEXT:    ret i128 [[TMP2]]
4039 //
vftintrml_l_s(v4f32 _1)4040 v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); }
4041 // CHECK-LABEL: @vftintrmh_l_s(
4042 // CHECK-NEXT:  entry:
4043 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4044 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]])
4045 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4046 // CHECK-NEXT:    ret i128 [[TMP2]]
4047 //
vftintrmh_l_s(v4f32 _1)4048 v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); }
4049 // CHECK-LABEL: @vftintrnel_l_s(
4050 // CHECK-NEXT:  entry:
4051 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4052 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]])
4053 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4054 // CHECK-NEXT:    ret i128 [[TMP2]]
4055 //
vftintrnel_l_s(v4f32 _1)4056 v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); }
4057 // CHECK-LABEL: @vftintrneh_l_s(
4058 // CHECK-NEXT:  entry:
4059 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4060 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]])
4061 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4062 // CHECK-NEXT:    ret i128 [[TMP2]]
4063 //
vftintrneh_l_s(v4f32 _1)4064 v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); }
4065 // CHECK-LABEL: @vfrintrne_s(
4066 // CHECK-NEXT:  entry:
4067 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4068 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]])
4069 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4070 // CHECK-NEXT:    ret i128 [[TMP2]]
4071 //
vfrintrne_s(v4f32 _1)4072 v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); }
4073 // CHECK-LABEL: @vfrintrne_d(
4074 // CHECK-NEXT:  entry:
4075 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4076 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]])
4077 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4078 // CHECK-NEXT:    ret i128 [[TMP2]]
4079 //
vfrintrne_d(v2f64 _1)4080 v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); }
4081 // CHECK-LABEL: @vfrintrz_s(
4082 // CHECK-NEXT:  entry:
4083 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4084 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]])
4085 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4086 // CHECK-NEXT:    ret i128 [[TMP2]]
4087 //
vfrintrz_s(v4f32 _1)4088 v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); }
4089 // CHECK-LABEL: @vfrintrz_d(
4090 // CHECK-NEXT:  entry:
4091 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4092 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]])
4093 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4094 // CHECK-NEXT:    ret i128 [[TMP2]]
4095 //
vfrintrz_d(v2f64 _1)4096 v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); }
4097 // CHECK-LABEL: @vfrintrp_s(
4098 // CHECK-NEXT:  entry:
4099 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4100 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]])
4101 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4102 // CHECK-NEXT:    ret i128 [[TMP2]]
4103 //
vfrintrp_s(v4f32 _1)4104 v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); }
4105 // CHECK-LABEL: @vfrintrp_d(
4106 // CHECK-NEXT:  entry:
4107 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4108 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]])
4109 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4110 // CHECK-NEXT:    ret i128 [[TMP2]]
4111 //
vfrintrp_d(v2f64 _1)4112 v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); }
4113 // CHECK-LABEL: @vfrintrm_s(
4114 // CHECK-NEXT:  entry:
4115 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
4116 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]])
4117 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
4118 // CHECK-NEXT:    ret i128 [[TMP2]]
4119 //
vfrintrm_s(v4f32 _1)4120 v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); }
4121 // CHECK-LABEL: @vfrintrm_d(
4122 // CHECK-NEXT:  entry:
4123 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
4124 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]])
4125 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
4126 // CHECK-NEXT:    ret i128 [[TMP2]]
4127 //
vfrintrm_d(v2f64 _1)4128 v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); }
4129 // CHECK-LABEL: @vstelm_b(
4130 // CHECK-NEXT:  entry:
4131 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4132 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1)
4133 // CHECK-NEXT:    ret void
4134 //
vstelm_b(v16i8 _1,void * _2)4135 void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); }
4136 // CHECK-LABEL: @vstelm_h(
4137 // CHECK-NEXT:  entry:
4138 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4139 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1)
4140 // CHECK-NEXT:    ret void
4141 //
vstelm_h(v8i16 _1,void * _2)4142 void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); }
4143 // CHECK-LABEL: @vstelm_w(
4144 // CHECK-NEXT:  entry:
4145 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4146 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1)
4147 // CHECK-NEXT:    ret void
4148 //
vstelm_w(v4i32 _1,void * _2)4149 void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); }
4150 // CHECK-LABEL: @vstelm_d(
4151 // CHECK-NEXT:  entry:
4152 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4153 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1)
4154 // CHECK-NEXT:    ret void
4155 //
vstelm_d(v2i64 _1,void * _2)4156 void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); }
4157 // CHECK-LABEL: @vaddwev_d_w(
4158 // CHECK-NEXT:  entry:
4159 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4160 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4161 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4162 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4163 // CHECK-NEXT:    ret i128 [[TMP3]]
4164 //
vaddwev_d_w(v4i32 _1,v4i32 _2)4165 v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); }
4166 // CHECK-LABEL: @vaddwev_w_h(
4167 // CHECK-NEXT:  entry:
4168 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4169 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4170 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4171 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4172 // CHECK-NEXT:    ret i128 [[TMP3]]
4173 //
vaddwev_w_h(v8i16 _1,v8i16 _2)4174 v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); }
4175 // CHECK-LABEL: @vaddwev_h_b(
4176 // CHECK-NEXT:  entry:
4177 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4178 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4179 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4180 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4181 // CHECK-NEXT:    ret i128 [[TMP3]]
4182 //
vaddwev_h_b(v16i8 _1,v16i8 _2)4183 v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); }
4184 // CHECK-LABEL: @vaddwod_d_w(
4185 // CHECK-NEXT:  entry:
4186 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4187 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4188 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4189 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4190 // CHECK-NEXT:    ret i128 [[TMP3]]
4191 //
vaddwod_d_w(v4i32 _1,v4i32 _2)4192 v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); }
4193 // CHECK-LABEL: @vaddwod_w_h(
4194 // CHECK-NEXT:  entry:
4195 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4196 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4197 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4198 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4199 // CHECK-NEXT:    ret i128 [[TMP3]]
4200 //
vaddwod_w_h(v8i16 _1,v8i16 _2)4201 v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); }
4202 // CHECK-LABEL: @vaddwod_h_b(
4203 // CHECK-NEXT:  entry:
4204 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4205 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4206 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4207 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4208 // CHECK-NEXT:    ret i128 [[TMP3]]
4209 //
vaddwod_h_b(v16i8 _1,v16i8 _2)4210 v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); }
4211 // CHECK-LABEL: @vaddwev_d_wu(
4212 // CHECK-NEXT:  entry:
4213 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4214 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4215 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4216 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4217 // CHECK-NEXT:    ret i128 [[TMP3]]
4218 //
vaddwev_d_wu(v4u32 _1,v4u32 _2)4219 v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); }
4220 // CHECK-LABEL: @vaddwev_w_hu(
4221 // CHECK-NEXT:  entry:
4222 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4223 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4224 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4225 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4226 // CHECK-NEXT:    ret i128 [[TMP3]]
4227 //
vaddwev_w_hu(v8u16 _1,v8u16 _2)4228 v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); }
4229 // CHECK-LABEL: @vaddwev_h_bu(
4230 // CHECK-NEXT:  entry:
4231 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4232 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4233 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4234 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4235 // CHECK-NEXT:    ret i128 [[TMP3]]
4236 //
vaddwev_h_bu(v16u8 _1,v16u8 _2)4237 v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); }
4238 // CHECK-LABEL: @vaddwod_d_wu(
4239 // CHECK-NEXT:  entry:
4240 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4241 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4242 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4243 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4244 // CHECK-NEXT:    ret i128 [[TMP3]]
4245 //
vaddwod_d_wu(v4u32 _1,v4u32 _2)4246 v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); }
4247 // CHECK-LABEL: @vaddwod_w_hu(
4248 // CHECK-NEXT:  entry:
4249 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4250 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4251 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4252 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4253 // CHECK-NEXT:    ret i128 [[TMP3]]
4254 //
vaddwod_w_hu(v8u16 _1,v8u16 _2)4255 v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); }
4256 // CHECK-LABEL: @vaddwod_h_bu(
4257 // CHECK-NEXT:  entry:
4258 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4259 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4260 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4261 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4262 // CHECK-NEXT:    ret i128 [[TMP3]]
4263 //
vaddwod_h_bu(v16u8 _1,v16u8 _2)4264 v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); }
4265 // CHECK-LABEL: @vaddwev_d_wu_w(
4266 // CHECK-NEXT:  entry:
4267 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4268 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4269 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4270 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4271 // CHECK-NEXT:    ret i128 [[TMP3]]
4272 //
vaddwev_d_wu_w(v4u32 _1,v4i32 _2)4273 v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) {
4274   return __lsx_vaddwev_d_wu_w(_1, _2);
4275 }
4276 // CHECK-LABEL: @vaddwev_w_hu_h(
4277 // CHECK-NEXT:  entry:
4278 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4279 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4280 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4281 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4282 // CHECK-NEXT:    ret i128 [[TMP3]]
4283 //
vaddwev_w_hu_h(v8u16 _1,v8i16 _2)4284 v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) {
4285   return __lsx_vaddwev_w_hu_h(_1, _2);
4286 }
4287 // CHECK-LABEL: @vaddwev_h_bu_b(
4288 // CHECK-NEXT:  entry:
4289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4291 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4292 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4293 // CHECK-NEXT:    ret i128 [[TMP3]]
4294 //
vaddwev_h_bu_b(v16u8 _1,v16i8 _2)4295 v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) {
4296   return __lsx_vaddwev_h_bu_b(_1, _2);
4297 }
4298 // CHECK-LABEL: @vaddwod_d_wu_w(
4299 // CHECK-NEXT:  entry:
4300 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4301 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4302 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4303 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4304 // CHECK-NEXT:    ret i128 [[TMP3]]
4305 //
vaddwod_d_wu_w(v4u32 _1,v4i32 _2)4306 v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) {
4307   return __lsx_vaddwod_d_wu_w(_1, _2);
4308 }
4309 // CHECK-LABEL: @vaddwod_w_hu_h(
4310 // CHECK-NEXT:  entry:
4311 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4312 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4313 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4314 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4315 // CHECK-NEXT:    ret i128 [[TMP3]]
4316 //
vaddwod_w_hu_h(v8u16 _1,v8i16 _2)4317 v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) {
4318   return __lsx_vaddwod_w_hu_h(_1, _2);
4319 }
4320 // CHECK-LABEL: @vaddwod_h_bu_b(
4321 // CHECK-NEXT:  entry:
4322 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4323 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4324 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4325 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4326 // CHECK-NEXT:    ret i128 [[TMP3]]
4327 //
vaddwod_h_bu_b(v16u8 _1,v16i8 _2)4328 v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) {
4329   return __lsx_vaddwod_h_bu_b(_1, _2);
4330 }
4331 // CHECK-LABEL: @vsubwev_d_w(
4332 // CHECK-NEXT:  entry:
4333 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4334 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4335 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4336 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4337 // CHECK-NEXT:    ret i128 [[TMP3]]
4338 //
vsubwev_d_w(v4i32 _1,v4i32 _2)4339 v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); }
4340 // CHECK-LABEL: @vsubwev_w_h(
4341 // CHECK-NEXT:  entry:
4342 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4343 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4344 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4345 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4346 // CHECK-NEXT:    ret i128 [[TMP3]]
4347 //
vsubwev_w_h(v8i16 _1,v8i16 _2)4348 v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); }
4349 // CHECK-LABEL: @vsubwev_h_b(
4350 // CHECK-NEXT:  entry:
4351 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4352 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4353 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4354 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4355 // CHECK-NEXT:    ret i128 [[TMP3]]
4356 //
vsubwev_h_b(v16i8 _1,v16i8 _2)4357 v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); }
4358 // CHECK-LABEL: @vsubwod_d_w(
4359 // CHECK-NEXT:  entry:
4360 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4361 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4362 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4363 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4364 // CHECK-NEXT:    ret i128 [[TMP3]]
4365 //
vsubwod_d_w(v4i32 _1,v4i32 _2)4366 v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); }
4367 // CHECK-LABEL: @vsubwod_w_h(
4368 // CHECK-NEXT:  entry:
4369 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4370 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4371 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4372 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4373 // CHECK-NEXT:    ret i128 [[TMP3]]
4374 //
vsubwod_w_h(v8i16 _1,v8i16 _2)4375 v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); }
4376 // CHECK-LABEL: @vsubwod_h_b(
4377 // CHECK-NEXT:  entry:
4378 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4379 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4380 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4381 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4382 // CHECK-NEXT:    ret i128 [[TMP3]]
4383 //
vsubwod_h_b(v16i8 _1,v16i8 _2)4384 v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); }
4385 // CHECK-LABEL: @vsubwev_d_wu(
4386 // CHECK-NEXT:  entry:
4387 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4388 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4389 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4390 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4391 // CHECK-NEXT:    ret i128 [[TMP3]]
4392 //
vsubwev_d_wu(v4u32 _1,v4u32 _2)4393 v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); }
4394 // CHECK-LABEL: @vsubwev_w_hu(
4395 // CHECK-NEXT:  entry:
4396 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4397 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4398 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4399 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4400 // CHECK-NEXT:    ret i128 [[TMP3]]
4401 //
vsubwev_w_hu(v8u16 _1,v8u16 _2)4402 v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); }
4403 // CHECK-LABEL: @vsubwev_h_bu(
4404 // CHECK-NEXT:  entry:
4405 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4406 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4407 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4408 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4409 // CHECK-NEXT:    ret i128 [[TMP3]]
4410 //
vsubwev_h_bu(v16u8 _1,v16u8 _2)4411 v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); }
4412 // CHECK-LABEL: @vsubwod_d_wu(
4413 // CHECK-NEXT:  entry:
4414 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4415 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4416 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4417 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4418 // CHECK-NEXT:    ret i128 [[TMP3]]
4419 //
vsubwod_d_wu(v4u32 _1,v4u32 _2)4420 v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); }
4421 // CHECK-LABEL: @vsubwod_w_hu(
4422 // CHECK-NEXT:  entry:
4423 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4424 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4425 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4426 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4427 // CHECK-NEXT:    ret i128 [[TMP3]]
4428 //
vsubwod_w_hu(v8u16 _1,v8u16 _2)4429 v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); }
4430 // CHECK-LABEL: @vsubwod_h_bu(
4431 // CHECK-NEXT:  entry:
4432 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4433 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4434 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4435 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4436 // CHECK-NEXT:    ret i128 [[TMP3]]
4437 //
vsubwod_h_bu(v16u8 _1,v16u8 _2)4438 v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); }
4439 // CHECK-LABEL: @vaddwev_q_d(
4440 // CHECK-NEXT:  entry:
4441 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4442 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4443 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4444 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4445 // CHECK-NEXT:    ret i128 [[TMP3]]
4446 //
vaddwev_q_d(v2i64 _1,v2i64 _2)4447 v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); }
4448 // CHECK-LABEL: @vaddwod_q_d(
4449 // CHECK-NEXT:  entry:
4450 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4451 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4452 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4453 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4454 // CHECK-NEXT:    ret i128 [[TMP3]]
4455 //
vaddwod_q_d(v2i64 _1,v2i64 _2)4456 v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); }
4457 // CHECK-LABEL: @vaddwev_q_du(
4458 // CHECK-NEXT:  entry:
4459 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4460 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4461 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4462 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4463 // CHECK-NEXT:    ret i128 [[TMP3]]
4464 //
vaddwev_q_du(v2u64 _1,v2u64 _2)4465 v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); }
4466 // CHECK-LABEL: @vaddwod_q_du(
4467 // CHECK-NEXT:  entry:
4468 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4469 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4470 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4471 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4472 // CHECK-NEXT:    ret i128 [[TMP3]]
4473 //
vaddwod_q_du(v2u64 _1,v2u64 _2)4474 v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); }
4475 // CHECK-LABEL: @vsubwev_q_d(
4476 // CHECK-NEXT:  entry:
4477 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4478 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4479 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4480 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4481 // CHECK-NEXT:    ret i128 [[TMP3]]
4482 //
vsubwev_q_d(v2i64 _1,v2i64 _2)4483 v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); }
4484 // CHECK-LABEL: @vsubwod_q_d(
4485 // CHECK-NEXT:  entry:
4486 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4487 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4488 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4489 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4490 // CHECK-NEXT:    ret i128 [[TMP3]]
4491 //
vsubwod_q_d(v2i64 _1,v2i64 _2)4492 v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); }
4493 // CHECK-LABEL: @vsubwev_q_du(
4494 // CHECK-NEXT:  entry:
4495 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4496 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4497 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4498 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4499 // CHECK-NEXT:    ret i128 [[TMP3]]
4500 //
vsubwev_q_du(v2u64 _1,v2u64 _2)4501 v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); }
4502 // CHECK-LABEL: @vsubwod_q_du(
4503 // CHECK-NEXT:  entry:
4504 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4505 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4506 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4507 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4508 // CHECK-NEXT:    ret i128 [[TMP3]]
4509 //
vsubwod_q_du(v2u64 _1,v2u64 _2)4510 v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); }
4511 // CHECK-LABEL: @vaddwev_q_du_d(
4512 // CHECK-NEXT:  entry:
4513 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4514 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4515 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4516 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4517 // CHECK-NEXT:    ret i128 [[TMP3]]
4518 //
vaddwev_q_du_d(v2u64 _1,v2i64 _2)4519 v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) {
4520   return __lsx_vaddwev_q_du_d(_1, _2);
4521 }
4522 // CHECK-LABEL: @vaddwod_q_du_d(
4523 // CHECK-NEXT:  entry:
4524 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4525 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4526 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4527 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4528 // CHECK-NEXT:    ret i128 [[TMP3]]
4529 //
vaddwod_q_du_d(v2u64 _1,v2i64 _2)4530 v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) {
4531   return __lsx_vaddwod_q_du_d(_1, _2);
4532 }
4533 // CHECK-LABEL: @vmulwev_d_w(
4534 // CHECK-NEXT:  entry:
4535 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4536 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4537 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4538 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4539 // CHECK-NEXT:    ret i128 [[TMP3]]
4540 //
vmulwev_d_w(v4i32 _1,v4i32 _2)4541 v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); }
4542 // CHECK-LABEL: @vmulwev_w_h(
4543 // CHECK-NEXT:  entry:
4544 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4545 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4546 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4547 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4548 // CHECK-NEXT:    ret i128 [[TMP3]]
4549 //
vmulwev_w_h(v8i16 _1,v8i16 _2)4550 v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); }
4551 // CHECK-LABEL: @vmulwev_h_b(
4552 // CHECK-NEXT:  entry:
4553 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4554 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4555 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4556 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4557 // CHECK-NEXT:    ret i128 [[TMP3]]
4558 //
vmulwev_h_b(v16i8 _1,v16i8 _2)4559 v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); }
4560 // CHECK-LABEL: @vmulwod_d_w(
4561 // CHECK-NEXT:  entry:
4562 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4563 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4564 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4565 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4566 // CHECK-NEXT:    ret i128 [[TMP3]]
4567 //
vmulwod_d_w(v4i32 _1,v4i32 _2)4568 v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); }
4569 // CHECK-LABEL: @vmulwod_w_h(
4570 // CHECK-NEXT:  entry:
4571 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4572 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4573 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4574 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4575 // CHECK-NEXT:    ret i128 [[TMP3]]
4576 //
vmulwod_w_h(v8i16 _1,v8i16 _2)4577 v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); }
4578 // CHECK-LABEL: @vmulwod_h_b(
4579 // CHECK-NEXT:  entry:
4580 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4581 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4582 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4583 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4584 // CHECK-NEXT:    ret i128 [[TMP3]]
4585 //
vmulwod_h_b(v16i8 _1,v16i8 _2)4586 v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); }
4587 // CHECK-LABEL: @vmulwev_d_wu(
4588 // CHECK-NEXT:  entry:
4589 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4590 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4591 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4592 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4593 // CHECK-NEXT:    ret i128 [[TMP3]]
4594 //
vmulwev_d_wu(v4u32 _1,v4u32 _2)4595 v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); }
4596 // CHECK-LABEL: @vmulwev_w_hu(
4597 // CHECK-NEXT:  entry:
4598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4599 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4600 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4601 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4602 // CHECK-NEXT:    ret i128 [[TMP3]]
4603 //
vmulwev_w_hu(v8u16 _1,v8u16 _2)4604 v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); }
4605 // CHECK-LABEL: @vmulwev_h_bu(
4606 // CHECK-NEXT:  entry:
4607 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4608 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4609 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4610 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4611 // CHECK-NEXT:    ret i128 [[TMP3]]
4612 //
vmulwev_h_bu(v16u8 _1,v16u8 _2)4613 v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); }
4614 // CHECK-LABEL: @vmulwod_d_wu(
4615 // CHECK-NEXT:  entry:
4616 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4617 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4618 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4619 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4620 // CHECK-NEXT:    ret i128 [[TMP3]]
4621 //
vmulwod_d_wu(v4u32 _1,v4u32 _2)4622 v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); }
4623 // CHECK-LABEL: @vmulwod_w_hu(
4624 // CHECK-NEXT:  entry:
4625 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4626 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4627 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4628 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4629 // CHECK-NEXT:    ret i128 [[TMP3]]
4630 //
vmulwod_w_hu(v8u16 _1,v8u16 _2)4631 v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); }
4632 // CHECK-LABEL: @vmulwod_h_bu(
4633 // CHECK-NEXT:  entry:
4634 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4635 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4636 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4637 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4638 // CHECK-NEXT:    ret i128 [[TMP3]]
4639 //
vmulwod_h_bu(v16u8 _1,v16u8 _2)4640 v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); }
4641 // CHECK-LABEL: @vmulwev_d_wu_w(
4642 // CHECK-NEXT:  entry:
4643 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4644 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4645 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4646 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4647 // CHECK-NEXT:    ret i128 [[TMP3]]
4648 //
vmulwev_d_wu_w(v4u32 _1,v4i32 _2)4649 v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) {
4650   return __lsx_vmulwev_d_wu_w(_1, _2);
4651 }
4652 // CHECK-LABEL: @vmulwev_w_hu_h(
4653 // CHECK-NEXT:  entry:
4654 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4655 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4656 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4657 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4658 // CHECK-NEXT:    ret i128 [[TMP3]]
4659 //
vmulwev_w_hu_h(v8u16 _1,v8i16 _2)4660 v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) {
4661   return __lsx_vmulwev_w_hu_h(_1, _2);
4662 }
4663 // CHECK-LABEL: @vmulwev_h_bu_b(
4664 // CHECK-NEXT:  entry:
4665 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4666 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4667 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4668 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4669 // CHECK-NEXT:    ret i128 [[TMP3]]
4670 //
vmulwev_h_bu_b(v16u8 _1,v16i8 _2)4671 v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) {
4672   return __lsx_vmulwev_h_bu_b(_1, _2);
4673 }
4674 // CHECK-LABEL: @vmulwod_d_wu_w(
4675 // CHECK-NEXT:  entry:
4676 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4677 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4678 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
4679 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4680 // CHECK-NEXT:    ret i128 [[TMP3]]
4681 //
vmulwod_d_wu_w(v4u32 _1,v4i32 _2)4682 v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) {
4683   return __lsx_vmulwod_d_wu_w(_1, _2);
4684 }
4685 // CHECK-LABEL: @vmulwod_w_hu_h(
4686 // CHECK-NEXT:  entry:
4687 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4688 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4689 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
4690 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4691 // CHECK-NEXT:    ret i128 [[TMP3]]
4692 //
vmulwod_w_hu_h(v8u16 _1,v8i16 _2)4693 v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) {
4694   return __lsx_vmulwod_w_hu_h(_1, _2);
4695 }
4696 // CHECK-LABEL: @vmulwod_h_bu_b(
4697 // CHECK-NEXT:  entry:
4698 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
4699 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4700 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
4701 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
4702 // CHECK-NEXT:    ret i128 [[TMP3]]
4703 //
vmulwod_h_bu_b(v16u8 _1,v16i8 _2)4704 v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) {
4705   return __lsx_vmulwod_h_bu_b(_1, _2);
4706 }
4707 // CHECK-LABEL: @vmulwev_q_d(
4708 // CHECK-NEXT:  entry:
4709 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4710 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4711 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4712 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4713 // CHECK-NEXT:    ret i128 [[TMP3]]
4714 //
vmulwev_q_d(v2i64 _1,v2i64 _2)4715 v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); }
4716 // CHECK-LABEL: @vmulwod_q_d(
4717 // CHECK-NEXT:  entry:
4718 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4719 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4720 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4721 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4722 // CHECK-NEXT:    ret i128 [[TMP3]]
4723 //
vmulwod_q_d(v2i64 _1,v2i64 _2)4724 v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); }
4725 // CHECK-LABEL: @vmulwev_q_du(
4726 // CHECK-NEXT:  entry:
4727 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4728 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4729 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4730 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4731 // CHECK-NEXT:    ret i128 [[TMP3]]
4732 //
vmulwev_q_du(v2u64 _1,v2u64 _2)4733 v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); }
4734 // CHECK-LABEL: @vmulwod_q_du(
4735 // CHECK-NEXT:  entry:
4736 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4737 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4738 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4739 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4740 // CHECK-NEXT:    ret i128 [[TMP3]]
4741 //
vmulwod_q_du(v2u64 _1,v2u64 _2)4742 v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); }
4743 // CHECK-LABEL: @vmulwev_q_du_d(
4744 // CHECK-NEXT:  entry:
4745 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4746 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4747 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4748 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4749 // CHECK-NEXT:    ret i128 [[TMP3]]
4750 //
vmulwev_q_du_d(v2u64 _1,v2i64 _2)4751 v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) {
4752   return __lsx_vmulwev_q_du_d(_1, _2);
4753 }
4754 // CHECK-LABEL: @vmulwod_q_du_d(
4755 // CHECK-NEXT:  entry:
4756 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4757 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4758 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4759 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4760 // CHECK-NEXT:    ret i128 [[TMP3]]
4761 //
vmulwod_q_du_d(v2u64 _1,v2i64 _2)4762 v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) {
4763   return __lsx_vmulwod_q_du_d(_1, _2);
4764 }
4765 // CHECK-LABEL: @vhaddw_q_d(
4766 // CHECK-NEXT:  entry:
4767 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4768 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4769 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4770 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4771 // CHECK-NEXT:    ret i128 [[TMP3]]
4772 //
vhaddw_q_d(v2i64 _1,v2i64 _2)4773 v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); }
4774 // CHECK-LABEL: @vhaddw_qu_du(
4775 // CHECK-NEXT:  entry:
4776 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4777 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4778 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4779 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4780 // CHECK-NEXT:    ret i128 [[TMP3]]
4781 //
vhaddw_qu_du(v2u64 _1,v2u64 _2)4782 v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); }
4783 // CHECK-LABEL: @vhsubw_q_d(
4784 // CHECK-NEXT:  entry:
4785 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4786 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4787 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4788 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4789 // CHECK-NEXT:    ret i128 [[TMP3]]
4790 //
vhsubw_q_d(v2i64 _1,v2i64 _2)4791 v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); }
4792 // CHECK-LABEL: @vhsubw_qu_du(
4793 // CHECK-NEXT:  entry:
4794 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4795 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
4796 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
4797 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4798 // CHECK-NEXT:    ret i128 [[TMP3]]
4799 //
vhsubw_qu_du(v2u64 _1,v2u64 _2)4800 v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); }
4801 // CHECK-LABEL: @vmaddwev_d_w(
4802 // CHECK-NEXT:  entry:
4803 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4804 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4805 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4806 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4807 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4808 // CHECK-NEXT:    ret i128 [[TMP4]]
4809 //
vmaddwev_d_w(v2i64 _1,v4i32 _2,v4i32 _3)4810 v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
4811   return __lsx_vmaddwev_d_w(_1, _2, _3);
4812 }
4813 // CHECK-LABEL: @vmaddwev_w_h(
4814 // CHECK-NEXT:  entry:
4815 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4816 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4817 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4818 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4819 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4820 // CHECK-NEXT:    ret i128 [[TMP4]]
4821 //
vmaddwev_w_h(v4i32 _1,v8i16 _2,v8i16 _3)4822 v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
4823   return __lsx_vmaddwev_w_h(_1, _2, _3);
4824 }
4825 // CHECK-LABEL: @vmaddwev_h_b(
4826 // CHECK-NEXT:  entry:
4827 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4828 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4829 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4830 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4831 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4832 // CHECK-NEXT:    ret i128 [[TMP4]]
4833 //
vmaddwev_h_b(v8i16 _1,v16i8 _2,v16i8 _3)4834 v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
4835   return __lsx_vmaddwev_h_b(_1, _2, _3);
4836 }
4837 // CHECK-LABEL: @vmaddwev_d_wu(
4838 // CHECK-NEXT:  entry:
4839 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4840 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4841 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4842 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4843 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4844 // CHECK-NEXT:    ret i128 [[TMP4]]
4845 //
vmaddwev_d_wu(v2u64 _1,v4u32 _2,v4u32 _3)4846 v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
4847   return __lsx_vmaddwev_d_wu(_1, _2, _3);
4848 }
4849 // CHECK-LABEL: @vmaddwev_w_hu(
4850 // CHECK-NEXT:  entry:
4851 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4852 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4853 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4854 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4855 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4856 // CHECK-NEXT:    ret i128 [[TMP4]]
4857 //
vmaddwev_w_hu(v4u32 _1,v8u16 _2,v8u16 _3)4858 v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
4859   return __lsx_vmaddwev_w_hu(_1, _2, _3);
4860 }
4861 // CHECK-LABEL: @vmaddwev_h_bu(
4862 // CHECK-NEXT:  entry:
4863 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4864 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4865 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4866 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4867 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4868 // CHECK-NEXT:    ret i128 [[TMP4]]
4869 //
vmaddwev_h_bu(v8u16 _1,v16u8 _2,v16u8 _3)4870 v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
4871   return __lsx_vmaddwev_h_bu(_1, _2, _3);
4872 }
4873 // CHECK-LABEL: @vmaddwod_d_w(
4874 // CHECK-NEXT:  entry:
4875 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4876 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4877 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4878 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4879 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4880 // CHECK-NEXT:    ret i128 [[TMP4]]
4881 //
vmaddwod_d_w(v2i64 _1,v4i32 _2,v4i32 _3)4882 v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) {
4883   return __lsx_vmaddwod_d_w(_1, _2, _3);
4884 }
4885 // CHECK-LABEL: @vmaddwod_w_h(
4886 // CHECK-NEXT:  entry:
4887 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4888 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4889 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4890 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4891 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4892 // CHECK-NEXT:    ret i128 [[TMP4]]
4893 //
vmaddwod_w_h(v4i32 _1,v8i16 _2,v8i16 _3)4894 v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) {
4895   return __lsx_vmaddwod_w_h(_1, _2, _3);
4896 }
4897 // CHECK-LABEL: @vmaddwod_h_b(
4898 // CHECK-NEXT:  entry:
4899 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4900 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4901 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4902 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4903 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4904 // CHECK-NEXT:    ret i128 [[TMP4]]
4905 //
vmaddwod_h_b(v8i16 _1,v16i8 _2,v16i8 _3)4906 v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) {
4907   return __lsx_vmaddwod_h_b(_1, _2, _3);
4908 }
4909 // CHECK-LABEL: @vmaddwod_d_wu(
4910 // CHECK-NEXT:  entry:
4911 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4912 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4913 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4914 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4915 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4916 // CHECK-NEXT:    ret i128 [[TMP4]]
4917 //
vmaddwod_d_wu(v2u64 _1,v4u32 _2,v4u32 _3)4918 v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) {
4919   return __lsx_vmaddwod_d_wu(_1, _2, _3);
4920 }
4921 // CHECK-LABEL: @vmaddwod_w_hu(
4922 // CHECK-NEXT:  entry:
4923 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4924 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4925 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4926 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4927 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4928 // CHECK-NEXT:    ret i128 [[TMP4]]
4929 //
vmaddwod_w_hu(v4u32 _1,v8u16 _2,v8u16 _3)4930 v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) {
4931   return __lsx_vmaddwod_w_hu(_1, _2, _3);
4932 }
4933 // CHECK-LABEL: @vmaddwod_h_bu(
4934 // CHECK-NEXT:  entry:
4935 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4936 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4937 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4938 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4939 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4940 // CHECK-NEXT:    ret i128 [[TMP4]]
4941 //
vmaddwod_h_bu(v8u16 _1,v16u8 _2,v16u8 _3)4942 v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) {
4943   return __lsx_vmaddwod_h_bu(_1, _2, _3);
4944 }
4945 // CHECK-LABEL: @vmaddwev_d_wu_w(
4946 // CHECK-NEXT:  entry:
4947 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4948 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4949 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4950 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4951 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4952 // CHECK-NEXT:    ret i128 [[TMP4]]
4953 //
vmaddwev_d_wu_w(v2i64 _1,v4u32 _2,v4i32 _3)4954 v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
4955   return __lsx_vmaddwev_d_wu_w(_1, _2, _3);
4956 }
4957 // CHECK-LABEL: @vmaddwev_w_hu_h(
4958 // CHECK-NEXT:  entry:
4959 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4960 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4961 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4962 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4963 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4964 // CHECK-NEXT:    ret i128 [[TMP4]]
4965 //
vmaddwev_w_hu_h(v4i32 _1,v8u16 _2,v8i16 _3)4966 v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
4967   return __lsx_vmaddwev_w_hu_h(_1, _2, _3);
4968 }
4969 // CHECK-LABEL: @vmaddwev_h_bu_b(
4970 // CHECK-NEXT:  entry:
4971 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
4972 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
4973 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
4974 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
4975 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
4976 // CHECK-NEXT:    ret i128 [[TMP4]]
4977 //
vmaddwev_h_bu_b(v8i16 _1,v16u8 _2,v16i8 _3)4978 v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
4979   return __lsx_vmaddwev_h_bu_b(_1, _2, _3);
4980 }
4981 // CHECK-LABEL: @vmaddwod_d_wu_w(
4982 // CHECK-NEXT:  entry:
4983 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
4984 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
4985 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32>
4986 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
4987 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4988 // CHECK-NEXT:    ret i128 [[TMP4]]
4989 //
vmaddwod_d_wu_w(v2i64 _1,v4u32 _2,v4i32 _3)4990 v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) {
4991   return __lsx_vmaddwod_d_wu_w(_1, _2, _3);
4992 }
4993 // CHECK-LABEL: @vmaddwod_w_hu_h(
4994 // CHECK-NEXT:  entry:
4995 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
4996 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
4997 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16>
4998 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
4999 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
5000 // CHECK-NEXT:    ret i128 [[TMP4]]
5001 //
vmaddwod_w_hu_h(v4i32 _1,v8u16 _2,v8i16 _3)5002 v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) {
5003   return __lsx_vmaddwod_w_hu_h(_1, _2, _3);
5004 }
5005 // CHECK-LABEL: @vmaddwod_h_bu_b(
5006 // CHECK-NEXT:  entry:
5007 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5008 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5009 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5010 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5011 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
5012 // CHECK-NEXT:    ret i128 [[TMP4]]
5013 //
vmaddwod_h_bu_b(v8i16 _1,v16u8 _2,v16i8 _3)5014 v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) {
5015   return __lsx_vmaddwod_h_bu_b(_1, _2, _3);
5016 }
5017 // CHECK-LABEL: @vmaddwev_q_d(
5018 // CHECK-NEXT:  entry:
5019 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5020 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5021 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5022 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5023 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5024 // CHECK-NEXT:    ret i128 [[TMP4]]
5025 //
vmaddwev_q_d(v2i64 _1,v2i64 _2,v2i64 _3)5026 v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5027   return __lsx_vmaddwev_q_d(_1, _2, _3);
5028 }
5029 // CHECK-LABEL: @vmaddwod_q_d(
5030 // CHECK-NEXT:  entry:
5031 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5032 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5033 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5034 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5035 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5036 // CHECK-NEXT:    ret i128 [[TMP4]]
5037 //
vmaddwod_q_d(v2i64 _1,v2i64 _2,v2i64 _3)5038 v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) {
5039   return __lsx_vmaddwod_q_d(_1, _2, _3);
5040 }
5041 // CHECK-LABEL: @vmaddwev_q_du(
5042 // CHECK-NEXT:  entry:
5043 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5044 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5045 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5046 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5047 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5048 // CHECK-NEXT:    ret i128 [[TMP4]]
5049 //
vmaddwev_q_du(v2u64 _1,v2u64 _2,v2u64 _3)5050 v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5051   return __lsx_vmaddwev_q_du(_1, _2, _3);
5052 }
5053 // CHECK-LABEL: @vmaddwod_q_du(
5054 // CHECK-NEXT:  entry:
5055 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5056 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5057 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5058 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5059 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5060 // CHECK-NEXT:    ret i128 [[TMP4]]
5061 //
vmaddwod_q_du(v2u64 _1,v2u64 _2,v2u64 _3)5062 v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) {
5063   return __lsx_vmaddwod_q_du(_1, _2, _3);
5064 }
5065 // CHECK-LABEL: @vmaddwev_q_du_d(
5066 // CHECK-NEXT:  entry:
5067 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5068 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5069 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5070 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5071 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5072 // CHECK-NEXT:    ret i128 [[TMP4]]
5073 //
vmaddwev_q_du_d(v2i64 _1,v2u64 _2,v2i64 _3)5074 v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5075   return __lsx_vmaddwev_q_du_d(_1, _2, _3);
5076 }
5077 // CHECK-LABEL: @vmaddwod_q_du_d(
5078 // CHECK-NEXT:  entry:
5079 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5080 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5081 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64>
5082 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
5083 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5084 // CHECK-NEXT:    ret i128 [[TMP4]]
5085 //
vmaddwod_q_du_d(v2i64 _1,v2u64 _2,v2i64 _3)5086 v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) {
5087   return __lsx_vmaddwod_q_du_d(_1, _2, _3);
5088 }
5089 // CHECK-LABEL: @vrotr_b(
5090 // CHECK-NEXT:  entry:
5091 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5092 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5093 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5094 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5095 // CHECK-NEXT:    ret i128 [[TMP3]]
5096 //
vrotr_b(v16i8 _1,v16i8 _2)5097 v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); }
5098 // CHECK-LABEL: @vrotr_h(
5099 // CHECK-NEXT:  entry:
5100 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5101 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5102 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5103 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5104 // CHECK-NEXT:    ret i128 [[TMP3]]
5105 //
vrotr_h(v8i16 _1,v8i16 _2)5106 v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); }
5107 // CHECK-LABEL: @vrotr_w(
5108 // CHECK-NEXT:  entry:
5109 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5110 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5111 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5112 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5113 // CHECK-NEXT:    ret i128 [[TMP3]]
5114 //
vrotr_w(v4i32 _1,v4i32 _2)5115 v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); }
5116 // CHECK-LABEL: @vrotr_d(
5117 // CHECK-NEXT:  entry:
5118 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5119 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5120 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5121 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5122 // CHECK-NEXT:    ret i128 [[TMP3]]
5123 //
vrotr_d(v2i64 _1,v2i64 _2)5124 v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); }
5125 // CHECK-LABEL: @vadd_q(
5126 // CHECK-NEXT:  entry:
5127 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5128 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5129 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5130 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5131 // CHECK-NEXT:    ret i128 [[TMP3]]
5132 //
vadd_q(v2i64 _1,v2i64 _2)5133 v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); }
5134 // CHECK-LABEL: @vsub_q(
5135 // CHECK-NEXT:  entry:
5136 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5137 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5138 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5139 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5140 // CHECK-NEXT:    ret i128 [[TMP3]]
5141 //
vsub_q(v2i64 _1,v2i64 _2)5142 v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); }
5143 // CHECK-LABEL: @vldrepl_b(
5144 // CHECK-NEXT:  entry:
5145 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1)
5146 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5147 // CHECK-NEXT:    ret i128 [[TMP1]]
5148 //
vldrepl_b(void * _1)5149 v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); }
5150 // CHECK-LABEL: @vldrepl_h(
5151 // CHECK-NEXT:  entry:
5152 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2)
5153 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
5154 // CHECK-NEXT:    ret i128 [[TMP1]]
5155 //
vldrepl_h(void * _1)5156 v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); }
5157 // CHECK-LABEL: @vldrepl_w(
5158 // CHECK-NEXT:  entry:
5159 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4)
5160 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
5161 // CHECK-NEXT:    ret i128 [[TMP1]]
5162 //
vldrepl_w(void * _1)5163 v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); }
5164 // CHECK-LABEL: @vldrepl_d(
5165 // CHECK-NEXT:  entry:
5166 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8)
5167 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
5168 // CHECK-NEXT:    ret i128 [[TMP1]]
5169 //
vldrepl_d(void * _1)5170 v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); }
5171 // CHECK-LABEL: @vmskgez_b(
5172 // CHECK-NEXT:  entry:
5173 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5174 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]])
5175 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5176 // CHECK-NEXT:    ret i128 [[TMP2]]
5177 //
vmskgez_b(v16i8 _1)5178 v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); }
5179 // CHECK-LABEL: @vmsknz_b(
5180 // CHECK-NEXT:  entry:
5181 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5182 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]])
5183 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5184 // CHECK-NEXT:    ret i128 [[TMP2]]
5185 //
vmsknz_b(v16i8 _1)5186 v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); }
5187 // CHECK-LABEL: @vexth_h_b(
5188 // CHECK-NEXT:  entry:
5189 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5190 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]])
5191 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5192 // CHECK-NEXT:    ret i128 [[TMP2]]
5193 //
vexth_h_b(v16i8 _1)5194 v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); }
5195 // CHECK-LABEL: @vexth_w_h(
5196 // CHECK-NEXT:  entry:
5197 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5198 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]])
5199 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5200 // CHECK-NEXT:    ret i128 [[TMP2]]
5201 //
vexth_w_h(v8i16 _1)5202 v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); }
5203 // CHECK-LABEL: @vexth_d_w(
5204 // CHECK-NEXT:  entry:
5205 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5206 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]])
5207 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5208 // CHECK-NEXT:    ret i128 [[TMP2]]
5209 //
vexth_d_w(v4i32 _1)5210 v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); }
5211 // CHECK-LABEL: @vexth_q_d(
5212 // CHECK-NEXT:  entry:
5213 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5214 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]])
5215 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5216 // CHECK-NEXT:    ret i128 [[TMP2]]
5217 //
vexth_q_d(v2i64 _1)5218 v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); }
5219 // CHECK-LABEL: @vexth_hu_bu(
5220 // CHECK-NEXT:  entry:
5221 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5222 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]])
5223 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5224 // CHECK-NEXT:    ret i128 [[TMP2]]
5225 //
vexth_hu_bu(v16u8 _1)5226 v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); }
5227 // CHECK-LABEL: @vexth_wu_hu(
5228 // CHECK-NEXT:  entry:
5229 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5230 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]])
5231 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5232 // CHECK-NEXT:    ret i128 [[TMP2]]
5233 //
vexth_wu_hu(v8u16 _1)5234 v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); }
5235 // CHECK-LABEL: @vexth_du_wu(
5236 // CHECK-NEXT:  entry:
5237 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5238 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]])
5239 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5240 // CHECK-NEXT:    ret i128 [[TMP2]]
5241 //
vexth_du_wu(v4u32 _1)5242 v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); }
5243 // CHECK-LABEL: @vexth_qu_du(
5244 // CHECK-NEXT:  entry:
5245 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5246 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]])
5247 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5248 // CHECK-NEXT:    ret i128 [[TMP2]]
5249 //
vexth_qu_du(v2u64 _1)5250 v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); }
5251 // CHECK-LABEL: @vrotri_b(
5252 // CHECK-NEXT:  entry:
5253 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5254 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1)
5255 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
5256 // CHECK-NEXT:    ret i128 [[TMP2]]
5257 //
vrotri_b(v16i8 _1)5258 v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); }
5259 // CHECK-LABEL: @vrotri_h(
5260 // CHECK-NEXT:  entry:
5261 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5262 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1)
5263 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
5264 // CHECK-NEXT:    ret i128 [[TMP2]]
5265 //
vrotri_h(v8i16 _1)5266 v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); }
5267 // CHECK-LABEL: @vrotri_w(
5268 // CHECK-NEXT:  entry:
5269 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5270 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1)
5271 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5272 // CHECK-NEXT:    ret i128 [[TMP2]]
5273 //
vrotri_w(v4i32 _1)5274 v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); }
5275 // CHECK-LABEL: @vrotri_d(
5276 // CHECK-NEXT:  entry:
5277 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5278 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1)
5279 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5280 // CHECK-NEXT:    ret i128 [[TMP2]]
5281 //
vrotri_d(v2i64 _1)5282 v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); }
5283 // CHECK-LABEL: @vextl_q_d(
5284 // CHECK-NEXT:  entry:
5285 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5286 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]])
5287 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5288 // CHECK-NEXT:    ret i128 [[TMP2]]
5289 //
vextl_q_d(v2i64 _1)5290 v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); }
5291 // CHECK-LABEL: @vsrlni_b_h(
5292 // CHECK-NEXT:  entry:
5293 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5294 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5295 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5296 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5297 // CHECK-NEXT:    ret i128 [[TMP3]]
5298 //
vsrlni_b_h(v16i8 _1,v16i8 _2)5299 v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); }
5300 // CHECK-LABEL: @vsrlni_h_w(
5301 // CHECK-NEXT:  entry:
5302 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5303 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5304 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5305 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5306 // CHECK-NEXT:    ret i128 [[TMP3]]
5307 //
vsrlni_h_w(v8i16 _1,v8i16 _2)5308 v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); }
5309 // CHECK-LABEL: @vsrlni_w_d(
5310 // CHECK-NEXT:  entry:
5311 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5312 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5313 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5314 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5315 // CHECK-NEXT:    ret i128 [[TMP3]]
5316 //
vsrlni_w_d(v4i32 _1,v4i32 _2)5317 v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); }
5318 // CHECK-LABEL: @vsrlni_d_q(
5319 // CHECK-NEXT:  entry:
5320 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5321 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5322 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5323 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5324 // CHECK-NEXT:    ret i128 [[TMP3]]
5325 //
vsrlni_d_q(v2i64 _1,v2i64 _2)5326 v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); }
5327 // CHECK-LABEL: @vsrlrni_b_h(
5328 // CHECK-NEXT:  entry:
5329 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5330 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5331 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5332 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5333 // CHECK-NEXT:    ret i128 [[TMP3]]
5334 //
vsrlrni_b_h(v16i8 _1,v16i8 _2)5335 v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); }
5336 // CHECK-LABEL: @vsrlrni_h_w(
5337 // CHECK-NEXT:  entry:
5338 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5339 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5340 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5341 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5342 // CHECK-NEXT:    ret i128 [[TMP3]]
5343 //
vsrlrni_h_w(v8i16 _1,v8i16 _2)5344 v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); }
5345 // CHECK-LABEL: @vsrlrni_w_d(
5346 // CHECK-NEXT:  entry:
5347 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5348 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5349 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5350 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5351 // CHECK-NEXT:    ret i128 [[TMP3]]
5352 //
vsrlrni_w_d(v4i32 _1,v4i32 _2)5353 v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); }
5354 // CHECK-LABEL: @vsrlrni_d_q(
5355 // CHECK-NEXT:  entry:
5356 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5357 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5358 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5359 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5360 // CHECK-NEXT:    ret i128 [[TMP3]]
5361 //
vsrlrni_d_q(v2i64 _1,v2i64 _2)5362 v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); }
5363 // CHECK-LABEL: @vssrlni_b_h(
5364 // CHECK-NEXT:  entry:
5365 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5366 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5367 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5368 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5369 // CHECK-NEXT:    ret i128 [[TMP3]]
5370 //
vssrlni_b_h(v16i8 _1,v16i8 _2)5371 v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); }
5372 // CHECK-LABEL: @vssrlni_h_w(
5373 // CHECK-NEXT:  entry:
5374 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5375 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5376 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5377 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5378 // CHECK-NEXT:    ret i128 [[TMP3]]
5379 //
vssrlni_h_w(v8i16 _1,v8i16 _2)5380 v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); }
5381 // CHECK-LABEL: @vssrlni_w_d(
5382 // CHECK-NEXT:  entry:
5383 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5384 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5385 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5386 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5387 // CHECK-NEXT:    ret i128 [[TMP3]]
5388 //
vssrlni_w_d(v4i32 _1,v4i32 _2)5389 v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); }
5390 // CHECK-LABEL: @vssrlni_d_q(
5391 // CHECK-NEXT:  entry:
5392 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5393 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5394 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5395 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5396 // CHECK-NEXT:    ret i128 [[TMP3]]
5397 //
vssrlni_d_q(v2i64 _1,v2i64 _2)5398 v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); }
5399 // CHECK-LABEL: @vssrlni_bu_h(
5400 // CHECK-NEXT:  entry:
5401 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5402 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5403 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5404 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5405 // CHECK-NEXT:    ret i128 [[TMP3]]
5406 //
vssrlni_bu_h(v16u8 _1,v16i8 _2)5407 v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); }
5408 // CHECK-LABEL: @vssrlni_hu_w(
5409 // CHECK-NEXT:  entry:
5410 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5411 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5412 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5413 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5414 // CHECK-NEXT:    ret i128 [[TMP3]]
5415 //
vssrlni_hu_w(v8u16 _1,v8i16 _2)5416 v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); }
5417 // CHECK-LABEL: @vssrlni_wu_d(
5418 // CHECK-NEXT:  entry:
5419 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5420 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5421 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5422 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5423 // CHECK-NEXT:    ret i128 [[TMP3]]
5424 //
vssrlni_wu_d(v4u32 _1,v4i32 _2)5425 v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); }
5426 // CHECK-LABEL: @vssrlni_du_q(
5427 // CHECK-NEXT:  entry:
5428 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5429 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5430 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5431 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5432 // CHECK-NEXT:    ret i128 [[TMP3]]
5433 //
vssrlni_du_q(v2u64 _1,v2i64 _2)5434 v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); }
5435 // CHECK-LABEL: @vssrlrni_b_h(
5436 // CHECK-NEXT:  entry:
5437 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5438 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5439 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5440 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5441 // CHECK-NEXT:    ret i128 [[TMP3]]
5442 //
vssrlrni_b_h(v16i8 _1,v16i8 _2)5443 v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); }
5444 // CHECK-LABEL: @vssrlrni_h_w(
5445 // CHECK-NEXT:  entry:
5446 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5447 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5448 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5449 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5450 // CHECK-NEXT:    ret i128 [[TMP3]]
5451 //
vssrlrni_h_w(v8i16 _1,v8i16 _2)5452 v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); }
5453 // CHECK-LABEL: @vssrlrni_w_d(
5454 // CHECK-NEXT:  entry:
5455 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5456 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5457 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5458 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5459 // CHECK-NEXT:    ret i128 [[TMP3]]
5460 //
vssrlrni_w_d(v4i32 _1,v4i32 _2)5461 v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); }
5462 // CHECK-LABEL: @vssrlrni_d_q(
5463 // CHECK-NEXT:  entry:
5464 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5465 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5466 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5467 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5468 // CHECK-NEXT:    ret i128 [[TMP3]]
5469 //
vssrlrni_d_q(v2i64 _1,v2i64 _2)5470 v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); }
5471 // CHECK-LABEL: @vssrlrni_bu_h(
5472 // CHECK-NEXT:  entry:
5473 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5474 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5475 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5476 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5477 // CHECK-NEXT:    ret i128 [[TMP3]]
5478 //
vssrlrni_bu_h(v16u8 _1,v16i8 _2)5479 v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) {
5480   return __lsx_vssrlrni_bu_h(_1, _2, 1);
5481 }
5482 // CHECK-LABEL: @vssrlrni_hu_w(
5483 // CHECK-NEXT:  entry:
5484 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5485 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5486 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5487 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5488 // CHECK-NEXT:    ret i128 [[TMP3]]
5489 //
vssrlrni_hu_w(v8u16 _1,v8i16 _2)5490 v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) {
5491   return __lsx_vssrlrni_hu_w(_1, _2, 1);
5492 }
5493 // CHECK-LABEL: @vssrlrni_wu_d(
5494 // CHECK-NEXT:  entry:
5495 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5496 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5497 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5498 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5499 // CHECK-NEXT:    ret i128 [[TMP3]]
5500 //
vssrlrni_wu_d(v4u32 _1,v4i32 _2)5501 v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) {
5502   return __lsx_vssrlrni_wu_d(_1, _2, 1);
5503 }
5504 // CHECK-LABEL: @vssrlrni_du_q(
5505 // CHECK-NEXT:  entry:
5506 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5507 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5508 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5509 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5510 // CHECK-NEXT:    ret i128 [[TMP3]]
5511 //
vssrlrni_du_q(v2u64 _1,v2i64 _2)5512 v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) {
5513   return __lsx_vssrlrni_du_q(_1, _2, 1);
5514 }
5515 // CHECK-LABEL: @vsrani_b_h(
5516 // CHECK-NEXT:  entry:
5517 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5518 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5519 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5520 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5521 // CHECK-NEXT:    ret i128 [[TMP3]]
5522 //
vsrani_b_h(v16i8 _1,v16i8 _2)5523 v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); }
5524 // CHECK-LABEL: @vsrani_h_w(
5525 // CHECK-NEXT:  entry:
5526 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5527 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5528 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5529 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5530 // CHECK-NEXT:    ret i128 [[TMP3]]
5531 //
vsrani_h_w(v8i16 _1,v8i16 _2)5532 v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); }
5533 // CHECK-LABEL: @vsrani_w_d(
5534 // CHECK-NEXT:  entry:
5535 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5536 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5537 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5538 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5539 // CHECK-NEXT:    ret i128 [[TMP3]]
5540 //
vsrani_w_d(v4i32 _1,v4i32 _2)5541 v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); }
5542 // CHECK-LABEL: @vsrani_d_q(
5543 // CHECK-NEXT:  entry:
5544 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5545 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5546 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5547 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5548 // CHECK-NEXT:    ret i128 [[TMP3]]
5549 //
vsrani_d_q(v2i64 _1,v2i64 _2)5550 v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); }
5551 // CHECK-LABEL: @vsrarni_b_h(
5552 // CHECK-NEXT:  entry:
5553 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5554 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5555 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5556 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5557 // CHECK-NEXT:    ret i128 [[TMP3]]
5558 //
vsrarni_b_h(v16i8 _1,v16i8 _2)5559 v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); }
5560 // CHECK-LABEL: @vsrarni_h_w(
5561 // CHECK-NEXT:  entry:
5562 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5563 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5564 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5565 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5566 // CHECK-NEXT:    ret i128 [[TMP3]]
5567 //
vsrarni_h_w(v8i16 _1,v8i16 _2)5568 v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); }
5569 // CHECK-LABEL: @vsrarni_w_d(
5570 // CHECK-NEXT:  entry:
5571 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5572 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5573 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5574 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5575 // CHECK-NEXT:    ret i128 [[TMP3]]
5576 //
vsrarni_w_d(v4i32 _1,v4i32 _2)5577 v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); }
5578 // CHECK-LABEL: @vsrarni_d_q(
5579 // CHECK-NEXT:  entry:
5580 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5581 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5582 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5583 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5584 // CHECK-NEXT:    ret i128 [[TMP3]]
5585 //
vsrarni_d_q(v2i64 _1,v2i64 _2)5586 v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); }
5587 // CHECK-LABEL: @vssrani_b_h(
5588 // CHECK-NEXT:  entry:
5589 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5590 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5591 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5592 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5593 // CHECK-NEXT:    ret i128 [[TMP3]]
5594 //
vssrani_b_h(v16i8 _1,v16i8 _2)5595 v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); }
5596 // CHECK-LABEL: @vssrani_h_w(
5597 // CHECK-NEXT:  entry:
5598 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5599 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5600 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5601 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5602 // CHECK-NEXT:    ret i128 [[TMP3]]
5603 //
vssrani_h_w(v8i16 _1,v8i16 _2)5604 v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); }
5605 // CHECK-LABEL: @vssrani_w_d(
5606 // CHECK-NEXT:  entry:
5607 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5608 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5609 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5610 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5611 // CHECK-NEXT:    ret i128 [[TMP3]]
5612 //
vssrani_w_d(v4i32 _1,v4i32 _2)5613 v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); }
5614 // CHECK-LABEL: @vssrani_d_q(
5615 // CHECK-NEXT:  entry:
5616 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5617 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5618 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5619 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5620 // CHECK-NEXT:    ret i128 [[TMP3]]
5621 //
vssrani_d_q(v2i64 _1,v2i64 _2)5622 v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); }
5623 // CHECK-LABEL: @vssrani_bu_h(
5624 // CHECK-NEXT:  entry:
5625 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5626 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5627 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5628 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5629 // CHECK-NEXT:    ret i128 [[TMP3]]
5630 //
vssrani_bu_h(v16u8 _1,v16i8 _2)5631 v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); }
5632 // CHECK-LABEL: @vssrani_hu_w(
5633 // CHECK-NEXT:  entry:
5634 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5635 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5636 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5637 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5638 // CHECK-NEXT:    ret i128 [[TMP3]]
5639 //
vssrani_hu_w(v8u16 _1,v8i16 _2)5640 v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); }
5641 // CHECK-LABEL: @vssrani_wu_d(
5642 // CHECK-NEXT:  entry:
5643 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5644 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5645 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5646 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5647 // CHECK-NEXT:    ret i128 [[TMP3]]
5648 //
vssrani_wu_d(v4u32 _1,v4i32 _2)5649 v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); }
5650 // CHECK-LABEL: @vssrani_du_q(
5651 // CHECK-NEXT:  entry:
5652 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5653 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5654 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5655 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5656 // CHECK-NEXT:    ret i128 [[TMP3]]
5657 //
vssrani_du_q(v2u64 _1,v2i64 _2)5658 v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); }
5659 // CHECK-LABEL: @vssrarni_b_h(
5660 // CHECK-NEXT:  entry:
5661 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5662 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5663 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5664 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5665 // CHECK-NEXT:    ret i128 [[TMP3]]
5666 //
vssrarni_b_h(v16i8 _1,v16i8 _2)5667 v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); }
5668 // CHECK-LABEL: @vssrarni_h_w(
5669 // CHECK-NEXT:  entry:
5670 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5671 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5672 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5673 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5674 // CHECK-NEXT:    ret i128 [[TMP3]]
5675 //
vssrarni_h_w(v8i16 _1,v8i16 _2)5676 v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); }
5677 // CHECK-LABEL: @vssrarni_w_d(
5678 // CHECK-NEXT:  entry:
5679 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5680 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5681 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5682 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5683 // CHECK-NEXT:    ret i128 [[TMP3]]
5684 //
vssrarni_w_d(v4i32 _1,v4i32 _2)5685 v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); }
5686 // CHECK-LABEL: @vssrarni_d_q(
5687 // CHECK-NEXT:  entry:
5688 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5689 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5690 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5691 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5692 // CHECK-NEXT:    ret i128 [[TMP3]]
5693 //
vssrarni_d_q(v2i64 _1,v2i64 _2)5694 v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); }
5695 // CHECK-LABEL: @vssrarni_bu_h(
5696 // CHECK-NEXT:  entry:
5697 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5698 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5699 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1)
5700 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5701 // CHECK-NEXT:    ret i128 [[TMP3]]
5702 //
vssrarni_bu_h(v16u8 _1,v16i8 _2)5703 v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) {
5704   return __lsx_vssrarni_bu_h(_1, _2, 1);
5705 }
5706 // CHECK-LABEL: @vssrarni_hu_w(
5707 // CHECK-NEXT:  entry:
5708 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5709 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5710 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1)
5711 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5712 // CHECK-NEXT:    ret i128 [[TMP3]]
5713 //
vssrarni_hu_w(v8u16 _1,v8i16 _2)5714 v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) {
5715   return __lsx_vssrarni_hu_w(_1, _2, 1);
5716 }
5717 // CHECK-LABEL: @vssrarni_wu_d(
5718 // CHECK-NEXT:  entry:
5719 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5720 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5721 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5722 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5723 // CHECK-NEXT:    ret i128 [[TMP3]]
5724 //
vssrarni_wu_d(v4u32 _1,v4i32 _2)5725 v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) {
5726   return __lsx_vssrarni_wu_d(_1, _2, 1);
5727 }
5728 // CHECK-LABEL: @vssrarni_du_q(
5729 // CHECK-NEXT:  entry:
5730 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5731 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5732 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1)
5733 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5734 // CHECK-NEXT:    ret i128 [[TMP3]]
5735 //
vssrarni_du_q(v2u64 _1,v2i64 _2)5736 v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) {
5737   return __lsx_vssrarni_du_q(_1, _2, 1);
5738 }
5739 // CHECK-LABEL: @vpermi_w(
5740 // CHECK-NEXT:  entry:
5741 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5742 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5743 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1)
5744 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5745 // CHECK-NEXT:    ret i128 [[TMP3]]
5746 //
vpermi_w(v4i32 _1,v4i32 _2)5747 v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); }
5748 // CHECK-LABEL: @vld(
5749 // CHECK-NEXT:  entry:
5750 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1)
5751 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5752 // CHECK-NEXT:    ret i128 [[TMP1]]
5753 //
vld(void * _1)5754 v16i8 vld(void *_1) { return __lsx_vld(_1, 1); }
5755 // CHECK-LABEL: @vst(
5756 // CHECK-NEXT:  entry:
5757 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5758 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1)
5759 // CHECK-NEXT:    ret void
5760 //
vst(v16i8 _1,void * _2)5761 void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); }
5762 // CHECK-LABEL: @vssrlrn_b_h(
5763 // CHECK-NEXT:  entry:
5764 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5765 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5766 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5767 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5768 // CHECK-NEXT:    ret i128 [[TMP3]]
5769 //
vssrlrn_b_h(v8i16 _1,v8i16 _2)5770 v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); }
5771 // CHECK-LABEL: @vssrlrn_h_w(
5772 // CHECK-NEXT:  entry:
5773 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5774 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5775 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5776 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5777 // CHECK-NEXT:    ret i128 [[TMP3]]
5778 //
vssrlrn_h_w(v4i32 _1,v4i32 _2)5779 v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); }
5780 // CHECK-LABEL: @vssrlrn_w_d(
5781 // CHECK-NEXT:  entry:
5782 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5783 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5784 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5785 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5786 // CHECK-NEXT:    ret i128 [[TMP3]]
5787 //
vssrlrn_w_d(v2i64 _1,v2i64 _2)5788 v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); }
5789 // CHECK-LABEL: @vssrln_b_h(
5790 // CHECK-NEXT:  entry:
5791 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5792 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16>
5793 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
5794 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5795 // CHECK-NEXT:    ret i128 [[TMP3]]
5796 //
vssrln_b_h(v8i16 _1,v8i16 _2)5797 v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); }
5798 // CHECK-LABEL: @vssrln_h_w(
5799 // CHECK-NEXT:  entry:
5800 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5801 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32>
5802 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
5803 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
5804 // CHECK-NEXT:    ret i128 [[TMP3]]
5805 //
vssrln_h_w(v4i32 _1,v4i32 _2)5806 v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); }
5807 // CHECK-LABEL: @vssrln_w_d(
5808 // CHECK-NEXT:  entry:
5809 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5810 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
5811 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
5812 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5813 // CHECK-NEXT:    ret i128 [[TMP3]]
5814 //
vssrln_w_d(v2i64 _1,v2i64 _2)5815 v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); }
5816 // CHECK-LABEL: @vorn_v(
5817 // CHECK-NEXT:  entry:
5818 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5819 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5820 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
5821 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
5822 // CHECK-NEXT:    ret i128 [[TMP3]]
5823 //
vorn_v(v16i8 _1,v16i8 _2)5824 v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); }
5825 // CHECK-LABEL: @vldi(
5826 // CHECK-NEXT:  entry:
5827 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1)
5828 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
5829 // CHECK-NEXT:    ret i128 [[TMP1]]
5830 //
vldi()5831 v2i64 vldi() { return __lsx_vldi(1); }
5832 // CHECK-LABEL: @vshuf_b(
5833 // CHECK-NEXT:  entry:
5834 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5835 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8>
5836 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8>
5837 // CHECK-NEXT:    [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
5838 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
5839 // CHECK-NEXT:    ret i128 [[TMP4]]
5840 //
vshuf_b(v16i8 _1,v16i8 _2,v16i8 _3)5841 v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) {
5842   return __lsx_vshuf_b(_1, _2, _3);
5843 }
5844 // CHECK-LABEL: @vldx(
5845 // CHECK-NEXT:  entry:
5846 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1)
5847 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
5848 // CHECK-NEXT:    ret i128 [[TMP1]]
5849 //
vldx(void * _1)5850 v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); }
5851 // CHECK-LABEL: @vstx(
5852 // CHECK-NEXT:  entry:
5853 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5854 // CHECK-NEXT:    tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1)
5855 // CHECK-NEXT:    ret void
5856 //
vstx(v16i8 _1,void * _2)5857 void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); }
5858 // CHECK-LABEL: @vextl_qu_du(
5859 // CHECK-NEXT:  entry:
5860 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5861 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]])
5862 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5863 // CHECK-NEXT:    ret i128 [[TMP2]]
5864 //
vextl_qu_du(v2u64 _1)5865 v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); }
5866 // CHECK-LABEL: @bnz_b(
5867 // CHECK-NEXT:  entry:
5868 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5869 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]])
5870 // CHECK-NEXT:    ret i32 [[TMP1]]
5871 //
bnz_b(v16u8 _1)5872 int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); }
5873 // CHECK-LABEL: @bnz_d(
5874 // CHECK-NEXT:  entry:
5875 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5876 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]])
5877 // CHECK-NEXT:    ret i32 [[TMP1]]
5878 //
bnz_d(v2u64 _1)5879 int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); }
5880 // CHECK-LABEL: @bnz_h(
5881 // CHECK-NEXT:  entry:
5882 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5883 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]])
5884 // CHECK-NEXT:    ret i32 [[TMP1]]
5885 //
bnz_h(v8u16 _1)5886 int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); }
5887 // CHECK-LABEL: @bnz_v(
5888 // CHECK-NEXT:  entry:
5889 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5890 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]])
5891 // CHECK-NEXT:    ret i32 [[TMP1]]
5892 //
bnz_v(v16u8 _1)5893 int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); }
5894 // CHECK-LABEL: @bnz_w(
5895 // CHECK-NEXT:  entry:
5896 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5897 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]])
5898 // CHECK-NEXT:    ret i32 [[TMP1]]
5899 //
bnz_w(v4u32 _1)5900 int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); }
5901 // CHECK-LABEL: @bz_b(
5902 // CHECK-NEXT:  entry:
5903 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5904 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]])
5905 // CHECK-NEXT:    ret i32 [[TMP1]]
5906 //
bz_b(v16u8 _1)5907 int bz_b(v16u8 _1) { return __lsx_bz_b(_1); }
5908 // CHECK-LABEL: @bz_d(
5909 // CHECK-NEXT:  entry:
5910 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
5911 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]])
5912 // CHECK-NEXT:    ret i32 [[TMP1]]
5913 //
bz_d(v2u64 _1)5914 int bz_d(v2u64 _1) { return __lsx_bz_d(_1); }
5915 // CHECK-LABEL: @bz_h(
5916 // CHECK-NEXT:  entry:
5917 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16>
5918 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]])
5919 // CHECK-NEXT:    ret i32 [[TMP1]]
5920 //
bz_h(v8u16 _1)5921 int bz_h(v8u16 _1) { return __lsx_bz_h(_1); }
5922 // CHECK-LABEL: @bz_v(
5923 // CHECK-NEXT:  entry:
5924 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8>
5925 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]])
5926 // CHECK-NEXT:    ret i32 [[TMP1]]
5927 //
bz_v(v16u8 _1)5928 int bz_v(v16u8 _1) { return __lsx_bz_v(_1); }
5929 // CHECK-LABEL: @bz_w(
5930 // CHECK-NEXT:  entry:
5931 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32>
5932 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]])
5933 // CHECK-NEXT:    ret i32 [[TMP1]]
5934 //
bz_w(v4u32 _1)5935 int bz_w(v4u32 _1) { return __lsx_bz_w(_1); }
5936 // CHECK-LABEL: @vfcmp_caf_d(
5937 // CHECK-NEXT:  entry:
5938 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5939 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5940 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5941 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5942 // CHECK-NEXT:    ret i128 [[TMP3]]
5943 //
vfcmp_caf_d(v2f64 _1,v2f64 _2)5944 v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); }
5945 // CHECK-LABEL: @vfcmp_caf_s(
5946 // CHECK-NEXT:  entry:
5947 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
5948 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
5949 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
5950 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5951 // CHECK-NEXT:    ret i128 [[TMP3]]
5952 //
vfcmp_caf_s(v4f32 _1,v4f32 _2)5953 v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); }
5954 // CHECK-LABEL: @vfcmp_ceq_d(
5955 // CHECK-NEXT:  entry:
5956 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5957 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5958 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5959 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5960 // CHECK-NEXT:    ret i128 [[TMP3]]
5961 //
vfcmp_ceq_d(v2f64 _1,v2f64 _2)5962 v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); }
5963 // CHECK-LABEL: @vfcmp_ceq_s(
5964 // CHECK-NEXT:  entry:
5965 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
5966 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
5967 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
5968 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5969 // CHECK-NEXT:    ret i128 [[TMP3]]
5970 //
vfcmp_ceq_s(v4f32 _1,v4f32 _2)5971 v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); }
5972 // CHECK-LABEL: @vfcmp_cle_d(
5973 // CHECK-NEXT:  entry:
5974 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5975 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5976 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5977 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5978 // CHECK-NEXT:    ret i128 [[TMP3]]
5979 //
vfcmp_cle_d(v2f64 _1,v2f64 _2)5980 v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); }
5981 // CHECK-LABEL: @vfcmp_cle_s(
5982 // CHECK-NEXT:  entry:
5983 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
5984 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
5985 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
5986 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5987 // CHECK-NEXT:    ret i128 [[TMP3]]
5988 //
vfcmp_cle_s(v4f32 _1,v4f32 _2)5989 v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); }
5990 // CHECK-LABEL: @vfcmp_clt_d(
5991 // CHECK-NEXT:  entry:
5992 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
5993 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
5994 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
5995 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5996 // CHECK-NEXT:    ret i128 [[TMP3]]
5997 //
vfcmp_clt_d(v2f64 _1,v2f64 _2)5998 v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); }
5999 // CHECK-LABEL: @vfcmp_clt_s(
6000 // CHECK-NEXT:  entry:
6001 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6002 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6003 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6004 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6005 // CHECK-NEXT:    ret i128 [[TMP3]]
6006 //
vfcmp_clt_s(v4f32 _1,v4f32 _2)6007 v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); }
6008 // CHECK-LABEL: @vfcmp_cne_d(
6009 // CHECK-NEXT:  entry:
6010 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6011 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6012 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6013 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6014 // CHECK-NEXT:    ret i128 [[TMP3]]
6015 //
vfcmp_cne_d(v2f64 _1,v2f64 _2)6016 v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); }
6017 // CHECK-LABEL: @vfcmp_cne_s(
6018 // CHECK-NEXT:  entry:
6019 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6020 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6021 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6022 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6023 // CHECK-NEXT:    ret i128 [[TMP3]]
6024 //
vfcmp_cne_s(v4f32 _1,v4f32 _2)6025 v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); }
6026 // CHECK-LABEL: @vfcmp_cor_d(
6027 // CHECK-NEXT:  entry:
6028 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6029 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6030 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6031 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6032 // CHECK-NEXT:    ret i128 [[TMP3]]
6033 //
vfcmp_cor_d(v2f64 _1,v2f64 _2)6034 v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); }
6035 // CHECK-LABEL: @vfcmp_cor_s(
6036 // CHECK-NEXT:  entry:
6037 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6038 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6039 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6040 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6041 // CHECK-NEXT:    ret i128 [[TMP3]]
6042 //
vfcmp_cor_s(v4f32 _1,v4f32 _2)6043 v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); }
6044 // CHECK-LABEL: @vfcmp_cueq_d(
6045 // CHECK-NEXT:  entry:
6046 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6047 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6048 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6049 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6050 // CHECK-NEXT:    ret i128 [[TMP3]]
6051 //
vfcmp_cueq_d(v2f64 _1,v2f64 _2)6052 v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); }
6053 // CHECK-LABEL: @vfcmp_cueq_s(
6054 // CHECK-NEXT:  entry:
6055 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6056 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6057 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6058 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6059 // CHECK-NEXT:    ret i128 [[TMP3]]
6060 //
vfcmp_cueq_s(v4f32 _1,v4f32 _2)6061 v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); }
6062 // CHECK-LABEL: @vfcmp_cule_d(
6063 // CHECK-NEXT:  entry:
6064 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6065 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6066 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6067 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6068 // CHECK-NEXT:    ret i128 [[TMP3]]
6069 //
vfcmp_cule_d(v2f64 _1,v2f64 _2)6070 v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); }
6071 // CHECK-LABEL: @vfcmp_cule_s(
6072 // CHECK-NEXT:  entry:
6073 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6074 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6075 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6076 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6077 // CHECK-NEXT:    ret i128 [[TMP3]]
6078 //
vfcmp_cule_s(v4f32 _1,v4f32 _2)6079 v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); }
6080 // CHECK-LABEL: @vfcmp_cult_d(
6081 // CHECK-NEXT:  entry:
6082 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6083 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6084 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6085 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6086 // CHECK-NEXT:    ret i128 [[TMP3]]
6087 //
vfcmp_cult_d(v2f64 _1,v2f64 _2)6088 v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); }
6089 // CHECK-LABEL: @vfcmp_cult_s(
6090 // CHECK-NEXT:  entry:
6091 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6092 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6093 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6094 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6095 // CHECK-NEXT:    ret i128 [[TMP3]]
6096 //
vfcmp_cult_s(v4f32 _1,v4f32 _2)6097 v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); }
6098 // CHECK-LABEL: @vfcmp_cun_d(
6099 // CHECK-NEXT:  entry:
6100 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6101 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6102 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6103 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6104 // CHECK-NEXT:    ret i128 [[TMP3]]
6105 //
vfcmp_cun_d(v2f64 _1,v2f64 _2)6106 v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); }
6107 // CHECK-LABEL: @vfcmp_cune_d(
6108 // CHECK-NEXT:  entry:
6109 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6110 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6111 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6112 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6113 // CHECK-NEXT:    ret i128 [[TMP3]]
6114 //
vfcmp_cune_d(v2f64 _1,v2f64 _2)6115 v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); }
6116 // CHECK-LABEL: @vfcmp_cune_s(
6117 // CHECK-NEXT:  entry:
6118 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6119 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6120 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6121 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6122 // CHECK-NEXT:    ret i128 [[TMP3]]
6123 //
vfcmp_cune_s(v4f32 _1,v4f32 _2)6124 v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); }
6125 // CHECK-LABEL: @vfcmp_cun_s(
6126 // CHECK-NEXT:  entry:
6127 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6128 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6129 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6130 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6131 // CHECK-NEXT:    ret i128 [[TMP3]]
6132 //
vfcmp_cun_s(v4f32 _1,v4f32 _2)6133 v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); }
6134 // CHECK-LABEL: @vfcmp_saf_d(
6135 // CHECK-NEXT:  entry:
6136 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6137 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6138 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6139 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6140 // CHECK-NEXT:    ret i128 [[TMP3]]
6141 //
vfcmp_saf_d(v2f64 _1,v2f64 _2)6142 v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); }
6143 // CHECK-LABEL: @vfcmp_saf_s(
6144 // CHECK-NEXT:  entry:
6145 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6146 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6147 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6148 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6149 // CHECK-NEXT:    ret i128 [[TMP3]]
6150 //
vfcmp_saf_s(v4f32 _1,v4f32 _2)6151 v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); }
6152 // CHECK-LABEL: @vfcmp_seq_d(
6153 // CHECK-NEXT:  entry:
6154 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6155 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6156 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6157 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6158 // CHECK-NEXT:    ret i128 [[TMP3]]
6159 //
vfcmp_seq_d(v2f64 _1,v2f64 _2)6160 v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); }
6161 // CHECK-LABEL: @vfcmp_seq_s(
6162 // CHECK-NEXT:  entry:
6163 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6164 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6165 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6166 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6167 // CHECK-NEXT:    ret i128 [[TMP3]]
6168 //
vfcmp_seq_s(v4f32 _1,v4f32 _2)6169 v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); }
6170 // CHECK-LABEL: @vfcmp_sle_d(
6171 // CHECK-NEXT:  entry:
6172 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6173 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6174 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6175 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6176 // CHECK-NEXT:    ret i128 [[TMP3]]
6177 //
vfcmp_sle_d(v2f64 _1,v2f64 _2)6178 v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); }
6179 // CHECK-LABEL: @vfcmp_sle_s(
6180 // CHECK-NEXT:  entry:
6181 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6182 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6183 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6184 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6185 // CHECK-NEXT:    ret i128 [[TMP3]]
6186 //
vfcmp_sle_s(v4f32 _1,v4f32 _2)6187 v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); }
6188 // CHECK-LABEL: @vfcmp_slt_d(
6189 // CHECK-NEXT:  entry:
6190 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6191 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6192 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6193 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6194 // CHECK-NEXT:    ret i128 [[TMP3]]
6195 //
vfcmp_slt_d(v2f64 _1,v2f64 _2)6196 v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); }
6197 // CHECK-LABEL: @vfcmp_slt_s(
6198 // CHECK-NEXT:  entry:
6199 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6200 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6201 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6202 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6203 // CHECK-NEXT:    ret i128 [[TMP3]]
6204 //
vfcmp_slt_s(v4f32 _1,v4f32 _2)6205 v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); }
6206 // CHECK-LABEL: @vfcmp_sne_d(
6207 // CHECK-NEXT:  entry:
6208 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6209 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6210 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6211 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6212 // CHECK-NEXT:    ret i128 [[TMP3]]
6213 //
vfcmp_sne_d(v2f64 _1,v2f64 _2)6214 v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); }
6215 // CHECK-LABEL: @vfcmp_sne_s(
6216 // CHECK-NEXT:  entry:
6217 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6218 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6219 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6220 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6221 // CHECK-NEXT:    ret i128 [[TMP3]]
6222 //
vfcmp_sne_s(v4f32 _1,v4f32 _2)6223 v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); }
6224 // CHECK-LABEL: @vfcmp_sor_d(
6225 // CHECK-NEXT:  entry:
6226 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6227 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6228 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6229 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6230 // CHECK-NEXT:    ret i128 [[TMP3]]
6231 //
vfcmp_sor_d(v2f64 _1,v2f64 _2)6232 v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); }
6233 // CHECK-LABEL: @vfcmp_sor_s(
6234 // CHECK-NEXT:  entry:
6235 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6236 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6237 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6238 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6239 // CHECK-NEXT:    ret i128 [[TMP3]]
6240 //
vfcmp_sor_s(v4f32 _1,v4f32 _2)6241 v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); }
6242 // CHECK-LABEL: @vfcmp_sueq_d(
6243 // CHECK-NEXT:  entry:
6244 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6245 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6246 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6247 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6248 // CHECK-NEXT:    ret i128 [[TMP3]]
6249 //
vfcmp_sueq_d(v2f64 _1,v2f64 _2)6250 v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); }
6251 // CHECK-LABEL: @vfcmp_sueq_s(
6252 // CHECK-NEXT:  entry:
6253 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6254 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6255 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6256 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6257 // CHECK-NEXT:    ret i128 [[TMP3]]
6258 //
vfcmp_sueq_s(v4f32 _1,v4f32 _2)6259 v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); }
6260 // CHECK-LABEL: @vfcmp_sule_d(
6261 // CHECK-NEXT:  entry:
6262 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6263 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6264 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6265 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6266 // CHECK-NEXT:    ret i128 [[TMP3]]
6267 //
vfcmp_sule_d(v2f64 _1,v2f64 _2)6268 v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); }
6269 // CHECK-LABEL: @vfcmp_sule_s(
6270 // CHECK-NEXT:  entry:
6271 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6272 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6273 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6274 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6275 // CHECK-NEXT:    ret i128 [[TMP3]]
6276 //
vfcmp_sule_s(v4f32 _1,v4f32 _2)6277 v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); }
6278 // CHECK-LABEL: @vfcmp_sult_d(
6279 // CHECK-NEXT:  entry:
6280 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6281 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6282 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6283 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6284 // CHECK-NEXT:    ret i128 [[TMP3]]
6285 //
vfcmp_sult_d(v2f64 _1,v2f64 _2)6286 v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); }
6287 // CHECK-LABEL: @vfcmp_sult_s(
6288 // CHECK-NEXT:  entry:
6289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6291 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6292 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6293 // CHECK-NEXT:    ret i128 [[TMP3]]
6294 //
vfcmp_sult_s(v4f32 _1,v4f32 _2)6295 v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); }
6296 // CHECK-LABEL: @vfcmp_sun_d(
6297 // CHECK-NEXT:  entry:
6298 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6299 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6300 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6301 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6302 // CHECK-NEXT:    ret i128 [[TMP3]]
6303 //
vfcmp_sun_d(v2f64 _1,v2f64 _2)6304 v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); }
6305 // CHECK-LABEL: @vfcmp_sune_d(
6306 // CHECK-NEXT:  entry:
6307 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6308 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6309 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6310 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
6311 // CHECK-NEXT:    ret i128 [[TMP3]]
6312 //
vfcmp_sune_d(v2f64 _1,v2f64 _2)6313 v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); }
6314 // CHECK-LABEL: @vfcmp_sune_s(
6315 // CHECK-NEXT:  entry:
6316 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6317 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6318 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6319 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6320 // CHECK-NEXT:    ret i128 [[TMP3]]
6321 //
vfcmp_sune_s(v4f32 _1,v4f32 _2)6322 v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); }
6323 // CHECK-LABEL: @vfcmp_sun_s(
6324 // CHECK-NEXT:  entry:
6325 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6326 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6327 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6328 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
6329 // CHECK-NEXT:    ret i128 [[TMP3]]
6330 //
vfcmp_sun_s(v4f32 _1,v4f32 _2)6331 v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); }
6332 // CHECK-LABEL: @vrepli_b(
6333 // CHECK-NEXT:  entry:
6334 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1)
6335 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
6336 // CHECK-NEXT:    ret i128 [[TMP1]]
6337 //
vrepli_b()6338 v16i8 vrepli_b() { return __lsx_vrepli_b(1); }
6339 // CHECK-LABEL: @vrepli_d(
6340 // CHECK-NEXT:  entry:
6341 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1)
6342 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128
6343 // CHECK-NEXT:    ret i128 [[TMP1]]
6344 //
vrepli_d()6345 v2i64 vrepli_d() { return __lsx_vrepli_d(1); }
6346 // CHECK-LABEL: @vrepli_h(
6347 // CHECK-NEXT:  entry:
6348 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1)
6349 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
6350 // CHECK-NEXT:    ret i128 [[TMP1]]
6351 //
vrepli_h()6352 v8i16 vrepli_h() { return __lsx_vrepli_h(1); }
6353 // CHECK-LABEL: @vrepli_w(
6354 // CHECK-NEXT:  entry:
6355 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1)
6356 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
6357 // CHECK-NEXT:    ret i128 [[TMP1]]
6358 //
vrepli_w()6359 v4i32 vrepli_w() { return __lsx_vrepli_w(1); }
6360