xref: /llvm-project/clang/test/Headers/wasm.c (revision 38fffa630ee80163dc65e759392ad29798905679)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: webassembly-registered-target, asserts
3 
4 // FIXME: This should not be using -O2 and implicitly testing the entire IR opt pipeline.
5 
6 // RUN: %clang %s -O2 -emit-llvm -S -o - -target wasm32-unknown-unknown -msimd128 -Wall -Weverything -Wno-missing-prototypes -fno-lax-vector-conversions -Werror | FileCheck %s
7 
8 #include <wasm_simd128.h>
9 
10 // CHECK-LABEL: @test_v128_load(
11 // CHECK-NEXT:  entry:
12 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2:![0-9]+]]
13 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
14 //
15 v128_t test_v128_load(const void *mem) {
16   return wasm_v128_load(mem);
17 }
18 
19 // CHECK-LABEL: @test_v128_load8_splat(
20 // CHECK-NEXT:  entry:
21 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
22 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i64 0
23 // CHECK-NEXT:    [[VECINIT16_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
24 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINIT16_I]] to <4 x i32>
25 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
26 //
27 v128_t test_v128_load8_splat(const void *mem) {
28   return wasm_v128_load8_splat(mem);
29 }
30 
31 // CHECK-LABEL: @test_v128_load16_splat(
32 // CHECK-NEXT:  entry:
33 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
34 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
35 // CHECK-NEXT:    [[VECINIT8_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
36 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT8_I]] to <4 x i32>
37 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
38 //
39 v128_t test_v128_load16_splat(const void *mem) {
40   return wasm_v128_load16_splat(mem);
41 }
42 
43 // CHECK-LABEL: @test_v128_load32_splat(
44 // CHECK-NEXT:  entry:
45 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
46 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
47 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
48 // CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
49 //
50 v128_t test_v128_load32_splat(const void *mem) {
51   return wasm_v128_load32_splat(mem);
52 }
53 
54 // CHECK-LABEL: @test_v128_load64_splat(
55 // CHECK-NEXT:  entry:
56 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
57 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
58 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
59 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
60 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
61 //
62 v128_t test_v128_load64_splat(const void *mem) {
63   return wasm_v128_load64_splat(mem);
64 }
65 
66 // CHECK-LABEL: @test_i16x8_load8x8(
67 // CHECK-NEXT:  entry:
68 // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
69 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
70 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
71 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
72 //
73 v128_t test_i16x8_load8x8(const void *mem) {
74   return wasm_i16x8_load8x8(mem);
75 }
76 
77 // CHECK-LABEL: @test_u16x8_load8x8(
78 // CHECK-NEXT:  entry:
79 // CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
80 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
81 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
82 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
83 //
84 v128_t test_u16x8_load8x8(const void *mem) {
85   return wasm_u16x8_load8x8(mem);
86 }
87 
88 // CHECK-LABEL: @test_i32x4_load16x4(
89 // CHECK-NEXT:  entry:
90 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
91 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
92 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
93 //
94 v128_t test_i32x4_load16x4(const void *mem) {
95   return wasm_i32x4_load16x4(mem);
96 }
97 
98 // CHECK-LABEL: @test_u32x4_load16x4(
99 // CHECK-NEXT:  entry:
100 // CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
101 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
102 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
103 //
104 v128_t test_u32x4_load16x4(const void *mem) {
105   return wasm_u32x4_load16x4(mem);
106 }
107 
108 // CHECK-LABEL: @test_i64x2_load32x2(
109 // CHECK-NEXT:  entry:
110 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
111 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[TMP0]] to <2 x i64>
112 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
113 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
114 //
115 v128_t test_i64x2_load32x2(const void *mem) {
116   return wasm_i64x2_load32x2(mem);
117 }
118 
119 // CHECK-LABEL: @test_u64x2_load32x2(
120 // CHECK-NEXT:  entry:
121 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
122 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[TMP0]] to <2 x i64>
123 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
124 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
125 //
126 v128_t test_u64x2_load32x2(const void *mem) {
127   return wasm_u64x2_load32x2(mem);
128 }
129 
130 // CHECK-LABEL: @test_v128_load32_zero(
131 // CHECK-NEXT:  entry:
132 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
133 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i64 0
134 // CHECK-NEXT:    ret <4 x i32> [[VECINIT4_I]]
135 //
136 v128_t test_v128_load32_zero(const void *mem) {
137   return wasm_v128_load32_zero(mem);
138 }
139 
140 // CHECK-LABEL: @test_v128_load64_zero(
141 // CHECK-NEXT:  entry:
142 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
143 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i64 0
144 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINIT2_I]] to <4 x i32>
145 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
146 //
147 v128_t test_v128_load64_zero(const void *mem) {
148   return wasm_v128_load64_zero(mem);
149 }
150 
151 // CHECK-LABEL: @test_v128_load8_lane(
152 // CHECK-NEXT:  entry:
153 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
154 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
155 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[TMP0]], i64 15
156 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
157 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
158 //
159 v128_t test_v128_load8_lane(const uint8_t *ptr, v128_t vec) {
160   return wasm_v128_load8_lane(ptr, vec, 15);
161 }
162 
163 // CHECK-LABEL: @test_v128_load16_lane(
164 // CHECK-NEXT:  entry:
165 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
166 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
167 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[TMP0]], i64 7
168 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
169 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
170 //
171 v128_t test_v128_load16_lane(const uint16_t *ptr, v128_t vec) {
172   return wasm_v128_load16_lane(ptr, vec, 7);
173 }
174 
175 // CHECK-LABEL: @test_v128_load32_lane(
176 // CHECK-NEXT:  entry:
177 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
178 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[VEC:%.*]], i32 [[TMP0]], i64 3
179 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
180 //
181 v128_t test_v128_load32_lane(const uint32_t *ptr, v128_t vec) {
182   return wasm_v128_load32_lane(ptr, vec, 3);
183 }
184 
185 // CHECK-LABEL: @test_v128_load64_lane(
186 // CHECK-NEXT:  entry:
187 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
188 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
189 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP0]], i64 1
190 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
191 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
192 //
193 v128_t test_v128_load64_lane(const uint64_t *ptr, v128_t vec) {
194   return wasm_v128_load64_lane(ptr, vec, 1);
195 }
196 
197 // CHECK-LABEL: @test_v128_store(
198 // CHECK-NEXT:  entry:
199 // CHECK-NEXT:    store <4 x i32> [[A:%.*]], ptr [[MEM:%.*]], align 1, !tbaa [[TBAA2]]
200 // CHECK-NEXT:    ret void
201 //
202 void test_v128_store(void *mem, v128_t a) {
203   wasm_v128_store(mem, a);
204 }
205 
206 // CHECK-LABEL: @test_v128_store8_lane(
207 // CHECK-NEXT:  entry:
208 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <16 x i8>
209 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
210 // CHECK-NEXT:    store i8 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
211 // CHECK-NEXT:    ret void
212 //
213 void test_v128_store8_lane(uint8_t *ptr, v128_t vec) {
214   wasm_v128_store8_lane(ptr, vec, 15);
215 }
216 
217 // CHECK-LABEL: @test_v128_store16_lane(
218 // CHECK-NEXT:  entry:
219 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <8 x i16>
220 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
221 // CHECK-NEXT:    store i16 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
222 // CHECK-NEXT:    ret void
223 //
224 void test_v128_store16_lane(uint16_t *ptr, v128_t vec) {
225   wasm_v128_store16_lane(ptr, vec, 7);
226 }
227 
228 // CHECK-LABEL: @test_v128_store32_lane(
229 // CHECK-NEXT:  entry:
230 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[VEC:%.*]], i64 3
231 // CHECK-NEXT:    store i32 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
232 // CHECK-NEXT:    ret void
233 //
234 void test_v128_store32_lane(uint32_t *ptr, v128_t vec) {
235   wasm_v128_store32_lane(ptr, vec, 3);
236 }
237 
238 // CHECK-LABEL: @test_v128_store64_lane(
239 // CHECK-NEXT:  entry:
240 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[VEC:%.*]] to <2 x i64>
241 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
242 // CHECK-NEXT:    store i64 [[VECEXT_I]], ptr [[PTR:%.*]], align 1, !tbaa [[TBAA2]]
243 // CHECK-NEXT:    ret void
244 //
245 void test_v128_store64_lane(uint64_t *ptr, v128_t vec) {
246   wasm_v128_store64_lane(ptr, vec, 1);
247 }
248 
249 // CHECK-LABEL: @test_i8x16_make(
250 // CHECK-NEXT:  entry:
251 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0:%.*]], i64 0
252 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1
253 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2
254 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3
255 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4
256 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5
257 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6
258 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7
259 // CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8
260 // CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9
261 // CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10
262 // CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11
263 // CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12
264 // CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13
265 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14
266 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15
267 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
268 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
269 //
270 v128_t test_i8x16_make(int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6, int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12, int8_t c13, int8_t c14, int8_t c15) {
271   return wasm_i8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
272 }
273 
274 // CHECK-LABEL: @test_u8x16_make(
275 // CHECK-NEXT:  entry:
276 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[C0:%.*]], i64 0
277 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 [[C1:%.*]], i64 1
278 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 [[C2:%.*]], i64 2
279 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 [[C3:%.*]], i64 3
280 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 [[C4:%.*]], i64 4
281 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 [[C5:%.*]], i64 5
282 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 [[C6:%.*]], i64 6
283 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 [[C7:%.*]], i64 7
284 // CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 [[C8:%.*]], i64 8
285 // CHECK-NEXT:    [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 [[C9:%.*]], i64 9
286 // CHECK-NEXT:    [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 [[C10:%.*]], i64 10
287 // CHECK-NEXT:    [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 [[C11:%.*]], i64 11
288 // CHECK-NEXT:    [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 [[C12:%.*]], i64 12
289 // CHECK-NEXT:    [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 [[C13:%.*]], i64 13
290 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 [[C14:%.*]], i64 14
291 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 [[C15:%.*]], i64 15
292 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
293 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
294 //
295 v128_t test_u8x16_make(uint8_t c0, uint8_t c1, uint8_t c2, uint8_t c3, uint8_t c4, uint8_t c5, uint8_t c6, uint8_t c7, uint8_t c8, uint8_t c9, uint8_t c10, uint8_t c11, uint8_t c12, uint8_t c13, uint8_t c14, uint8_t c15) {
296   return wasm_u8x16_make(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15);
297 }
298 
299 // CHECK-LABEL: @test_i16x8_make(
300 // CHECK-NEXT:  entry:
301 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0:%.*]], i64 0
302 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1
303 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2
304 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3
305 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4
306 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5
307 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6
308 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7
309 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
310 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
311 //
312 v128_t test_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4, int16_t c5, int16_t c6, int16_t c7) {
313   return wasm_i16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
314 }
315 
316 // CHECK-LABEL: @test_u16x8_make(
317 // CHECK-NEXT:  entry:
318 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[C0:%.*]], i64 0
319 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[C1:%.*]], i64 1
320 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[C2:%.*]], i64 2
321 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[C3:%.*]], i64 3
322 // CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[C4:%.*]], i64 4
323 // CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[C5:%.*]], i64 5
324 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[C6:%.*]], i64 6
325 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[C7:%.*]], i64 7
326 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
327 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
328 //
329 v128_t test_u16x8_make(uint16_t c0, uint16_t c1, uint16_t c2, uint16_t c3, uint16_t c4, uint16_t c5, uint16_t c6, uint16_t c7) {
330   return wasm_u16x8_make(c0, c1, c2, c3, c4, c5, c6, c7);
331 }
332 
333 // CHECK-LABEL: @test_i32x4_make(
334 // CHECK-NEXT:  entry:
335 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0:%.*]], i64 0
336 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1
337 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2
338 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3
339 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
340 //
341 v128_t test_i32x4_make(int32_t c0, int32_t c1, int32_t c2, int32_t c3) {
342   return wasm_i32x4_make(c0, c1, c2, c3);
343 }
344 
345 // CHECK-LABEL: @test_u32x4_make(
346 // CHECK-NEXT:  entry:
347 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[C0:%.*]], i64 0
348 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[C1:%.*]], i64 1
349 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[C2:%.*]], i64 2
350 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[C3:%.*]], i64 3
351 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
352 //
353 v128_t test_u32x4_make(uint32_t c0, uint32_t c1, uint32_t c2, uint32_t c3) {
354   return wasm_u32x4_make(c0, c1, c2, c3);
355 }
356 
357 // CHECK-LABEL: @test_i64x2_make(
358 // CHECK-NEXT:  entry:
359 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0:%.*]], i64 0
360 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1
361 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
362 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
363 //
364 v128_t test_i64x2_make(int64_t c0, int64_t c1) {
365   return wasm_i64x2_make(c0, c1);
366 }
367 
368 // CHECK-LABEL: @test_u64x2_make(
369 // CHECK-NEXT:  entry:
370 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[C0:%.*]], i64 0
371 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 [[C1:%.*]], i64 1
372 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
373 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
374 //
375 v128_t test_u64x2_make(uint64_t c0, uint64_t c1) {
376   return wasm_u64x2_make(c0, c1);
377 }
378 
379 // CHECK-LABEL: @test_f32x4_make(
380 // CHECK-NEXT:  entry:
381 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[C0:%.*]], i64 0
382 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[C1:%.*]], i64 1
383 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[C2:%.*]], i64 2
384 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[C3:%.*]], i64 3
385 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <4 x i32>
386 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
387 //
388 v128_t test_f32x4_make(float c0, float c1, float c2, float c3) {
389   return wasm_f32x4_make(c0, c1, c2, c3);
390 }
391 
392 // CHECK-LABEL: @test_f64x2_make(
393 // CHECK-NEXT:  entry:
394 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[C0:%.*]], i64 0
395 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[C1:%.*]], i64 1
396 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
397 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
398 //
399 v128_t test_f64x2_make(double c0, double c1) {
400   return wasm_f64x2_make(c0, c1);
401 }
402 
403 // CHECK-LABEL: @test_i8x16_const(
404 // CHECK-NEXT:  entry:
405 // CHECK-NEXT:    ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
406 //
407 v128_t test_i8x16_const(void) {
408   return wasm_i8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
409 }
410 
411 // CHECK-LABEL: @test_u8x16_const(
412 // CHECK-NEXT:  entry:
413 // CHECK-NEXT:    ret <4 x i32> <i32 50462976, i32 117835012, i32 185207048, i32 252579084>
414 //
415 v128_t test_u8x16_const(void) {
416   return wasm_u8x16_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
417 }
418 
419 // CHECK-LABEL: @test_i16x8_const(
420 // CHECK-NEXT:  entry:
421 // CHECK-NEXT:    ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
422 //
423 v128_t test_i16x8_const(void) {
424   return wasm_i16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
425 }
426 
427 // CHECK-LABEL: @test_u16x8_const(
428 // CHECK-NEXT:  entry:
429 // CHECK-NEXT:    ret <4 x i32> <i32 65536, i32 196610, i32 327684, i32 458758>
430 //
431 v128_t test_u16x8_const(void) {
432   return wasm_u16x8_const(0, 1, 2, 3, 4, 5, 6, 7);
433 }
434 
435 // CHECK-LABEL: @test_i32x4_const(
436 // CHECK-NEXT:  entry:
437 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
438 //
439 v128_t test_i32x4_const(void) {
440   return wasm_i32x4_const(0, 1, 2, 3);
441 }
442 
443 // CHECK-LABEL: @test_u32x4_const(
444 // CHECK-NEXT:  entry:
445 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
446 //
447 v128_t test_u32x4_const(void) {
448   return wasm_u32x4_const(0, 1, 2, 3);
449 }
450 
451 // CHECK-LABEL: @test_i64x2_const(
452 // CHECK-NEXT:  entry:
453 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
454 //
455 v128_t test_i64x2_const(void) {
456   return wasm_i64x2_const(0, 1);
457 }
458 
459 // CHECK-LABEL: @test_u64x2_const(
460 // CHECK-NEXT:  entry:
461 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 1, i32 0>
462 //
463 v128_t test_u64x2_const(void) {
464   return wasm_u64x2_const(0, 1);
465 }
466 
467 // CHECK-LABEL: @test_f32x4_const(
468 // CHECK-NEXT:  entry:
469 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>
470 //
471 v128_t test_f32x4_const(void) {
472   return wasm_f32x4_const(0, 1, 2, 3);
473 }
474 
475 // CHECK-LABEL: @test_f64x2_const(
476 // CHECK-NEXT:  entry:
477 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 1072693248>
478 //
479 v128_t test_f64x2_const(void) {
480   return wasm_f64x2_const(0, 1);
481 }
482 
483 // CHECK-LABEL: @test_i8x16_const_splat(
484 // CHECK-NEXT:  entry:
485 // CHECK-NEXT:    ret <4 x i32> splat (i32 707406378)
486 //
487 v128_t test_i8x16_const_splat(void) {
488   return wasm_i8x16_const_splat(42);
489 }
490 
491 // CHECK-LABEL: @test_u8x16_const_splat(
492 // CHECK-NEXT:  entry:
493 // CHECK-NEXT:    ret <4 x i32> splat (i32 707406378)
494 //
495 v128_t test_u8x16_const_splat(void) {
496   return wasm_u8x16_const_splat(42);
497 }
498 
499 // CHECK-LABEL: @test_i16x8_const_splat(
500 // CHECK-NEXT:  entry:
501 // CHECK-NEXT:    ret <4 x i32> splat (i32 2752554)
502 //
503 v128_t test_i16x8_const_splat(void) {
504   return wasm_i16x8_const_splat(42);
505 }
506 
507 // CHECK-LABEL: @test_u16x8_const_splat(
508 // CHECK-NEXT:  entry:
509 // CHECK-NEXT:    ret <4 x i32> splat (i32 2752554)
510 //
511 v128_t test_u16x8_const_splat(void) {
512   return wasm_u16x8_const_splat(42);
513 }
514 
515 // CHECK-LABEL: @test_i32x4_const_splat(
516 // CHECK-NEXT:  entry:
517 // CHECK-NEXT:    ret <4 x i32> splat (i32 42)
518 //
519 v128_t test_i32x4_const_splat(void) {
520   return wasm_i32x4_const_splat(42);
521 }
522 
523 // CHECK-LABEL: @test_u32x4_const_splat(
524 // CHECK-NEXT:  entry:
525 // CHECK-NEXT:    ret <4 x i32> splat (i32 42)
526 //
527 v128_t test_u32x4_const_splat(void) {
528   return wasm_u32x4_const_splat(42);
529 }
530 
531 // CHECK-LABEL: @test_i64x2_const_splat(
532 // CHECK-NEXT:  entry:
533 // CHECK-NEXT:    ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
534 //
535 v128_t test_i64x2_const_splat(void) {
536   return wasm_i64x2_const_splat(42);
537 }
538 
539 // CHECK-LABEL: @test_u64x2_const_splat(
540 // CHECK-NEXT:  entry:
541 // CHECK-NEXT:    ret <4 x i32> <i32 42, i32 0, i32 42, i32 0>
542 //
543 v128_t test_u64x2_const_splat(void) {
544   return wasm_u64x2_const_splat(42);
545 }
546 
547 // CHECK-LABEL: @test_f32x4_const_splat(
548 // CHECK-NEXT:  entry:
549 // CHECK-NEXT:    ret <4 x i32> splat (i32 1109917696)
550 //
551 v128_t test_f32x4_const_splat(void) {
552   return wasm_f32x4_const_splat(42);
553 }
554 
555 // CHECK-LABEL: @test_f64x2_const_splat(
556 // CHECK-NEXT:  entry:
557 // CHECK-NEXT:    ret <4 x i32> <i32 0, i32 1078263808, i32 0, i32 1078263808>
558 //
559 v128_t test_f64x2_const_splat(void) {
560   return wasm_f64x2_const_splat(42);
561 }
562 
563 // CHECK-LABEL: @test_i8x16_splat(
564 // CHECK-NEXT:  entry:
565 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
566 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
567 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
568 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
569 //
570 v128_t test_i8x16_splat(int8_t a) {
571   return wasm_i8x16_splat(a);
572 }
573 
574 // CHECK-LABEL: @test_u8x16_splat(
575 // CHECK-NEXT:  entry:
576 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i64 0
577 // CHECK-NEXT:    [[VECINIT15_I:%.*]] = shufflevector <16 x i8> [[VECINIT_I]], <16 x i8> poison, <16 x i32> zeroinitializer
578 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[VECINIT15_I]] to <4 x i32>
579 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
580 //
581 v128_t test_u8x16_splat(uint8_t a) {
582   return wasm_u8x16_splat(a);
583 }
584 
585 // CHECK-LABEL: @test_i8x16_extract_lane(
586 // CHECK-NEXT:  entry:
587 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
588 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
589 // CHECK-NEXT:    ret i8 [[VECEXT_I]]
590 //
591 int8_t test_i8x16_extract_lane(v128_t a) {
592   return wasm_i8x16_extract_lane(a, 15);
593 }
594 
595 // CHECK-LABEL: @test_u8x16_extract_lane(
596 // CHECK-NEXT:  entry:
597 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
598 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <16 x i8> [[TMP0]], i64 15
599 // CHECK-NEXT:    ret i8 [[VECEXT_I]]
600 //
601 uint8_t test_u8x16_extract_lane(v128_t a) {
602   return wasm_u8x16_extract_lane(a, 15);
603 }
604 
605 // CHECK-LABEL: @test_i8x16_replace_lane(
606 // CHECK-NEXT:  entry:
607 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
608 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15
609 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
610 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
611 //
612 v128_t test_i8x16_replace_lane(v128_t a, int8_t b) {
613   return wasm_i8x16_replace_lane(a, 15, b);
614 }
615 
616 // CHECK-LABEL: @test_u8x16_replace_lane(
617 // CHECK-NEXT:  entry:
618 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
619 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <16 x i8> [[TMP0]], i8 [[B:%.*]], i64 15
620 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[VECINS_I]] to <4 x i32>
621 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
622 //
623 v128_t test_u8x16_replace_lane(v128_t a, uint8_t b) {
624   return wasm_u8x16_replace_lane(a, 15, b);
625 }
626 
627 // CHECK-LABEL: @test_i16x8_splat(
628 // CHECK-NEXT:  entry:
629 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
630 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
631 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
632 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
633 //
634 v128_t test_i16x8_splat(int16_t a) {
635   return wasm_i16x8_splat(a);
636 }
637 
638 // CHECK-LABEL: @test_u16x8_splat(
639 // CHECK-NEXT:  entry:
640 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x i16> poison, i16 [[A:%.*]], i64 0
641 // CHECK-NEXT:    [[VECINIT7_I:%.*]] = shufflevector <8 x i16> [[VECINIT_I]], <8 x i16> poison, <8 x i32> zeroinitializer
642 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <4 x i32>
643 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
644 //
645 v128_t test_u16x8_splat(uint16_t a) {
646   return wasm_u16x8_splat(a);
647 }
648 
649 // CHECK-LABEL: @test_i16x8_extract_lane(
650 // CHECK-NEXT:  entry:
651 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
652 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
653 // CHECK-NEXT:    ret i16 [[VECEXT_I]]
654 //
655 int16_t test_i16x8_extract_lane(v128_t a) {
656   return wasm_i16x8_extract_lane(a, 7);
657 }
658 
659 // CHECK-LABEL: @test_u16x8_extract_lane(
660 // CHECK-NEXT:  entry:
661 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
662 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <8 x i16> [[TMP0]], i64 7
663 // CHECK-NEXT:    ret i16 [[VECEXT_I]]
664 //
665 uint16_t test_u16x8_extract_lane(v128_t a) {
666   return wasm_u16x8_extract_lane(a, 7);
667 }
668 
669 // CHECK-LABEL: @test_i16x8_replace_lane(
670 // CHECK-NEXT:  entry:
671 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
672 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7
673 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
674 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
675 //
676 v128_t test_i16x8_replace_lane(v128_t a, int16_t b) {
677   return wasm_i16x8_replace_lane(a, 7, b);
678 }
679 
680 // CHECK-LABEL: @test_u16x8_replace_lane(
681 // CHECK-NEXT:  entry:
682 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
683 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[B:%.*]], i64 7
684 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[VECINS_I]] to <4 x i32>
685 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
686 //
687 v128_t test_u16x8_replace_lane(v128_t a, uint16_t b) {
688   return wasm_u16x8_replace_lane(a, 7, b);
689 }
690 
691 // CHECK-LABEL: @test_i32x4_splat(
692 // CHECK-NEXT:  entry:
693 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
694 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
695 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
696 //
697 v128_t test_i32x4_splat(int32_t a) {
698   return wasm_i32x4_splat(a);
699 }
700 
701 // CHECK-LABEL: @test_u32x4_splat(
702 // CHECK-NEXT:  entry:
703 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
704 // CHECK-NEXT:    [[VECINIT3_I:%.*]] = shufflevector <4 x i32> [[VECINIT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
705 // CHECK-NEXT:    ret <4 x i32> [[VECINIT3_I]]
706 //
707 v128_t test_u32x4_splat(uint32_t a) {
708   return wasm_u32x4_splat(a);
709 }
710 
711 // CHECK-LABEL: @test_i32x4_extract_lane(
712 // CHECK-NEXT:  entry:
713 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3
714 // CHECK-NEXT:    ret i32 [[VECEXT_I]]
715 //
716 int32_t test_i32x4_extract_lane(v128_t a) {
717   return wasm_i32x4_extract_lane(a, 3);
718 }
719 
720 // CHECK-LABEL: @test_u32x4_extract_lane(
721 // CHECK-NEXT:  entry:
722 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x i32> [[A:%.*]], i64 3
723 // CHECK-NEXT:    ret i32 [[VECEXT_I]]
724 //
725 uint32_t test_u32x4_extract_lane(v128_t a) {
726   return wasm_u32x4_extract_lane(a, 3);
727 }
728 
729 // CHECK-LABEL: @test_i32x4_replace_lane(
730 // CHECK-NEXT:  entry:
731 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3
732 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
733 //
734 v128_t test_i32x4_replace_lane(v128_t a, int32_t b) {
735   return wasm_i32x4_replace_lane(a, 3, b);
736 }
737 
738 // CHECK-LABEL: @test_u32x4_replace_lane(
739 // CHECK-NEXT:  entry:
740 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x i32> [[A:%.*]], i32 [[B:%.*]], i64 3
741 // CHECK-NEXT:    ret <4 x i32> [[VECINS_I]]
742 //
743 v128_t test_u32x4_replace_lane(v128_t a, uint32_t b) {
744   return wasm_u32x4_replace_lane(a, 3, b);
745 }
746 
747 // CHECK-LABEL: @test_i64x2_splat(
748 // CHECK-NEXT:  entry:
749 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
750 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
751 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
752 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
753 //
754 v128_t test_i64x2_splat(int64_t a) {
755   return wasm_i64x2_splat(a);
756 }
757 
758 // CHECK-LABEL: @test_u64x2_splat(
759 // CHECK-NEXT:  entry:
760 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
761 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x i64> [[VECINIT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
762 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[VECINIT1_I]] to <4 x i32>
763 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
764 //
765 v128_t test_u64x2_splat(uint64_t a) {
766   return wasm_u64x2_splat(a);
767 }
768 
769 // CHECK-LABEL: @test_i64x2_extract_lane(
770 // CHECK-NEXT:  entry:
771 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
772 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
773 // CHECK-NEXT:    ret i64 [[VECEXT_I]]
774 //
775 int64_t test_i64x2_extract_lane(v128_t a) {
776   return wasm_i64x2_extract_lane(a, 1);
777 }
778 
779 // CHECK-LABEL: @test_u64x2_extract_lane(
780 // CHECK-NEXT:  entry:
781 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
782 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
783 // CHECK-NEXT:    ret i64 [[VECEXT_I]]
784 //
785 uint64_t test_u64x2_extract_lane(v128_t a) {
786   return wasm_u64x2_extract_lane(a, 1);
787 }
788 
789 // CHECK-LABEL: @test_i64x2_replace_lane(
790 // CHECK-NEXT:  entry:
791 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
792 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
793 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
794 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
795 //
796 v128_t test_i64x2_replace_lane(v128_t a, int64_t b) {
797   return wasm_i64x2_replace_lane(a, 1, b);
798 }
799 
800 // CHECK-LABEL: @test_u64x2_replace_lane(
801 // CHECK-NEXT:  entry:
802 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
803 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
804 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[VECINS_I]] to <4 x i32>
805 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
806 //
807 v128_t test_u64x2_replace_lane(v128_t a, uint64_t b) {
808   return wasm_u64x2_replace_lane(a, 1, b);
809 }
810 
811 // CHECK-LABEL: @test_f32x4_splat(
812 // CHECK-NEXT:  entry:
813 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
814 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[VECINIT_I]] to <4 x i32>
815 // CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
816 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
817 //
818 v128_t test_f32x4_splat(float a) {
819   return wasm_f32x4_splat(a);
820 }
821 
822 // CHECK-LABEL: @test_f32x4_extract_lane(
823 // CHECK-NEXT:  entry:
824 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
825 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3
826 // CHECK-NEXT:    ret float [[VECEXT_I]]
827 //
828 float test_f32x4_extract_lane(v128_t a) {
829   return wasm_f32x4_extract_lane(a, 3);
830 }
831 
832 // CHECK-LABEL: @test_f32x4_replace_lane(
833 // CHECK-NEXT:  entry:
834 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
835 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <4 x float> [[TMP0]], float [[B:%.*]], i64 3
836 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[VECINS_I]] to <4 x i32>
837 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
838 //
839 v128_t test_f32x4_replace_lane(v128_t a, float b) {
840   return wasm_f32x4_replace_lane(a, 3, b);
841 }
842 
843 // CHECK-LABEL: @test_f64x2_splat(
844 // CHECK-NEXT:  entry:
845 // CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0
846 // CHECK-NEXT:    [[VECINIT1_I:%.*]] = shufflevector <2 x double> [[VECINIT_I]], <2 x double> poison, <2 x i32> zeroinitializer
847 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <4 x i32>
848 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
849 //
850 v128_t test_f64x2_splat(double a) {
851   return wasm_f64x2_splat(a);
852 }
853 
854 // CHECK-LABEL: @test_f64x2_extract_lane(
855 // CHECK-NEXT:  entry:
856 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
857 // CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i64 1
858 // CHECK-NEXT:    ret double [[VECEXT_I]]
859 //
860 double test_f64x2_extract_lane(v128_t a) {
861   return wasm_f64x2_extract_lane(a, 1);
862 }
863 
864 // CHECK-LABEL: @test_f64x2_replace_lane(
865 // CHECK-NEXT:  entry:
866 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
867 // CHECK-NEXT:    [[VECINS_I:%.*]] = insertelement <2 x double> [[TMP0]], double [[B:%.*]], i64 1
868 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[VECINS_I]] to <4 x i32>
869 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
870 //
871 v128_t test_f64x2_replace_lane(v128_t a, double b) {
872   return wasm_f64x2_replace_lane(a, 1, b);
873 }
874 
875 // CHECK-LABEL: @test_i8x16_eq(
876 // CHECK-NEXT:  entry:
877 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
878 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
879 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <16 x i8> [[TMP0]], [[TMP1]]
880 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
881 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
882 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
883 //
884 v128_t test_i8x16_eq(v128_t a, v128_t b) {
885   return wasm_i8x16_eq(a, b);
886 }
887 
888 // CHECK-LABEL: @test_i8x16_ne(
889 // CHECK-NEXT:  entry:
890 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
891 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
892 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <16 x i8> [[TMP0]], [[TMP1]]
893 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
894 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
895 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
896 //
897 v128_t test_i8x16_ne(v128_t a, v128_t b) {
898   return wasm_i8x16_ne(a, b);
899 }
900 
901 // CHECK-LABEL: @test_i8x16_lt(
902 // CHECK-NEXT:  entry:
903 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
904 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
905 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <16 x i8> [[TMP0]], [[TMP1]]
906 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
907 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
908 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
909 //
910 v128_t test_i8x16_lt(v128_t a, v128_t b) {
911   return wasm_i8x16_lt(a, b);
912 }
913 
914 // CHECK-LABEL: @test_u8x16_lt(
915 // CHECK-NEXT:  entry:
916 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
917 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
918 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <16 x i8> [[TMP0]], [[TMP1]]
919 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
920 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
921 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
922 //
923 v128_t test_u8x16_lt(v128_t a, v128_t b) {
924   return wasm_u8x16_lt(a, b);
925 }
926 
927 // CHECK-LABEL: @test_i8x16_gt(
928 // CHECK-NEXT:  entry:
929 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
930 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
931 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <16 x i8> [[TMP0]], [[TMP1]]
932 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
933 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
934 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
935 //
936 v128_t test_i8x16_gt(v128_t a, v128_t b) {
937   return wasm_i8x16_gt(a, b);
938 }
939 
940 // CHECK-LABEL: @test_u8x16_gt(
941 // CHECK-NEXT:  entry:
942 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
943 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
944 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <16 x i8> [[TMP0]], [[TMP1]]
945 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
946 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
947 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
948 //
949 v128_t test_u8x16_gt(v128_t a, v128_t b) {
950   return wasm_u8x16_gt(a, b);
951 }
952 
953 // CHECK-LABEL: @test_i8x16_le(
954 // CHECK-NEXT:  entry:
955 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
956 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
957 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <16 x i8> [[TMP0]], [[TMP1]]
958 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
959 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
960 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
961 //
962 v128_t test_i8x16_le(v128_t a, v128_t b) {
963   return wasm_i8x16_le(a, b);
964 }
965 
966 // CHECK-LABEL: @test_u8x16_le(
967 // CHECK-NEXT:  entry:
968 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
969 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
970 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <16 x i8> [[TMP0]], [[TMP1]]
971 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
972 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
973 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
974 //
975 v128_t test_u8x16_le(v128_t a, v128_t b) {
976   return wasm_u8x16_le(a, b);
977 }
978 
979 // CHECK-LABEL: @test_i8x16_ge(
980 // CHECK-NEXT:  entry:
981 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
982 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
983 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <16 x i8> [[TMP0]], [[TMP1]]
984 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
985 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
986 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
987 //
988 v128_t test_i8x16_ge(v128_t a, v128_t b) {
989   return wasm_i8x16_ge(a, b);
990 }
991 
992 // CHECK-LABEL: @test_u8x16_ge(
993 // CHECK-NEXT:  entry:
994 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
995 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
996 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <16 x i8> [[TMP0]], [[TMP1]]
997 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
998 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SEXT_I]] to <4 x i32>
999 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1000 //
1001 v128_t test_u8x16_ge(v128_t a, v128_t b) {
1002   return wasm_u8x16_ge(a, b);
1003 }
1004 
1005 // CHECK-LABEL: @test_i16x8_eq(
1006 // CHECK-NEXT:  entry:
1007 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1008 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1009 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <8 x i16> [[TMP0]], [[TMP1]]
1010 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1011 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1012 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1013 //
1014 v128_t test_i16x8_eq(v128_t a, v128_t b) {
1015   return wasm_i16x8_eq(a, b);
1016 }
1017 
1018 // CHECK-LABEL: @test_i16x8_ne(
1019 // CHECK-NEXT:  entry:
1020 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1021 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1022 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <8 x i16> [[TMP0]], [[TMP1]]
1023 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1024 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1025 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1026 //
1027 v128_t test_i16x8_ne(v128_t a, v128_t b) {
1028   return wasm_i16x8_ne(a, b);
1029 }
1030 
1031 // CHECK-LABEL: @test_i16x8_lt(
1032 // CHECK-NEXT:  entry:
1033 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1034 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1035 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <8 x i16> [[TMP0]], [[TMP1]]
1036 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1037 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1038 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1039 //
1040 v128_t test_i16x8_lt(v128_t a, v128_t b) {
1041   return wasm_i16x8_lt(a, b);
1042 }
1043 
1044 // CHECK-LABEL: @test_u16x8_lt(
1045 // CHECK-NEXT:  entry:
1046 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1047 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1048 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <8 x i16> [[TMP0]], [[TMP1]]
1049 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1050 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1051 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1052 //
1053 v128_t test_u16x8_lt(v128_t a, v128_t b) {
1054   return wasm_u16x8_lt(a, b);
1055 }
1056 
1057 // CHECK-LABEL: @test_i16x8_gt(
1058 // CHECK-NEXT:  entry:
1059 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1060 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1061 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <8 x i16> [[TMP0]], [[TMP1]]
1062 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1063 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1064 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1065 //
1066 v128_t test_i16x8_gt(v128_t a, v128_t b) {
1067   return wasm_i16x8_gt(a, b);
1068 }
1069 
1070 // CHECK-LABEL: @test_u16x8_gt(
1071 // CHECK-NEXT:  entry:
1072 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1073 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1074 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <8 x i16> [[TMP0]], [[TMP1]]
1075 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1076 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1077 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1078 //
1079 v128_t test_u16x8_gt(v128_t a, v128_t b) {
1080   return wasm_u16x8_gt(a, b);
1081 }
1082 
1083 // CHECK-LABEL: @test_i16x8_le(
1084 // CHECK-NEXT:  entry:
1085 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1086 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1087 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <8 x i16> [[TMP0]], [[TMP1]]
1088 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1089 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1090 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1091 //
1092 v128_t test_i16x8_le(v128_t a, v128_t b) {
1093   return wasm_i16x8_le(a, b);
1094 }
1095 
1096 // CHECK-LABEL: @test_u16x8_le(
1097 // CHECK-NEXT:  entry:
1098 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1099 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1100 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <8 x i16> [[TMP0]], [[TMP1]]
1101 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1102 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1103 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1104 //
1105 v128_t test_u16x8_le(v128_t a, v128_t b) {
1106   return wasm_u16x8_le(a, b);
1107 }
1108 
1109 // CHECK-LABEL: @test_i16x8_ge(
1110 // CHECK-NEXT:  entry:
1111 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1112 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1113 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <8 x i16> [[TMP0]], [[TMP1]]
1114 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1115 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1116 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1117 //
1118 v128_t test_i16x8_ge(v128_t a, v128_t b) {
1119   return wasm_i16x8_ge(a, b);
1120 }
1121 
1122 // CHECK-LABEL: @test_u16x8_ge(
1123 // CHECK-NEXT:  entry:
1124 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1125 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1126 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <8 x i16> [[TMP0]], [[TMP1]]
1127 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1128 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SEXT_I]] to <4 x i32>
1129 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1130 //
1131 v128_t test_u16x8_ge(v128_t a, v128_t b) {
1132   return wasm_u16x8_ge(a, b);
1133 }
1134 
1135 // CHECK-LABEL: @test_i32x4_eq(
1136 // CHECK-NEXT:  entry:
1137 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <4 x i32> [[A:%.*]], [[B:%.*]]
1138 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1139 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1140 //
1141 v128_t test_i32x4_eq(v128_t a, v128_t b) {
1142   return wasm_i32x4_eq(a, b);
1143 }
1144 
1145 // CHECK-LABEL: @test_i32x4_ne(
1146 // CHECK-NEXT:  entry:
1147 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <4 x i32> [[A:%.*]], [[B:%.*]]
1148 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1149 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1150 //
1151 v128_t test_i32x4_ne(v128_t a, v128_t b) {
1152   return wasm_i32x4_ne(a, b);
1153 }
1154 
1155 // CHECK-LABEL: @test_i32x4_lt(
1156 // CHECK-NEXT:  entry:
1157 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <4 x i32> [[A:%.*]], [[B:%.*]]
1158 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1159 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1160 //
1161 v128_t test_i32x4_lt(v128_t a, v128_t b) {
1162   return wasm_i32x4_lt(a, b);
1163 }
1164 
1165 // CHECK-LABEL: @test_u32x4_lt(
1166 // CHECK-NEXT:  entry:
1167 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ult <4 x i32> [[A:%.*]], [[B:%.*]]
1168 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1169 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1170 //
1171 v128_t test_u32x4_lt(v128_t a, v128_t b) {
1172   return wasm_u32x4_lt(a, b);
1173 }
1174 
1175 // CHECK-LABEL: @test_i32x4_gt(
1176 // CHECK-NEXT:  entry:
1177 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <4 x i32> [[A:%.*]], [[B:%.*]]
1178 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1179 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1180 //
1181 v128_t test_i32x4_gt(v128_t a, v128_t b) {
1182   return wasm_i32x4_gt(a, b);
1183 }
1184 
1185 // CHECK-LABEL: @test_u32x4_gt(
1186 // CHECK-NEXT:  entry:
1187 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
1188 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1189 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1190 //
1191 v128_t test_u32x4_gt(v128_t a, v128_t b) {
1192   return wasm_u32x4_gt(a, b);
1193 }
1194 
1195 // CHECK-LABEL: @test_i32x4_le(
1196 // CHECK-NEXT:  entry:
1197 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <4 x i32> [[A:%.*]], [[B:%.*]]
1198 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1199 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1200 //
1201 v128_t test_i32x4_le(v128_t a, v128_t b) {
1202   return wasm_i32x4_le(a, b);
1203 }
1204 
1205 // CHECK-LABEL: @test_u32x4_le(
1206 // CHECK-NEXT:  entry:
1207 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
1208 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1209 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1210 //
1211 v128_t test_u32x4_le(v128_t a, v128_t b) {
1212   return wasm_u32x4_le(a, b);
1213 }
1214 
1215 // CHECK-LABEL: @test_i32x4_ge(
1216 // CHECK-NEXT:  entry:
1217 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <4 x i32> [[A:%.*]], [[B:%.*]]
1218 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1219 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1220 //
1221 v128_t test_i32x4_ge(v128_t a, v128_t b) {
1222   return wasm_i32x4_ge(a, b);
1223 }
1224 
1225 // CHECK-LABEL: @test_u32x4_ge(
1226 // CHECK-NEXT:  entry:
1227 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp uge <4 x i32> [[A:%.*]], [[B:%.*]]
1228 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1229 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1230 //
1231 v128_t test_u32x4_ge(v128_t a, v128_t b) {
1232   return wasm_u32x4_ge(a, b);
1233 }
1234 
1235 // CHECK-LABEL: @test_i64x2_eq(
1236 // CHECK-NEXT:  entry:
1237 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1238 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1239 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq <2 x i64> [[TMP0]], [[TMP1]]
1240 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1241 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1242 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1243 //
1244 v128_t test_i64x2_eq(v128_t a, v128_t b) {
1245   return wasm_i64x2_eq(a, b);
1246 }
1247 
1248 // CHECK-LABEL: @test_i64x2_ne(
1249 // CHECK-NEXT:  entry:
1250 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1251 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1252 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp ne <2 x i64> [[TMP0]], [[TMP1]]
1253 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1254 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1255 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1256 //
1257 v128_t test_i64x2_ne(v128_t a, v128_t b) {
1258   return wasm_i64x2_ne(a, b);
1259 }
1260 
1261 // CHECK-LABEL: @test_i64x2_lt(
1262 // CHECK-NEXT:  entry:
1263 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1264 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1265 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <2 x i64> [[TMP0]], [[TMP1]]
1266 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1267 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1268 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1269 //
1270 v128_t test_i64x2_lt(v128_t a, v128_t b) {
1271   return wasm_i64x2_lt(a, b);
1272 }
1273 
1274 // CHECK-LABEL: @test_i64x2_gt(
1275 // CHECK-NEXT:  entry:
1276 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1277 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1278 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sgt <2 x i64> [[TMP0]], [[TMP1]]
1279 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1280 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1281 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1282 //
1283 v128_t test_i64x2_gt(v128_t a, v128_t b) {
1284   return wasm_i64x2_gt(a, b);
1285 }
1286 
1287 // CHECK-LABEL: @test_i64x2_le(
1288 // CHECK-NEXT:  entry:
1289 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1290 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1291 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sle <2 x i64> [[TMP0]], [[TMP1]]
1292 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1293 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1294 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1295 //
1296 v128_t test_i64x2_le(v128_t a, v128_t b) {
1297   return wasm_i64x2_le(a, b);
1298 }
1299 
1300 // CHECK-LABEL: @test_i64x2_ge(
1301 // CHECK-NEXT:  entry:
1302 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
1303 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
1304 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp sge <2 x i64> [[TMP0]], [[TMP1]]
1305 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1306 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1307 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1308 //
1309 v128_t test_i64x2_ge(v128_t a, v128_t b) {
1310   return wasm_i64x2_ge(a, b);
1311 }
1312 
1313 // CHECK-LABEL: @test_f32x4_eq(
1314 // CHECK-NEXT:  entry:
1315 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1316 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1317 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <4 x float> [[TMP0]], [[TMP1]]
1318 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1319 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1320 //
1321 v128_t test_f32x4_eq(v128_t a, v128_t b) {
1322   return wasm_f32x4_eq(a, b);
1323 }
1324 
1325 // CHECK-LABEL: @test_f32x4_ne(
1326 // CHECK-NEXT:  entry:
1327 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1328 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1329 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <4 x float> [[TMP0]], [[TMP1]]
1330 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1331 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1332 //
1333 v128_t test_f32x4_ne(v128_t a, v128_t b) {
1334   return wasm_f32x4_ne(a, b);
1335 }
1336 
1337 // CHECK-LABEL: @test_f32x4_lt(
1338 // CHECK-NEXT:  entry:
1339 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1340 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1341 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <4 x float> [[TMP0]], [[TMP1]]
1342 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1343 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1344 //
1345 v128_t test_f32x4_lt(v128_t a, v128_t b) {
1346   return wasm_f32x4_lt(a, b);
1347 }
1348 
1349 // CHECK-LABEL: @test_f32x4_gt(
1350 // CHECK-NEXT:  entry:
1351 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1352 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1353 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <4 x float> [[TMP0]], [[TMP1]]
1354 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1355 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1356 //
1357 v128_t test_f32x4_gt(v128_t a, v128_t b) {
1358   return wasm_f32x4_gt(a, b);
1359 }
1360 
1361 // CHECK-LABEL: @test_f32x4_le(
1362 // CHECK-NEXT:  entry:
1363 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1364 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1365 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <4 x float> [[TMP0]], [[TMP1]]
1366 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1367 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1368 //
1369 v128_t test_f32x4_le(v128_t a, v128_t b) {
1370   return wasm_f32x4_le(a, b);
1371 }
1372 
1373 // CHECK-LABEL: @test_f32x4_ge(
1374 // CHECK-NEXT:  entry:
1375 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
1376 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
1377 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <4 x float> [[TMP0]], [[TMP1]]
1378 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1379 // CHECK-NEXT:    ret <4 x i32> [[SEXT_I]]
1380 //
1381 v128_t test_f32x4_ge(v128_t a, v128_t b) {
1382   return wasm_f32x4_ge(a, b);
1383 }
1384 
1385 // CHECK-LABEL: @test_f64x2_eq(
1386 // CHECK-NEXT:  entry:
1387 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1388 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1389 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oeq <2 x double> [[TMP0]], [[TMP1]]
1390 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1391 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1392 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1393 //
1394 v128_t test_f64x2_eq(v128_t a, v128_t b) {
1395   return wasm_f64x2_eq(a, b);
1396 }
1397 
1398 // CHECK-LABEL: @test_f64x2_ne(
1399 // CHECK-NEXT:  entry:
1400 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1401 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1402 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp une <2 x double> [[TMP0]], [[TMP1]]
1403 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1404 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1405 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1406 //
1407 v128_t test_f64x2_ne(v128_t a, v128_t b) {
1408   return wasm_f64x2_ne(a, b);
1409 }
1410 
1411 // CHECK-LABEL: @test_f64x2_lt(
1412 // CHECK-NEXT:  entry:
1413 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1414 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1415 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp olt <2 x double> [[TMP0]], [[TMP1]]
1416 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1417 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1418 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1419 //
1420 v128_t test_f64x2_lt(v128_t a, v128_t b) {
1421   return wasm_f64x2_lt(a, b);
1422 }
1423 
1424 // CHECK-LABEL: @test_f64x2_gt(
1425 // CHECK-NEXT:  entry:
1426 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1427 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1428 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ogt <2 x double> [[TMP0]], [[TMP1]]
1429 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1430 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1431 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1432 //
1433 v128_t test_f64x2_gt(v128_t a, v128_t b) {
1434   return wasm_f64x2_gt(a, b);
1435 }
1436 
1437 // CHECK-LABEL: @test_f64x2_le(
1438 // CHECK-NEXT:  entry:
1439 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1440 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1441 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp ole <2 x double> [[TMP0]], [[TMP1]]
1442 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1443 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1444 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1445 //
1446 v128_t test_f64x2_le(v128_t a, v128_t b) {
1447   return wasm_f64x2_le(a, b);
1448 }
1449 
1450 // CHECK-LABEL: @test_f64x2_ge(
1451 // CHECK-NEXT:  entry:
1452 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
1453 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
1454 // CHECK-NEXT:    [[CMP_I:%.*]] = fcmp oge <2 x double> [[TMP0]], [[TMP1]]
1455 // CHECK-NEXT:    [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
1456 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SEXT_I]] to <4 x i32>
1457 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1458 //
1459 v128_t test_f64x2_ge(v128_t a, v128_t b) {
1460   return wasm_f64x2_ge(a, b);
1461 }
1462 
1463 // CHECK-LABEL: @test_v128_not(
1464 // CHECK-NEXT:  entry:
1465 // CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
1466 // CHECK-NEXT:    ret <4 x i32> [[NOT_I]]
1467 //
1468 v128_t test_v128_not(v128_t a) {
1469   return wasm_v128_not(a);
1470 }
1471 
1472 // CHECK-LABEL: @test_v128_and(
1473 // CHECK-NEXT:  entry:
1474 // CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[B:%.*]], [[A:%.*]]
1475 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
1476 //
1477 v128_t test_v128_and(v128_t a, v128_t b) {
1478   return wasm_v128_and(a, b);
1479 }
1480 
1481 // CHECK-LABEL: @test_v128_or(
1482 // CHECK-NEXT:  entry:
1483 // CHECK-NEXT:    [[OR_I:%.*]] = or <4 x i32> [[B:%.*]], [[A:%.*]]
1484 // CHECK-NEXT:    ret <4 x i32> [[OR_I]]
1485 //
1486 v128_t test_v128_or(v128_t a, v128_t b) {
1487   return wasm_v128_or(a, b);
1488 }
1489 
1490 // CHECK-LABEL: @test_v128_xor(
1491 // CHECK-NEXT:  entry:
1492 // CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i32> [[B:%.*]], [[A:%.*]]
1493 // CHECK-NEXT:    ret <4 x i32> [[XOR_I]]
1494 //
1495 v128_t test_v128_xor(v128_t a, v128_t b) {
1496   return wasm_v128_xor(a, b);
1497 }
1498 
1499 // CHECK-LABEL: @test_v128_andnot(
1500 // CHECK-NEXT:  entry:
1501 // CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1)
1502 // CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[A:%.*]], [[NOT_I]]
1503 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
1504 //
1505 v128_t test_v128_andnot(v128_t a, v128_t b) {
1506   return wasm_v128_andnot(a, b);
1507 }
1508 
1509 // CHECK-LABEL: @test_v128_any_true(
1510 // CHECK-NEXT:  entry:
1511 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1512 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]])
1513 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1514 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1515 //
1516 bool test_v128_any_true(v128_t a) {
1517   return wasm_v128_any_true(a);
1518 }
1519 
1520 // CHECK-LABEL: @test_v128_bitselect(
1521 // CHECK-NEXT:  entry:
1522 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]])
1523 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
1524 //
1525 v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) {
1526   return wasm_v128_bitselect(a, b, mask);
1527 }
1528 
1529 // CHECK-LABEL: @test_i8x16_abs(
1530 // CHECK-NEXT:  entry:
1531 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1532 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false)
1533 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[ABS_I]] to <4 x i32>
1534 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1535 //
1536 v128_t test_i8x16_abs(v128_t a) {
1537   return wasm_i8x16_abs(a);
1538 }
1539 
1540 // CHECK-LABEL: @test_i8x16_neg(
1541 // CHECK-NEXT:  entry:
1542 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1543 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, [[TMP0]]
1544 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
1545 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1546 //
1547 v128_t test_i8x16_neg(v128_t a) {
1548   return wasm_i8x16_neg(a);
1549 }
1550 
1551 // CHECK-LABEL: @test_i8x16_all_true(
1552 // CHECK-NEXT:  entry:
1553 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1554 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]])
1555 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1556 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1557 //
1558 bool test_i8x16_all_true(v128_t a) {
1559   return wasm_i8x16_all_true(a);
1560 }
1561 
1562 // CHECK-LABEL: @test_i8x16_bitmask(
1563 // CHECK-NEXT:  entry:
1564 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1565 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]])
1566 // CHECK-NEXT:    ret i32 [[TMP1]]
1567 //
1568 uint32_t test_i8x16_bitmask(v128_t a) {
1569   return wasm_i8x16_bitmask(a);
1570 }
1571 
1572 // CHECK-LABEL: @test_i8x16_popcnt(
1573 // CHECK-NEXT:  entry:
1574 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1575 // CHECK-NEXT:    [[TMP1:%.*]] = tail call range(i8 0, 9) <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP0]])
1576 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
1577 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1578 //
1579 v128_t test_i8x16_popcnt(v128_t a) {
1580   return wasm_i8x16_popcnt(a);
1581 }
1582 
1583 // CHECK-LABEL: @test_i8x16_shl(
1584 // CHECK-NEXT:  entry:
1585 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1586 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1587 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 7
1588 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
1589 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
1590 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <16 x i8> [[TMP0]], [[SH_PROM_I]]
1591 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[SHL_I]] to <4 x i32>
1592 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1593 //
1594 v128_t test_i8x16_shl(v128_t a, uint32_t b) {
1595   return wasm_i8x16_shl(a, b);
1596 }
1597 
1598 // CHECK-LABEL: @test_i8x16_shr(
1599 // CHECK-NEXT:  entry:
1600 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1601 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1602 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 7
1603 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
1604 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
1605 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1606 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1607 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1608 //
1609 v128_t test_i8x16_shr(v128_t a, uint32_t b) {
1610   return wasm_i8x16_shr(a, b);
1611 }
1612 
1613 // CHECK-LABEL: @test_u8x16_shr(
1614 // CHECK-NEXT:  entry:
1615 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1616 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i8
1617 // CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], 7
1618 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i8> poison, i8 [[TMP2]], i64 0
1619 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> zeroinitializer
1620 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <16 x i8> [[TMP0]], [[SH_PROM_I]]
1621 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[SHR_I]] to <4 x i32>
1622 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1623 //
1624 v128_t test_u8x16_shr(v128_t a, uint32_t b) {
1625   return wasm_u8x16_shr(a, b);
1626 }
1627 
1628 // CHECK-LABEL: @test_i8x16_add(
1629 // CHECK-NEXT:  entry:
1630 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1631 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1632 // CHECK-NEXT:    [[ADD_I:%.*]] = add <16 x i8> [[TMP1]], [[TMP0]]
1633 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[ADD_I]] to <4 x i32>
1634 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1635 //
1636 v128_t test_i8x16_add(v128_t a, v128_t b) {
1637   return wasm_i8x16_add(a, b);
1638 }
1639 
1640 // CHECK-LABEL: @test_i8x16_add_sat(
1641 // CHECK-NEXT:  entry:
1642 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1643 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1644 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1645 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1646 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1647 //
1648 v128_t test_i8x16_add_sat(v128_t a, v128_t b) {
1649   return wasm_i8x16_add_sat(a, b);
1650 }
1651 
1652 // CHECK-LABEL: @test_u8x16_add_sat(
1653 // CHECK-NEXT:  entry:
1654 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1655 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1656 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1657 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1658 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1659 //
1660 v128_t test_u8x16_add_sat(v128_t a, v128_t b) {
1661   return wasm_u8x16_add_sat(a, b);
1662 }
1663 
1664 // CHECK-LABEL: @test_i8x16_sub(
1665 // CHECK-NEXT:  entry:
1666 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1667 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1668 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <16 x i8> [[TMP0]], [[TMP1]]
1669 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[SUB_I]] to <4 x i32>
1670 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1671 //
1672 v128_t test_i8x16_sub(v128_t a, v128_t b) {
1673   return wasm_i8x16_sub(a, b);
1674 }
1675 
1676 // CHECK-LABEL: @test_i8x16_sub_sat(
1677 // CHECK-NEXT:  entry:
1678 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1679 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1680 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1681 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1682 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1683 //
1684 v128_t test_i8x16_sub_sat(v128_t a, v128_t b) {
1685   return wasm_i8x16_sub_sat(a, b);
1686 }
1687 
1688 // CHECK-LABEL: @test_u8x16_sub_sat(
1689 // CHECK-NEXT:  entry:
1690 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1691 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1692 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1693 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1694 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1695 //
1696 v128_t test_u8x16_sub_sat(v128_t a, v128_t b) {
1697   return wasm_u8x16_sub_sat(a, b);
1698 }
1699 
1700 // CHECK-LABEL: @test_i8x16_min(
1701 // CHECK-NEXT:  entry:
1702 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1703 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1704 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1705 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1706 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1707 //
1708 v128_t test_i8x16_min(v128_t a, v128_t b) {
1709   return wasm_i8x16_min(a, b);
1710 }
1711 
1712 // CHECK-LABEL: @test_u8x16_min(
1713 // CHECK-NEXT:  entry:
1714 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1715 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1716 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1717 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1718 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1719 //
1720 v128_t test_u8x16_min(v128_t a, v128_t b) {
1721   return wasm_u8x16_min(a, b);
1722 }
1723 
1724 // CHECK-LABEL: @test_i8x16_max(
1725 // CHECK-NEXT:  entry:
1726 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1727 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1728 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1729 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1730 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1731 //
1732 v128_t test_i8x16_max(v128_t a, v128_t b) {
1733   return wasm_i8x16_max(a, b);
1734 }
1735 
1736 // CHECK-LABEL: @test_u8x16_max(
1737 // CHECK-NEXT:  entry:
1738 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1739 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1740 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1741 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1742 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1743 //
1744 v128_t test_u8x16_max(v128_t a, v128_t b) {
1745   return wasm_u8x16_max(a, b);
1746 }
1747 
1748 // CHECK-LABEL: @test_u8x16_avgr(
1749 // CHECK-NEXT:  entry:
1750 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
1751 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
1752 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
1753 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
1754 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1755 //
1756 v128_t test_u8x16_avgr(v128_t a, v128_t b) {
1757   return wasm_u8x16_avgr(a, b);
1758 }
1759 
1760 // CHECK-LABEL: @test_i16x8_abs(
1761 // CHECK-NEXT:  entry:
1762 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1763 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false)
1764 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[ABS_I]] to <4 x i32>
1765 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1766 //
1767 v128_t test_i16x8_abs(v128_t a) {
1768   return wasm_i16x8_abs(a);
1769 }
1770 
1771 // CHECK-LABEL: @test_i16x8_neg(
1772 // CHECK-NEXT:  entry:
1773 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1774 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, [[TMP0]]
1775 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
1776 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
1777 //
1778 v128_t test_i16x8_neg(v128_t a) {
1779   return wasm_i16x8_neg(a);
1780 }
1781 
1782 // CHECK-LABEL: @test_i16x8_all_true(
1783 // CHECK-NEXT:  entry:
1784 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1785 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]])
1786 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
1787 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
1788 //
1789 bool test_i16x8_all_true(v128_t a) {
1790   return wasm_i16x8_all_true(a);
1791 }
1792 
1793 // CHECK-LABEL: @test_i16x8_bitmask(
1794 // CHECK-NEXT:  entry:
1795 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1796 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]])
1797 // CHECK-NEXT:    ret i32 [[TMP1]]
1798 //
1799 uint32_t test_i16x8_bitmask(v128_t a) {
1800   return wasm_i16x8_bitmask(a);
1801 }
1802 
1803 // CHECK-LABEL: @test_i16x8_shl(
1804 // CHECK-NEXT:  entry:
1805 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1806 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1807 // CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
1808 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
1809 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
1810 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <8 x i16> [[TMP0]], [[SH_PROM_I]]
1811 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[SHL_I]] to <4 x i32>
1812 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1813 //
1814 v128_t test_i16x8_shl(v128_t a, uint32_t b) {
1815   return wasm_i16x8_shl(a, b);
1816 }
1817 
1818 // CHECK-LABEL: @test_i16x8_shr(
1819 // CHECK-NEXT:  entry:
1820 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1821 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1822 // CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
1823 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
1824 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
1825 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1826 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1827 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1828 //
1829 v128_t test_i16x8_shr(v128_t a, uint32_t b) {
1830   return wasm_i16x8_shr(a, b);
1831 }
1832 
1833 // CHECK-LABEL: @test_u16x8_shr(
1834 // CHECK-NEXT:  entry:
1835 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1836 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[B:%.*]] to i16
1837 // CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], 15
1838 // CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
1839 // CHECK-NEXT:    [[SH_PROM_I:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> poison, <8 x i32> zeroinitializer
1840 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <8 x i16> [[TMP0]], [[SH_PROM_I]]
1841 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[SHR_I]] to <4 x i32>
1842 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
1843 //
1844 v128_t test_u16x8_shr(v128_t a, uint32_t b) {
1845   return wasm_u16x8_shr(a, b);
1846 }
1847 
1848 // CHECK-LABEL: @test_i16x8_add(
1849 // CHECK-NEXT:  entry:
1850 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1851 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1852 // CHECK-NEXT:    [[ADD_I:%.*]] = add <8 x i16> [[TMP1]], [[TMP0]]
1853 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[ADD_I]] to <4 x i32>
1854 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1855 //
1856 v128_t test_i16x8_add(v128_t a, v128_t b) {
1857   return wasm_i16x8_add(a, b);
1858 }
1859 
1860 // CHECK-LABEL: @test_i16x8_add_sat(
1861 // CHECK-NEXT:  entry:
1862 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1863 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1864 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1865 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1866 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1867 //
1868 v128_t test_i16x8_add_sat(v128_t a, v128_t b) {
1869   return wasm_i16x8_add_sat(a, b);
1870 }
1871 
1872 // CHECK-LABEL: @test_u16x8_add_sat(
1873 // CHECK-NEXT:  entry:
1874 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1875 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1876 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1877 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1878 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1879 //
1880 v128_t test_u16x8_add_sat(v128_t a, v128_t b) {
1881   return wasm_u16x8_add_sat(a, b);
1882 }
1883 
1884 // CHECK-LABEL: @test_i16x8_sub(
1885 // CHECK-NEXT:  entry:
1886 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1887 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1888 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]]
1889 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[SUB_I]] to <4 x i32>
1890 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1891 //
1892 v128_t test_i16x8_sub(v128_t a, v128_t b) {
1893   return wasm_i16x8_sub(a, b);
1894 }
1895 
1896 // CHECK-LABEL: @test_i16x8_sub_sat(
1897 // CHECK-NEXT:  entry:
1898 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1899 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1900 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1901 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1902 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1903 //
1904 v128_t test_i16x8_sub_sat(v128_t a, v128_t b) {
1905   return wasm_i16x8_sub_sat(a, b);
1906 }
1907 
1908 // CHECK-LABEL: @test_u16x8_sub_sat(
1909 // CHECK-NEXT:  entry:
1910 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1911 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1912 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1913 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1914 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1915 //
1916 v128_t test_u16x8_sub_sat(v128_t a, v128_t b) {
1917   return wasm_u16x8_sub_sat(a, b);
1918 }
1919 
1920 // CHECK-LABEL: @test_i16x8_mul(
1921 // CHECK-NEXT:  entry:
1922 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1923 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1924 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <8 x i16> [[TMP1]], [[TMP0]]
1925 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
1926 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
1927 //
1928 v128_t test_i16x8_mul(v128_t a, v128_t b) {
1929   return wasm_i16x8_mul(a, b);
1930 }
1931 
1932 // CHECK-LABEL: @test_i16x8_min(
1933 // CHECK-NEXT:  entry:
1934 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1935 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1936 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1937 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1938 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1939 //
1940 v128_t test_i16x8_min(v128_t a, v128_t b) {
1941   return wasm_i16x8_min(a, b);
1942 }
1943 
1944 // CHECK-LABEL: @test_u16x8_min(
1945 // CHECK-NEXT:  entry:
1946 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1947 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1948 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1949 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1950 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1951 //
1952 v128_t test_u16x8_min(v128_t a, v128_t b) {
1953   return wasm_u16x8_min(a, b);
1954 }
1955 
1956 // CHECK-LABEL: @test_i16x8_max(
1957 // CHECK-NEXT:  entry:
1958 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1959 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1960 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1961 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1962 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1963 //
1964 v128_t test_i16x8_max(v128_t a, v128_t b) {
1965   return wasm_i16x8_max(a, b);
1966 }
1967 
1968 // CHECK-LABEL: @test_u16x8_max(
1969 // CHECK-NEXT:  entry:
1970 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1971 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1972 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1973 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1974 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1975 //
1976 v128_t test_u16x8_max(v128_t a, v128_t b) {
1977   return wasm_u16x8_max(a, b);
1978 }
1979 
1980 // CHECK-LABEL: @test_u16x8_avgr(
1981 // CHECK-NEXT:  entry:
1982 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
1983 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
1984 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
1985 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
1986 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1987 //
1988 v128_t test_u16x8_avgr(v128_t a, v128_t b) {
1989   return wasm_u16x8_avgr(a, b);
1990 }
1991 
1992 // CHECK-LABEL: @test_i32x4_abs(
1993 // CHECK-NEXT:  entry:
1994 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false)
1995 // CHECK-NEXT:    ret <4 x i32> [[ABS_I]]
1996 //
1997 v128_t test_i32x4_abs(v128_t a) {
1998   return wasm_i32x4_abs(a);
1999 }
2000 
2001 // CHECK-LABEL: @test_i32x4_neg(
2002 // CHECK-NEXT:  entry:
2003 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, [[A:%.*]]
2004 // CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
2005 //
2006 v128_t test_i32x4_neg(v128_t a) {
2007   return wasm_i32x4_neg(a);
2008 }
2009 
2010 // CHECK-LABEL: @test_i32x4_all_true(
2011 // CHECK-NEXT:  entry:
2012 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]])
2013 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0
2014 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
2015 //
2016 bool test_i32x4_all_true(v128_t a) {
2017   return wasm_i32x4_all_true(a);
2018 }
2019 
2020 // CHECK-LABEL: @test_i32x4_bitmask(
2021 // CHECK-NEXT:  entry:
2022 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]])
2023 // CHECK-NEXT:    ret i32 [[TMP0]]
2024 //
2025 uint32_t test_i32x4_bitmask(v128_t a) {
2026   return wasm_i32x4_bitmask(a);
2027 }
2028 
2029 // CHECK-LABEL: @test_i32x4_shl(
2030 // CHECK-NEXT:  entry:
2031 // CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
2032 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
2033 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
2034 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
2035 // CHECK-NEXT:    ret <4 x i32> [[SHL_I]]
2036 //
2037 v128_t test_i32x4_shl(v128_t a, uint32_t b) {
2038   return wasm_i32x4_shl(a, b);
2039 }
2040 
2041 // CHECK-LABEL: @test_i32x4_shr(
2042 // CHECK-NEXT:  entry:
2043 // CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
2044 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
2045 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
2046 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
2047 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
2048 //
2049 v128_t test_i32x4_shr(v128_t a, uint32_t b) {
2050   return wasm_i32x4_shr(a, b);
2051 }
2052 
2053 // CHECK-LABEL: @test_u32x4_shr(
2054 // CHECK-NEXT:  entry:
2055 // CHECK-NEXT:    [[AND_I:%.*]] = and i32 [[B:%.*]], 31
2056 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <4 x i32> poison, i32 [[AND_I]], i64 0
2057 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <4 x i32> [[SPLAT_SPLATINSERT_I]], <4 x i32> poison, <4 x i32> zeroinitializer
2058 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <4 x i32> [[A:%.*]], [[SPLAT_SPLAT_I]]
2059 // CHECK-NEXT:    ret <4 x i32> [[SHR_I]]
2060 //
2061 v128_t test_u32x4_shr(v128_t a, uint32_t b) {
2062   return wasm_u32x4_shr(a, b);
2063 }
2064 
2065 // CHECK-LABEL: @test_i32x4_add(
2066 // CHECK-NEXT:  entry:
2067 // CHECK-NEXT:    [[ADD_I:%.*]] = add <4 x i32> [[B:%.*]], [[A:%.*]]
2068 // CHECK-NEXT:    ret <4 x i32> [[ADD_I]]
2069 //
2070 v128_t test_i32x4_add(v128_t a, v128_t b) {
2071   return wasm_i32x4_add(a, b);
2072 }
2073 
2074 // CHECK-LABEL: @test_i32x4_sub(
2075 // CHECK-NEXT:  entry:
2076 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
2077 // CHECK-NEXT:    ret <4 x i32> [[SUB_I]]
2078 //
2079 v128_t test_i32x4_sub(v128_t a, v128_t b) {
2080   return wasm_i32x4_sub(a, b);
2081 }
2082 
2083 // CHECK-LABEL: @test_i32x4_mul(
2084 // CHECK-NEXT:  entry:
2085 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <4 x i32> [[B:%.*]], [[A:%.*]]
2086 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
2087 //
2088 v128_t test_i32x4_mul(v128_t a, v128_t b) {
2089   return wasm_i32x4_mul(a, b);
2090 }
2091 
2092 // CHECK-LABEL: @test_i32x4_min(
2093 // CHECK-NEXT:  entry:
2094 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
2095 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2096 //
2097 v128_t test_i32x4_min(v128_t a, v128_t b) {
2098   return wasm_i32x4_min(a, b);
2099 }
2100 
2101 // CHECK-LABEL: @test_u32x4_min(
2102 // CHECK-NEXT:  entry:
2103 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
2104 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2105 //
2106 v128_t test_u32x4_min(v128_t a, v128_t b) {
2107   return wasm_u32x4_min(a, b);
2108 }
2109 
2110 // CHECK-LABEL: @test_i32x4_max(
2111 // CHECK-NEXT:  entry:
2112 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
2113 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2114 //
2115 v128_t test_i32x4_max(v128_t a, v128_t b) {
2116   return wasm_i32x4_max(a, b);
2117 }
2118 
2119 // CHECK-LABEL: @test_u32x4_max(
2120 // CHECK-NEXT:  entry:
2121 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
2122 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2123 //
2124 v128_t test_u32x4_max(v128_t a, v128_t b) {
2125   return wasm_u32x4_max(a, b);
2126 }
2127 
2128 // CHECK-LABEL: @test_i32x4_dot_i16x8(
2129 // CHECK-NEXT:  entry:
2130 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2131 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2132 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2133 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2134 //
2135 v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) {
2136   return wasm_i32x4_dot_i16x8(a, b);
2137 }
2138 
2139 // CHECK-LABEL: @test_i64x2_abs(
2140 // CHECK-NEXT:  entry:
2141 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2142 // CHECK-NEXT:    [[ABS_I:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false)
2143 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[ABS_I]] to <4 x i32>
2144 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2145 //
2146 v128_t test_i64x2_abs(v128_t a) {
2147   return wasm_i64x2_abs(a);
2148 }
2149 
2150 // CHECK-LABEL: @test_i64x2_neg(
2151 // CHECK-NEXT:  entry:
2152 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2153 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> zeroinitializer, [[TMP0]]
2154 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
2155 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2156 //
2157 v128_t test_i64x2_neg(v128_t a) {
2158   return wasm_i64x2_neg(a);
2159 }
2160 
2161 // CHECK-LABEL: @test_i64x2_all_true(
2162 // CHECK-NEXT:  entry:
2163 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2164 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]])
2165 // CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0
2166 // CHECK-NEXT:    ret i1 [[TOBOOL_I]]
2167 //
2168 bool test_i64x2_all_true(v128_t a) {
2169   return wasm_i64x2_all_true(a);
2170 }
2171 
2172 // CHECK-LABEL: @test_i64x2_bitmask(
2173 // CHECK-NEXT:  entry:
2174 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2175 // CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]])
2176 // CHECK-NEXT:    ret i32 [[TMP1]]
2177 //
2178 uint32_t test_i64x2_bitmask(v128_t a) {
2179   return wasm_i64x2_bitmask(a);
2180 }
2181 
2182 // CHECK-LABEL: @test_i64x2_shl(
2183 // CHECK-NEXT:  entry:
2184 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2185 // CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2186 // CHECK-NEXT:    [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64
2187 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
2188 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
2189 // CHECK-NEXT:    [[SHL_I:%.*]] = shl <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2190 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SHL_I]] to <4 x i32>
2191 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2192 //
2193 v128_t test_i64x2_shl(v128_t a, uint32_t b) {
2194   return wasm_i64x2_shl(a, b);
2195 }
2196 
2197 // CHECK-LABEL: @test_i64x2_shr(
2198 // CHECK-NEXT:  entry:
2199 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2200 // CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2201 // CHECK-NEXT:    [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64
2202 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
2203 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
2204 // CHECK-NEXT:    [[SHR_I:%.*]] = ashr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2205 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2206 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2207 //
2208 v128_t test_i64x2_shr(v128_t a, uint32_t b) {
2209   return wasm_i64x2_shr(a, b);
2210 }
2211 
2212 // CHECK-LABEL: @test_u64x2_shr(
2213 // CHECK-NEXT:  entry:
2214 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2215 // CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[B:%.*]], 63
2216 // CHECK-NEXT:    [[AND_I:%.*]] = zext nneg i32 [[TMP1]] to i64
2217 // CHECK-NEXT:    [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i64> poison, i64 [[AND_I]], i64 0
2218 // CHECK-NEXT:    [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i64> [[SPLAT_SPLATINSERT_I]], <2 x i64> poison, <2 x i32> zeroinitializer
2219 // CHECK-NEXT:    [[SHR_I:%.*]] = lshr <2 x i64> [[TMP0]], [[SPLAT_SPLAT_I]]
2220 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SHR_I]] to <4 x i32>
2221 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2222 //
2223 v128_t test_u64x2_shr(v128_t a, uint32_t b) {
2224   return wasm_u64x2_shr(a, b);
2225 }
2226 
2227 // CHECK-LABEL: @test_i64x2_add(
2228 // CHECK-NEXT:  entry:
2229 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2230 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2231 // CHECK-NEXT:    [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP0]]
2232 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[ADD_I]] to <4 x i32>
2233 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2234 //
2235 v128_t test_i64x2_add(v128_t a, v128_t b) {
2236   return wasm_i64x2_add(a, b);
2237 }
2238 
2239 // CHECK-LABEL: @test_i64x2_sub(
2240 // CHECK-NEXT:  entry:
2241 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2242 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2243 // CHECK-NEXT:    [[SUB_I:%.*]] = sub <2 x i64> [[TMP0]], [[TMP1]]
2244 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[SUB_I]] to <4 x i32>
2245 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2246 //
2247 v128_t test_i64x2_sub(v128_t a, v128_t b) {
2248   return wasm_i64x2_sub(a, b);
2249 }
2250 
2251 // CHECK-LABEL: @test_i64x2_mul(
2252 // CHECK-NEXT:  entry:
2253 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
2254 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x i64>
2255 // CHECK-NEXT:    [[MUL_I:%.*]] = mul <2 x i64> [[TMP1]], [[TMP0]]
2256 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
2257 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2258 //
2259 v128_t test_i64x2_mul(v128_t a, v128_t b) {
2260   return wasm_i64x2_mul(a, b);
2261 }
2262 
2263 // CHECK-LABEL: @test_f32x4_abs(
2264 // CHECK-NEXT:  entry:
2265 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2266 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
2267 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2268 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2269 //
2270 v128_t test_f32x4_abs(v128_t a) {
2271   return wasm_f32x4_abs(a);
2272 }
2273 
2274 // CHECK-LABEL: @test_f32x4_neg(
2275 // CHECK-NEXT:  entry:
2276 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2277 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <4 x float> [[TMP0]]
2278 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[FNEG_I]] to <4 x i32>
2279 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2280 //
2281 v128_t test_f32x4_neg(v128_t a) {
2282   return wasm_f32x4_neg(a);
2283 }
2284 
2285 // CHECK-LABEL: @test_f32x4_sqrt(
2286 // CHECK-NEXT:  entry:
2287 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2288 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
2289 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2290 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2291 //
2292 v128_t test_f32x4_sqrt(v128_t a) {
2293   return wasm_f32x4_sqrt(a);
2294 }
2295 
2296 // CHECK-LABEL: @test_f32x4_ceil(
2297 // CHECK-NEXT:  entry:
2298 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2299 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
2300 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2301 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2302 //
2303 v128_t test_f32x4_ceil(v128_t a) {
2304   return wasm_f32x4_ceil(a);
2305 }
2306 
2307 // CHECK-LABEL: @test_f32x4_floor(
2308 // CHECK-NEXT:  entry:
2309 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2310 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
2311 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2312 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2313 //
2314 v128_t test_f32x4_floor(v128_t a) {
2315   return wasm_f32x4_floor(a);
2316 }
2317 
2318 // CHECK-LABEL: @test_f32x4_trunc(
2319 // CHECK-NEXT:  entry:
2320 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2321 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]])
2322 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2323 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2324 //
2325 v128_t test_f32x4_trunc(v128_t a) {
2326   return wasm_f32x4_trunc(a);
2327 }
2328 
2329 // CHECK-LABEL: @test_f32x4_nearest(
2330 // CHECK-NEXT:  entry:
2331 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2332 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]])
2333 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32>
2334 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2335 //
2336 v128_t test_f32x4_nearest(v128_t a) {
2337   return wasm_f32x4_nearest(a);
2338 }
2339 
2340 // CHECK-LABEL: @test_f32x4_add(
2341 // CHECK-NEXT:  entry:
2342 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2343 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2344 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
2345 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[ADD_I]] to <4 x i32>
2346 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2347 //
2348 v128_t test_f32x4_add(v128_t a, v128_t b) {
2349   return wasm_f32x4_add(a, b);
2350 }
2351 
2352 // CHECK-LABEL: @test_f32x4_sub(
2353 // CHECK-NEXT:  entry:
2354 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2355 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2356 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <4 x float> [[TMP0]], [[TMP1]]
2357 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[SUB_I]] to <4 x i32>
2358 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2359 //
2360 v128_t test_f32x4_sub(v128_t a, v128_t b) {
2361   return wasm_f32x4_sub(a, b);
2362 }
2363 
2364 // CHECK-LABEL: @test_f32x4_mul(
2365 // CHECK-NEXT:  entry:
2366 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2367 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2368 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
2369 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[MUL_I]] to <4 x i32>
2370 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2371 //
2372 v128_t test_f32x4_mul(v128_t a, v128_t b) {
2373   return wasm_f32x4_mul(a, b);
2374 }
2375 
2376 // CHECK-LABEL: @test_f32x4_div(
2377 // CHECK-NEXT:  entry:
2378 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2379 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2380 // CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <4 x float> [[TMP0]], [[TMP1]]
2381 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x float> [[DIV_I]] to <4 x i32>
2382 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2383 //
2384 v128_t test_f32x4_div(v128_t a, v128_t b) {
2385   return wasm_f32x4_div(a, b);
2386 }
2387 
2388 // CHECK-LABEL: @test_f32x4_min(
2389 // CHECK-NEXT:  entry:
2390 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2391 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2392 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2393 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2394 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2395 //
2396 v128_t test_f32x4_min(v128_t a, v128_t b) {
2397   return wasm_f32x4_min(a, b);
2398 }
2399 
2400 // CHECK-LABEL: @test_f32x4_max(
2401 // CHECK-NEXT:  entry:
2402 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2403 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2404 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2405 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2406 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2407 //
2408 v128_t test_f32x4_max(v128_t a, v128_t b) {
2409   return wasm_f32x4_max(a, b);
2410 }
2411 
2412 // CHECK-LABEL: @test_f32x4_pmin(
2413 // CHECK-NEXT:  entry:
2414 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2415 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2416 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2417 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2418 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2419 //
2420 v128_t test_f32x4_pmin(v128_t a, v128_t b) {
2421   return wasm_f32x4_pmin(a, b);
2422 }
2423 
2424 // CHECK-LABEL: @test_f32x4_pmax(
2425 // CHECK-NEXT:  entry:
2426 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2427 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
2428 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
2429 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32>
2430 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2431 //
2432 v128_t test_f32x4_pmax(v128_t a, v128_t b) {
2433   return wasm_f32x4_pmax(a, b);
2434 }
2435 
2436 // CHECK-LABEL: @test_f64x2_abs(
2437 // CHECK-NEXT:  entry:
2438 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2439 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]])
2440 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2441 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2442 //
2443 v128_t test_f64x2_abs(v128_t a) {
2444   return wasm_f64x2_abs(a);
2445 }
2446 
2447 // CHECK-LABEL: @test_f64x2_neg(
2448 // CHECK-NEXT:  entry:
2449 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2450 // CHECK-NEXT:    [[FNEG_I:%.*]] = fneg <2 x double> [[TMP0]]
2451 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[FNEG_I]] to <4 x i32>
2452 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2453 //
2454 v128_t test_f64x2_neg(v128_t a) {
2455   return wasm_f64x2_neg(a);
2456 }
2457 
2458 // CHECK-LABEL: @test_f64x2_sqrt(
2459 // CHECK-NEXT:  entry:
2460 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2461 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]])
2462 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2463 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2464 //
2465 v128_t test_f64x2_sqrt(v128_t a) {
2466   return wasm_f64x2_sqrt(a);
2467 }
2468 
2469 // CHECK-LABEL: @test_f64x2_ceil(
2470 // CHECK-NEXT:  entry:
2471 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2472 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]])
2473 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2474 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2475 //
2476 v128_t test_f64x2_ceil(v128_t a) {
2477   return wasm_f64x2_ceil(a);
2478 }
2479 
2480 // CHECK-LABEL: @test_f64x2_floor(
2481 // CHECK-NEXT:  entry:
2482 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2483 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]])
2484 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2485 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2486 //
2487 v128_t test_f64x2_floor(v128_t a) {
2488   return wasm_f64x2_floor(a);
2489 }
2490 
2491 // CHECK-LABEL: @test_f64x2_trunc(
2492 // CHECK-NEXT:  entry:
2493 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2494 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]])
2495 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2496 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2497 //
2498 v128_t test_f64x2_trunc(v128_t a) {
2499   return wasm_f64x2_trunc(a);
2500 }
2501 
2502 // CHECK-LABEL: @test_f64x2_nearest(
2503 // CHECK-NEXT:  entry:
2504 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2505 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]])
2506 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32>
2507 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2508 //
2509 v128_t test_f64x2_nearest(v128_t a) {
2510   return wasm_f64x2_nearest(a);
2511 }
2512 
2513 // CHECK-LABEL: @test_f64x2_add(
2514 // CHECK-NEXT:  entry:
2515 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2516 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2517 // CHECK-NEXT:    [[ADD_I:%.*]] = fadd <2 x double> [[TMP0]], [[TMP1]]
2518 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[ADD_I]] to <4 x i32>
2519 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2520 //
2521 v128_t test_f64x2_add(v128_t a, v128_t b) {
2522   return wasm_f64x2_add(a, b);
2523 }
2524 
2525 // CHECK-LABEL: @test_f64x2_sub(
2526 // CHECK-NEXT:  entry:
2527 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2528 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2529 // CHECK-NEXT:    [[SUB_I:%.*]] = fsub <2 x double> [[TMP0]], [[TMP1]]
2530 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[SUB_I]] to <4 x i32>
2531 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2532 //
2533 v128_t test_f64x2_sub(v128_t a, v128_t b) {
2534   return wasm_f64x2_sub(a, b);
2535 }
2536 
2537 // CHECK-LABEL: @test_f64x2_mul(
2538 // CHECK-NEXT:  entry:
2539 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2540 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2541 // CHECK-NEXT:    [[MUL_I:%.*]] = fmul <2 x double> [[TMP0]], [[TMP1]]
2542 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[MUL_I]] to <4 x i32>
2543 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2544 //
2545 v128_t test_f64x2_mul(v128_t a, v128_t b) {
2546   return wasm_f64x2_mul(a, b);
2547 }
2548 
2549 // CHECK-LABEL: @test_f64x2_div(
2550 // CHECK-NEXT:  entry:
2551 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2552 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2553 // CHECK-NEXT:    [[DIV_I:%.*]] = fdiv <2 x double> [[TMP0]], [[TMP1]]
2554 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x double> [[DIV_I]] to <4 x i32>
2555 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2556 //
2557 v128_t test_f64x2_div(v128_t a, v128_t b) {
2558   return wasm_f64x2_div(a, b);
2559 }
2560 
2561 // CHECK-LABEL: @test_f64x2_min(
2562 // CHECK-NEXT:  entry:
2563 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2564 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2565 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2566 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2567 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2568 //
2569 v128_t test_f64x2_min(v128_t a, v128_t b) {
2570   return wasm_f64x2_min(a, b);
2571 }
2572 
2573 // CHECK-LABEL: @test_f64x2_max(
2574 // CHECK-NEXT:  entry:
2575 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2576 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2577 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2578 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2579 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2580 //
2581 v128_t test_f64x2_max(v128_t a, v128_t b) {
2582   return wasm_f64x2_max(a, b);
2583 }
2584 
2585 // CHECK-LABEL: @test_f64x2_pmin(
2586 // CHECK-NEXT:  entry:
2587 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2588 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2589 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2590 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2591 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2592 //
2593 v128_t test_f64x2_pmin(v128_t a, v128_t b) {
2594   return wasm_f64x2_pmin(a, b);
2595 }
2596 
2597 // CHECK-LABEL: @test_f64x2_pmax(
2598 // CHECK-NEXT:  entry:
2599 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2600 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double>
2601 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
2602 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32>
2603 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2604 //
2605 v128_t test_f64x2_pmax(v128_t a, v128_t b) {
2606   return wasm_f64x2_pmax(a, b);
2607 }
2608 
2609 // CHECK-LABEL: @test_i32x4_trunc_sat_f32x4(
2610 // CHECK-NEXT:  entry:
2611 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2612 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
2613 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2614 //
2615 v128_t test_i32x4_trunc_sat_f32x4(v128_t a) {
2616   return wasm_i32x4_trunc_sat_f32x4(a);
2617 }
2618 
2619 // CHECK-LABEL: @test_u32x4_trunc_sat_f32x4(
2620 // CHECK-NEXT:  entry:
2621 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2622 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]])
2623 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2624 //
2625 v128_t test_u32x4_trunc_sat_f32x4(v128_t a) {
2626   return wasm_u32x4_trunc_sat_f32x4(a);
2627 }
2628 
2629 // CHECK-LABEL: @test_f32x4_convert_i32x4(
2630 // CHECK-NEXT:  entry:
2631 // CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
2632 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2633 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2634 //
2635 v128_t test_f32x4_convert_i32x4(v128_t a) {
2636   return wasm_f32x4_convert_i32x4(a);
2637 }
2638 
2639 // CHECK-LABEL: @test_f32x4_convert_u32x4(
2640 // CHECK-NEXT:  entry:
2641 // CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
2642 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2643 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2644 //
2645 v128_t test_f32x4_convert_u32x4(v128_t a) {
2646   return wasm_f32x4_convert_u32x4(a);
2647 }
2648 
2649 // CHECK-LABEL: @test_f64x2_convert_low_i32x4(
2650 // CHECK-NEXT:  entry:
2651 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
2652 // CHECK-NEXT:    [[CONV_I:%.*]] = sitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
2653 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2654 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2655 //
2656 v128_t test_f64x2_convert_low_i32x4(v128_t a) {
2657   return wasm_f64x2_convert_low_i32x4(a);
2658 }
2659 
2660 // CHECK-LABEL: @test_f64x2_convert_low_u32x4(
2661 // CHECK-NEXT:  entry:
2662 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
2663 // CHECK-NEXT:    [[CONV_I:%.*]] = uitofp <2 x i32> [[VECINIT2_I]] to <2 x double>
2664 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2665 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2666 //
2667 v128_t test_f64x2_convert_low_u32x4(v128_t a) {
2668   return wasm_f64x2_convert_low_u32x4(a);
2669 }
2670 
2671 // CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero(
2672 // CHECK-NEXT:  entry:
2673 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2674 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]])
2675 // CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2676 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2677 //
2678 v128_t test_i32x4_trunc_sat_f64x2_zero(v128_t a) {
2679   return wasm_i32x4_trunc_sat_f64x2_zero(a);
2680 }
2681 
2682 // CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero(
2683 // CHECK-NEXT:  entry:
2684 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2685 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]])
2686 // CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2687 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2688 //
2689 v128_t test_u32x4_trunc_sat_f64x2_zero(v128_t a) {
2690   return wasm_u32x4_trunc_sat_f64x2_zero(a);
2691 }
2692 
2693 // CHECK-LABEL: @test_f32x4_demote_f64x2_zero(
2694 // CHECK-NEXT:  entry:
2695 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double>
2696 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2697 // CHECK-NEXT:    [[CONV_I:%.*]] = fptrunc <4 x double> [[SHUFFLE_I]] to <4 x float>
2698 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[CONV_I]] to <4 x i32>
2699 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2700 //
2701 v128_t test_f32x4_demote_f64x2_zero(v128_t a) {
2702   return wasm_f32x4_demote_f64x2_zero(a);
2703 }
2704 
2705 // CHECK-LABEL: @test_f64x2_promote_low_f32x4(
2706 // CHECK-NEXT:  entry:
2707 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
2708 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
2709 // CHECK-NEXT:    [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double>
2710 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32>
2711 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2712 //
2713 v128_t test_f64x2_promote_low_f32x4(v128_t a) {
2714   return wasm_f64x2_promote_low_f32x4(a);
2715 }
2716 
2717 // CHECK-LABEL: @test_i8x16_shuffle(
2718 // CHECK-NEXT:  entry:
2719 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2720 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2721 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0)
2722 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2723 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2724 //
2725 v128_t test_i8x16_shuffle(v128_t a, v128_t b) {
2726   return wasm_i8x16_shuffle(a, b, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
2727 }
2728 
2729 // CHECK-LABEL: @test_i16x8_shuffle(
2730 // CHECK-NEXT:  entry:
2731 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2732 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2733 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1)
2734 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2735 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2736 //
2737 v128_t test_i16x8_shuffle(v128_t a, v128_t b) {
2738   return wasm_i16x8_shuffle(a, b, 7, 6, 5, 4, 3, 2, 1, 0);
2739 }
2740 
2741 // CHECK-LABEL: @test_i32x4_shuffle(
2742 // CHECK-NEXT:  entry:
2743 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2744 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2745 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3)
2746 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2747 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2748 //
2749 v128_t test_i32x4_shuffle(v128_t a, v128_t b) {
2750   return wasm_i32x4_shuffle(a, b, 3, 2, 1, 0);
2751 }
2752 
2753 // CHECK-LABEL: @test_i64x2_shuffle(
2754 // CHECK-NEXT:  entry:
2755 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2756 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2757 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.shuffle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
2758 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2759 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2760 //
2761 v128_t test_i64x2_shuffle(v128_t a, v128_t b) {
2762   return wasm_i64x2_shuffle(a, b, 1, 0);
2763 }
2764 
2765 // CHECK-LABEL: @test_i8x16_swizzle(
2766 // CHECK-NEXT:  entry:
2767 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2768 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
2769 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]])
2770 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2771 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2772 //
2773 v128_t test_i8x16_swizzle(v128_t a, v128_t b) {
2774   return wasm_i8x16_swizzle(a, b);
2775 }
2776 
2777 // CHECK-LABEL: @test_i8x16_narrow_i16x8(
2778 // CHECK-NEXT:  entry:
2779 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2780 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2781 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2782 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2783 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2784 //
2785 v128_t test_i8x16_narrow_i16x8(v128_t a, v128_t b) {
2786   return wasm_i8x16_narrow_i16x8(a, b);
2787 }
2788 
2789 // CHECK-LABEL: @test_u8x16_narrow_i16x8(
2790 // CHECK-NEXT:  entry:
2791 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2792 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
2793 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
2794 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32>
2795 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2796 //
2797 v128_t test_u8x16_narrow_i16x8(v128_t a, v128_t b) {
2798   return wasm_u8x16_narrow_i16x8(a, b);
2799 }
2800 
2801 // CHECK-LABEL: @test_i16x8_narrow_i32x4(
2802 // CHECK-NEXT:  entry:
2803 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
2804 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
2805 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2806 //
2807 v128_t test_i16x8_narrow_i32x4(v128_t a, v128_t b) {
2808   return wasm_i16x8_narrow_i32x4(a, b);
2809 }
2810 
2811 // CHECK-LABEL: @test_u16x8_narrow_i32x4(
2812 // CHECK-NEXT:  entry:
2813 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
2814 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32>
2815 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2816 //
2817 v128_t test_u16x8_narrow_i32x4(v128_t a, v128_t b) {
2818   return wasm_u16x8_narrow_i32x4(a, b);
2819 }
2820 
2821 // CHECK-LABEL: @test_i16x8_extend_low_i8x16(
2822 // CHECK-NEXT:  entry:
2823 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2824 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2825 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2826 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2827 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2828 //
2829 v128_t test_i16x8_extend_low_i8x16(v128_t a) {
2830   return wasm_i16x8_extend_low_i8x16(a);
2831 }
2832 
2833 // CHECK-LABEL: @test_i16x8_extend_high_i8x16(
2834 // CHECK-NEXT:  entry:
2835 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2836 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2837 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2838 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2839 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2840 //
2841 v128_t test_i16x8_extend_high_i8x16(v128_t a) {
2842   return wasm_i16x8_extend_high_i8x16(a);
2843 }
2844 
2845 // CHECK-LABEL: @test_u16x8_extend_low_u8x16(
2846 // CHECK-NEXT:  entry:
2847 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2848 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2849 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2850 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2851 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2852 //
2853 v128_t test_u16x8_extend_low_u8x16(v128_t a) {
2854   return wasm_u16x8_extend_low_u8x16(a);
2855 }
2856 
2857 // CHECK-LABEL: @test_u16x8_extend_high_u8x16(
2858 // CHECK-NEXT:  entry:
2859 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2860 // CHECK-NEXT:    [[VECINIT14_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2861 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <8 x i8> [[VECINIT14_I]] to <8 x i16>
2862 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[CONV_I]] to <4 x i32>
2863 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2864 //
2865 v128_t test_u16x8_extend_high_u8x16(v128_t a) {
2866   return wasm_u16x8_extend_high_u8x16(a);
2867 }
2868 
2869 // CHECK-LABEL: @test_i32x4_extend_low_i16x8(
2870 // CHECK-NEXT:  entry:
2871 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2872 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2873 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2874 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2875 //
2876 v128_t test_i32x4_extend_low_i16x8(v128_t a) {
2877   return wasm_i32x4_extend_low_i16x8(a);
2878 }
2879 
2880 // CHECK-LABEL: @test_i32x4_extend_high_i16x8(
2881 // CHECK-NEXT:  entry:
2882 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2883 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2884 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2885 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2886 //
2887 v128_t test_i32x4_extend_high_i16x8(v128_t a) {
2888   return wasm_i32x4_extend_high_i16x8(a);
2889 }
2890 
2891 // CHECK-LABEL: @test_u32x4_extend_low_u16x8(
2892 // CHECK-NEXT:  entry:
2893 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2894 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2895 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2896 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2897 //
2898 v128_t test_u32x4_extend_low_u16x8(v128_t a) {
2899   return wasm_u32x4_extend_low_u16x8(a);
2900 }
2901 
2902 // CHECK-LABEL: @test_u32x4_extend_high_u16x8(
2903 // CHECK-NEXT:  entry:
2904 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2905 // CHECK-NEXT:    [[VECINIT6_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
2906 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <4 x i16> [[VECINIT6_I]] to <4 x i32>
2907 // CHECK-NEXT:    ret <4 x i32> [[CONV_I]]
2908 //
2909 v128_t test_u32x4_extend_high_u16x8(v128_t a) {
2910   return wasm_u32x4_extend_high_u16x8(a);
2911 }
2912 
2913 // CHECK-LABEL: @test_i64x2_extend_low_i32x4(
2914 // CHECK-NEXT:  entry:
2915 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
2916 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2917 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2918 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2919 //
2920 v128_t test_i64x2_extend_low_i32x4(v128_t a) {
2921   return wasm_i64x2_extend_low_i32x4(a);
2922 }
2923 
2924 // CHECK-LABEL: @test_i64x2_extend_high_i32x4(
2925 // CHECK-NEXT:  entry:
2926 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
2927 // CHECK-NEXT:    [[CONV_I:%.*]] = sext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2928 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2929 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2930 //
2931 v128_t test_i64x2_extend_high_i32x4(v128_t a) {
2932   return wasm_i64x2_extend_high_i32x4(a);
2933 }
2934 
2935 // CHECK-LABEL: @test_u64x2_extend_low_u32x4(
2936 // CHECK-NEXT:  entry:
2937 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
2938 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2939 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2940 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2941 //
2942 v128_t test_u64x2_extend_low_u32x4(v128_t a) {
2943   return wasm_u64x2_extend_low_u32x4(a);
2944 }
2945 
2946 // CHECK-LABEL: @test_u64x2_extend_high_u32x4(
2947 // CHECK-NEXT:  entry:
2948 // CHECK-NEXT:    [[VECINIT2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
2949 // CHECK-NEXT:    [[CONV_I:%.*]] = zext <2 x i32> [[VECINIT2_I]] to <2 x i64>
2950 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[CONV_I]] to <4 x i32>
2951 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
2952 //
2953 v128_t test_u64x2_extend_high_u32x4(v128_t a) {
2954   return wasm_u64x2_extend_high_u32x4(a);
2955 }
2956 
2957 // CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16(
2958 // CHECK-NEXT:  entry:
2959 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2960 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]])
2961 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
2962 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2963 //
2964 v128_t test_i16x8_extadd_pairwise_i8x16(v128_t a) {
2965   return wasm_i16x8_extadd_pairwise_i8x16(a);
2966 }
2967 
2968 // CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16(
2969 // CHECK-NEXT:  entry:
2970 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
2971 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]])
2972 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32>
2973 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
2974 //
2975 v128_t test_u16x8_extadd_pairwise_u8x16(v128_t a) {
2976   return wasm_u16x8_extadd_pairwise_u8x16(a);
2977 }
2978 
2979 // CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8(
2980 // CHECK-NEXT:  entry:
2981 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2982 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]])
2983 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2984 //
2985 v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) {
2986   return wasm_i32x4_extadd_pairwise_i16x8(a);
2987 }
2988 
2989 // CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8(
2990 // CHECK-NEXT:  entry:
2991 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
2992 // CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]])
2993 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
2994 //
2995 v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) {
2996   return wasm_u32x4_extadd_pairwise_u16x8(a);
2997 }
2998 
2999 // CHECK-LABEL: @test_i16x8_extmul_low_i8x16(
3000 // CHECK-NEXT:  entry:
3001 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
3002 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3003 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
3004 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
3005 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3006 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
3007 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
3008 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
3009 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
3010 //
3011 v128_t test_i16x8_extmul_low_i8x16(v128_t a, v128_t b) {
3012   return wasm_i16x8_extmul_low_i8x16(a, b);
3013 }
3014 
3015 // CHECK-LABEL: @test_i16x8_extmul_high_i8x16(
3016 // CHECK-NEXT:  entry:
3017 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
3018 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3019 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
3020 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
3021 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3022 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
3023 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
3024 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
3025 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
3026 //
3027 v128_t test_i16x8_extmul_high_i8x16(v128_t a, v128_t b) {
3028   return wasm_i16x8_extmul_high_i8x16(a, b);
3029 }
3030 
3031 // CHECK-LABEL: @test_u16x8_extmul_low_u8x16(
3032 // CHECK-NEXT:  entry:
3033 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
3034 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3035 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
3036 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
3037 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3038 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
3039 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
3040 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
3041 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
3042 //
3043 v128_t test_u16x8_extmul_low_u8x16(v128_t a, v128_t b) {
3044   return wasm_u16x8_extmul_low_u8x16(a, b);
3045 }
3046 
3047 // CHECK-LABEL: @test_u16x8_extmul_high_u8x16(
3048 // CHECK-NEXT:  entry:
3049 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
3050 // CHECK-NEXT:    [[VECINIT14_I2_I:%.*]] = shufflevector <16 x i8> [[TMP0]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3051 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <8 x i8> [[VECINIT14_I2_I]] to <8 x i16>
3052 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8>
3053 // CHECK-NEXT:    [[VECINIT14_I_I:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3054 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <8 x i8> [[VECINIT14_I_I]] to <8 x i16>
3055 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <8 x i16> [[CONV_I_I]], [[CONV_I3_I]]
3056 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i16> [[MUL_I]] to <4 x i32>
3057 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
3058 //
3059 v128_t test_u16x8_extmul_high_u8x16(v128_t a, v128_t b) {
3060   return wasm_u16x8_extmul_high_u8x16(a, b);
3061 }
3062 
3063 // CHECK-LABEL: @test_i32x4_extmul_low_i16x8(
3064 // CHECK-NEXT:  entry:
3065 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
3066 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3067 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
3068 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
3069 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3070 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
3071 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
3072 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
3073 //
3074 v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) {
3075   return wasm_i32x4_extmul_low_i16x8(a, b);
3076 }
3077 
3078 // CHECK-LABEL: @test_i32x4_extmul_high_i16x8(
3079 // CHECK-NEXT:  entry:
3080 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
3081 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3082 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
3083 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
3084 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3085 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
3086 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
3087 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
3088 //
3089 v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) {
3090   return wasm_i32x4_extmul_high_i16x8(a, b);
3091 }
3092 
3093 // CHECK-LABEL: @test_u32x4_extmul_low_u16x8(
3094 // CHECK-NEXT:  entry:
3095 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
3096 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3097 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
3098 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
3099 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3100 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
3101 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
3102 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
3103 //
3104 v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) {
3105   return wasm_u32x4_extmul_low_u16x8(a, b);
3106 }
3107 
3108 // CHECK-LABEL: @test_u32x4_extmul_high_u16x8(
3109 // CHECK-NEXT:  entry:
3110 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
3111 // CHECK-NEXT:    [[VECINIT6_I2_I:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3112 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <4 x i16> [[VECINIT6_I2_I]] to <4 x i32>
3113 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
3114 // CHECK-NEXT:    [[VECINIT6_I_I:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3115 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <4 x i16> [[VECINIT6_I_I]] to <4 x i32>
3116 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <4 x i32> [[CONV_I_I]], [[CONV_I3_I]]
3117 // CHECK-NEXT:    ret <4 x i32> [[MUL_I]]
3118 //
3119 v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) {
3120   return wasm_u32x4_extmul_high_u16x8(a, b);
3121 }
3122 
3123 // CHECK-LABEL: @test_i64x2_extmul_low_i32x4(
3124 // CHECK-NEXT:  entry:
3125 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
3126 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
3127 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
3128 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
3129 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
3130 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
3131 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
3132 //
3133 v128_t test_i64x2_extmul_low_i32x4(v128_t a, v128_t b) {
3134   return wasm_i64x2_extmul_low_i32x4(a, b);
3135 }
3136 
3137 // CHECK-LABEL: @test_i64x2_extmul_high_i32x4(
3138 // CHECK-NEXT:  entry:
3139 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
3140 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = sext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
3141 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
3142 // CHECK-NEXT:    [[CONV_I_I:%.*]] = sext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
3143 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nsw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
3144 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
3145 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
3146 //
3147 v128_t test_i64x2_extmul_high_i32x4(v128_t a, v128_t b) {
3148   return wasm_i64x2_extmul_high_i32x4(a, b);
3149 }
3150 
3151 // CHECK-LABEL: @test_u64x2_extmul_low_u32x4(
3152 // CHECK-NEXT:  entry:
3153 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
3154 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
3155 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
3156 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
3157 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
3158 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
3159 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
3160 //
3161 v128_t test_u64x2_extmul_low_u32x4(v128_t a, v128_t b) {
3162   return wasm_u64x2_extmul_low_u32x4(a, b);
3163 }
3164 
3165 // CHECK-LABEL: @test_u64x2_extmul_high_u32x4(
3166 // CHECK-NEXT:  entry:
3167 // CHECK-NEXT:    [[VECINIT2_I2_I:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
3168 // CHECK-NEXT:    [[CONV_I3_I:%.*]] = zext <2 x i32> [[VECINIT2_I2_I]] to <2 x i64>
3169 // CHECK-NEXT:    [[VECINIT2_I_I:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
3170 // CHECK-NEXT:    [[CONV_I_I:%.*]] = zext <2 x i32> [[VECINIT2_I_I]] to <2 x i64>
3171 // CHECK-NEXT:    [[MUL_I:%.*]] = mul nuw <2 x i64> [[CONV_I_I]], [[CONV_I3_I]]
3172 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[MUL_I]] to <4 x i32>
3173 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
3174 //
3175 v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
3176   return wasm_u64x2_extmul_high_u32x4(a, b);
3177 }
3178 
3179 // CHECK-LABEL: @test_i16x8_q15mulr_sat(
3180 // CHECK-NEXT:  entry:
3181 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
3182 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16>
3183 // CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]])
3184 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32>
3185 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
3186 //
3187 v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
3188   return wasm_i16x8_q15mulr_sat(a, b);
3189 }
3190