xref: /llvm-project/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll (revision 76c4529515c30626aa91ad63ee5a09bc6d6e7bb6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s
3
4; Test that the logic to choose between v128.const vector
5; initialization and splat vector initialization and to optimize the
6; choice of splat value works correctly.
7
8target triple = "wasm32-unknown-unknown"
9
10define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
11; CHECK-LABEL: same_const_one_replaced_i16x8:
12; CHECK:         .functype same_const_one_replaced_i16x8 (i32) -> (v128)
13; CHECK-NEXT:  # %bb.0:
14; CHECK-NEXT:    v128.const $push0=, 42, 42, 42, 42, 42, 0, 42, 42
15; CHECK-NEXT:    i16x8.replace_lane $push1=, $pop0, 5, $0
16; CHECK-NEXT:    return $pop1
17  %v = insertelement
18    <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
19    i16 %x,
20    i32 5
21  ret <8 x i16> %v
22}
23
24define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
25; CHECK-LABEL: different_const_one_replaced_i16x8:
26; CHECK:         .functype different_const_one_replaced_i16x8 (i32) -> (v128)
27; CHECK-NEXT:  # %bb.0:
28; CHECK-NEXT:    v128.const $push0=, 1, -2, 3, -4, 5, 0, 7, -8
29; CHECK-NEXT:    i16x8.replace_lane $push1=, $pop0, 5, $0
30; CHECK-NEXT:    return $pop1
31  %v = insertelement
32    <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
33    i16 %x,
34    i32 5
35  ret <8 x i16> %v
36}
37
38define <4 x float> @same_const_one_replaced_f32x4(float %x) {
39; CHECK-LABEL: same_const_one_replaced_f32x4:
40; CHECK:         .functype same_const_one_replaced_f32x4 (f32) -> (v128)
41; CHECK-NEXT:  # %bb.0:
42; CHECK-NEXT:    v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
43; CHECK-NEXT:    f32x4.replace_lane $push1=, $pop0, 2, $0
44; CHECK-NEXT:    return $pop1
45  %v = insertelement
46    <4 x float> <float 42., float 42., float 42., float 42.>,
47    float %x,
48    i32 2
49  ret <4 x float> %v
50}
51
52define <4 x float> @different_const_one_replaced_f32x4(float %x) {
53; CHECK-LABEL: different_const_one_replaced_f32x4:
54; CHECK:         .functype different_const_one_replaced_f32x4 (f32) -> (v128)
55; CHECK-NEXT:  # %bb.0:
56; CHECK-NEXT:    v128.const $push0=, 0x1p0, 0x1p1, 0x0p0, 0x1p2
57; CHECK-NEXT:    f32x4.replace_lane $push1=, $pop0, 2, $0
58; CHECK-NEXT:    return $pop1
59  %v = insertelement
60    <4 x float> <float 1., float 2., float 3., float 4.>,
61    float %x,
62    i32 2
63  ret <4 x float> %v
64}
65
66define <4 x i32> @splat_common_const_i32x4() {
67; CHECK-LABEL: splat_common_const_i32x4:
68; CHECK:         .functype splat_common_const_i32x4 () -> (v128)
69; CHECK-NEXT:  # %bb.0:
70; CHECK-NEXT:    v128.const $push0=, 0, 3, 3, 1
71; CHECK-NEXT:    return $pop0
72  ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
73}
74
75define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
76; CHECK-LABEL: splat_common_arg_i16x8:
77; CHECK:         .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128)
78; CHECK-NEXT:  # %bb.0:
79; CHECK-NEXT:    i16x8.splat $push0=, $2
80; CHECK-NEXT:    i16x8.replace_lane $push1=, $pop0, 0, $1
81; CHECK-NEXT:    i16x8.replace_lane $push2=, $pop1, 2, $0
82; CHECK-NEXT:    i16x8.replace_lane $push3=, $pop2, 4, $1
83; CHECK-NEXT:    i16x8.replace_lane $push4=, $pop3, 7, $1
84; CHECK-NEXT:    return $pop4
85  %v0 = insertelement <8 x i16> undef, i16 %b, i32 0
86  %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1
87  %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2
88  %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3
89  %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4
90  %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5
91  %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6
92  %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7
93  ret <8 x i16> %v7
94}
95
96define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
97; CHECK-LABEL: swizzle_one_i8x16:
98; CHECK:         .functype swizzle_one_i8x16 (v128, v128) -> (v128)
99; CHECK-NEXT:  # %bb.0:
100; CHECK-NEXT:    global.get $push5=, __stack_pointer
101; CHECK-NEXT:    i32.const $push6=, 16
102; CHECK-NEXT:    i32.sub $push8=, $pop5, $pop6
103; CHECK-NEXT:    local.tee $push7=, $2=, $pop8
104; CHECK-NEXT:    v128.store 0($pop7), $0
105; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
106; CHECK-NEXT:    i32.const $push1=, 15
107; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
108; CHECK-NEXT:    i32.or $push3=, $2, $pop2
109; CHECK-NEXT:    v128.load8_splat $push4=, 0($pop3)
110; CHECK-NEXT:    return $pop4
111  %m0 = extractelement <16 x i8> %mask, i32 0
112  %s0 = extractelement <16 x i8> %src, i8 %m0
113  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
114  ret <16 x i8> %v0
115}
116
117define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
118; CHECK-LABEL: swizzle_all_i8x16:
119; CHECK:         .functype swizzle_all_i8x16 (v128, v128) -> (v128)
120; CHECK-NEXT:  # %bb.0:
121; CHECK-NEXT:    global.get $push65=, __stack_pointer
122; CHECK-NEXT:    i32.const $push66=, 16
123; CHECK-NEXT:    i32.sub $push83=, $pop65, $pop66
124; CHECK-NEXT:    local.tee $push82=, $2=, $pop83
125; CHECK-NEXT:    v128.store 0($pop82), $0
126; CHECK-NEXT:    i8x16.extract_lane_u $push61=, $1, 15
127; CHECK-NEXT:    i32.const $push1=, 15
128; CHECK-NEXT:    i32.and $push62=, $pop61, $pop1
129; CHECK-NEXT:    i32.or $push63=, $2, $pop62
130; CHECK-NEXT:    i8x16.extract_lane_u $push57=, $1, 14
131; CHECK-NEXT:    i32.const $push81=, 15
132; CHECK-NEXT:    i32.and $push58=, $pop57, $pop81
133; CHECK-NEXT:    i32.or $push59=, $2, $pop58
134; CHECK-NEXT:    i8x16.extract_lane_u $push53=, $1, 13
135; CHECK-NEXT:    i32.const $push80=, 15
136; CHECK-NEXT:    i32.and $push54=, $pop53, $pop80
137; CHECK-NEXT:    i32.or $push55=, $2, $pop54
138; CHECK-NEXT:    i8x16.extract_lane_u $push49=, $1, 12
139; CHECK-NEXT:    i32.const $push79=, 15
140; CHECK-NEXT:    i32.and $push50=, $pop49, $pop79
141; CHECK-NEXT:    i32.or $push51=, $2, $pop50
142; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $1, 11
143; CHECK-NEXT:    i32.const $push78=, 15
144; CHECK-NEXT:    i32.and $push46=, $pop45, $pop78
145; CHECK-NEXT:    i32.or $push47=, $2, $pop46
146; CHECK-NEXT:    i8x16.extract_lane_u $push41=, $1, 10
147; CHECK-NEXT:    i32.const $push77=, 15
148; CHECK-NEXT:    i32.and $push42=, $pop41, $pop77
149; CHECK-NEXT:    i32.or $push43=, $2, $pop42
150; CHECK-NEXT:    i8x16.extract_lane_u $push37=, $1, 9
151; CHECK-NEXT:    i32.const $push76=, 15
152; CHECK-NEXT:    i32.and $push38=, $pop37, $pop76
153; CHECK-NEXT:    i32.or $push39=, $2, $pop38
154; CHECK-NEXT:    i8x16.extract_lane_u $push33=, $1, 8
155; CHECK-NEXT:    i32.const $push75=, 15
156; CHECK-NEXT:    i32.and $push34=, $pop33, $pop75
157; CHECK-NEXT:    i32.or $push35=, $2, $pop34
158; CHECK-NEXT:    i8x16.extract_lane_u $push29=, $1, 7
159; CHECK-NEXT:    i32.const $push74=, 15
160; CHECK-NEXT:    i32.and $push30=, $pop29, $pop74
161; CHECK-NEXT:    i32.or $push31=, $2, $pop30
162; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $1, 6
163; CHECK-NEXT:    i32.const $push73=, 15
164; CHECK-NEXT:    i32.and $push26=, $pop25, $pop73
165; CHECK-NEXT:    i32.or $push27=, $2, $pop26
166; CHECK-NEXT:    i8x16.extract_lane_u $push21=, $1, 5
167; CHECK-NEXT:    i32.const $push72=, 15
168; CHECK-NEXT:    i32.and $push22=, $pop21, $pop72
169; CHECK-NEXT:    i32.or $push23=, $2, $pop22
170; CHECK-NEXT:    i8x16.extract_lane_u $push17=, $1, 4
171; CHECK-NEXT:    i32.const $push71=, 15
172; CHECK-NEXT:    i32.and $push18=, $pop17, $pop71
173; CHECK-NEXT:    i32.or $push19=, $2, $pop18
174; CHECK-NEXT:    i8x16.extract_lane_u $push13=, $1, 3
175; CHECK-NEXT:    i32.const $push70=, 15
176; CHECK-NEXT:    i32.and $push14=, $pop13, $pop70
177; CHECK-NEXT:    i32.or $push15=, $2, $pop14
178; CHECK-NEXT:    i8x16.extract_lane_u $push9=, $1, 2
179; CHECK-NEXT:    i32.const $push69=, 15
180; CHECK-NEXT:    i32.and $push10=, $pop9, $pop69
181; CHECK-NEXT:    i32.or $push11=, $2, $pop10
182; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 1
183; CHECK-NEXT:    i32.const $push68=, 15
184; CHECK-NEXT:    i32.and $push2=, $pop0, $pop68
185; CHECK-NEXT:    i32.or $push3=, $2, $pop2
186; CHECK-NEXT:    i8x16.extract_lane_u $push4=, $1, 0
187; CHECK-NEXT:    i32.const $push67=, 15
188; CHECK-NEXT:    i32.and $push5=, $pop4, $pop67
189; CHECK-NEXT:    i32.or $push6=, $2, $pop5
190; CHECK-NEXT:    v128.load8_splat $push7=, 0($pop6)
191; CHECK-NEXT:    v128.load8_lane $push8=, 0($pop3), $pop7, 1
192; CHECK-NEXT:    v128.load8_lane $push12=, 0($pop11), $pop8, 2
193; CHECK-NEXT:    v128.load8_lane $push16=, 0($pop15), $pop12, 3
194; CHECK-NEXT:    v128.load8_lane $push20=, 0($pop19), $pop16, 4
195; CHECK-NEXT:    v128.load8_lane $push24=, 0($pop23), $pop20, 5
196; CHECK-NEXT:    v128.load8_lane $push28=, 0($pop27), $pop24, 6
197; CHECK-NEXT:    v128.load8_lane $push32=, 0($pop31), $pop28, 7
198; CHECK-NEXT:    v128.load8_lane $push36=, 0($pop35), $pop32, 8
199; CHECK-NEXT:    v128.load8_lane $push40=, 0($pop39), $pop36, 9
200; CHECK-NEXT:    v128.load8_lane $push44=, 0($pop43), $pop40, 10
201; CHECK-NEXT:    v128.load8_lane $push48=, 0($pop47), $pop44, 11
202; CHECK-NEXT:    v128.load8_lane $push52=, 0($pop51), $pop48, 12
203; CHECK-NEXT:    v128.load8_lane $push56=, 0($pop55), $pop52, 13
204; CHECK-NEXT:    v128.load8_lane $push60=, 0($pop59), $pop56, 14
205; CHECK-NEXT:    v128.load8_lane $push64=, 0($pop63), $pop60, 15
206; CHECK-NEXT:    return $pop64
207  %m0 = extractelement <16 x i8> %mask, i32 0
208  %s0 = extractelement <16 x i8> %src, i8 %m0
209  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
210  %m1 = extractelement <16 x i8> %mask, i32 1
211  %s1 = extractelement <16 x i8> %src, i8 %m1
212  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
213  %m2 = extractelement <16 x i8> %mask, i32 2
214  %s2 = extractelement <16 x i8> %src, i8 %m2
215  %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
216  %m3 = extractelement <16 x i8> %mask, i32 3
217  %s3 = extractelement <16 x i8> %src, i8 %m3
218  %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
219  %m4 = extractelement <16 x i8> %mask, i32 4
220  %s4 = extractelement <16 x i8> %src, i8 %m4
221  %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
222  %m5 = extractelement <16 x i8> %mask, i32 5
223  %s5 = extractelement <16 x i8> %src, i8 %m5
224  %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
225  %m6 = extractelement <16 x i8> %mask, i32 6
226  %s6 = extractelement <16 x i8> %src, i8 %m6
227  %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
228  %m7 = extractelement <16 x i8> %mask, i32 7
229  %s7 = extractelement <16 x i8> %src, i8 %m7
230  %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
231  %m8 = extractelement <16 x i8> %mask, i32 8
232  %s8 = extractelement <16 x i8> %src, i8 %m8
233  %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
234  %m9 = extractelement <16 x i8> %mask, i32 9
235  %s9 = extractelement <16 x i8> %src, i8 %m9
236  %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
237  %m10 = extractelement <16 x i8> %mask, i32 10
238  %s10 = extractelement <16 x i8> %src, i8 %m10
239  %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
240  %m11 = extractelement <16 x i8> %mask, i32 11
241  %s11 = extractelement <16 x i8> %src, i8 %m11
242  %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
243  %m12 = extractelement <16 x i8> %mask, i32 12
244  %s12 = extractelement <16 x i8> %src, i8 %m12
245  %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
246  %m13 = extractelement <16 x i8> %mask, i32 13
247  %s13 = extractelement <16 x i8> %src, i8 %m13
248  %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
249  %m14 = extractelement <16 x i8> %mask, i32 14
250  %s14 = extractelement <16 x i8> %src, i8 %m14
251  %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
252  %m15 = extractelement <16 x i8> %mask, i32 15
253  %s15 = extractelement <16 x i8> %src, i8 %m15
254  %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
255  ret <16 x i8> %v15
256}
257
258; Ensure we don't us swizzle
259define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
260; CHECK-LABEL: swizzle_one_i16x8:
261; CHECK:         .functype swizzle_one_i16x8 (v128, v128) -> (v128)
262; CHECK-NEXT:  # %bb.0:
263; CHECK-NEXT:    global.get $push7=, __stack_pointer
264; CHECK-NEXT:    i32.const $push8=, 16
265; CHECK-NEXT:    i32.sub $push10=, $pop7, $pop8
266; CHECK-NEXT:    local.tee $push9=, $2=, $pop10
267; CHECK-NEXT:    v128.store 0($pop9), $0
268; CHECK-NEXT:    i16x8.extract_lane_u $push0=, $1, 0
269; CHECK-NEXT:    i32.const $push1=, 7
270; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
271; CHECK-NEXT:    i32.const $push3=, 1
272; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop3
273; CHECK-NEXT:    i32.or $push5=, $2, $pop4
274; CHECK-NEXT:    v128.load16_splat $push6=, 0($pop5)
275; CHECK-NEXT:    return $pop6
276  %m0 = extractelement <8 x i16> %mask, i32 0
277  %s0 = extractelement <8 x i16> %src, i16 %m0
278  %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
279  ret <8 x i16> %v0
280}
281
282define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) {
283; CHECK-LABEL: half_shuffle_i32x4:
284; CHECK:         .functype half_shuffle_i32x4 (v128) -> (v128)
285; CHECK-NEXT:  # %bb.0:
286; CHECK-NEXT:    i8x16.shuffle $push0=, $0, $0, 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 0, 1, 2, 3
287; CHECK-NEXT:    i32.const $push1=, 0
288; CHECK-NEXT:    i32x4.replace_lane $push2=, $pop0, 0, $pop1
289; CHECK-NEXT:    i32.const $push3=, 3
290; CHECK-NEXT:    i32x4.replace_lane $push4=, $pop2, 3, $pop3
291; CHECK-NEXT:    return $pop4
292  %s0 = extractelement <4 x i32> %src, i32 0
293  %s2 = extractelement <4 x i32> %src, i32 2
294  %v0 = insertelement <4 x i32> undef, i32 0, i32 0
295  %v1 = insertelement <4 x i32> %v0, i32 %s2, i32 1
296  %v2 = insertelement <4 x i32> %v1, i32 %s0, i32 2
297  %v3 = insertelement <4 x i32> %v2, i32 3, i32 3
298  ret <4 x i32> %v3
299}
300
301define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
302  ; swizzle 0
303; CHECK-LABEL: mashup_swizzle_i8x16:
304; CHECK:         .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
305; CHECK-NEXT:  # %bb.0:
306; CHECK-NEXT:    global.get $push12=, __stack_pointer
307; CHECK-NEXT:    i32.const $push13=, 16
308; CHECK-NEXT:    i32.sub $push16=, $pop12, $pop13
309; CHECK-NEXT:    local.tee $push15=, $3=, $pop16
310; CHECK-NEXT:    v128.store 0($pop15), $0
311; CHECK-NEXT:    i8x16.extract_lane_u $push7=, $1, 7
312; CHECK-NEXT:    i32.const $push1=, 15
313; CHECK-NEXT:    i32.and $push8=, $pop7, $pop1
314; CHECK-NEXT:    i32.or $push9=, $3, $pop8
315; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
316; CHECK-NEXT:    i32.const $push14=, 15
317; CHECK-NEXT:    i32.and $push2=, $pop0, $pop14
318; CHECK-NEXT:    i32.or $push3=, $3, $pop2
319; CHECK-NEXT:    v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
320; CHECK-NEXT:    v128.load8_lane $push5=, 0($pop3), $pop4, 0
321; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop5, 3, $2
322; CHECK-NEXT:    v128.load8_lane $push10=, 0($pop9), $pop6, 7
323; CHECK-NEXT:    i8x16.replace_lane $push11=, $pop10, 12, $2
324; CHECK-NEXT:    return $pop11
325  %m0 = extractelement <16 x i8> %mask, i32 0
326  %s0 = extractelement <16 x i8> %src, i8 %m0
327  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
328  ; swizzle 7
329  %m1 = extractelement <16 x i8> %mask, i32 7
330  %s1 = extractelement <16 x i8> %src, i8 %m1
331  %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
332  ; splat 3
333  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
334  ; splat 12
335  %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
336  ; const 4
337  %v4 = insertelement <16 x i8> %v3, i8 42, i32 4
338  ; const 14
339  %v5 = insertelement <16 x i8> %v4, i8 42, i32 14
340  ret <16 x i8> %v5
341}
342
343define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
344  ; swizzle 0
345; CHECK-LABEL: mashup_const_i8x16:
346; CHECK:         .functype mashup_const_i8x16 (v128, v128, i32) -> (v128)
347; CHECK-NEXT:  # %bb.0:
348; CHECK-NEXT:    global.get $push8=, __stack_pointer
349; CHECK-NEXT:    i32.const $push9=, 16
350; CHECK-NEXT:    i32.sub $push11=, $pop8, $pop9
351; CHECK-NEXT:    local.tee $push10=, $3=, $pop11
352; CHECK-NEXT:    v128.store 0($pop10), $0
353; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
354; CHECK-NEXT:    i32.const $push1=, 15
355; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
356; CHECK-NEXT:    i32.or $push3=, $3, $pop2
357; CHECK-NEXT:    v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
358; CHECK-NEXT:    v128.load8_lane $push5=, 0($pop3), $pop4, 0
359; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop5, 3, $2
360; CHECK-NEXT:    i8x16.replace_lane $push7=, $pop6, 12, $2
361; CHECK-NEXT:    return $pop7
362  %m0 = extractelement <16 x i8> %mask, i32 0
363  %s0 = extractelement <16 x i8> %src, i8 %m0
364  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
365  ; splat 3
366  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
367  ; splat 12
368  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
369  ; const 4
370  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
371  ; const 14
372  %v4 = insertelement <16 x i8> %v3, i8 42, i32 14
373  ret <16 x i8> %v4
374}
375
376define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
377  ; swizzle 0
378; CHECK-LABEL: mashup_splat_i8x16:
379; CHECK:         .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128)
380; CHECK-NEXT:  # %bb.0:
381; CHECK-NEXT:    global.get $push8=, __stack_pointer
382; CHECK-NEXT:    i32.const $push9=, 16
383; CHECK-NEXT:    i32.sub $push11=, $pop8, $pop9
384; CHECK-NEXT:    local.tee $push10=, $3=, $pop11
385; CHECK-NEXT:    v128.store 0($pop10), $0
386; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
387; CHECK-NEXT:    i32.const $push1=, 15
388; CHECK-NEXT:    i32.and $push2=, $pop0, $pop1
389; CHECK-NEXT:    i32.or $push3=, $3, $pop2
390; CHECK-NEXT:    i8x16.splat $push4=, $2
391; CHECK-NEXT:    v128.load8_lane $push5=, 0($pop3), $pop4, 0
392; CHECK-NEXT:    i32.const $push6=, 42
393; CHECK-NEXT:    i8x16.replace_lane $push7=, $pop5, 4, $pop6
394; CHECK-NEXT:    return $pop7
395  %m0 = extractelement <16 x i8> %mask, i32 0
396  %s0 = extractelement <16 x i8> %src, i8 %m0
397  %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
398  ; splat 3
399  %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
400  ; splat 12
401  %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
402  ; const 4
403  %v3 = insertelement <16 x i8> %v2, i8 42, i32 4
404  ret <16 x i8> %v3
405}
406
407define <4 x float> @undef_const_insert_f32x4() {
408; CHECK-LABEL: undef_const_insert_f32x4:
409; CHECK:         .functype undef_const_insert_f32x4 () -> (v128)
410; CHECK-NEXT:  # %bb.0:
411; CHECK-NEXT:    v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5
412; CHECK-NEXT:    return $pop0
413  %v = insertelement <4 x float> undef, float 42., i32 1
414  ret <4 x float> %v
415}
416
417define <4 x i32> @undef_arg_insert_i32x4(i32 %x) {
418; CHECK-LABEL: undef_arg_insert_i32x4:
419; CHECK:         .functype undef_arg_insert_i32x4 (i32) -> (v128)
420; CHECK-NEXT:  # %bb.0:
421; CHECK-NEXT:    i32x4.splat $push0=, $0
422; CHECK-NEXT:    return $pop0
423  %v = insertelement <4 x i32> undef, i32 %x, i32 3
424  ret <4 x i32> %v
425}
426
427define <16 x i8> @all_undef_i8x16() {
428; CHECK-LABEL: all_undef_i8x16:
429; CHECK:         .functype all_undef_i8x16 () -> (v128)
430; CHECK-NEXT:  # %bb.0:
431; CHECK-NEXT:    return $0
432  %v = insertelement <16 x i8> undef, i8 undef, i32 4
433  ret <16 x i8> %v
434}
435
436define <2 x double> @all_undef_f64x2() {
437; CHECK-LABEL: all_undef_f64x2:
438; CHECK:         .functype all_undef_f64x2 () -> (v128)
439; CHECK-NEXT:  # %bb.0:
440; CHECK-NEXT:    return $0
441  ret <2 x double> undef
442}
443
444define <4 x i32> @load_zero_lane_i32x4(ptr %addr.a, ptr %addr.b, ptr %addr.c, ptr %addr.d) {
445; CHECK-LABEL: load_zero_lane_i32x4:
446; CHECK:         .functype load_zero_lane_i32x4 (i32, i32, i32, i32) -> (v128)
447; CHECK-NEXT:  # %bb.0:
448; CHECK-NEXT:    v128.load32_zero $push0=, 0($0)
449; CHECK-NEXT:    v128.load32_lane $push1=, 0($1), $pop0, 1
450; CHECK-NEXT:    v128.load32_lane $push2=, 0($2), $pop1, 2
451; CHECK-NEXT:    v128.load32_lane $push3=, 0($3), $pop2, 3
452; CHECK-NEXT:    return $pop3
453  %a = load i32, ptr %addr.a
454  %b = load i32, ptr %addr.b
455  %c = load i32, ptr %addr.c
456  %d = load i32, ptr %addr.d
457  %v = insertelement <4 x i32> undef, i32 %a, i32 0
458  %v.1 = insertelement <4 x i32> %v, i32 %b, i32 1
459  %v.2 = insertelement <4 x i32> %v.1, i32 %c, i32 2
460  %v.3 = insertelement <4 x i32> %v.2, i32 %d, i32 3
461  ret <4 x i32> %v.3
462}
463
464define <2 x i64> @load_zero_lane_i64x2(ptr %addr.a, ptr %addr.b) {
465; CHECK-LABEL: load_zero_lane_i64x2:
466; CHECK:         .functype load_zero_lane_i64x2 (i32, i32) -> (v128)
467; CHECK-NEXT:  # %bb.0:
468; CHECK-NEXT:    v128.load64_zero $push0=, 0($0)
469; CHECK-NEXT:    v128.load64_lane $push1=, 0($1), $pop0, 1
470; CHECK-NEXT:    return $pop1
471  %a = load i64, ptr %addr.a
472  %b = load i64, ptr %addr.b
473  %v = insertelement <2 x i64> undef, i64 %a, i32 0
474  %v.1 = insertelement <2 x i64> %v, i64 %b, i32 1
475  ret <2 x i64> %v.1
476}
477
478define <4 x float> @load_zero_lane_f32x4(ptr %addr.a, ptr %addr.b, ptr %addr.c, ptr %addr.d) {
479; CHECK-LABEL: load_zero_lane_f32x4:
480; CHECK:         .functype load_zero_lane_f32x4 (i32, i32, i32, i32) -> (v128)
481; CHECK-NEXT:  # %bb.0:
482; CHECK-NEXT:    v128.load32_zero $push0=, 0($0)
483; CHECK-NEXT:    v128.load32_lane $push1=, 0($1), $pop0, 1
484; CHECK-NEXT:    v128.load32_lane $push2=, 0($2), $pop1, 2
485; CHECK-NEXT:    v128.load32_lane $push3=, 0($3), $pop2, 3
486; CHECK-NEXT:    return $pop3
487  %a = load float, ptr %addr.a
488  %b = load float, ptr %addr.b
489  %c = load float, ptr %addr.c
490  %d = load float, ptr %addr.d
491  %v = insertelement <4 x float> undef, float %a, i32 0
492  %v.1 = insertelement <4 x float> %v, float %b, i32 1
493  %v.2 = insertelement <4 x float> %v.1, float %c, i32 2
494  %v.3 = insertelement <4 x float> %v.2, float %d, i32 3
495  ret <4 x float> %v.3
496}
497
498define <4 x float> @load_zero_undef_lane_f32x4(ptr %addr.a, ptr %addr.b) {
499; CHECK-LABEL: load_zero_undef_lane_f32x4:
500; CHECK:         .functype load_zero_undef_lane_f32x4 (i32, i32) -> (v128)
501; CHECK-NEXT:  # %bb.0:
502; CHECK-NEXT:    v128.load32_splat $push0=, 0($0)
503; CHECK-NEXT:    v128.load32_lane $push1=, 0($1), $pop0, 3
504; CHECK-NEXT:    return $pop1
505  %a = load float, ptr %addr.a
506  %b = load float, ptr %addr.b
507  %v = insertelement <4 x float> undef, float %a, i32 1
508  %v.1 = insertelement <4 x float> %v, float %b, i32 3
509  ret <4 x float> %v.1
510}
511
512define <2 x double> @load_zero_lane_f64x2(ptr %addr.a, ptr %addr.b) {
513; CHECK-LABEL: load_zero_lane_f64x2:
514; CHECK:         .functype load_zero_lane_f64x2 (i32, i32) -> (v128)
515; CHECK-NEXT:  # %bb.0:
516; CHECK-NEXT:    v128.load64_zero $push0=, 0($0)
517; CHECK-NEXT:    v128.load64_lane $push1=, 0($1), $pop0, 1
518; CHECK-NEXT:    return $pop1
519  %a = load double, ptr %addr.a
520  %b = load double, ptr %addr.b
521  %v = insertelement <2 x double> undef, double %a, i32 0
522  %v.1 = insertelement <2 x double> %v, double %b, i32 1
523  ret <2 x double> %v.1
524}
525
526