xref: /llvm-project/llvm/test/CodeGen/WebAssembly/simd-conversions.ll (revision 0807bc7e07f0430bd5b048d5c08f09442aab3b7d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mcpu=mvp -mattr=+simd128 | FileCheck %s
3
4; Test that vector float-to-int and int-to-float instructions lower correctly
5
6target triple = "wasm32-unknown-unknown"
7
8define <4 x float> @convert_s_v4f32(<4 x i32> %x) {
9; CHECK-LABEL: convert_s_v4f32:
10; CHECK:         .functype convert_s_v4f32 (v128) -> (v128)
11; CHECK-NEXT:  # %bb.0:
12; CHECK-NEXT:    local.get 0
13; CHECK-NEXT:    f32x4.convert_i32x4_s
14; CHECK-NEXT:    # fallthrough-return
15  %a = sitofp <4 x i32> %x to <4 x float>
16  ret <4 x float> %a
17}
18
19define <4 x float> @convert_u_v4f32(<4 x i32> %x) {
20; CHECK-LABEL: convert_u_v4f32:
21; CHECK:         .functype convert_u_v4f32 (v128) -> (v128)
22; CHECK-NEXT:  # %bb.0:
23; CHECK-NEXT:    local.get 0
24; CHECK-NEXT:    f32x4.convert_i32x4_u
25; CHECK-NEXT:    # fallthrough-return
26  %a = uitofp <4 x i32> %x to <4 x float>
27  ret <4 x float> %a
28}
29
30define <2 x double> @convert_s_v2f64(<2 x i64> %x) {
31; CHECK-LABEL: convert_s_v2f64:
32; CHECK:         .functype convert_s_v2f64 (v128) -> (v128)
33; CHECK-NEXT:  # %bb.0:
34; CHECK-NEXT:    local.get 0
35; CHECK-NEXT:    i64x2.extract_lane 0
36; CHECK-NEXT:    f64.convert_i64_s
37; CHECK-NEXT:    f64x2.splat
38; CHECK-NEXT:    local.get 0
39; CHECK-NEXT:    i64x2.extract_lane 1
40; CHECK-NEXT:    f64.convert_i64_s
41; CHECK-NEXT:    f64x2.replace_lane 1
42; CHECK-NEXT:    # fallthrough-return
43  %a = sitofp <2 x i64> %x to <2 x double>
44  ret <2 x double> %a
45}
46
47define <2 x double> @convert_u_v2f64(<2 x i64> %x) {
48; CHECK-LABEL: convert_u_v2f64:
49; CHECK:         .functype convert_u_v2f64 (v128) -> (v128)
50; CHECK-NEXT:  # %bb.0:
51; CHECK-NEXT:    local.get 0
52; CHECK-NEXT:    v128.const 4294967295, 4294967295
53; CHECK-NEXT:    v128.and
54; CHECK-NEXT:    v128.const 4841369599423283200, 4841369599423283200
55; CHECK-NEXT:    v128.or
56; CHECK-NEXT:    local.get 0
57; CHECK-NEXT:    i32.const 32
58; CHECK-NEXT:    i64x2.shr_u
59; CHECK-NEXT:    v128.const 4985484787499139072, 4985484787499139072
60; CHECK-NEXT:    v128.or
61; CHECK-NEXT:    v128.const 0x1.00000001p84, 0x1.00000001p84
62; CHECK-NEXT:    f64x2.sub
63; CHECK-NEXT:    f64x2.add
64; CHECK-NEXT:    # fallthrough-return
65  %a = uitofp <2 x i64> %x to <2 x double>
66  ret <2 x double> %a
67}
68
69define <4 x i32> @trunc_sat_s_v4i32(<4 x float> %x) {
70; CHECK-LABEL: trunc_sat_s_v4i32:
71; CHECK:         .functype trunc_sat_s_v4i32 (v128) -> (v128)
72; CHECK-NEXT:  # %bb.0:
73; CHECK-NEXT:    local.get 0
74; CHECK-NEXT:    i32x4.trunc_sat_f32x4_s
75; CHECK-NEXT:    # fallthrough-return
76  %a = fptosi <4 x float> %x to <4 x i32>
77  ret <4 x i32> %a
78}
79
80define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) {
81; CHECK-LABEL: trunc_sat_u_v4i32:
82; CHECK:         .functype trunc_sat_u_v4i32 (v128) -> (v128)
83; CHECK-NEXT:  # %bb.0:
84; CHECK-NEXT:    local.get 0
85; CHECK-NEXT:    i32x4.trunc_sat_f32x4_u
86; CHECK-NEXT:    # fallthrough-return
87  %a = fptoui <4 x float> %x to <4 x i32>
88  ret <4 x i32> %a
89}
90
91define <2 x i64> @trunc_sat_s_v2i64(<2 x double> %x) {
92; CHECK-LABEL: trunc_sat_s_v2i64:
93; CHECK:         .functype trunc_sat_s_v2i64 (v128) -> (v128)
94; CHECK-NEXT:    .local f64, i64, i64
95; CHECK-NEXT:  # %bb.0:
96; CHECK-NEXT:    block
97; CHECK-NEXT:    block
98; CHECK-NEXT:    local.get 0
99; CHECK-NEXT:    f64x2.extract_lane 1
100; CHECK-NEXT:    local.tee 1
101; CHECK-NEXT:    f64.abs
102; CHECK-NEXT:    f64.const 0x1p63
103; CHECK-NEXT:    f64.lt
104; CHECK-NEXT:    i32.eqz
105; CHECK-NEXT:    br_if 0 # 0: down to label1
106; CHECK-NEXT:  # %bb.1:
107; CHECK-NEXT:    local.get 1
108; CHECK-NEXT:    i64.trunc_f64_s
109; CHECK-NEXT:    local.set 2
110; CHECK-NEXT:    br 1 # 1: down to label0
111; CHECK-NEXT:  .LBB6_2:
112; CHECK-NEXT:    end_block # label1:
113; CHECK-NEXT:    i64.const -9223372036854775808
114; CHECK-NEXT:    local.set 2
115; CHECK-NEXT:  .LBB6_3:
116; CHECK-NEXT:    end_block # label0:
117; CHECK-NEXT:    block
118; CHECK-NEXT:    block
119; CHECK-NEXT:    local.get 0
120; CHECK-NEXT:    f64x2.extract_lane 0
121; CHECK-NEXT:    local.tee 1
122; CHECK-NEXT:    f64.abs
123; CHECK-NEXT:    f64.const 0x1p63
124; CHECK-NEXT:    f64.lt
125; CHECK-NEXT:    i32.eqz
126; CHECK-NEXT:    br_if 0 # 0: down to label3
127; CHECK-NEXT:  # %bb.4:
128; CHECK-NEXT:    local.get 1
129; CHECK-NEXT:    i64.trunc_f64_s
130; CHECK-NEXT:    local.set 3
131; CHECK-NEXT:    br 1 # 1: down to label2
132; CHECK-NEXT:  .LBB6_5:
133; CHECK-NEXT:    end_block # label3:
134; CHECK-NEXT:    i64.const -9223372036854775808
135; CHECK-NEXT:    local.set 3
136; CHECK-NEXT:  .LBB6_6:
137; CHECK-NEXT:    end_block # label2:
138; CHECK-NEXT:    local.get 3
139; CHECK-NEXT:    i64x2.splat
140; CHECK-NEXT:    local.get 2
141; CHECK-NEXT:    i64x2.replace_lane 1
142; CHECK-NEXT:    # fallthrough-return
143  %a = fptosi <2 x double> %x to <2 x i64>
144  ret <2 x i64> %a
145}
146
147define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) {
148; CHECK-LABEL: trunc_sat_u_v2i64:
149; CHECK:         .functype trunc_sat_u_v2i64 (v128) -> (v128)
150; CHECK-NEXT:    .local f64, i64, i64
151; CHECK-NEXT:  # %bb.0:
152; CHECK-NEXT:    block
153; CHECK-NEXT:    block
154; CHECK-NEXT:    local.get 0
155; CHECK-NEXT:    f64x2.extract_lane 1
156; CHECK-NEXT:    local.tee 1
157; CHECK-NEXT:    f64.const 0x1p64
158; CHECK-NEXT:    f64.lt
159; CHECK-NEXT:    local.get 1
160; CHECK-NEXT:    f64.const 0x0p0
161; CHECK-NEXT:    f64.ge
162; CHECK-NEXT:    i32.and
163; CHECK-NEXT:    i32.eqz
164; CHECK-NEXT:    br_if 0 # 0: down to label5
165; CHECK-NEXT:  # %bb.1:
166; CHECK-NEXT:    local.get 1
167; CHECK-NEXT:    i64.trunc_f64_u
168; CHECK-NEXT:    local.set 2
169; CHECK-NEXT:    br 1 # 1: down to label4
170; CHECK-NEXT:  .LBB7_2:
171; CHECK-NEXT:    end_block # label5:
172; CHECK-NEXT:    i64.const 0
173; CHECK-NEXT:    local.set 2
174; CHECK-NEXT:  .LBB7_3:
175; CHECK-NEXT:    end_block # label4:
176; CHECK-NEXT:    block
177; CHECK-NEXT:    block
178; CHECK-NEXT:    local.get 0
179; CHECK-NEXT:    f64x2.extract_lane 0
180; CHECK-NEXT:    local.tee 1
181; CHECK-NEXT:    f64.const 0x1p64
182; CHECK-NEXT:    f64.lt
183; CHECK-NEXT:    local.get 1
184; CHECK-NEXT:    f64.const 0x0p0
185; CHECK-NEXT:    f64.ge
186; CHECK-NEXT:    i32.and
187; CHECK-NEXT:    i32.eqz
188; CHECK-NEXT:    br_if 0 # 0: down to label7
189; CHECK-NEXT:  # %bb.4:
190; CHECK-NEXT:    local.get 1
191; CHECK-NEXT:    i64.trunc_f64_u
192; CHECK-NEXT:    local.set 3
193; CHECK-NEXT:    br 1 # 1: down to label6
194; CHECK-NEXT:  .LBB7_5:
195; CHECK-NEXT:    end_block # label7:
196; CHECK-NEXT:    i64.const 0
197; CHECK-NEXT:    local.set 3
198; CHECK-NEXT:  .LBB7_6:
199; CHECK-NEXT:    end_block # label6:
200; CHECK-NEXT:    local.get 3
201; CHECK-NEXT:    i64x2.splat
202; CHECK-NEXT:    local.get 2
203; CHECK-NEXT:    i64x2.replace_lane 1
204; CHECK-NEXT:    # fallthrough-return
205  %a = fptoui <2 x double> %x to <2 x i64>
206  ret <2 x i64> %a
207}
208
209define <4 x float> @demote_zero_v4f32(<2 x double> %x) {
210; CHECK-LABEL: demote_zero_v4f32:
211; CHECK:         .functype demote_zero_v4f32 (v128) -> (v128)
212; CHECK-NEXT:  # %bb.0:
213; CHECK-NEXT:    local.get 0
214; CHECK-NEXT:    f32x4.demote_f64x2_zero
215; CHECK-NEXT:    # fallthrough-return
216  %v = shufflevector <2 x double> %x, <2 x double> zeroinitializer,
217         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
218  %a = fptrunc <4 x double> %v to <4 x float>
219  ret <4 x float> %a
220}
221
222define <4 x float> @demote_zero_v4f32_2(<2 x double> %x) {
223; CHECK-LABEL: demote_zero_v4f32_2:
224; CHECK:         .functype demote_zero_v4f32_2 (v128) -> (v128)
225; CHECK-NEXT:  # %bb.0:
226; CHECK-NEXT:    local.get 0
227; CHECK-NEXT:    f32x4.demote_f64x2_zero
228; CHECK-NEXT:    # fallthrough-return
229  %v = fptrunc <2 x double> %x to <2 x float>
230  %a = shufflevector <2 x float> %v, <2 x float> zeroinitializer,
231         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
232  ret <4 x float> %a
233}
234
235define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) {
236; CHECK-LABEL: convert_low_s_v2f64:
237; CHECK:         .functype convert_low_s_v2f64 (v128) -> (v128)
238; CHECK-NEXT:  # %bb.0:
239; CHECK-NEXT:    local.get 0
240; CHECK-NEXT:    f64x2.convert_low_i32x4_s
241; CHECK-NEXT:    # fallthrough-return
242  %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
243  %a = sitofp <2 x i32> %v to <2 x double>
244  ret <2 x double> %a
245}
246
247define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) {
248; CHECK-LABEL: convert_low_u_v2f64:
249; CHECK:         .functype convert_low_u_v2f64 (v128) -> (v128)
250; CHECK-NEXT:  # %bb.0:
251; CHECK-NEXT:    local.get 0
252; CHECK-NEXT:    f64x2.convert_low_i32x4_u
253; CHECK-NEXT:    # fallthrough-return
254  %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
255  %a = uitofp <2 x i32> %v to <2 x double>
256  ret <2 x double> %a
257}
258
259
260define <2 x double> @convert_low_s_v2f64_2(<4 x i32> %x) {
261; CHECK-LABEL: convert_low_s_v2f64_2:
262; CHECK:         .functype convert_low_s_v2f64_2 (v128) -> (v128)
263; CHECK-NEXT:  # %bb.0:
264; CHECK-NEXT:    local.get 0
265; CHECK-NEXT:    f64x2.convert_low_i32x4_s
266; CHECK-NEXT:    # fallthrough-return
267  %v = sitofp <4 x i32> %x to <4 x double>
268  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
269  ret <2 x double> %a
270}
271
272define <2 x double> @convert_low_u_v2f64_2(<4 x i32> %x) {
273; CHECK-LABEL: convert_low_u_v2f64_2:
274; CHECK:         .functype convert_low_u_v2f64_2 (v128) -> (v128)
275; CHECK-NEXT:  # %bb.0:
276; CHECK-NEXT:    local.get 0
277; CHECK-NEXT:    f64x2.convert_low_i32x4_u
278; CHECK-NEXT:    # fallthrough-return
279  %v = uitofp <4 x i32> %x to <4 x double>
280  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
281  ret <2 x double> %a
282}
283
284define <2 x double> @promote_low_v2f64(<4 x float> %x) {
285; CHECK-LABEL: promote_low_v2f64:
286; CHECK:         .functype promote_low_v2f64 (v128) -> (v128)
287; CHECK-NEXT:  # %bb.0:
288; CHECK-NEXT:    local.get 0
289; CHECK-NEXT:    f64x2.promote_low_f32x4
290; CHECK-NEXT:    # fallthrough-return
291  %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 0, i32 1>
292  %a = fpext <2 x float> %v to <2 x double>
293  ret <2 x double> %a
294}
295
296define <2 x double> @promote_low_v2f64_2(<4 x float> %x) {
297; CHECK-LABEL: promote_low_v2f64_2:
298; CHECK:         .functype promote_low_v2f64_2 (v128) -> (v128)
299; CHECK-NEXT:  # %bb.0:
300; CHECK-NEXT:    local.get 0
301; CHECK-NEXT:    f64x2.promote_low_f32x4
302; CHECK-NEXT:    # fallthrough-return
303  %v = fpext <4 x float> %x to <4 x double>
304  %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> <i32 0, i32 1>
305  ret <2 x double> %a
306}
307
308;; Also check with illegally wide vectors
309
310define <4 x double> @convert_low_s_v4f64(<8 x i32> %x) {
311; CHECK-LABEL: convert_low_s_v4f64:
312; CHECK:         .functype convert_low_s_v4f64 (i32, v128, v128) -> ()
313; CHECK-NEXT:  # %bb.0:
314; CHECK-NEXT:    local.get 0
315; CHECK-NEXT:    local.get 1
316; CHECK-NEXT:    local.get 1
317; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
318; CHECK-NEXT:    f64x2.convert_low_i32x4_s
319; CHECK-NEXT:    v128.store 16
320; CHECK-NEXT:    local.get 0
321; CHECK-NEXT:    local.get 1
322; CHECK-NEXT:    f64x2.convert_low_i32x4_s
323; CHECK-NEXT:    v128.store 0
324; CHECK-NEXT:    # fallthrough-return
325  %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
326  %a = sitofp <4 x i32> %v to <4 x double>
327  ret <4 x double> %a
328}
329
330define <4 x double> @convert_low_u_v4f64(<8 x i32> %x) {
331; CHECK-LABEL: convert_low_u_v4f64:
332; CHECK:         .functype convert_low_u_v4f64 (i32, v128, v128) -> ()
333; CHECK-NEXT:  # %bb.0:
334; CHECK-NEXT:    local.get 0
335; CHECK-NEXT:    local.get 1
336; CHECK-NEXT:    local.get 1
337; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
338; CHECK-NEXT:    f64x2.convert_low_i32x4_u
339; CHECK-NEXT:    v128.store 16
340; CHECK-NEXT:    local.get 0
341; CHECK-NEXT:    local.get 1
342; CHECK-NEXT:    f64x2.convert_low_i32x4_u
343; CHECK-NEXT:    v128.store 0
344; CHECK-NEXT:    # fallthrough-return
345  %v = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
346  %a = uitofp <4 x i32> %v to <4 x double>
347  ret <4 x double> %a
348}
349
350
351define <4 x double> @convert_low_s_v4f64_2(<8 x i32> %x) {
352; CHECK-LABEL: convert_low_s_v4f64_2:
353; CHECK:         .functype convert_low_s_v4f64_2 (i32, v128, v128) -> ()
354; CHECK-NEXT:  # %bb.0:
355; CHECK-NEXT:    local.get 0
356; CHECK-NEXT:    local.get 1
357; CHECK-NEXT:    local.get 1
358; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
359; CHECK-NEXT:    f64x2.convert_low_i32x4_s
360; CHECK-NEXT:    v128.store 16
361; CHECK-NEXT:    local.get 0
362; CHECK-NEXT:    local.get 1
363; CHECK-NEXT:    f64x2.convert_low_i32x4_s
364; CHECK-NEXT:    v128.store 0
365; CHECK-NEXT:    # fallthrough-return
366  %v = sitofp <8 x i32> %x to <8 x double>
367  %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
368  ret <4 x double> %a
369}
370
371define <4 x double> @convert_low_u_v4f64_2(<8 x i32> %x) {
372; CHECK-LABEL: convert_low_u_v4f64_2:
373; CHECK:         .functype convert_low_u_v4f64_2 (i32, v128, v128) -> ()
374; CHECK-NEXT:  # %bb.0:
375; CHECK-NEXT:    local.get 0
376; CHECK-NEXT:    local.get 1
377; CHECK-NEXT:    local.get 1
378; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
379; CHECK-NEXT:    f64x2.convert_low_i32x4_u
380; CHECK-NEXT:    v128.store 16
381; CHECK-NEXT:    local.get 0
382; CHECK-NEXT:    local.get 1
383; CHECK-NEXT:    f64x2.convert_low_i32x4_u
384; CHECK-NEXT:    v128.store 0
385; CHECK-NEXT:    # fallthrough-return
386  %v = uitofp <8 x i32> %x to <8 x double>
387  %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
388  ret <4 x double> %a
389}
390
391define <4 x double> @promote_low_v4f64(<8 x float> %x) {
392; CHECK-LABEL: promote_low_v4f64:
393; CHECK:         .functype promote_low_v4f64 (i32, v128, v128) -> ()
394; CHECK-NEXT:  # %bb.0:
395; CHECK-NEXT:    local.get 0
396; CHECK-NEXT:    local.get 1
397; CHECK-NEXT:    local.get 1
398; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
399; CHECK-NEXT:    f64x2.promote_low_f32x4
400; CHECK-NEXT:    v128.store 16
401; CHECK-NEXT:    local.get 0
402; CHECK-NEXT:    local.get 1
403; CHECK-NEXT:    f64x2.promote_low_f32x4
404; CHECK-NEXT:    v128.store 0
405; CHECK-NEXT:    # fallthrough-return
406  %v = shufflevector <8 x float> %x, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
407  %a = fpext <4 x float> %v to <4 x double>
408  ret <4 x double> %a
409}
410
411define <4 x double> @promote_low_v4f64_2(<8 x float> %x) {
412; CHECK-LABEL: promote_low_v4f64_2:
413; CHECK:         .functype promote_low_v4f64_2 (i32, v128, v128) -> ()
414; CHECK-NEXT:  # %bb.0:
415; CHECK-NEXT:    local.get 0
416; CHECK-NEXT:    local.get 1
417; CHECK-NEXT:    local.get 1
418; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3
419; CHECK-NEXT:    f64x2.promote_low_f32x4
420; CHECK-NEXT:    v128.store 16
421; CHECK-NEXT:    local.get 0
422; CHECK-NEXT:    local.get 1
423; CHECK-NEXT:    f64x2.promote_low_f32x4
424; CHECK-NEXT:    v128.store 0
425; CHECK-NEXT:    # fallthrough-return
426  %v = fpext <8 x float> %x to <8 x double>
427  %a = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
428  ret <4 x double> %a
429}
430
431define <2 x double> @promote_mixed_v2f64(<4 x float> %x, <4 x float> %y) {
432; CHECK-LABEL: promote_mixed_v2f64:
433; CHECK:         .functype promote_mixed_v2f64 (v128, v128) -> (v128)
434; CHECK-NEXT:  # %bb.0:
435; CHECK-NEXT:    local.get 0
436; CHECK-NEXT:    local.get 1
437; CHECK-NEXT:    i8x16.shuffle 8, 9, 10, 11, 28, 29, 30, 31, 0, 1, 2, 3, 0, 1, 2, 3
438; CHECK-NEXT:    f64x2.promote_low_f32x4
439; CHECK-NEXT:    # fallthrough-return
440  %v = shufflevector <4 x float> %x, <4 x float> %y, <2 x i32> <i32 2, i32 7>
441  %a = fpext <2 x float> %v to <2 x double>
442  ret <2 x double> %a
443}
444