xref: /llvm-project/llvm/test/CodeGen/WebAssembly/simd-load-promote-wide.ll (revision 73856247eef35f5336e485dc009842a5b991c421)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test wide load+promote patterns, which after combines and legalization are
5; represented differently than 128-bit load+promote patterns.
6
7target triple = "wasm32-unknown-unknown"
8
9define <4 x double> @load_promote_v2f64(ptr %p) {
10; CHECK-LABEL: load_promote_v2f64:
11; CHECK:         .functype load_promote_v2f64 (i32, i32) -> ()
12; CHECK-NEXT:  # %bb.0:
13; CHECK-NEXT:    local.get 0
14; CHECK-NEXT:    local.get 1
15; CHECK-NEXT:    i32.const 8
16; CHECK-NEXT:    i32.add
17; CHECK-NEXT:    v128.load64_zero 0
18; CHECK-NEXT:    f64x2.promote_low_f32x4
19; CHECK-NEXT:    v128.store 16
20; CHECK-NEXT:    local.get 0
21; CHECK-NEXT:    local.get 1
22; CHECK-NEXT:    v128.load64_zero 0
23; CHECK-NEXT:    f64x2.promote_low_f32x4
24; CHECK-NEXT:    v128.store 0
25; CHECK-NEXT:    # fallthrough-return
26  %e = load <4 x float>, ptr %p
27  %v = fpext <4 x float> %e to <4 x double>
28  ret <4 x double> %v
29}
30
31define <4 x double> @load_promote_v2f64_with_folded_offset(ptr %p) {
32; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
33; CHECK:         .functype load_promote_v2f64_with_folded_offset (i32, i32) -> ()
34; CHECK-NEXT:  # %bb.0:
35; CHECK-NEXT:    local.get 0
36; CHECK-NEXT:    local.get 1
37; CHECK-NEXT:    i32.const 24
38; CHECK-NEXT:    i32.add
39; CHECK-NEXT:    v128.load64_zero 0
40; CHECK-NEXT:    f64x2.promote_low_f32x4
41; CHECK-NEXT:    v128.store 16
42; CHECK-NEXT:    local.get 0
43; CHECK-NEXT:    local.get 1
44; CHECK-NEXT:    i32.const 16
45; CHECK-NEXT:    i32.add
46; CHECK-NEXT:    v128.load64_zero 0
47; CHECK-NEXT:    f64x2.promote_low_f32x4
48; CHECK-NEXT:    v128.store 0
49; CHECK-NEXT:    # fallthrough-return
50  %q = ptrtoint ptr %p to i32
51  %r = add nuw i32 %q, 16
52  %s = inttoptr i32 %r to ptr
53  %e = load <4 x float>, ptr %s
54  %v = fpext <4 x float> %e to <4 x double>
55  ret <4 x double> %v
56}
57
58define <4 x double> @load_promote_v2f64_with_folded_gep_offset(ptr %p) {
59; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
60; CHECK:         .functype load_promote_v2f64_with_folded_gep_offset (i32, i32) -> ()
61; CHECK-NEXT:  # %bb.0:
62; CHECK-NEXT:    local.get 0
63; CHECK-NEXT:    local.get 1
64; CHECK-NEXT:    i32.const 24
65; CHECK-NEXT:    i32.add
66; CHECK-NEXT:    v128.load64_zero 0
67; CHECK-NEXT:    f64x2.promote_low_f32x4
68; CHECK-NEXT:    v128.store 16
69; CHECK-NEXT:    local.get 0
70; CHECK-NEXT:    local.get 1
71; CHECK-NEXT:    i32.const 16
72; CHECK-NEXT:    i32.add
73; CHECK-NEXT:    v128.load64_zero 0
74; CHECK-NEXT:    f64x2.promote_low_f32x4
75; CHECK-NEXT:    v128.store 0
76; CHECK-NEXT:    # fallthrough-return
77  %s = getelementptr inbounds <4 x float>, ptr %p, i32 1
78  %e = load <4 x float>, ptr %s
79  %v = fpext <4 x float> %e to <4 x double>
80  ret <4 x double> %v
81}
82
83define <4 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
84; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
85; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32, i32) -> ()
86; CHECK-NEXT:  # %bb.0:
87; CHECK-NEXT:    local.get 0
88; CHECK-NEXT:    local.get 1
89; CHECK-NEXT:    i32.const -16
90; CHECK-NEXT:    i32.add
91; CHECK-NEXT:    local.tee 1
92; CHECK-NEXT:    v128.load64_zero 0
93; CHECK-NEXT:    f64x2.promote_low_f32x4
94; CHECK-NEXT:    v128.store 0
95; CHECK-NEXT:    local.get 0
96; CHECK-NEXT:    local.get 1
97; CHECK-NEXT:    i32.const 8
98; CHECK-NEXT:    i32.add
99; CHECK-NEXT:    v128.load64_zero 0
100; CHECK-NEXT:    f64x2.promote_low_f32x4
101; CHECK-NEXT:    v128.store 16
102; CHECK-NEXT:    # fallthrough-return
103  %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1
104  %e = load <4 x float>, ptr %s
105  %v = fpext <4 x float> %e to <4 x double>
106  ret <4 x double> %v
107}
108
109define <4 x double> @load_promote_v2f64_with_unfolded_offset(ptr %p) {
110; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
111; CHECK:         .functype load_promote_v2f64_with_unfolded_offset (i32, i32) -> ()
112; CHECK-NEXT:  # %bb.0:
113; CHECK-NEXT:    local.get 0
114; CHECK-NEXT:    local.get 1
115; CHECK-NEXT:    i32.const 24
116; CHECK-NEXT:    i32.add
117; CHECK-NEXT:    v128.load64_zero 0
118; CHECK-NEXT:    f64x2.promote_low_f32x4
119; CHECK-NEXT:    v128.store 16
120; CHECK-NEXT:    local.get 0
121; CHECK-NEXT:    local.get 1
122; CHECK-NEXT:    i32.const 16
123; CHECK-NEXT:    i32.add
124; CHECK-NEXT:    v128.load64_zero 0
125; CHECK-NEXT:    f64x2.promote_low_f32x4
126; CHECK-NEXT:    v128.store 0
127; CHECK-NEXT:    # fallthrough-return
128  %q = ptrtoint ptr %p to i32
129  %r = add nsw i32 %q, 16
130  %s = inttoptr i32 %r to ptr
131  %e = load <4 x float>, ptr %s
132  %v = fpext <4 x float> %e to <4 x double>
133  ret <4 x double> %v
134}
135
136define <4 x double> @load_promote_v2f64_with_unfolded_gep_offset(ptr %p) {
137; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
138; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_offset (i32, i32) -> ()
139; CHECK-NEXT:  # %bb.0:
140; CHECK-NEXT:    local.get 0
141; CHECK-NEXT:    local.get 1
142; CHECK-NEXT:    i32.const 24
143; CHECK-NEXT:    i32.add
144; CHECK-NEXT:    v128.load64_zero 0
145; CHECK-NEXT:    f64x2.promote_low_f32x4
146; CHECK-NEXT:    v128.store 16
147; CHECK-NEXT:    local.get 0
148; CHECK-NEXT:    local.get 1
149; CHECK-NEXT:    i32.const 16
150; CHECK-NEXT:    i32.add
151; CHECK-NEXT:    v128.load64_zero 0
152; CHECK-NEXT:    f64x2.promote_low_f32x4
153; CHECK-NEXT:    v128.store 0
154; CHECK-NEXT:    # fallthrough-return
155  %s = getelementptr <4 x float>, ptr %p, i32 1
156  %e = load <4 x float>, ptr %s
157  %v = fpext <4 x float> %e to <4 x double>
158  ret <4 x double> %v
159}
160
161define <4 x double> @load_promote_v2f64_from_numeric_address() {
162; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
163; CHECK:         .functype load_promote_v2f64_from_numeric_address (i32) -> ()
164; CHECK-NEXT:  # %bb.0:
165; CHECK-NEXT:    local.get 0
166; CHECK-NEXT:    i32.const 40
167; CHECK-NEXT:    v128.load64_zero 0
168; CHECK-NEXT:    f64x2.promote_low_f32x4
169; CHECK-NEXT:    v128.store 16
170; CHECK-NEXT:    local.get 0
171; CHECK-NEXT:    i32.const 32
172; CHECK-NEXT:    v128.load64_zero 0
173; CHECK-NEXT:    f64x2.promote_low_f32x4
174; CHECK-NEXT:    v128.store 0
175; CHECK-NEXT:    # fallthrough-return
176  %s = inttoptr i32 32 to ptr
177  %e = load <4 x float>, ptr %s
178  %v = fpext <4 x float> %e to <4 x double>
179  ret <4 x double> %v
180}
181
182@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
183define <4 x double> @load_promote_v2f64_from_global_address() {
184; CHECK-LABEL: load_promote_v2f64_from_global_address:
185; CHECK:         .functype load_promote_v2f64_from_global_address (i32) -> ()
186; CHECK-NEXT:  # %bb.0:
187; CHECK-NEXT:    local.get 0
188; CHECK-NEXT:    i32.const gv_v4f32
189; CHECK-NEXT:    i32.const 8
190; CHECK-NEXT:    i32.add
191; CHECK-NEXT:    v128.load64_zero 0
192; CHECK-NEXT:    f64x2.promote_low_f32x4
193; CHECK-NEXT:    v128.store 16
194; CHECK-NEXT:    local.get 0
195; CHECK-NEXT:    i32.const gv_v4f32
196; CHECK-NEXT:    v128.load64_zero 0
197; CHECK-NEXT:    f64x2.promote_low_f32x4
198; CHECK-NEXT:    v128.store 0
199; CHECK-NEXT:    # fallthrough-return
200  %e = load <4 x float>, ptr @gv_v4f32
201  %v = fpext <4 x float> %e to <4 x double>
202  ret <4 x double> %v
203}
204