xref: /llvm-project/llvm/test/CodeGen/WebAssembly/simd-load-lane-offset.ll (revision abdb5e041ce2b2abee6d46865d91692e634a5648)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm32-unknown-unknown | FileCheck %s
3; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 -mtriple=wasm64-unknown-unknown | FileCheck %s --check-prefix MEM64
4
5; Test SIMD v128.load{8,16,32,64}_lane instructions.
6
7; TODO: Use the offset field by supporting more patterns. Right now only the
8; equivalents of LoadPatNoOffset/StorePatNoOffset are supported.
9
10;===----------------------------------------------------------------------------
11; v128.load8_lane / v128.store8_lane
12;===----------------------------------------------------------------------------
13
14define <16 x i8> @load_lane_i8_no_offset(ptr %p, <16 x i8> %v) {
15; CHECK-LABEL: load_lane_i8_no_offset:
16; CHECK:         .functype load_lane_i8_no_offset (i32, v128) -> (v128)
17; CHECK-NEXT:  # %bb.0:
18; CHECK-NEXT:    local.get 0
19; CHECK-NEXT:    local.get 1
20; CHECK-NEXT:    v128.load8_lane 0, 0
21; CHECK-NEXT:    # fallthrough-return
22;
23; MEM64-LABEL: load_lane_i8_no_offset:
24; MEM64:         .functype load_lane_i8_no_offset (i64, v128) -> (v128)
25; MEM64-NEXT:  # %bb.0:
26; MEM64-NEXT:    local.get 0
27; MEM64-NEXT:    local.get 1
28; MEM64-NEXT:    v128.load8_lane 0, 0
29; MEM64-NEXT:    # fallthrough-return
30  %x = load i8, ptr %p
31  %t = insertelement <16 x i8> %v, i8 %x, i32 0
32  ret <16 x i8> %t
33}
34
35define <16 x i8> @load_lane_i8_with_folded_offset(ptr %p, <16 x i8> %v) {
36; CHECK-LABEL: load_lane_i8_with_folded_offset:
37; CHECK:         .functype load_lane_i8_with_folded_offset (i32, v128) -> (v128)
38; CHECK-NEXT:  # %bb.0:
39; CHECK-NEXT:    local.get 0
40; CHECK-NEXT:    i32.const 24
41; CHECK-NEXT:    i32.add
42; CHECK-NEXT:    local.get 1
43; CHECK-NEXT:    v128.load8_lane 0, 0
44; CHECK-NEXT:    # fallthrough-return
45;
46; MEM64-LABEL: load_lane_i8_with_folded_offset:
47; MEM64:         .functype load_lane_i8_with_folded_offset (i64, v128) -> (v128)
48; MEM64-NEXT:  # %bb.0:
49; MEM64-NEXT:    local.get 0
50; MEM64-NEXT:    i32.wrap_i64
51; MEM64-NEXT:    i32.const 24
52; MEM64-NEXT:    i32.add
53; MEM64-NEXT:    i64.extend_i32_u
54; MEM64-NEXT:    local.get 1
55; MEM64-NEXT:    v128.load8_lane 0, 0
56; MEM64-NEXT:    # fallthrough-return
57  %q = ptrtoint ptr %p to i32
58  %r = add nuw i32 %q, 24
59  %s = inttoptr i32 %r to ptr
60  %x = load i8, ptr %s
61  %t = insertelement <16 x i8> %v, i8 %x, i32 0
62  ret <16 x i8> %t
63}
64
65define <16 x i8> @load_lane_i8_with_folded_gep_offset(ptr %p, <16 x i8> %v) {
66; CHECK-LABEL: load_lane_i8_with_folded_gep_offset:
67; CHECK:         .functype load_lane_i8_with_folded_gep_offset (i32, v128) -> (v128)
68; CHECK-NEXT:  # %bb.0:
69; CHECK-NEXT:    local.get 0
70; CHECK-NEXT:    i32.const 6
71; CHECK-NEXT:    i32.add
72; CHECK-NEXT:    local.get 1
73; CHECK-NEXT:    v128.load8_lane 0, 0
74; CHECK-NEXT:    # fallthrough-return
75;
76; MEM64-LABEL: load_lane_i8_with_folded_gep_offset:
77; MEM64:         .functype load_lane_i8_with_folded_gep_offset (i64, v128) -> (v128)
78; MEM64-NEXT:  # %bb.0:
79; MEM64-NEXT:    local.get 0
80; MEM64-NEXT:    i64.const 6
81; MEM64-NEXT:    i64.add
82; MEM64-NEXT:    local.get 1
83; MEM64-NEXT:    v128.load8_lane 0, 0
84; MEM64-NEXT:    # fallthrough-return
85  %s = getelementptr inbounds i8, ptr %p, i32 6
86  %x = load i8, ptr %s
87  %t = insertelement <16 x i8> %v, i8 %x, i32 0
88  ret <16 x i8> %t
89}
90
91define <16 x i8> @load_lane_i8_with_unfolded_gep_negative_offset(ptr %p, <16 x i8> %v) {
92; CHECK-LABEL: load_lane_i8_with_unfolded_gep_negative_offset:
93; CHECK:         .functype load_lane_i8_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
94; CHECK-NEXT:  # %bb.0:
95; CHECK-NEXT:    local.get 0
96; CHECK-NEXT:    i32.const -6
97; CHECK-NEXT:    i32.add
98; CHECK-NEXT:    local.get 1
99; CHECK-NEXT:    v128.load8_lane 0, 0
100; CHECK-NEXT:    # fallthrough-return
101;
102; MEM64-LABEL: load_lane_i8_with_unfolded_gep_negative_offset:
103; MEM64:         .functype load_lane_i8_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
104; MEM64-NEXT:  # %bb.0:
105; MEM64-NEXT:    local.get 0
106; MEM64-NEXT:    i64.const -6
107; MEM64-NEXT:    i64.add
108; MEM64-NEXT:    local.get 1
109; MEM64-NEXT:    v128.load8_lane 0, 0
110; MEM64-NEXT:    # fallthrough-return
111  %s = getelementptr inbounds i8, ptr %p, i32 -6
112  %x = load i8, ptr %s
113  %t = insertelement <16 x i8> %v, i8 %x, i32 0
114  ret <16 x i8> %t
115}
116
117define <16 x i8> @load_lane_i8_with_unfolded_offset(ptr %p, <16 x i8> %v) {
118; CHECK-LABEL: load_lane_i8_with_unfolded_offset:
119; CHECK:         .functype load_lane_i8_with_unfolded_offset (i32, v128) -> (v128)
120; CHECK-NEXT:  # %bb.0:
121; CHECK-NEXT:    local.get 0
122; CHECK-NEXT:    i32.const 24
123; CHECK-NEXT:    i32.add
124; CHECK-NEXT:    local.get 1
125; CHECK-NEXT:    v128.load8_lane 0, 0
126; CHECK-NEXT:    # fallthrough-return
127;
128; MEM64-LABEL: load_lane_i8_with_unfolded_offset:
129; MEM64:         .functype load_lane_i8_with_unfolded_offset (i64, v128) -> (v128)
130; MEM64-NEXT:  # %bb.0:
131; MEM64-NEXT:    local.get 0
132; MEM64-NEXT:    i32.wrap_i64
133; MEM64-NEXT:    i32.const 24
134; MEM64-NEXT:    i32.add
135; MEM64-NEXT:    i64.extend_i32_u
136; MEM64-NEXT:    local.get 1
137; MEM64-NEXT:    v128.load8_lane 0, 0
138; MEM64-NEXT:    # fallthrough-return
139  %q = ptrtoint ptr %p to i32
140  %r = add nsw i32 %q, 24
141  %s = inttoptr i32 %r to ptr
142  %x = load i8, ptr %s
143  %t = insertelement <16 x i8> %v, i8 %x, i32 0
144  ret <16 x i8> %t
145}
146
147define <16 x i8> @load_lane_i8_with_unfolded_gep_offset(ptr %p, <16 x i8> %v) {
148; CHECK-LABEL: load_lane_i8_with_unfolded_gep_offset:
149; CHECK:         .functype load_lane_i8_with_unfolded_gep_offset (i32, v128) -> (v128)
150; CHECK-NEXT:  # %bb.0:
151; CHECK-NEXT:    local.get 0
152; CHECK-NEXT:    i32.const 6
153; CHECK-NEXT:    i32.add
154; CHECK-NEXT:    local.get 1
155; CHECK-NEXT:    v128.load8_lane 0, 0
156; CHECK-NEXT:    # fallthrough-return
157;
158; MEM64-LABEL: load_lane_i8_with_unfolded_gep_offset:
159; MEM64:         .functype load_lane_i8_with_unfolded_gep_offset (i64, v128) -> (v128)
160; MEM64-NEXT:  # %bb.0:
161; MEM64-NEXT:    local.get 0
162; MEM64-NEXT:    i64.const 6
163; MEM64-NEXT:    i64.add
164; MEM64-NEXT:    local.get 1
165; MEM64-NEXT:    v128.load8_lane 0, 0
166; MEM64-NEXT:    # fallthrough-return
167  %s = getelementptr i8, ptr %p, i32 6
168  %x = load i8, ptr %s
169  %t = insertelement <16 x i8> %v, i8 %x, i32 0
170  ret <16 x i8> %t
171}
172
173define <16 x i8> @load_lane_i8_from_numeric_address(<16 x i8> %v) {
174; CHECK-LABEL: load_lane_i8_from_numeric_address:
175; CHECK:         .functype load_lane_i8_from_numeric_address (v128) -> (v128)
176; CHECK-NEXT:  # %bb.0:
177; CHECK-NEXT:    i32.const 42
178; CHECK-NEXT:    local.get 0
179; CHECK-NEXT:    v128.load8_lane 0, 0
180; CHECK-NEXT:    # fallthrough-return
181;
182; MEM64-LABEL: load_lane_i8_from_numeric_address:
183; MEM64:         .functype load_lane_i8_from_numeric_address (v128) -> (v128)
184; MEM64-NEXT:  # %bb.0:
185; MEM64-NEXT:    i64.const 42
186; MEM64-NEXT:    local.get 0
187; MEM64-NEXT:    v128.load8_lane 0, 0
188; MEM64-NEXT:    # fallthrough-return
189  %s = inttoptr i32 42 to ptr
190  %x = load i8, ptr %s
191  %t = insertelement <16 x i8> %v, i8 %x, i32 0
192  ret <16 x i8> %t
193}
194
195@gv_i8 = global i8 0
196define <16 x i8> @load_lane_i8_from_global_address(<16 x i8> %v) {
197; CHECK-LABEL: load_lane_i8_from_global_address:
198; CHECK:         .functype load_lane_i8_from_global_address (v128) -> (v128)
199; CHECK-NEXT:  # %bb.0:
200; CHECK-NEXT:    i32.const gv_i8
201; CHECK-NEXT:    local.get 0
202; CHECK-NEXT:    v128.load8_lane 0, 0
203; CHECK-NEXT:    # fallthrough-return
204;
205; MEM64-LABEL: load_lane_i8_from_global_address:
206; MEM64:         .functype load_lane_i8_from_global_address (v128) -> (v128)
207; MEM64-NEXT:  # %bb.0:
208; MEM64-NEXT:    i64.const gv_i8
209; MEM64-NEXT:    local.get 0
210; MEM64-NEXT:    v128.load8_lane 0, 0
211; MEM64-NEXT:    # fallthrough-return
212  %x = load i8, ptr @gv_i8
213  %t = insertelement <16 x i8> %v, i8 %x, i32 0
214  ret <16 x i8> %t
215}
216
217define void @store_lane_i8_no_offset(<16 x i8> %v, ptr %p) {
218; CHECK-LABEL: store_lane_i8_no_offset:
219; CHECK:         .functype store_lane_i8_no_offset (v128, i32) -> ()
220; CHECK-NEXT:  # %bb.0:
221; CHECK-NEXT:    local.get 1
222; CHECK-NEXT:    local.get 0
223; CHECK-NEXT:    v128.store8_lane 0, 0
224; CHECK-NEXT:    # fallthrough-return
225;
226; MEM64-LABEL: store_lane_i8_no_offset:
227; MEM64:         .functype store_lane_i8_no_offset (v128, i64) -> ()
228; MEM64-NEXT:  # %bb.0:
229; MEM64-NEXT:    local.get 1
230; MEM64-NEXT:    local.get 0
231; MEM64-NEXT:    v128.store8_lane 0, 0
232; MEM64-NEXT:    # fallthrough-return
233  %x = extractelement <16 x i8> %v, i32 0
234  store i8 %x, ptr %p
235  ret void
236}
237
238define void @store_lane_i8_with_folded_offset(<16 x i8> %v, ptr %p) {
239; CHECK-LABEL: store_lane_i8_with_folded_offset:
240; CHECK:         .functype store_lane_i8_with_folded_offset (v128, i32) -> ()
241; CHECK-NEXT:  # %bb.0:
242; CHECK-NEXT:    local.get 1
243; CHECK-NEXT:    local.get 0
244; CHECK-NEXT:    v128.store8_lane 24, 0
245; CHECK-NEXT:    # fallthrough-return
246;
247; MEM64-LABEL: store_lane_i8_with_folded_offset:
248; MEM64:         .functype store_lane_i8_with_folded_offset (v128, i64) -> ()
249; MEM64-NEXT:  # %bb.0:
250; MEM64-NEXT:    local.get 1
251; MEM64-NEXT:    i32.wrap_i64
252; MEM64-NEXT:    i32.const 24
253; MEM64-NEXT:    i32.add
254; MEM64-NEXT:    i64.extend_i32_u
255; MEM64-NEXT:    local.get 0
256; MEM64-NEXT:    v128.store8_lane 0, 0
257; MEM64-NEXT:    # fallthrough-return
258  %q = ptrtoint ptr %p to i32
259  %r = add nuw i32 %q, 24
260  %s = inttoptr i32 %r to ptr
261  %x = extractelement <16 x i8> %v, i32 0
262  store i8 %x, ptr %s
263  ret void
264}
265
266define void @store_lane_i8_with_folded_gep_offset(<16 x i8> %v, ptr %p) {
267; CHECK-LABEL: store_lane_i8_with_folded_gep_offset:
268; CHECK:         .functype store_lane_i8_with_folded_gep_offset (v128, i32) -> ()
269; CHECK-NEXT:  # %bb.0:
270; CHECK-NEXT:    local.get 1
271; CHECK-NEXT:    local.get 0
272; CHECK-NEXT:    v128.store8_lane 6, 0
273; CHECK-NEXT:    # fallthrough-return
274;
275; MEM64-LABEL: store_lane_i8_with_folded_gep_offset:
276; MEM64:         .functype store_lane_i8_with_folded_gep_offset (v128, i64) -> ()
277; MEM64-NEXT:  # %bb.0:
278; MEM64-NEXT:    local.get 1
279; MEM64-NEXT:    local.get 0
280; MEM64-NEXT:    v128.store8_lane 6, 0
281; MEM64-NEXT:    # fallthrough-return
282  %s = getelementptr inbounds i8, ptr %p, i32 6
283  %x = extractelement <16 x i8> %v, i32 0
284  store i8 %x, ptr %s
285  ret void
286}
287
288define void @store_lane_i8_with_unfolded_gep_negative_offset(<16 x i8> %v, ptr %p) {
289; CHECK-LABEL: store_lane_i8_with_unfolded_gep_negative_offset:
290; CHECK:         .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
291; CHECK-NEXT:  # %bb.0:
292; CHECK-NEXT:    local.get 1
293; CHECK-NEXT:    i32.const -6
294; CHECK-NEXT:    i32.add
295; CHECK-NEXT:    local.get 0
296; CHECK-NEXT:    v128.store8_lane 0, 0
297; CHECK-NEXT:    # fallthrough-return
298;
299; MEM64-LABEL: store_lane_i8_with_unfolded_gep_negative_offset:
300; MEM64:         .functype store_lane_i8_with_unfolded_gep_negative_offset (v128, i64) -> ()
301; MEM64-NEXT:  # %bb.0:
302; MEM64-NEXT:    local.get 1
303; MEM64-NEXT:    i64.const -6
304; MEM64-NEXT:    i64.add
305; MEM64-NEXT:    local.get 0
306; MEM64-NEXT:    v128.store8_lane 0, 0
307; MEM64-NEXT:    # fallthrough-return
308  %s = getelementptr inbounds i8, ptr %p, i32 -6
309  %x = extractelement <16 x i8> %v, i32 0
310  store i8 %x, ptr %s
311  ret void
312}
313
314define void @store_lane_i8_with_unfolded_offset(<16 x i8> %v, ptr %p) {
315; CHECK-LABEL: store_lane_i8_with_unfolded_offset:
316; CHECK:         .functype store_lane_i8_with_unfolded_offset (v128, i32) -> ()
317; CHECK-NEXT:  # %bb.0:
318; CHECK-NEXT:    local.get 1
319; CHECK-NEXT:    i32.const 24
320; CHECK-NEXT:    i32.add
321; CHECK-NEXT:    local.get 0
322; CHECK-NEXT:    v128.store8_lane 0, 0
323; CHECK-NEXT:    # fallthrough-return
324;
325; MEM64-LABEL: store_lane_i8_with_unfolded_offset:
326; MEM64:         .functype store_lane_i8_with_unfolded_offset (v128, i64) -> ()
327; MEM64-NEXT:  # %bb.0:
328; MEM64-NEXT:    local.get 1
329; MEM64-NEXT:    i32.wrap_i64
330; MEM64-NEXT:    i32.const 24
331; MEM64-NEXT:    i32.add
332; MEM64-NEXT:    i64.extend_i32_u
333; MEM64-NEXT:    local.get 0
334; MEM64-NEXT:    v128.store8_lane 0, 0
335; MEM64-NEXT:    # fallthrough-return
336  %q = ptrtoint ptr %p to i32
337  %r = add nsw i32 %q, 24
338  %s = inttoptr i32 %r to ptr
339  %x = extractelement <16 x i8> %v, i32 0
340  store i8 %x, ptr %s
341  ret void
342}
343
344define void @store_lane_i8_with_unfolded_gep_offset(<16 x i8> %v, ptr %p) {
345; CHECK-LABEL: store_lane_i8_with_unfolded_gep_offset:
346; CHECK:         .functype store_lane_i8_with_unfolded_gep_offset (v128, i32) -> ()
347; CHECK-NEXT:  # %bb.0:
348; CHECK-NEXT:    local.get 1
349; CHECK-NEXT:    i32.const 6
350; CHECK-NEXT:    i32.add
351; CHECK-NEXT:    local.get 0
352; CHECK-NEXT:    v128.store8_lane 0, 0
353; CHECK-NEXT:    # fallthrough-return
354;
355; MEM64-LABEL: store_lane_i8_with_unfolded_gep_offset:
356; MEM64:         .functype store_lane_i8_with_unfolded_gep_offset (v128, i64) -> ()
357; MEM64-NEXT:  # %bb.0:
358; MEM64-NEXT:    local.get 1
359; MEM64-NEXT:    i64.const 6
360; MEM64-NEXT:    i64.add
361; MEM64-NEXT:    local.get 0
362; MEM64-NEXT:    v128.store8_lane 0, 0
363; MEM64-NEXT:    # fallthrough-return
364  %s = getelementptr i8, ptr %p, i32 6
365  %x = extractelement <16 x i8> %v, i32 0
366  store i8 %x, ptr %s
367  ret void
368}
369
370define void @store_lane_i8_to_numeric_address(<16 x i8> %v) {
371; CHECK-LABEL: store_lane_i8_to_numeric_address:
372; CHECK:         .functype store_lane_i8_to_numeric_address (v128) -> ()
373; CHECK-NEXT:  # %bb.0:
374; CHECK-NEXT:    i32.const 0
375; CHECK-NEXT:    local.get 0
376; CHECK-NEXT:    v128.store8_lane 42, 0
377; CHECK-NEXT:    # fallthrough-return
378;
379; MEM64-LABEL: store_lane_i8_to_numeric_address:
380; MEM64:         .functype store_lane_i8_to_numeric_address (v128) -> ()
381; MEM64-NEXT:  # %bb.0:
382; MEM64-NEXT:    i64.const 0
383; MEM64-NEXT:    local.get 0
384; MEM64-NEXT:    v128.store8_lane 42, 0
385; MEM64-NEXT:    # fallthrough-return
386  %s = inttoptr i32 42 to ptr
387  %x = extractelement <16 x i8> %v, i32 0
388  store i8 %x, ptr %s
389  ret void
390}
391
392define void @store_lane_i8_from_global_address(<16 x i8> %v) {
393; CHECK-LABEL: store_lane_i8_from_global_address:
394; CHECK:         .functype store_lane_i8_from_global_address (v128) -> ()
395; CHECK-NEXT:  # %bb.0:
396; CHECK-NEXT:    i32.const 0
397; CHECK-NEXT:    local.get 0
398; CHECK-NEXT:    v128.store8_lane gv_i8, 0
399; CHECK-NEXT:    # fallthrough-return
400;
401; MEM64-LABEL: store_lane_i8_from_global_address:
402; MEM64:         .functype store_lane_i8_from_global_address (v128) -> ()
403; MEM64-NEXT:  # %bb.0:
404; MEM64-NEXT:    i64.const 0
405; MEM64-NEXT:    local.get 0
406; MEM64-NEXT:    v128.store8_lane gv_i8, 0
407; MEM64-NEXT:    # fallthrough-return
408  %x = extractelement <16 x i8> %v, i32 0
409  store i8 %x, ptr @gv_i8
410  ret void
411}
412
413;===----------------------------------------------------------------------------
414; v128.load16_lane / v128.store16_lane
415;===----------------------------------------------------------------------------
416
417define <8 x i16> @load_lane_i16_no_offset(ptr %p, <8 x i16> %v) {
418; CHECK-LABEL: load_lane_i16_no_offset:
419; CHECK:         .functype load_lane_i16_no_offset (i32, v128) -> (v128)
420; CHECK-NEXT:  # %bb.0:
421; CHECK-NEXT:    local.get 0
422; CHECK-NEXT:    local.get 1
423; CHECK-NEXT:    v128.load16_lane 0, 0
424; CHECK-NEXT:    # fallthrough-return
425;
426; MEM64-LABEL: load_lane_i16_no_offset:
427; MEM64:         .functype load_lane_i16_no_offset (i64, v128) -> (v128)
428; MEM64-NEXT:  # %bb.0:
429; MEM64-NEXT:    local.get 0
430; MEM64-NEXT:    local.get 1
431; MEM64-NEXT:    v128.load16_lane 0, 0
432; MEM64-NEXT:    # fallthrough-return
433  %x = load i16, ptr %p
434  %t = insertelement <8 x i16> %v, i16 %x, i32 0
435  ret <8 x i16> %t
436}
437
438define <8 x i16> @load_lane_i16_with_folded_offset(ptr %p, <8 x i16> %v) {
439; CHECK-LABEL: load_lane_i16_with_folded_offset:
440; CHECK:         .functype load_lane_i16_with_folded_offset (i32, v128) -> (v128)
441; CHECK-NEXT:  # %bb.0:
442; CHECK-NEXT:    local.get 0
443; CHECK-NEXT:    i32.const 24
444; CHECK-NEXT:    i32.add
445; CHECK-NEXT:    local.get 1
446; CHECK-NEXT:    v128.load16_lane 0, 0
447; CHECK-NEXT:    # fallthrough-return
448;
449; MEM64-LABEL: load_lane_i16_with_folded_offset:
450; MEM64:         .functype load_lane_i16_with_folded_offset (i64, v128) -> (v128)
451; MEM64-NEXT:  # %bb.0:
452; MEM64-NEXT:    local.get 0
453; MEM64-NEXT:    i32.wrap_i64
454; MEM64-NEXT:    i32.const 24
455; MEM64-NEXT:    i32.add
456; MEM64-NEXT:    i64.extend_i32_u
457; MEM64-NEXT:    local.get 1
458; MEM64-NEXT:    v128.load16_lane 0, 0
459; MEM64-NEXT:    # fallthrough-return
460  %q = ptrtoint ptr %p to i32
461  %r = add nuw i32 %q, 24
462  %s = inttoptr i32 %r to ptr
463  %x = load i16, ptr %s
464  %t = insertelement <8 x i16> %v, i16 %x, i32 0
465  ret <8 x i16> %t
466}
467
468define <8 x i16> @load_lane_i16_with_folded_gep_offset(ptr %p, <8 x i16> %v) {
469; CHECK-LABEL: load_lane_i16_with_folded_gep_offset:
470; CHECK:         .functype load_lane_i16_with_folded_gep_offset (i32, v128) -> (v128)
471; CHECK-NEXT:  # %bb.0:
472; CHECK-NEXT:    local.get 0
473; CHECK-NEXT:    i32.const 12
474; CHECK-NEXT:    i32.add
475; CHECK-NEXT:    local.get 1
476; CHECK-NEXT:    v128.load16_lane 0, 0
477; CHECK-NEXT:    # fallthrough-return
478;
479; MEM64-LABEL: load_lane_i16_with_folded_gep_offset:
480; MEM64:         .functype load_lane_i16_with_folded_gep_offset (i64, v128) -> (v128)
481; MEM64-NEXT:  # %bb.0:
482; MEM64-NEXT:    local.get 0
483; MEM64-NEXT:    i64.const 12
484; MEM64-NEXT:    i64.add
485; MEM64-NEXT:    local.get 1
486; MEM64-NEXT:    v128.load16_lane 0, 0
487; MEM64-NEXT:    # fallthrough-return
488  %s = getelementptr inbounds i16, ptr %p, i32 6
489  %x = load i16, ptr %s
490  %t = insertelement <8 x i16> %v, i16 %x, i32 0
491  ret <8 x i16> %t
492}
493
494define <8 x i16> @load_lane_i16_with_unfolded_gep_negative_offset(ptr %p, <8 x i16> %v) {
495; CHECK-LABEL: load_lane_i16_with_unfolded_gep_negative_offset:
496; CHECK:         .functype load_lane_i16_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
497; CHECK-NEXT:  # %bb.0:
498; CHECK-NEXT:    local.get 0
499; CHECK-NEXT:    i32.const -12
500; CHECK-NEXT:    i32.add
501; CHECK-NEXT:    local.get 1
502; CHECK-NEXT:    v128.load16_lane 0, 0
503; CHECK-NEXT:    # fallthrough-return
504;
505; MEM64-LABEL: load_lane_i16_with_unfolded_gep_negative_offset:
506; MEM64:         .functype load_lane_i16_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
507; MEM64-NEXT:  # %bb.0:
508; MEM64-NEXT:    local.get 0
509; MEM64-NEXT:    i64.const -12
510; MEM64-NEXT:    i64.add
511; MEM64-NEXT:    local.get 1
512; MEM64-NEXT:    v128.load16_lane 0, 0
513; MEM64-NEXT:    # fallthrough-return
514  %s = getelementptr inbounds i16, ptr %p, i32 -6
515  %x = load i16, ptr %s
516  %t = insertelement <8 x i16> %v, i16 %x, i32 0
517  ret <8 x i16> %t
518}
519
520define <8 x i16> @load_lane_i16_with_unfolded_offset(ptr %p, <8 x i16> %v) {
521; CHECK-LABEL: load_lane_i16_with_unfolded_offset:
522; CHECK:         .functype load_lane_i16_with_unfolded_offset (i32, v128) -> (v128)
523; CHECK-NEXT:  # %bb.0:
524; CHECK-NEXT:    local.get 0
525; CHECK-NEXT:    i32.const 24
526; CHECK-NEXT:    i32.add
527; CHECK-NEXT:    local.get 1
528; CHECK-NEXT:    v128.load16_lane 0, 0
529; CHECK-NEXT:    # fallthrough-return
530;
531; MEM64-LABEL: load_lane_i16_with_unfolded_offset:
532; MEM64:         .functype load_lane_i16_with_unfolded_offset (i64, v128) -> (v128)
533; MEM64-NEXT:  # %bb.0:
534; MEM64-NEXT:    local.get 0
535; MEM64-NEXT:    i32.wrap_i64
536; MEM64-NEXT:    i32.const 24
537; MEM64-NEXT:    i32.add
538; MEM64-NEXT:    i64.extend_i32_u
539; MEM64-NEXT:    local.get 1
540; MEM64-NEXT:    v128.load16_lane 0, 0
541; MEM64-NEXT:    # fallthrough-return
542  %q = ptrtoint ptr %p to i32
543  %r = add nsw i32 %q, 24
544  %s = inttoptr i32 %r to ptr
545  %x = load i16, ptr %s
546  %t = insertelement <8 x i16> %v, i16 %x, i32 0
547  ret <8 x i16> %t
548}
549
550define <8 x i16> @load_lane_i16_with_unfolded_gep_offset(ptr %p, <8 x i16> %v) {
551; CHECK-LABEL: load_lane_i16_with_unfolded_gep_offset:
552; CHECK:         .functype load_lane_i16_with_unfolded_gep_offset (i32, v128) -> (v128)
553; CHECK-NEXT:  # %bb.0:
554; CHECK-NEXT:    local.get 0
555; CHECK-NEXT:    i32.const 12
556; CHECK-NEXT:    i32.add
557; CHECK-NEXT:    local.get 1
558; CHECK-NEXT:    v128.load16_lane 0, 0
559; CHECK-NEXT:    # fallthrough-return
560;
561; MEM64-LABEL: load_lane_i16_with_unfolded_gep_offset:
562; MEM64:         .functype load_lane_i16_with_unfolded_gep_offset (i64, v128) -> (v128)
563; MEM64-NEXT:  # %bb.0:
564; MEM64-NEXT:    local.get 0
565; MEM64-NEXT:    i64.const 12
566; MEM64-NEXT:    i64.add
567; MEM64-NEXT:    local.get 1
568; MEM64-NEXT:    v128.load16_lane 0, 0
569; MEM64-NEXT:    # fallthrough-return
570  %s = getelementptr i16, ptr %p, i32 6
571  %x = load i16, ptr %s
572  %t = insertelement <8 x i16> %v, i16 %x, i32 0
573  ret <8 x i16> %t
574}
575
576define <8 x i16> @load_lane_i16_from_numeric_address(<8 x i16> %v) {
577; CHECK-LABEL: load_lane_i16_from_numeric_address:
578; CHECK:         .functype load_lane_i16_from_numeric_address (v128) -> (v128)
579; CHECK-NEXT:  # %bb.0:
580; CHECK-NEXT:    i32.const 42
581; CHECK-NEXT:    local.get 0
582; CHECK-NEXT:    v128.load16_lane 0, 0
583; CHECK-NEXT:    # fallthrough-return
584;
585; MEM64-LABEL: load_lane_i16_from_numeric_address:
586; MEM64:         .functype load_lane_i16_from_numeric_address (v128) -> (v128)
587; MEM64-NEXT:  # %bb.0:
588; MEM64-NEXT:    i64.const 42
589; MEM64-NEXT:    local.get 0
590; MEM64-NEXT:    v128.load16_lane 0, 0
591; MEM64-NEXT:    # fallthrough-return
592  %s = inttoptr i32 42 to ptr
593  %x = load i16, ptr %s
594  %t = insertelement <8 x i16> %v, i16 %x, i32 0
595  ret <8 x i16> %t
596}
597
598@gv_i16 = global i16 0
599define <8 x i16> @load_lane_i16_from_global_address(<8 x i16> %v) {
600; CHECK-LABEL: load_lane_i16_from_global_address:
601; CHECK:         .functype load_lane_i16_from_global_address (v128) -> (v128)
602; CHECK-NEXT:  # %bb.0:
603; CHECK-NEXT:    i32.const gv_i16
604; CHECK-NEXT:    local.get 0
605; CHECK-NEXT:    v128.load16_lane 0, 0
606; CHECK-NEXT:    # fallthrough-return
607;
608; MEM64-LABEL: load_lane_i16_from_global_address:
609; MEM64:         .functype load_lane_i16_from_global_address (v128) -> (v128)
610; MEM64-NEXT:  # %bb.0:
611; MEM64-NEXT:    i64.const gv_i16
612; MEM64-NEXT:    local.get 0
613; MEM64-NEXT:    v128.load16_lane 0, 0
614; MEM64-NEXT:    # fallthrough-return
615  %x = load i16, ptr @gv_i16
616  %t = insertelement <8 x i16> %v, i16 %x, i32 0
617  ret <8 x i16> %t
618}
619
620define void @store_lane_i16_no_offset(<8 x i16> %v, ptr %p) {
621; CHECK-LABEL: store_lane_i16_no_offset:
622; CHECK:         .functype store_lane_i16_no_offset (v128, i32) -> ()
623; CHECK-NEXT:  # %bb.0:
624; CHECK-NEXT:    local.get 1
625; CHECK-NEXT:    local.get 0
626; CHECK-NEXT:    v128.store16_lane 0, 0
627; CHECK-NEXT:    # fallthrough-return
628;
629; MEM64-LABEL: store_lane_i16_no_offset:
630; MEM64:         .functype store_lane_i16_no_offset (v128, i64) -> ()
631; MEM64-NEXT:  # %bb.0:
632; MEM64-NEXT:    local.get 1
633; MEM64-NEXT:    local.get 0
634; MEM64-NEXT:    v128.store16_lane 0, 0
635; MEM64-NEXT:    # fallthrough-return
636  %x = extractelement <8 x i16> %v, i32 0
637  store i16 %x, ptr %p
638  ret void
639}
640
641define void @store_lane_i16_with_folded_offset(<8 x i16> %v, ptr %p) {
642; CHECK-LABEL: store_lane_i16_with_folded_offset:
643; CHECK:         .functype store_lane_i16_with_folded_offset (v128, i32) -> ()
644; CHECK-NEXT:  # %bb.0:
645; CHECK-NEXT:    local.get 1
646; CHECK-NEXT:    local.get 0
647; CHECK-NEXT:    v128.store16_lane 24, 0
648; CHECK-NEXT:    # fallthrough-return
649;
650; MEM64-LABEL: store_lane_i16_with_folded_offset:
651; MEM64:         .functype store_lane_i16_with_folded_offset (v128, i64) -> ()
652; MEM64-NEXT:  # %bb.0:
653; MEM64-NEXT:    local.get 1
654; MEM64-NEXT:    i32.wrap_i64
655; MEM64-NEXT:    i32.const 24
656; MEM64-NEXT:    i32.add
657; MEM64-NEXT:    i64.extend_i32_u
658; MEM64-NEXT:    local.get 0
659; MEM64-NEXT:    v128.store16_lane 0, 0
660; MEM64-NEXT:    # fallthrough-return
661  %q = ptrtoint ptr %p to i32
662  %r = add nuw i32 %q, 24
663  %s = inttoptr i32 %r to ptr
664  %x = extractelement <8 x i16> %v, i32 0
665  store i16 %x, ptr %s
666  ret void
667}
668
669define void @store_lane_i16_with_folded_gep_offset(<8 x i16> %v, ptr %p) {
670; CHECK-LABEL: store_lane_i16_with_folded_gep_offset:
671; CHECK:         .functype store_lane_i16_with_folded_gep_offset (v128, i32) -> ()
672; CHECK-NEXT:  # %bb.0:
673; CHECK-NEXT:    local.get 1
674; CHECK-NEXT:    local.get 0
675; CHECK-NEXT:    v128.store16_lane 12, 0
676; CHECK-NEXT:    # fallthrough-return
677;
678; MEM64-LABEL: store_lane_i16_with_folded_gep_offset:
679; MEM64:         .functype store_lane_i16_with_folded_gep_offset (v128, i64) -> ()
680; MEM64-NEXT:  # %bb.0:
681; MEM64-NEXT:    local.get 1
682; MEM64-NEXT:    local.get 0
683; MEM64-NEXT:    v128.store16_lane 12, 0
684; MEM64-NEXT:    # fallthrough-return
685  %s = getelementptr inbounds i16, ptr %p, i32 6
686  %x = extractelement <8 x i16> %v, i32 0
687  store i16 %x, ptr %s
688  ret void
689}
690
691define void @store_lane_i16_with_unfolded_gep_negative_offset(<8 x i16> %v, ptr %p) {
692; CHECK-LABEL: store_lane_i16_with_unfolded_gep_negative_offset:
693; CHECK:         .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
694; CHECK-NEXT:  # %bb.0:
695; CHECK-NEXT:    local.get 1
696; CHECK-NEXT:    i32.const -12
697; CHECK-NEXT:    i32.add
698; CHECK-NEXT:    local.get 0
699; CHECK-NEXT:    v128.store16_lane 0, 0
700; CHECK-NEXT:    # fallthrough-return
701;
702; MEM64-LABEL: store_lane_i16_with_unfolded_gep_negative_offset:
703; MEM64:         .functype store_lane_i16_with_unfolded_gep_negative_offset (v128, i64) -> ()
704; MEM64-NEXT:  # %bb.0:
705; MEM64-NEXT:    local.get 1
706; MEM64-NEXT:    i64.const -12
707; MEM64-NEXT:    i64.add
708; MEM64-NEXT:    local.get 0
709; MEM64-NEXT:    v128.store16_lane 0, 0
710; MEM64-NEXT:    # fallthrough-return
711  %s = getelementptr inbounds i16, ptr %p, i32 -6
712  %x = extractelement <8 x i16> %v, i32 0
713  store i16 %x, ptr %s
714  ret void
715}
716
717define void @store_lane_i16_with_unfolded_offset(<8 x i16> %v, ptr %p) {
718; CHECK-LABEL: store_lane_i16_with_unfolded_offset:
719; CHECK:         .functype store_lane_i16_with_unfolded_offset (v128, i32) -> ()
720; CHECK-NEXT:  # %bb.0:
721; CHECK-NEXT:    local.get 1
722; CHECK-NEXT:    i32.const 24
723; CHECK-NEXT:    i32.add
724; CHECK-NEXT:    local.get 0
725; CHECK-NEXT:    v128.store16_lane 0, 0
726; CHECK-NEXT:    # fallthrough-return
727;
728; MEM64-LABEL: store_lane_i16_with_unfolded_offset:
729; MEM64:         .functype store_lane_i16_with_unfolded_offset (v128, i64) -> ()
730; MEM64-NEXT:  # %bb.0:
731; MEM64-NEXT:    local.get 1
732; MEM64-NEXT:    i32.wrap_i64
733; MEM64-NEXT:    i32.const 24
734; MEM64-NEXT:    i32.add
735; MEM64-NEXT:    i64.extend_i32_u
736; MEM64-NEXT:    local.get 0
737; MEM64-NEXT:    v128.store16_lane 0, 0
738; MEM64-NEXT:    # fallthrough-return
739  %q = ptrtoint ptr %p to i32
740  %r = add nsw i32 %q, 24
741  %s = inttoptr i32 %r to ptr
742  %x = extractelement <8 x i16> %v, i32 0
743  store i16 %x, ptr %s
744  ret void
745}
746
747define void @store_lane_i16_with_unfolded_gep_offset(<8 x i16> %v, ptr %p) {
748; CHECK-LABEL: store_lane_i16_with_unfolded_gep_offset:
749; CHECK:         .functype store_lane_i16_with_unfolded_gep_offset (v128, i32) -> ()
750; CHECK-NEXT:  # %bb.0:
751; CHECK-NEXT:    local.get 1
752; CHECK-NEXT:    i32.const 12
753; CHECK-NEXT:    i32.add
754; CHECK-NEXT:    local.get 0
755; CHECK-NEXT:    v128.store16_lane 0, 0
756; CHECK-NEXT:    # fallthrough-return
757;
758; MEM64-LABEL: store_lane_i16_with_unfolded_gep_offset:
759; MEM64:         .functype store_lane_i16_with_unfolded_gep_offset (v128, i64) -> ()
760; MEM64-NEXT:  # %bb.0:
761; MEM64-NEXT:    local.get 1
762; MEM64-NEXT:    i64.const 12
763; MEM64-NEXT:    i64.add
764; MEM64-NEXT:    local.get 0
765; MEM64-NEXT:    v128.store16_lane 0, 0
766; MEM64-NEXT:    # fallthrough-return
767  %s = getelementptr i16, ptr %p, i32 6
768  %x = extractelement <8 x i16> %v, i32 0
769  store i16 %x, ptr %s
770  ret void
771}
772
773define void @store_lane_i16_to_numeric_address(<8 x i16> %v) {
774; CHECK-LABEL: store_lane_i16_to_numeric_address:
775; CHECK:         .functype store_lane_i16_to_numeric_address (v128) -> ()
776; CHECK-NEXT:  # %bb.0:
777; CHECK-NEXT:    i32.const 0
778; CHECK-NEXT:    local.get 0
779; CHECK-NEXT:    v128.store16_lane 42, 0
780; CHECK-NEXT:    # fallthrough-return
781;
782; MEM64-LABEL: store_lane_i16_to_numeric_address:
783; MEM64:         .functype store_lane_i16_to_numeric_address (v128) -> ()
784; MEM64-NEXT:  # %bb.0:
785; MEM64-NEXT:    i64.const 0
786; MEM64-NEXT:    local.get 0
787; MEM64-NEXT:    v128.store16_lane 42, 0
788; MEM64-NEXT:    # fallthrough-return
789  %s = inttoptr i32 42 to ptr
790  %x = extractelement <8 x i16> %v, i32 0
791  store i16 %x, ptr %s
792  ret void
793}
794
795define void @store_lane_i16_from_global_address(<8 x i16> %v) {
796; CHECK-LABEL: store_lane_i16_from_global_address:
797; CHECK:         .functype store_lane_i16_from_global_address (v128) -> ()
798; CHECK-NEXT:  # %bb.0:
799; CHECK-NEXT:    i32.const 0
800; CHECK-NEXT:    local.get 0
801; CHECK-NEXT:    v128.store16_lane gv_i16, 0
802; CHECK-NEXT:    # fallthrough-return
803;
804; MEM64-LABEL: store_lane_i16_from_global_address:
805; MEM64:         .functype store_lane_i16_from_global_address (v128) -> ()
806; MEM64-NEXT:  # %bb.0:
807; MEM64-NEXT:    i64.const 0
808; MEM64-NEXT:    local.get 0
809; MEM64-NEXT:    v128.store16_lane gv_i16, 0
810; MEM64-NEXT:    # fallthrough-return
811  %x = extractelement <8 x i16> %v, i32 0
812  store i16 %x, ptr @gv_i16
813  ret void
814}
815
816;===----------------------------------------------------------------------------
817; v128.load32_lane / v128.store32_lane
818;===----------------------------------------------------------------------------
819
820define <4 x i32> @load_lane_i32_no_offset(ptr %p, <4 x i32> %v) {
821; CHECK-LABEL: load_lane_i32_no_offset:
822; CHECK:         .functype load_lane_i32_no_offset (i32, v128) -> (v128)
823; CHECK-NEXT:  # %bb.0:
824; CHECK-NEXT:    local.get 0
825; CHECK-NEXT:    local.get 1
826; CHECK-NEXT:    v128.load32_lane 0, 0
827; CHECK-NEXT:    # fallthrough-return
828;
829; MEM64-LABEL: load_lane_i32_no_offset:
830; MEM64:         .functype load_lane_i32_no_offset (i64, v128) -> (v128)
831; MEM64-NEXT:  # %bb.0:
832; MEM64-NEXT:    local.get 0
833; MEM64-NEXT:    local.get 1
834; MEM64-NEXT:    v128.load32_lane 0, 0
835; MEM64-NEXT:    # fallthrough-return
836  %x = load i32, ptr %p
837  %t = insertelement <4 x i32> %v, i32 %x, i32 0
838  ret <4 x i32> %t
839}
840
841define <4 x i32> @load_lane_i32_with_folded_offset(ptr %p, <4 x i32> %v) {
842; CHECK-LABEL: load_lane_i32_with_folded_offset:
843; CHECK:         .functype load_lane_i32_with_folded_offset (i32, v128) -> (v128)
844; CHECK-NEXT:  # %bb.0:
845; CHECK-NEXT:    local.get 0
846; CHECK-NEXT:    i32.const 24
847; CHECK-NEXT:    i32.add
848; CHECK-NEXT:    local.get 1
849; CHECK-NEXT:    v128.load32_lane 0, 0
850; CHECK-NEXT:    # fallthrough-return
851;
852; MEM64-LABEL: load_lane_i32_with_folded_offset:
853; MEM64:         .functype load_lane_i32_with_folded_offset (i64, v128) -> (v128)
854; MEM64-NEXT:  # %bb.0:
855; MEM64-NEXT:    local.get 0
856; MEM64-NEXT:    i32.wrap_i64
857; MEM64-NEXT:    i32.const 24
858; MEM64-NEXT:    i32.add
859; MEM64-NEXT:    i64.extend_i32_u
860; MEM64-NEXT:    local.get 1
861; MEM64-NEXT:    v128.load32_lane 0, 0
862; MEM64-NEXT:    # fallthrough-return
863  %q = ptrtoint ptr %p to i32
864  %r = add nuw i32 %q, 24
865  %s = inttoptr i32 %r to ptr
866  %x = load i32, ptr %s
867  %t = insertelement <4 x i32> %v, i32 %x, i32 0
868  ret <4 x i32> %t
869}
870
871define <4 x i32> @load_lane_i32_with_folded_gep_offset(ptr %p, <4 x i32> %v) {
872; CHECK-LABEL: load_lane_i32_with_folded_gep_offset:
873; CHECK:         .functype load_lane_i32_with_folded_gep_offset (i32, v128) -> (v128)
874; CHECK-NEXT:  # %bb.0:
875; CHECK-NEXT:    local.get 0
876; CHECK-NEXT:    i32.const 24
877; CHECK-NEXT:    i32.add
878; CHECK-NEXT:    local.get 1
879; CHECK-NEXT:    v128.load32_lane 0, 0
880; CHECK-NEXT:    # fallthrough-return
881;
882; MEM64-LABEL: load_lane_i32_with_folded_gep_offset:
883; MEM64:         .functype load_lane_i32_with_folded_gep_offset (i64, v128) -> (v128)
884; MEM64-NEXT:  # %bb.0:
885; MEM64-NEXT:    local.get 0
886; MEM64-NEXT:    i64.const 24
887; MEM64-NEXT:    i64.add
888; MEM64-NEXT:    local.get 1
889; MEM64-NEXT:    v128.load32_lane 0, 0
890; MEM64-NEXT:    # fallthrough-return
891  %s = getelementptr inbounds i32, ptr %p, i32 6
892  %x = load i32, ptr %s
893  %t = insertelement <4 x i32> %v, i32 %x, i32 0
894  ret <4 x i32> %t
895}
896
897define <4 x i32> @load_lane_i32_with_unfolded_gep_negative_offset(ptr %p, <4 x i32> %v) {
898; CHECK-LABEL: load_lane_i32_with_unfolded_gep_negative_offset:
899; CHECK:         .functype load_lane_i32_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
900; CHECK-NEXT:  # %bb.0:
901; CHECK-NEXT:    local.get 0
902; CHECK-NEXT:    i32.const -24
903; CHECK-NEXT:    i32.add
904; CHECK-NEXT:    local.get 1
905; CHECK-NEXT:    v128.load32_lane 0, 0
906; CHECK-NEXT:    # fallthrough-return
907;
908; MEM64-LABEL: load_lane_i32_with_unfolded_gep_negative_offset:
909; MEM64:         .functype load_lane_i32_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
910; MEM64-NEXT:  # %bb.0:
911; MEM64-NEXT:    local.get 0
912; MEM64-NEXT:    i64.const -24
913; MEM64-NEXT:    i64.add
914; MEM64-NEXT:    local.get 1
915; MEM64-NEXT:    v128.load32_lane 0, 0
916; MEM64-NEXT:    # fallthrough-return
917  %s = getelementptr inbounds i32, ptr %p, i32 -6
918  %x = load i32, ptr %s
919  %t = insertelement <4 x i32> %v, i32 %x, i32 0
920  ret <4 x i32> %t
921}
922
923define <4 x i32> @load_lane_i32_with_unfolded_offset(ptr %p, <4 x i32> %v) {
924; CHECK-LABEL: load_lane_i32_with_unfolded_offset:
925; CHECK:         .functype load_lane_i32_with_unfolded_offset (i32, v128) -> (v128)
926; CHECK-NEXT:  # %bb.0:
927; CHECK-NEXT:    local.get 0
928; CHECK-NEXT:    i32.const 24
929; CHECK-NEXT:    i32.add
930; CHECK-NEXT:    local.get 1
931; CHECK-NEXT:    v128.load32_lane 0, 0
932; CHECK-NEXT:    # fallthrough-return
933;
934; MEM64-LABEL: load_lane_i32_with_unfolded_offset:
935; MEM64:         .functype load_lane_i32_with_unfolded_offset (i64, v128) -> (v128)
936; MEM64-NEXT:  # %bb.0:
937; MEM64-NEXT:    local.get 0
938; MEM64-NEXT:    i32.wrap_i64
939; MEM64-NEXT:    i32.const 24
940; MEM64-NEXT:    i32.add
941; MEM64-NEXT:    i64.extend_i32_u
942; MEM64-NEXT:    local.get 1
943; MEM64-NEXT:    v128.load32_lane 0, 0
944; MEM64-NEXT:    # fallthrough-return
945  %q = ptrtoint ptr %p to i32
946  %r = add nsw i32 %q, 24
947  %s = inttoptr i32 %r to ptr
948  %x = load i32, ptr %s
949  %t = insertelement <4 x i32> %v, i32 %x, i32 0
950  ret <4 x i32> %t
951}
952
953define <4 x i32> @load_lane_i32_with_unfolded_gep_offset(ptr %p, <4 x i32> %v) {
954; CHECK-LABEL: load_lane_i32_with_unfolded_gep_offset:
955; CHECK:         .functype load_lane_i32_with_unfolded_gep_offset (i32, v128) -> (v128)
956; CHECK-NEXT:  # %bb.0:
957; CHECK-NEXT:    local.get 0
958; CHECK-NEXT:    i32.const 24
959; CHECK-NEXT:    i32.add
960; CHECK-NEXT:    local.get 1
961; CHECK-NEXT:    v128.load32_lane 0, 0
962; CHECK-NEXT:    # fallthrough-return
963;
964; MEM64-LABEL: load_lane_i32_with_unfolded_gep_offset:
965; MEM64:         .functype load_lane_i32_with_unfolded_gep_offset (i64, v128) -> (v128)
966; MEM64-NEXT:  # %bb.0:
967; MEM64-NEXT:    local.get 0
968; MEM64-NEXT:    i64.const 24
969; MEM64-NEXT:    i64.add
970; MEM64-NEXT:    local.get 1
971; MEM64-NEXT:    v128.load32_lane 0, 0
972; MEM64-NEXT:    # fallthrough-return
973  %s = getelementptr i32, ptr %p, i32 6
974  %x = load i32, ptr %s
975  %t = insertelement <4 x i32> %v, i32 %x, i32 0
976  ret <4 x i32> %t
977}
978
979define <4 x i32> @load_lane_i32_from_numeric_address(<4 x i32> %v) {
980; CHECK-LABEL: load_lane_i32_from_numeric_address:
981; CHECK:         .functype load_lane_i32_from_numeric_address (v128) -> (v128)
982; CHECK-NEXT:  # %bb.0:
983; CHECK-NEXT:    i32.const 42
984; CHECK-NEXT:    local.get 0
985; CHECK-NEXT:    v128.load32_lane 0, 0
986; CHECK-NEXT:    # fallthrough-return
987;
988; MEM64-LABEL: load_lane_i32_from_numeric_address:
989; MEM64:         .functype load_lane_i32_from_numeric_address (v128) -> (v128)
990; MEM64-NEXT:  # %bb.0:
991; MEM64-NEXT:    i64.const 42
992; MEM64-NEXT:    local.get 0
993; MEM64-NEXT:    v128.load32_lane 0, 0
994; MEM64-NEXT:    # fallthrough-return
995  %s = inttoptr i32 42 to ptr
996  %x = load i32, ptr %s
997  %t = insertelement <4 x i32> %v, i32 %x, i32 0
998  ret <4 x i32> %t
999}
1000
1001@gv_i32 = global i32 0
1002define <4 x i32> @load_lane_i32_from_global_address(<4 x i32> %v) {
1003; CHECK-LABEL: load_lane_i32_from_global_address:
1004; CHECK:         .functype load_lane_i32_from_global_address (v128) -> (v128)
1005; CHECK-NEXT:  # %bb.0:
1006; CHECK-NEXT:    i32.const gv_i32
1007; CHECK-NEXT:    local.get 0
1008; CHECK-NEXT:    v128.load32_lane 0, 0
1009; CHECK-NEXT:    # fallthrough-return
1010;
1011; MEM64-LABEL: load_lane_i32_from_global_address:
1012; MEM64:         .functype load_lane_i32_from_global_address (v128) -> (v128)
1013; MEM64-NEXT:  # %bb.0:
1014; MEM64-NEXT:    i64.const gv_i32
1015; MEM64-NEXT:    local.get 0
1016; MEM64-NEXT:    v128.load32_lane 0, 0
1017; MEM64-NEXT:    # fallthrough-return
1018  %x = load i32, ptr @gv_i32
1019  %t = insertelement <4 x i32> %v, i32 %x, i32 0
1020  ret <4 x i32> %t
1021}
1022
1023define void @store_lane_i32_no_offset(<4 x i32> %v, ptr %p) {
1024; CHECK-LABEL: store_lane_i32_no_offset:
1025; CHECK:         .functype store_lane_i32_no_offset (v128, i32) -> ()
1026; CHECK-NEXT:  # %bb.0:
1027; CHECK-NEXT:    local.get 1
1028; CHECK-NEXT:    local.get 0
1029; CHECK-NEXT:    v128.store32_lane 0, 0
1030; CHECK-NEXT:    # fallthrough-return
1031;
1032; MEM64-LABEL: store_lane_i32_no_offset:
1033; MEM64:         .functype store_lane_i32_no_offset (v128, i64) -> ()
1034; MEM64-NEXT:  # %bb.0:
1035; MEM64-NEXT:    local.get 1
1036; MEM64-NEXT:    local.get 0
1037; MEM64-NEXT:    v128.store32_lane 0, 0
1038; MEM64-NEXT:    # fallthrough-return
1039  %x = extractelement <4 x i32> %v, i32 0
1040  store i32 %x, ptr %p
1041  ret void
1042}
1043
1044define void @store_lane_i32_with_folded_offset(<4 x i32> %v, ptr %p) {
1045; CHECK-LABEL: store_lane_i32_with_folded_offset:
1046; CHECK:         .functype store_lane_i32_with_folded_offset (v128, i32) -> ()
1047; CHECK-NEXT:  # %bb.0:
1048; CHECK-NEXT:    local.get 1
1049; CHECK-NEXT:    local.get 0
1050; CHECK-NEXT:    v128.store32_lane 24, 0
1051; CHECK-NEXT:    # fallthrough-return
1052;
1053; MEM64-LABEL: store_lane_i32_with_folded_offset:
1054; MEM64:         .functype store_lane_i32_with_folded_offset (v128, i64) -> ()
1055; MEM64-NEXT:  # %bb.0:
1056; MEM64-NEXT:    local.get 1
1057; MEM64-NEXT:    i32.wrap_i64
1058; MEM64-NEXT:    i32.const 24
1059; MEM64-NEXT:    i32.add
1060; MEM64-NEXT:    i64.extend_i32_u
1061; MEM64-NEXT:    local.get 0
1062; MEM64-NEXT:    v128.store32_lane 0, 0
1063; MEM64-NEXT:    # fallthrough-return
1064  %q = ptrtoint ptr %p to i32
1065  %r = add nuw i32 %q, 24
1066  %s = inttoptr i32 %r to ptr
1067  %x = extractelement <4 x i32> %v, i32 0
1068  store i32 %x, ptr %s
1069  ret void
1070}
1071
1072define void @store_lane_i32_with_folded_gep_offset(<4 x i32> %v, ptr %p) {
1073; CHECK-LABEL: store_lane_i32_with_folded_gep_offset:
1074; CHECK:         .functype store_lane_i32_with_folded_gep_offset (v128, i32) -> ()
1075; CHECK-NEXT:  # %bb.0:
1076; CHECK-NEXT:    local.get 1
1077; CHECK-NEXT:    local.get 0
1078; CHECK-NEXT:    v128.store32_lane 24, 0
1079; CHECK-NEXT:    # fallthrough-return
1080;
1081; MEM64-LABEL: store_lane_i32_with_folded_gep_offset:
1082; MEM64:         .functype store_lane_i32_with_folded_gep_offset (v128, i64) -> ()
1083; MEM64-NEXT:  # %bb.0:
1084; MEM64-NEXT:    local.get 1
1085; MEM64-NEXT:    local.get 0
1086; MEM64-NEXT:    v128.store32_lane 24, 0
1087; MEM64-NEXT:    # fallthrough-return
1088  %s = getelementptr inbounds i32, ptr %p, i32 6
1089  %x = extractelement <4 x i32> %v, i32 0
1090  store i32 %x, ptr %s
1091  ret void
1092}
1093
1094define void @store_lane_i32_with_unfolded_gep_negative_offset(<4 x i32> %v, ptr %p) {
1095; CHECK-LABEL: store_lane_i32_with_unfolded_gep_negative_offset:
1096; CHECK:         .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
1097; CHECK-NEXT:  # %bb.0:
1098; CHECK-NEXT:    local.get 1
1099; CHECK-NEXT:    i32.const -24
1100; CHECK-NEXT:    i32.add
1101; CHECK-NEXT:    local.get 0
1102; CHECK-NEXT:    v128.store32_lane 0, 0
1103; CHECK-NEXT:    # fallthrough-return
1104;
1105; MEM64-LABEL: store_lane_i32_with_unfolded_gep_negative_offset:
1106; MEM64:         .functype store_lane_i32_with_unfolded_gep_negative_offset (v128, i64) -> ()
1107; MEM64-NEXT:  # %bb.0:
1108; MEM64-NEXT:    local.get 1
1109; MEM64-NEXT:    i64.const -24
1110; MEM64-NEXT:    i64.add
1111; MEM64-NEXT:    local.get 0
1112; MEM64-NEXT:    v128.store32_lane 0, 0
1113; MEM64-NEXT:    # fallthrough-return
1114  %s = getelementptr inbounds i32, ptr %p, i32 -6
1115  %x = extractelement <4 x i32> %v, i32 0
1116  store i32 %x, ptr %s
1117  ret void
1118}
1119
1120define void @store_lane_i32_with_unfolded_offset(<4 x i32> %v, ptr %p) {
1121; CHECK-LABEL: store_lane_i32_with_unfolded_offset:
1122; CHECK:         .functype store_lane_i32_with_unfolded_offset (v128, i32) -> ()
1123; CHECK-NEXT:  # %bb.0:
1124; CHECK-NEXT:    local.get 1
1125; CHECK-NEXT:    i32.const 24
1126; CHECK-NEXT:    i32.add
1127; CHECK-NEXT:    local.get 0
1128; CHECK-NEXT:    v128.store32_lane 0, 0
1129; CHECK-NEXT:    # fallthrough-return
1130;
1131; MEM64-LABEL: store_lane_i32_with_unfolded_offset:
1132; MEM64:         .functype store_lane_i32_with_unfolded_offset (v128, i64) -> ()
1133; MEM64-NEXT:  # %bb.0:
1134; MEM64-NEXT:    local.get 1
1135; MEM64-NEXT:    i32.wrap_i64
1136; MEM64-NEXT:    i32.const 24
1137; MEM64-NEXT:    i32.add
1138; MEM64-NEXT:    i64.extend_i32_u
1139; MEM64-NEXT:    local.get 0
1140; MEM64-NEXT:    v128.store32_lane 0, 0
1141; MEM64-NEXT:    # fallthrough-return
1142  %q = ptrtoint ptr %p to i32
1143  %r = add nsw i32 %q, 24
1144  %s = inttoptr i32 %r to ptr
1145  %x = extractelement <4 x i32> %v, i32 0
1146  store i32 %x, ptr %s
1147  ret void
1148}
1149
1150define void @store_lane_i32_with_unfolded_gep_offset(<4 x i32> %v, ptr %p) {
1151; CHECK-LABEL: store_lane_i32_with_unfolded_gep_offset:
1152; CHECK:         .functype store_lane_i32_with_unfolded_gep_offset (v128, i32) -> ()
1153; CHECK-NEXT:  # %bb.0:
1154; CHECK-NEXT:    local.get 1
1155; CHECK-NEXT:    i32.const 24
1156; CHECK-NEXT:    i32.add
1157; CHECK-NEXT:    local.get 0
1158; CHECK-NEXT:    v128.store32_lane 0, 0
1159; CHECK-NEXT:    # fallthrough-return
1160;
1161; MEM64-LABEL: store_lane_i32_with_unfolded_gep_offset:
1162; MEM64:         .functype store_lane_i32_with_unfolded_gep_offset (v128, i64) -> ()
1163; MEM64-NEXT:  # %bb.0:
1164; MEM64-NEXT:    local.get 1
1165; MEM64-NEXT:    i64.const 24
1166; MEM64-NEXT:    i64.add
1167; MEM64-NEXT:    local.get 0
1168; MEM64-NEXT:    v128.store32_lane 0, 0
1169; MEM64-NEXT:    # fallthrough-return
1170  %s = getelementptr i32, ptr %p, i32 6
1171  %x = extractelement <4 x i32> %v, i32 0
1172  store i32 %x, ptr %s
1173  ret void
1174}
1175
1176define void @store_lane_i32_to_numeric_address(<4 x i32> %v) {
1177; CHECK-LABEL: store_lane_i32_to_numeric_address:
1178; CHECK:         .functype store_lane_i32_to_numeric_address (v128) -> ()
1179; CHECK-NEXT:  # %bb.0:
1180; CHECK-NEXT:    i32.const 0
1181; CHECK-NEXT:    local.get 0
1182; CHECK-NEXT:    v128.store32_lane 42, 0
1183; CHECK-NEXT:    # fallthrough-return
1184;
1185; MEM64-LABEL: store_lane_i32_to_numeric_address:
1186; MEM64:         .functype store_lane_i32_to_numeric_address (v128) -> ()
1187; MEM64-NEXT:  # %bb.0:
1188; MEM64-NEXT:    i64.const 0
1189; MEM64-NEXT:    local.get 0
1190; MEM64-NEXT:    v128.store32_lane 42, 0
1191; MEM64-NEXT:    # fallthrough-return
1192  %s = inttoptr i32 42 to ptr
1193  %x = extractelement <4 x i32> %v, i32 0
1194  store i32 %x, ptr %s
1195  ret void
1196}
1197
1198define void @store_lane_i32_from_global_address(<4 x i32> %v) {
1199; CHECK-LABEL: store_lane_i32_from_global_address:
1200; CHECK:         .functype store_lane_i32_from_global_address (v128) -> ()
1201; CHECK-NEXT:  # %bb.0:
1202; CHECK-NEXT:    i32.const 0
1203; CHECK-NEXT:    local.get 0
1204; CHECK-NEXT:    v128.store32_lane gv_i32, 0
1205; CHECK-NEXT:    # fallthrough-return
1206;
1207; MEM64-LABEL: store_lane_i32_from_global_address:
1208; MEM64:         .functype store_lane_i32_from_global_address (v128) -> ()
1209; MEM64-NEXT:  # %bb.0:
1210; MEM64-NEXT:    i64.const 0
1211; MEM64-NEXT:    local.get 0
1212; MEM64-NEXT:    v128.store32_lane gv_i32, 0
1213; MEM64-NEXT:    # fallthrough-return
1214  %x = extractelement <4 x i32> %v, i32 0
1215  store i32 %x, ptr @gv_i32
1216  ret void
1217}
1218
1219;===----------------------------------------------------------------------------
1220; v128.load64_lane / v128.store64_lane
1221;===----------------------------------------------------------------------------
1222
1223define <2 x i64> @load_lane_i64_no_offset(ptr %p, <2 x i64> %v) {
1224; CHECK-LABEL: load_lane_i64_no_offset:
1225; CHECK:         .functype load_lane_i64_no_offset (i32, v128) -> (v128)
1226; CHECK-NEXT:  # %bb.0:
1227; CHECK-NEXT:    local.get 0
1228; CHECK-NEXT:    local.get 1
1229; CHECK-NEXT:    v128.load64_lane 0, 0
1230; CHECK-NEXT:    # fallthrough-return
1231;
1232; MEM64-LABEL: load_lane_i64_no_offset:
1233; MEM64:         .functype load_lane_i64_no_offset (i64, v128) -> (v128)
1234; MEM64-NEXT:  # %bb.0:
1235; MEM64-NEXT:    local.get 0
1236; MEM64-NEXT:    local.get 1
1237; MEM64-NEXT:    v128.load64_lane 0, 0
1238; MEM64-NEXT:    # fallthrough-return
1239  %x = load i64, ptr %p
1240  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1241  ret <2 x i64> %t
1242}
1243
1244define <2 x i64> @load_lane_i64_with_folded_offset(ptr %p, <2 x i64> %v) {
1245; CHECK-LABEL: load_lane_i64_with_folded_offset:
1246; CHECK:         .functype load_lane_i64_with_folded_offset (i32, v128) -> (v128)
1247; CHECK-NEXT:  # %bb.0:
1248; CHECK-NEXT:    local.get 0
1249; CHECK-NEXT:    i32.const 24
1250; CHECK-NEXT:    i32.add
1251; CHECK-NEXT:    local.get 1
1252; CHECK-NEXT:    v128.load64_lane 0, 0
1253; CHECK-NEXT:    # fallthrough-return
1254;
1255; MEM64-LABEL: load_lane_i64_with_folded_offset:
1256; MEM64:         .functype load_lane_i64_with_folded_offset (i64, v128) -> (v128)
1257; MEM64-NEXT:  # %bb.0:
1258; MEM64-NEXT:    local.get 0
1259; MEM64-NEXT:    i32.wrap_i64
1260; MEM64-NEXT:    i32.const 24
1261; MEM64-NEXT:    i32.add
1262; MEM64-NEXT:    i64.extend_i32_u
1263; MEM64-NEXT:    local.get 1
1264; MEM64-NEXT:    v128.load64_lane 0, 0
1265; MEM64-NEXT:    # fallthrough-return
1266  %q = ptrtoint ptr %p to i32
1267  %r = add nuw i32 %q, 24
1268  %s = inttoptr i32 %r to ptr
1269  %x = load i64, ptr %s
1270  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1271  ret <2 x i64> %t
1272}
1273
1274define <2 x i64> @load_lane_i64_with_folded_gep_offset(ptr %p, <2 x i64> %v) {
1275; CHECK-LABEL: load_lane_i64_with_folded_gep_offset:
1276; CHECK:         .functype load_lane_i64_with_folded_gep_offset (i32, v128) -> (v128)
1277; CHECK-NEXT:  # %bb.0:
1278; CHECK-NEXT:    local.get 0
1279; CHECK-NEXT:    i32.const 48
1280; CHECK-NEXT:    i32.add
1281; CHECK-NEXT:    local.get 1
1282; CHECK-NEXT:    v128.load64_lane 0, 0
1283; CHECK-NEXT:    # fallthrough-return
1284;
1285; MEM64-LABEL: load_lane_i64_with_folded_gep_offset:
1286; MEM64:         .functype load_lane_i64_with_folded_gep_offset (i64, v128) -> (v128)
1287; MEM64-NEXT:  # %bb.0:
1288; MEM64-NEXT:    local.get 0
1289; MEM64-NEXT:    i64.const 48
1290; MEM64-NEXT:    i64.add
1291; MEM64-NEXT:    local.get 1
1292; MEM64-NEXT:    v128.load64_lane 0, 0
1293; MEM64-NEXT:    # fallthrough-return
1294  %s = getelementptr inbounds i64, ptr %p, i32 6
1295  %x = load i64, ptr %s
1296  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1297  ret <2 x i64> %t
1298}
1299
1300define <2 x i64> @load_lane_i64_with_unfolded_gep_negative_offset(ptr %p, <2 x i64> %v) {
1301; CHECK-LABEL: load_lane_i64_with_unfolded_gep_negative_offset:
1302; CHECK:         .functype load_lane_i64_with_unfolded_gep_negative_offset (i32, v128) -> (v128)
1303; CHECK-NEXT:  # %bb.0:
1304; CHECK-NEXT:    local.get 0
1305; CHECK-NEXT:    i32.const -48
1306; CHECK-NEXT:    i32.add
1307; CHECK-NEXT:    local.get 1
1308; CHECK-NEXT:    v128.load64_lane 0, 0
1309; CHECK-NEXT:    # fallthrough-return
1310;
1311; MEM64-LABEL: load_lane_i64_with_unfolded_gep_negative_offset:
1312; MEM64:         .functype load_lane_i64_with_unfolded_gep_negative_offset (i64, v128) -> (v128)
1313; MEM64-NEXT:  # %bb.0:
1314; MEM64-NEXT:    local.get 0
1315; MEM64-NEXT:    i64.const -48
1316; MEM64-NEXT:    i64.add
1317; MEM64-NEXT:    local.get 1
1318; MEM64-NEXT:    v128.load64_lane 0, 0
1319; MEM64-NEXT:    # fallthrough-return
1320  %s = getelementptr inbounds i64, ptr %p, i32 -6
1321  %x = load i64, ptr %s
1322  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1323  ret <2 x i64> %t
1324}
1325
1326define <2 x i64> @load_lane_i64_with_unfolded_offset(ptr %p, <2 x i64> %v) {
1327; CHECK-LABEL: load_lane_i64_with_unfolded_offset:
1328; CHECK:         .functype load_lane_i64_with_unfolded_offset (i32, v128) -> (v128)
1329; CHECK-NEXT:  # %bb.0:
1330; CHECK-NEXT:    local.get 0
1331; CHECK-NEXT:    i32.const 24
1332; CHECK-NEXT:    i32.add
1333; CHECK-NEXT:    local.get 1
1334; CHECK-NEXT:    v128.load64_lane 0, 0
1335; CHECK-NEXT:    # fallthrough-return
1336;
1337; MEM64-LABEL: load_lane_i64_with_unfolded_offset:
1338; MEM64:         .functype load_lane_i64_with_unfolded_offset (i64, v128) -> (v128)
1339; MEM64-NEXT:  # %bb.0:
1340; MEM64-NEXT:    local.get 0
1341; MEM64-NEXT:    i32.wrap_i64
1342; MEM64-NEXT:    i32.const 24
1343; MEM64-NEXT:    i32.add
1344; MEM64-NEXT:    i64.extend_i32_u
1345; MEM64-NEXT:    local.get 1
1346; MEM64-NEXT:    v128.load64_lane 0, 0
1347; MEM64-NEXT:    # fallthrough-return
1348  %q = ptrtoint ptr %p to i32
1349  %r = add nsw i32 %q, 24
1350  %s = inttoptr i32 %r to ptr
1351  %x = load i64, ptr %s
1352  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1353  ret <2 x i64> %t
1354}
1355
1356define <2 x i64> @load_lane_i64_with_unfolded_gep_offset(ptr %p, <2 x i64> %v) {
1357; CHECK-LABEL: load_lane_i64_with_unfolded_gep_offset:
1358; CHECK:         .functype load_lane_i64_with_unfolded_gep_offset (i32, v128) -> (v128)
1359; CHECK-NEXT:  # %bb.0:
1360; CHECK-NEXT:    local.get 0
1361; CHECK-NEXT:    i32.const 48
1362; CHECK-NEXT:    i32.add
1363; CHECK-NEXT:    local.get 1
1364; CHECK-NEXT:    v128.load64_lane 0, 0
1365; CHECK-NEXT:    # fallthrough-return
1366;
1367; MEM64-LABEL: load_lane_i64_with_unfolded_gep_offset:
1368; MEM64:         .functype load_lane_i64_with_unfolded_gep_offset (i64, v128) -> (v128)
1369; MEM64-NEXT:  # %bb.0:
1370; MEM64-NEXT:    local.get 0
1371; MEM64-NEXT:    i64.const 48
1372; MEM64-NEXT:    i64.add
1373; MEM64-NEXT:    local.get 1
1374; MEM64-NEXT:    v128.load64_lane 0, 0
1375; MEM64-NEXT:    # fallthrough-return
1376  %s = getelementptr i64, ptr %p, i32 6
1377  %x = load i64, ptr %s
1378  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1379  ret <2 x i64> %t
1380}
1381
1382define <2 x i64> @load_lane_i64_from_numeric_address(<2 x i64> %v) {
1383; CHECK-LABEL: load_lane_i64_from_numeric_address:
1384; CHECK:         .functype load_lane_i64_from_numeric_address (v128) -> (v128)
1385; CHECK-NEXT:  # %bb.0:
1386; CHECK-NEXT:    i32.const 42
1387; CHECK-NEXT:    local.get 0
1388; CHECK-NEXT:    v128.load64_lane 0, 0
1389; CHECK-NEXT:    # fallthrough-return
1390;
1391; MEM64-LABEL: load_lane_i64_from_numeric_address:
1392; MEM64:         .functype load_lane_i64_from_numeric_address (v128) -> (v128)
1393; MEM64-NEXT:  # %bb.0:
1394; MEM64-NEXT:    i64.const 42
1395; MEM64-NEXT:    local.get 0
1396; MEM64-NEXT:    v128.load64_lane 0, 0
1397; MEM64-NEXT:    # fallthrough-return
1398  %s = inttoptr i32 42 to ptr
1399  %x = load i64, ptr %s
1400  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1401  ret <2 x i64> %t
1402}
1403
1404@gv_i64 = global i64 0
1405define <2 x i64> @load_lane_i64_from_global_address(<2 x i64> %v) {
1406; CHECK-LABEL: load_lane_i64_from_global_address:
1407; CHECK:         .functype load_lane_i64_from_global_address (v128) -> (v128)
1408; CHECK-NEXT:  # %bb.0:
1409; CHECK-NEXT:    i32.const gv_i64
1410; CHECK-NEXT:    local.get 0
1411; CHECK-NEXT:    v128.load64_lane 0, 0
1412; CHECK-NEXT:    # fallthrough-return
1413;
1414; MEM64-LABEL: load_lane_i64_from_global_address:
1415; MEM64:         .functype load_lane_i64_from_global_address (v128) -> (v128)
1416; MEM64-NEXT:  # %bb.0:
1417; MEM64-NEXT:    i64.const gv_i64
1418; MEM64-NEXT:    local.get 0
1419; MEM64-NEXT:    v128.load64_lane 0, 0
1420; MEM64-NEXT:    # fallthrough-return
1421  %x = load i64, ptr @gv_i64
1422  %t = insertelement <2 x i64> %v, i64 %x, i32 0
1423  ret <2 x i64> %t
1424}
1425
1426define void @store_lane_i64_no_offset(<2 x i64> %v, ptr %p) {
1427; CHECK-LABEL: store_lane_i64_no_offset:
1428; CHECK:         .functype store_lane_i64_no_offset (v128, i32) -> ()
1429; CHECK-NEXT:  # %bb.0:
1430; CHECK-NEXT:    local.get 1
1431; CHECK-NEXT:    local.get 0
1432; CHECK-NEXT:    v128.store64_lane 0, 0
1433; CHECK-NEXT:    # fallthrough-return
1434;
1435; MEM64-LABEL: store_lane_i64_no_offset:
1436; MEM64:         .functype store_lane_i64_no_offset (v128, i64) -> ()
1437; MEM64-NEXT:  # %bb.0:
1438; MEM64-NEXT:    local.get 1
1439; MEM64-NEXT:    local.get 0
1440; MEM64-NEXT:    v128.store64_lane 0, 0
1441; MEM64-NEXT:    # fallthrough-return
1442  %x = extractelement <2 x i64> %v, i32 0
1443  store i64 %x, ptr %p
1444  ret void
1445}
1446
1447define void @store_lane_i64_with_folded_offset(<2 x i64> %v, ptr %p) {
1448; CHECK-LABEL: store_lane_i64_with_folded_offset:
1449; CHECK:         .functype store_lane_i64_with_folded_offset (v128, i32) -> ()
1450; CHECK-NEXT:  # %bb.0:
1451; CHECK-NEXT:    local.get 1
1452; CHECK-NEXT:    local.get 0
1453; CHECK-NEXT:    v128.store64_lane 24, 0
1454; CHECK-NEXT:    # fallthrough-return
1455;
1456; MEM64-LABEL: store_lane_i64_with_folded_offset:
1457; MEM64:         .functype store_lane_i64_with_folded_offset (v128, i64) -> ()
1458; MEM64-NEXT:  # %bb.0:
1459; MEM64-NEXT:    local.get 1
1460; MEM64-NEXT:    i32.wrap_i64
1461; MEM64-NEXT:    i32.const 24
1462; MEM64-NEXT:    i32.add
1463; MEM64-NEXT:    i64.extend_i32_u
1464; MEM64-NEXT:    local.get 0
1465; MEM64-NEXT:    v128.store64_lane 0, 0
1466; MEM64-NEXT:    # fallthrough-return
1467  %q = ptrtoint ptr %p to i32
1468  %r = add nuw i32 %q, 24
1469  %s = inttoptr i32 %r to ptr
1470  %x = extractelement <2 x i64> %v, i32 0
1471  store i64 %x, ptr %s
1472  ret void
1473}
1474
1475define void @store_lane_i64_with_folded_gep_offset(<2 x i64> %v, ptr %p) {
1476; CHECK-LABEL: store_lane_i64_with_folded_gep_offset:
1477; CHECK:         .functype store_lane_i64_with_folded_gep_offset (v128, i32) -> ()
1478; CHECK-NEXT:  # %bb.0:
1479; CHECK-NEXT:    local.get 1
1480; CHECK-NEXT:    local.get 0
1481; CHECK-NEXT:    v128.store64_lane 48, 0
1482; CHECK-NEXT:    # fallthrough-return
1483;
1484; MEM64-LABEL: store_lane_i64_with_folded_gep_offset:
1485; MEM64:         .functype store_lane_i64_with_folded_gep_offset (v128, i64) -> ()
1486; MEM64-NEXT:  # %bb.0:
1487; MEM64-NEXT:    local.get 1
1488; MEM64-NEXT:    local.get 0
1489; MEM64-NEXT:    v128.store64_lane 48, 0
1490; MEM64-NEXT:    # fallthrough-return
1491  %s = getelementptr inbounds i64, ptr %p, i32 6
1492  %x = extractelement <2 x i64> %v, i32 0
1493  store i64 %x, ptr %s
1494  ret void
1495}
1496
1497define void @store_lane_i64_with_unfolded_gep_negative_offset(<2 x i64> %v, ptr %p) {
1498; CHECK-LABEL: store_lane_i64_with_unfolded_gep_negative_offset:
1499; CHECK:         .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
1500; CHECK-NEXT:  # %bb.0:
1501; CHECK-NEXT:    local.get 1
1502; CHECK-NEXT:    i32.const -48
1503; CHECK-NEXT:    i32.add
1504; CHECK-NEXT:    local.get 0
1505; CHECK-NEXT:    v128.store64_lane 0, 0
1506; CHECK-NEXT:    # fallthrough-return
1507;
1508; MEM64-LABEL: store_lane_i64_with_unfolded_gep_negative_offset:
1509; MEM64:         .functype store_lane_i64_with_unfolded_gep_negative_offset (v128, i64) -> ()
1510; MEM64-NEXT:  # %bb.0:
1511; MEM64-NEXT:    local.get 1
1512; MEM64-NEXT:    i64.const -48
1513; MEM64-NEXT:    i64.add
1514; MEM64-NEXT:    local.get 0
1515; MEM64-NEXT:    v128.store64_lane 0, 0
1516; MEM64-NEXT:    # fallthrough-return
1517  %s = getelementptr inbounds i64, ptr %p, i32 -6
1518  %x = extractelement <2 x i64> %v, i32 0
1519  store i64 %x, ptr %s
1520  ret void
1521}
1522
1523define void @store_lane_i64_with_unfolded_offset(<2 x i64> %v, ptr %p) {
1524; CHECK-LABEL: store_lane_i64_with_unfolded_offset:
1525; CHECK:         .functype store_lane_i64_with_unfolded_offset (v128, i32) -> ()
1526; CHECK-NEXT:  # %bb.0:
1527; CHECK-NEXT:    local.get 1
1528; CHECK-NEXT:    i32.const 24
1529; CHECK-NEXT:    i32.add
1530; CHECK-NEXT:    local.get 0
1531; CHECK-NEXT:    v128.store64_lane 0, 0
1532; CHECK-NEXT:    # fallthrough-return
1533;
1534; MEM64-LABEL: store_lane_i64_with_unfolded_offset:
1535; MEM64:         .functype store_lane_i64_with_unfolded_offset (v128, i64) -> ()
1536; MEM64-NEXT:  # %bb.0:
1537; MEM64-NEXT:    local.get 1
1538; MEM64-NEXT:    i32.wrap_i64
1539; MEM64-NEXT:    i32.const 24
1540; MEM64-NEXT:    i32.add
1541; MEM64-NEXT:    i64.extend_i32_u
1542; MEM64-NEXT:    local.get 0
1543; MEM64-NEXT:    v128.store64_lane 0, 0
1544; MEM64-NEXT:    # fallthrough-return
1545  %q = ptrtoint ptr %p to i32
1546  %r = add nsw i32 %q, 24
1547  %s = inttoptr i32 %r to ptr
1548  %x = extractelement <2 x i64> %v, i32 0
1549  store i64 %x, ptr %s
1550  ret void
1551}
1552
1553define void @store_lane_i64_with_unfolded_gep_offset(<2 x i64> %v, ptr %p) {
1554; CHECK-LABEL: store_lane_i64_with_unfolded_gep_offset:
1555; CHECK:         .functype store_lane_i64_with_unfolded_gep_offset (v128, i32) -> ()
1556; CHECK-NEXT:  # %bb.0:
1557; CHECK-NEXT:    local.get 1
1558; CHECK-NEXT:    i32.const 48
1559; CHECK-NEXT:    i32.add
1560; CHECK-NEXT:    local.get 0
1561; CHECK-NEXT:    v128.store64_lane 0, 0
1562; CHECK-NEXT:    # fallthrough-return
1563;
1564; MEM64-LABEL: store_lane_i64_with_unfolded_gep_offset:
1565; MEM64:         .functype store_lane_i64_with_unfolded_gep_offset (v128, i64) -> ()
1566; MEM64-NEXT:  # %bb.0:
1567; MEM64-NEXT:    local.get 1
1568; MEM64-NEXT:    i64.const 48
1569; MEM64-NEXT:    i64.add
1570; MEM64-NEXT:    local.get 0
1571; MEM64-NEXT:    v128.store64_lane 0, 0
1572; MEM64-NEXT:    # fallthrough-return
1573  %s = getelementptr i64, ptr %p, i32 6
1574  %x = extractelement <2 x i64> %v, i32 0
1575  store i64 %x, ptr %s
1576  ret void
1577}
1578
1579define void @store_lane_i64_to_numeric_address(<2 x i64> %v) {
1580; CHECK-LABEL: store_lane_i64_to_numeric_address:
1581; CHECK:         .functype store_lane_i64_to_numeric_address (v128) -> ()
1582; CHECK-NEXT:  # %bb.0:
1583; CHECK-NEXT:    i32.const 0
1584; CHECK-NEXT:    local.get 0
1585; CHECK-NEXT:    v128.store64_lane 42, 0
1586; CHECK-NEXT:    # fallthrough-return
1587;
1588; MEM64-LABEL: store_lane_i64_to_numeric_address:
1589; MEM64:         .functype store_lane_i64_to_numeric_address (v128) -> ()
1590; MEM64-NEXT:  # %bb.0:
1591; MEM64-NEXT:    i64.const 0
1592; MEM64-NEXT:    local.get 0
1593; MEM64-NEXT:    v128.store64_lane 42, 0
1594; MEM64-NEXT:    # fallthrough-return
1595  %s = inttoptr i32 42 to ptr
1596  %x = extractelement <2 x i64> %v, i32 0
1597  store i64 %x, ptr %s
1598  ret void
1599}
1600
1601define void @store_lane_i64_from_global_address(<2 x i64> %v) {
1602; CHECK-LABEL: store_lane_i64_from_global_address:
1603; CHECK:         .functype store_lane_i64_from_global_address (v128) -> ()
1604; CHECK-NEXT:  # %bb.0:
1605; CHECK-NEXT:    i32.const 0
1606; CHECK-NEXT:    local.get 0
1607; CHECK-NEXT:    v128.store64_lane gv_i64, 0
1608; CHECK-NEXT:    # fallthrough-return
1609;
1610; MEM64-LABEL: store_lane_i64_from_global_address:
1611; MEM64:         .functype store_lane_i64_from_global_address (v128) -> ()
1612; MEM64-NEXT:  # %bb.0:
1613; MEM64-NEXT:    i64.const 0
1614; MEM64-NEXT:    local.get 0
1615; MEM64-NEXT:    v128.store64_lane gv_i64, 0
1616; MEM64-NEXT:    # fallthrough-return
1617  %x = extractelement <2 x i64> %v, i32 0
1618  store i64 %x, ptr @gv_i64
1619  ret void
1620}
1621