xref: /llvm-project/llvm/test/CodeGen/WebAssembly/simd-offset.ll (revision 18077e9fd688443ca111111541e7e3a71236efd5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
3
4; Test SIMD loads and stores
5
6target triple = "wasm32-unknown-unknown"
7
8; ==============================================================================
9; 16 x i8
10; ==============================================================================
11define <16 x i8> @load_v16i8(ptr %p) {
12; CHECK-LABEL: load_v16i8:
13; CHECK:         .functype load_v16i8 (i32) -> (v128)
14; CHECK-NEXT:  # %bb.0:
15; CHECK-NEXT:    local.get 0
16; CHECK-NEXT:    v128.load 0
17; CHECK-NEXT:    # fallthrough-return
18  %v = load <16 x i8>, ptr %p
19  ret <16 x i8> %v
20}
21
22define <16 x i8> @load_splat_v16i8(ptr %p) {
23; CHECK-LABEL: load_splat_v16i8:
24; CHECK:         .functype load_splat_v16i8 (i32) -> (v128)
25; CHECK-NEXT:  # %bb.0:
26; CHECK-NEXT:    local.get 0
27; CHECK-NEXT:    v128.load8_splat 0
28; CHECK-NEXT:    # fallthrough-return
29  %e = load i8, ptr %p
30  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
31  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
32  ret <16 x i8> %v2
33}
34
35define <16 x i8> @load_v16i8_with_folded_offset(ptr %p) {
36; CHECK-LABEL: load_v16i8_with_folded_offset:
37; CHECK:         .functype load_v16i8_with_folded_offset (i32) -> (v128)
38; CHECK-NEXT:  # %bb.0:
39; CHECK-NEXT:    local.get 0
40; CHECK-NEXT:    v128.load 16
41; CHECK-NEXT:    # fallthrough-return
42  %q = ptrtoint ptr %p to i32
43  %r = add nuw i32 %q, 16
44  %s = inttoptr i32 %r to ptr
45  %v = load <16 x i8>, ptr %s
46  ret <16 x i8> %v
47}
48
49define <16 x i8> @load_splat_v16i8_with_folded_offset(ptr %p) {
50; CHECK-LABEL: load_splat_v16i8_with_folded_offset:
51; CHECK:         .functype load_splat_v16i8_with_folded_offset (i32) -> (v128)
52; CHECK-NEXT:  # %bb.0:
53; CHECK-NEXT:    local.get 0
54; CHECK-NEXT:    v128.load8_splat 16
55; CHECK-NEXT:    # fallthrough-return
56  %q = ptrtoint ptr %p to i32
57  %r = add nuw i32 %q, 16
58  %s = inttoptr i32 %r to ptr
59  %e = load i8, ptr %s
60  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
61  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
62  ret <16 x i8> %v2
63}
64
65define <16 x i8> @load_v16i8_with_folded_gep_offset(ptr %p) {
66; CHECK-LABEL: load_v16i8_with_folded_gep_offset:
67; CHECK:         .functype load_v16i8_with_folded_gep_offset (i32) -> (v128)
68; CHECK-NEXT:  # %bb.0:
69; CHECK-NEXT:    local.get 0
70; CHECK-NEXT:    v128.load 16
71; CHECK-NEXT:    # fallthrough-return
72  %s = getelementptr inbounds <16 x i8>, ptr %p, i32 1
73  %v = load <16 x i8>, ptr %s
74  ret <16 x i8> %v
75}
76
77define <16 x i8> @load_splat_v16i8_with_folded_gep_offset(ptr %p) {
78; CHECK-LABEL: load_splat_v16i8_with_folded_gep_offset:
79; CHECK:         .functype load_splat_v16i8_with_folded_gep_offset (i32) -> (v128)
80; CHECK-NEXT:  # %bb.0:
81; CHECK-NEXT:    local.get 0
82; CHECK-NEXT:    v128.load8_splat 1
83; CHECK-NEXT:    # fallthrough-return
84  %s = getelementptr inbounds i8, ptr %p, i32 1
85  %e = load i8, ptr %s
86  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
87  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
88  ret <16 x i8> %v2
89}
90
91define <16 x i8> @load_v16i8_with_unfolded_gep_negative_offset(ptr %p) {
92; CHECK-LABEL: load_v16i8_with_unfolded_gep_negative_offset:
93; CHECK:         .functype load_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
94; CHECK-NEXT:  # %bb.0:
95; CHECK-NEXT:    local.get 0
96; CHECK-NEXT:    i32.const -16
97; CHECK-NEXT:    i32.add
98; CHECK-NEXT:    v128.load 0
99; CHECK-NEXT:    # fallthrough-return
100  %s = getelementptr inbounds <16 x i8>, ptr %p, i32 -1
101  %v = load <16 x i8>, ptr %s
102  ret <16 x i8> %v
103}
104
105define <16 x i8> @load_splat_v16i8_with_unfolded_gep_negative_offset(ptr %p) {
106; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_negative_offset:
107; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_negative_offset (i32) -> (v128)
108; CHECK-NEXT:  # %bb.0:
109; CHECK-NEXT:    local.get 0
110; CHECK-NEXT:    i32.const -1
111; CHECK-NEXT:    i32.add
112; CHECK-NEXT:    v128.load8_splat 0
113; CHECK-NEXT:    # fallthrough-return
114  %s = getelementptr inbounds i8, ptr %p, i32 -1
115  %e = load i8, ptr %s
116  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
117  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
118  ret <16 x i8> %v2
119}
120
121define <16 x i8> @load_v16i8_with_unfolded_offset(ptr %p) {
122; CHECK-LABEL: load_v16i8_with_unfolded_offset:
123; CHECK:         .functype load_v16i8_with_unfolded_offset (i32) -> (v128)
124; CHECK-NEXT:  # %bb.0:
125; CHECK-NEXT:    local.get 0
126; CHECK-NEXT:    i32.const 16
127; CHECK-NEXT:    i32.add
128; CHECK-NEXT:    v128.load 0
129; CHECK-NEXT:    # fallthrough-return
130  %q = ptrtoint ptr %p to i32
131  %r = add nsw i32 %q, 16
132  %s = inttoptr i32 %r to ptr
133  %v = load <16 x i8>, ptr %s
134  ret <16 x i8> %v
135}
136
137define <16 x i8> @load_splat_v16i8_with_unfolded_offset(ptr %p) {
138; CHECK-LABEL: load_splat_v16i8_with_unfolded_offset:
139; CHECK:         .functype load_splat_v16i8_with_unfolded_offset (i32) -> (v128)
140; CHECK-NEXT:  # %bb.0:
141; CHECK-NEXT:    local.get 0
142; CHECK-NEXT:    i32.const 16
143; CHECK-NEXT:    i32.add
144; CHECK-NEXT:    v128.load8_splat 0
145; CHECK-NEXT:    # fallthrough-return
146  %q = ptrtoint ptr %p to i32
147  %r = add nsw i32 %q, 16
148  %s = inttoptr i32 %r to ptr
149  %e = load i8, ptr %s
150  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
151  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
152  ret <16 x i8> %v2
153}
154
155define <16 x i8> @load_v16i8_with_unfolded_gep_offset(ptr %p) {
156; CHECK-LABEL: load_v16i8_with_unfolded_gep_offset:
157; CHECK:         .functype load_v16i8_with_unfolded_gep_offset (i32) -> (v128)
158; CHECK-NEXT:  # %bb.0:
159; CHECK-NEXT:    local.get 0
160; CHECK-NEXT:    i32.const 16
161; CHECK-NEXT:    i32.add
162; CHECK-NEXT:    v128.load 0
163; CHECK-NEXT:    # fallthrough-return
164  %s = getelementptr <16 x i8>, ptr %p, i32 1
165  %v = load <16 x i8>, ptr %s
166  ret <16 x i8> %v
167}
168
169define <16 x i8> @load_splat_v16i8_with_unfolded_gep_offset(ptr %p) {
170; CHECK-LABEL: load_splat_v16i8_with_unfolded_gep_offset:
171; CHECK:         .functype load_splat_v16i8_with_unfolded_gep_offset (i32) -> (v128)
172; CHECK-NEXT:  # %bb.0:
173; CHECK-NEXT:    local.get 0
174; CHECK-NEXT:    i32.const 1
175; CHECK-NEXT:    i32.add
176; CHECK-NEXT:    v128.load8_splat 0
177; CHECK-NEXT:    # fallthrough-return
178  %s = getelementptr i8, ptr %p, i32 1
179  %e = load i8, ptr %s
180  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
181  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
182  ret <16 x i8> %v2
183}
184
185define <16 x i8> @load_v16i8_from_numeric_address() {
186; CHECK-LABEL: load_v16i8_from_numeric_address:
187; CHECK:         .functype load_v16i8_from_numeric_address () -> (v128)
188; CHECK-NEXT:  # %bb.0:
189; CHECK-NEXT:    i32.const 0
190; CHECK-NEXT:    v128.load 32
191; CHECK-NEXT:    # fallthrough-return
192  %s = inttoptr i32 32 to ptr
193  %v = load <16 x i8>, ptr %s
194  ret <16 x i8> %v
195}
196
197define <16 x i8> @load_splat_v16i8_from_numeric_address() {
198; CHECK-LABEL: load_splat_v16i8_from_numeric_address:
199; CHECK:         .functype load_splat_v16i8_from_numeric_address () -> (v128)
200; CHECK-NEXT:  # %bb.0:
201; CHECK-NEXT:    i32.const 0
202; CHECK-NEXT:    v128.load8_splat 32
203; CHECK-NEXT:    # fallthrough-return
204  %s = inttoptr i32 32 to ptr
205  %e = load i8, ptr %s
206  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
207  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
208  ret <16 x i8> %v2
209}
210
211@gv_v16i8 = global <16 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
212define <16 x i8> @load_v16i8_from_global_address() {
213; CHECK-LABEL: load_v16i8_from_global_address:
214; CHECK:         .functype load_v16i8_from_global_address () -> (v128)
215; CHECK-NEXT:  # %bb.0:
216; CHECK-NEXT:    i32.const 0
217; CHECK-NEXT:    v128.load gv_v16i8
218; CHECK-NEXT:    # fallthrough-return
219  %v = load <16 x i8>, ptr @gv_v16i8
220  ret <16 x i8> %v
221}
222
223@gv_i8 = global i8 42
224define <16 x i8> @load_splat_v16i8_from_global_address() {
225; CHECK-LABEL: load_splat_v16i8_from_global_address:
226; CHECK:         .functype load_splat_v16i8_from_global_address () -> (v128)
227; CHECK-NEXT:  # %bb.0:
228; CHECK-NEXT:    i32.const 0
229; CHECK-NEXT:    v128.load8_splat gv_i8
230; CHECK-NEXT:    # fallthrough-return
231  %e = load i8, ptr @gv_i8
232  %v1 = insertelement <16 x i8> undef, i8 %e, i32 0
233  %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> zeroinitializer
234  ret <16 x i8> %v2
235}
236
237define void @store_v16i8(<16 x i8> %v, ptr %p) {
238; CHECK-LABEL: store_v16i8:
239; CHECK:         .functype store_v16i8 (v128, i32) -> ()
240; CHECK-NEXT:  # %bb.0:
241; CHECK-NEXT:    local.get 1
242; CHECK-NEXT:    local.get 0
243; CHECK-NEXT:    v128.store 0
244; CHECK-NEXT:    # fallthrough-return
245  store <16 x i8> %v , ptr %p
246  ret void
247}
248
249define void @store_v16i8_with_folded_offset(<16 x i8> %v, ptr %p) {
250; CHECK-LABEL: store_v16i8_with_folded_offset:
251; CHECK:         .functype store_v16i8_with_folded_offset (v128, i32) -> ()
252; CHECK-NEXT:  # %bb.0:
253; CHECK-NEXT:    local.get 1
254; CHECK-NEXT:    local.get 0
255; CHECK-NEXT:    v128.store 16
256; CHECK-NEXT:    # fallthrough-return
257  %q = ptrtoint ptr %p to i32
258  %r = add nuw i32 %q, 16
259  %s = inttoptr i32 %r to ptr
260  store <16 x i8> %v , ptr %s
261  ret void
262}
263
264define void @store_v16i8_with_folded_gep_offset(<16 x i8> %v, ptr %p) {
265; CHECK-LABEL: store_v16i8_with_folded_gep_offset:
266; CHECK:         .functype store_v16i8_with_folded_gep_offset (v128, i32) -> ()
267; CHECK-NEXT:  # %bb.0:
268; CHECK-NEXT:    local.get 1
269; CHECK-NEXT:    local.get 0
270; CHECK-NEXT:    v128.store 16
271; CHECK-NEXT:    # fallthrough-return
272  %s = getelementptr inbounds <16 x i8>, ptr %p, i32 1
273  store <16 x i8> %v , ptr %s
274  ret void
275}
276
277define void @store_v16i8_with_unfolded_gep_negative_offset(<16 x i8> %v, ptr %p) {
278; CHECK-LABEL: store_v16i8_with_unfolded_gep_negative_offset:
279; CHECK:         .functype store_v16i8_with_unfolded_gep_negative_offset (v128, i32) -> ()
280; CHECK-NEXT:  # %bb.0:
281; CHECK-NEXT:    local.get 1
282; CHECK-NEXT:    i32.const -16
283; CHECK-NEXT:    i32.add
284; CHECK-NEXT:    local.get 0
285; CHECK-NEXT:    v128.store 0
286; CHECK-NEXT:    # fallthrough-return
287  %s = getelementptr inbounds <16 x i8>, ptr %p, i32 -1
288  store <16 x i8> %v , ptr %s
289  ret void
290}
291
292define void @store_v16i8_with_unfolded_offset(<16 x i8> %v, ptr %p) {
293; CHECK-LABEL: store_v16i8_with_unfolded_offset:
294; CHECK:         .functype store_v16i8_with_unfolded_offset (v128, i32) -> ()
295; CHECK-NEXT:  # %bb.0:
296; CHECK-NEXT:    local.get 1
297; CHECK-NEXT:    i32.const 16
298; CHECK-NEXT:    i32.add
299; CHECK-NEXT:    local.get 0
300; CHECK-NEXT:    v128.store 0
301; CHECK-NEXT:    # fallthrough-return
302  %q = ptrtoint ptr %p to i32
303  %r = add nsw i32 %q, 16
304  %s = inttoptr i32 %r to ptr
305  store <16 x i8> %v , ptr %s
306  ret void
307}
308
309define void @store_v16i8_with_unfolded_gep_offset(<16 x i8> %v, ptr %p) {
310; CHECK-LABEL: store_v16i8_with_unfolded_gep_offset:
311; CHECK:         .functype store_v16i8_with_unfolded_gep_offset (v128, i32) -> ()
312; CHECK-NEXT:  # %bb.0:
313; CHECK-NEXT:    local.get 1
314; CHECK-NEXT:    i32.const 16
315; CHECK-NEXT:    i32.add
316; CHECK-NEXT:    local.get 0
317; CHECK-NEXT:    v128.store 0
318; CHECK-NEXT:    # fallthrough-return
319  %s = getelementptr <16 x i8>, ptr %p, i32 1
320  store <16 x i8> %v , ptr %s
321  ret void
322}
323
324define void @store_v16i8_to_numeric_address(<16 x i8> %v) {
325; CHECK-LABEL: store_v16i8_to_numeric_address:
326; CHECK:         .functype store_v16i8_to_numeric_address (v128) -> ()
327; CHECK-NEXT:  # %bb.0:
328; CHECK-NEXT:    i32.const 0
329; CHECK-NEXT:    local.get 0
330; CHECK-NEXT:    v128.store 32
331; CHECK-NEXT:    # fallthrough-return
332  %s = inttoptr i32 32 to ptr
333  store <16 x i8> %v , ptr %s
334  ret void
335}
336
337define void @store_v16i8_to_global_address(<16 x i8> %v) {
338; CHECK-LABEL: store_v16i8_to_global_address:
339; CHECK:         .functype store_v16i8_to_global_address (v128) -> ()
340; CHECK-NEXT:  # %bb.0:
341; CHECK-NEXT:    i32.const 0
342; CHECK-NEXT:    local.get 0
343; CHECK-NEXT:    v128.store gv_v16i8
344; CHECK-NEXT:    # fallthrough-return
345  store <16 x i8> %v , ptr @gv_v16i8
346  ret void
347}
348
349; ==============================================================================
350; 8 x i16
351; ==============================================================================
352define <8 x i16> @load_v8i16(ptr %p) {
353; CHECK-LABEL: load_v8i16:
354; CHECK:         .functype load_v8i16 (i32) -> (v128)
355; CHECK-NEXT:  # %bb.0:
356; CHECK-NEXT:    local.get 0
357; CHECK-NEXT:    v128.load 0
358; CHECK-NEXT:    # fallthrough-return
359  %v = load <8 x i16>, ptr %p
360  ret <8 x i16> %v
361}
362
363define <8 x i16> @load_splat_v8i16(ptr %p) {
364; CHECK-LABEL: load_splat_v8i16:
365; CHECK:         .functype load_splat_v8i16 (i32) -> (v128)
366; CHECK-NEXT:  # %bb.0:
367; CHECK-NEXT:    local.get 0
368; CHECK-NEXT:    v128.load16_splat 0
369; CHECK-NEXT:    # fallthrough-return
370  %e = load i16, ptr %p
371  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
372  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
373  ret <8 x i16> %v2
374}
375
376define <8 x i16> @load_sext_v8i16(ptr %p) {
377; CHECK-LABEL: load_sext_v8i16:
378; CHECK:         .functype load_sext_v8i16 (i32) -> (v128)
379; CHECK-NEXT:  # %bb.0:
380; CHECK-NEXT:    local.get 0
381; CHECK-NEXT:    i16x8.load8x8_s 0
382; CHECK-NEXT:    # fallthrough-return
383  %v = load <8 x i8>, ptr %p
384  %v2 = sext <8 x i8> %v to <8 x i16>
385  ret <8 x i16> %v2
386}
387
388define <8 x i16> @load_zext_v8i16(ptr %p) {
389; CHECK-LABEL: load_zext_v8i16:
390; CHECK:         .functype load_zext_v8i16 (i32) -> (v128)
391; CHECK-NEXT:  # %bb.0:
392; CHECK-NEXT:    local.get 0
393; CHECK-NEXT:    i16x8.load8x8_u 0
394; CHECK-NEXT:    # fallthrough-return
395  %v = load <8 x i8>, ptr %p
396  %v2 = zext <8 x i8> %v to <8 x i16>
397  ret <8 x i16> %v2
398}
399
400define <8 x i8> @load_ext_v8i16(ptr %p) {
401; CHECK-LABEL: load_ext_v8i16:
402; CHECK:         .functype load_ext_v8i16 (i32) -> (v128)
403; CHECK-NEXT:  # %bb.0:
404; CHECK-NEXT:    local.get 0
405; CHECK-NEXT:    v128.load64_zero 0
406; CHECK-NEXT:    # fallthrough-return
407  %v = load <8 x i8>, ptr %p
408  ret <8 x i8> %v
409}
410
411define <8 x i16> @load_v8i16_with_folded_offset(ptr %p) {
412; CHECK-LABEL: load_v8i16_with_folded_offset:
413; CHECK:         .functype load_v8i16_with_folded_offset (i32) -> (v128)
414; CHECK-NEXT:  # %bb.0:
415; CHECK-NEXT:    local.get 0
416; CHECK-NEXT:    v128.load 16
417; CHECK-NEXT:    # fallthrough-return
418  %q = ptrtoint ptr %p to i32
419  %r = add nuw i32 %q, 16
420  %s = inttoptr i32 %r to ptr
421  %v = load <8 x i16>, ptr %s
422  ret <8 x i16> %v
423}
424
425define <8 x i16> @load_splat_v8i16_with_folded_offset(ptr %p) {
426; CHECK-LABEL: load_splat_v8i16_with_folded_offset:
427; CHECK:         .functype load_splat_v8i16_with_folded_offset (i32) -> (v128)
428; CHECK-NEXT:  # %bb.0:
429; CHECK-NEXT:    local.get 0
430; CHECK-NEXT:    v128.load16_splat 16
431; CHECK-NEXT:    # fallthrough-return
432  %q = ptrtoint ptr %p to i32
433  %r = add nuw i32 %q, 16
434  %s = inttoptr i32 %r to ptr
435  %e = load i16, ptr %s
436  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
437  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
438  ret <8 x i16> %v2
439}
440
441define <8 x i16> @load_sext_v8i16_with_folded_offset(ptr %p) {
442; CHECK-LABEL: load_sext_v8i16_with_folded_offset:
443; CHECK:         .functype load_sext_v8i16_with_folded_offset (i32) -> (v128)
444; CHECK-NEXT:  # %bb.0:
445; CHECK-NEXT:    local.get 0
446; CHECK-NEXT:    i16x8.load8x8_s 16
447; CHECK-NEXT:    # fallthrough-return
448  %q = ptrtoint ptr %p to i32
449  %r = add nuw i32 %q, 16
450  %s = inttoptr i32 %r to ptr
451  %v = load <8 x i8>, ptr %s
452  %v2 = sext <8 x i8> %v to <8 x i16>
453  ret <8 x i16> %v2
454}
455
456define <8 x i16> @load_zext_v8i16_with_folded_offset(ptr %p) {
457; CHECK-LABEL: load_zext_v8i16_with_folded_offset:
458; CHECK:         .functype load_zext_v8i16_with_folded_offset (i32) -> (v128)
459; CHECK-NEXT:  # %bb.0:
460; CHECK-NEXT:    local.get 0
461; CHECK-NEXT:    i16x8.load8x8_u 16
462; CHECK-NEXT:    # fallthrough-return
463  %q = ptrtoint ptr %p to i32
464  %r = add nuw i32 %q, 16
465  %s = inttoptr i32 %r to ptr
466  %v = load <8 x i8>, ptr %s
467  %v2 = zext <8 x i8> %v to <8 x i16>
468  ret <8 x i16> %v2
469}
470
471define <8 x i8> @load_ext_v8i16_with_folded_offset(ptr %p) {
472; CHECK-LABEL: load_ext_v8i16_with_folded_offset:
473; CHECK:         .functype load_ext_v8i16_with_folded_offset (i32) -> (v128)
474; CHECK-NEXT:  # %bb.0:
475; CHECK-NEXT:    local.get 0
476; CHECK-NEXT:    v128.load64_zero 16
477; CHECK-NEXT:    # fallthrough-return
478  %q = ptrtoint ptr %p to i32
479  %r = add nuw i32 %q, 16
480  %s = inttoptr i32 %r to ptr
481  %v = load <8 x i8>, ptr %s
482  ret <8 x i8> %v
483}
484
485define <8 x i16> @load_v8i16_with_folded_gep_offset(ptr %p) {
486; CHECK-LABEL: load_v8i16_with_folded_gep_offset:
487; CHECK:         .functype load_v8i16_with_folded_gep_offset (i32) -> (v128)
488; CHECK-NEXT:  # %bb.0:
489; CHECK-NEXT:    local.get 0
490; CHECK-NEXT:    v128.load 16
491; CHECK-NEXT:    # fallthrough-return
492  %s = getelementptr inbounds <8 x i16>, ptr %p, i32 1
493  %v = load <8 x i16>, ptr %s
494  ret <8 x i16> %v
495}
496
497define <8 x i16> @load_splat_v8i16_with_folded_gep_offset(ptr %p) {
498; CHECK-LABEL: load_splat_v8i16_with_folded_gep_offset:
499; CHECK:         .functype load_splat_v8i16_with_folded_gep_offset (i32) -> (v128)
500; CHECK-NEXT:  # %bb.0:
501; CHECK-NEXT:    local.get 0
502; CHECK-NEXT:    v128.load16_splat 2
503; CHECK-NEXT:    # fallthrough-return
504  %s = getelementptr inbounds i16, ptr %p, i32 1
505  %e = load i16, ptr %s
506  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
507  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
508  ret <8 x i16> %v2
509}
510
511define <8 x i16> @load_sext_v8i16_with_folded_gep_offset(ptr %p) {
512; CHECK-LABEL: load_sext_v8i16_with_folded_gep_offset:
513; CHECK:         .functype load_sext_v8i16_with_folded_gep_offset (i32) -> (v128)
514; CHECK-NEXT:  # %bb.0:
515; CHECK-NEXT:    local.get 0
516; CHECK-NEXT:    i16x8.load8x8_s 8
517; CHECK-NEXT:    # fallthrough-return
518  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
519  %v = load <8 x i8>, ptr %s
520  %v2 = sext <8 x i8> %v to <8 x i16>
521  ret <8 x i16> %v2
522}
523
524define <8 x i16> @load_zext_v8i16_with_folded_gep_offset(ptr %p) {
525; CHECK-LABEL: load_zext_v8i16_with_folded_gep_offset:
526; CHECK:         .functype load_zext_v8i16_with_folded_gep_offset (i32) -> (v128)
527; CHECK-NEXT:  # %bb.0:
528; CHECK-NEXT:    local.get 0
529; CHECK-NEXT:    i16x8.load8x8_u 8
530; CHECK-NEXT:    # fallthrough-return
531  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
532  %v = load <8 x i8>, ptr %s
533  %v2 = zext <8 x i8> %v to <8 x i16>
534  ret <8 x i16> %v2
535}
536
537define <8 x i8> @load_ext_v8i16_with_folded_gep_offset(ptr %p) {
538; CHECK-LABEL: load_ext_v8i16_with_folded_gep_offset:
539; CHECK:         .functype load_ext_v8i16_with_folded_gep_offset (i32) -> (v128)
540; CHECK-NEXT:  # %bb.0:
541; CHECK-NEXT:    local.get 0
542; CHECK-NEXT:    v128.load64_zero 8
543; CHECK-NEXT:    # fallthrough-return
544  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
545  %v = load <8 x i8>, ptr %s
546  ret <8 x i8> %v
547}
548
549define <8 x i16> @load_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
550; CHECK-LABEL: load_v8i16_with_unfolded_gep_negative_offset:
551; CHECK:         .functype load_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
552; CHECK-NEXT:  # %bb.0:
553; CHECK-NEXT:    local.get 0
554; CHECK-NEXT:    i32.const -16
555; CHECK-NEXT:    i32.add
556; CHECK-NEXT:    v128.load 0
557; CHECK-NEXT:    # fallthrough-return
558  %s = getelementptr inbounds <8 x i16>, ptr %p, i32 -1
559  %v = load <8 x i16>, ptr %s
560  ret <8 x i16> %v
561}
562
563define <8 x i16> @load_splat_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
564; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_negative_offset:
565; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
566; CHECK-NEXT:  # %bb.0:
567; CHECK-NEXT:    local.get 0
568; CHECK-NEXT:    i32.const -2
569; CHECK-NEXT:    i32.add
570; CHECK-NEXT:    v128.load16_splat 0
571; CHECK-NEXT:    # fallthrough-return
572  %s = getelementptr inbounds i16, ptr %p, i32 -1
573  %e = load i16, ptr %s
574  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
575  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
576  ret <8 x i16> %v2
577}
578
579define <8 x i16> @load_sext_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
580; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_negative_offset:
581; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
582; CHECK-NEXT:  # %bb.0:
583; CHECK-NEXT:    local.get 0
584; CHECK-NEXT:    i32.const -8
585; CHECK-NEXT:    i32.add
586; CHECK-NEXT:    i16x8.load8x8_s 0
587; CHECK-NEXT:    # fallthrough-return
588  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
589  %v = load <8 x i8>, ptr %s
590  %v2 = sext <8 x i8> %v to <8 x i16>
591  ret <8 x i16> %v2
592}
593
594define <8 x i16> @load_zext_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
595; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_negative_offset:
596; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
597; CHECK-NEXT:  # %bb.0:
598; CHECK-NEXT:    local.get 0
599; CHECK-NEXT:    i32.const -8
600; CHECK-NEXT:    i32.add
601; CHECK-NEXT:    i16x8.load8x8_u 0
602; CHECK-NEXT:    # fallthrough-return
603  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
604  %v = load <8 x i8>, ptr %s
605  %v2 = zext <8 x i8> %v to <8 x i16>
606  ret <8 x i16> %v2
607}
608
609define <8 x i8> @load_ext_v8i16_with_unfolded_gep_negative_offset(ptr %p) {
610; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_negative_offset:
611; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_negative_offset (i32) -> (v128)
612; CHECK-NEXT:  # %bb.0:
613; CHECK-NEXT:    local.get 0
614; CHECK-NEXT:    i32.const -8
615; CHECK-NEXT:    i32.add
616; CHECK-NEXT:    v128.load64_zero 0
617; CHECK-NEXT:    # fallthrough-return
618  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
619  %v = load <8 x i8>, ptr %s
620  ret <8 x i8> %v
621}
622
623define <8 x i16> @load_v8i16_with_unfolded_offset(ptr %p) {
624; CHECK-LABEL: load_v8i16_with_unfolded_offset:
625; CHECK:         .functype load_v8i16_with_unfolded_offset (i32) -> (v128)
626; CHECK-NEXT:  # %bb.0:
627; CHECK-NEXT:    local.get 0
628; CHECK-NEXT:    i32.const 16
629; CHECK-NEXT:    i32.add
630; CHECK-NEXT:    v128.load 0
631; CHECK-NEXT:    # fallthrough-return
632  %q = ptrtoint ptr %p to i32
633  %r = add nsw i32 %q, 16
634  %s = inttoptr i32 %r to ptr
635  %v = load <8 x i16>, ptr %s
636  ret <8 x i16> %v
637}
638
639define <8 x i16> @load_splat_v8i16_with_unfolded_offset(ptr %p) {
640; CHECK-LABEL: load_splat_v8i16_with_unfolded_offset:
641; CHECK:         .functype load_splat_v8i16_with_unfolded_offset (i32) -> (v128)
642; CHECK-NEXT:  # %bb.0:
643; CHECK-NEXT:    local.get 0
644; CHECK-NEXT:    i32.const 16
645; CHECK-NEXT:    i32.add
646; CHECK-NEXT:    v128.load16_splat 0
647; CHECK-NEXT:    # fallthrough-return
648  %q = ptrtoint ptr %p to i32
649  %r = add nsw i32 %q, 16
650  %s = inttoptr i32 %r to ptr
651  %e = load i16, ptr %s
652  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
653  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
654  ret <8 x i16> %v2
655}
656
657define <8 x i16> @load_sext_v8i16_with_unfolded_offset(ptr %p) {
658; CHECK-LABEL: load_sext_v8i16_with_unfolded_offset:
659; CHECK:         .functype load_sext_v8i16_with_unfolded_offset (i32) -> (v128)
660; CHECK-NEXT:  # %bb.0:
661; CHECK-NEXT:    local.get 0
662; CHECK-NEXT:    i32.const 16
663; CHECK-NEXT:    i32.add
664; CHECK-NEXT:    i16x8.load8x8_s 0
665; CHECK-NEXT:    # fallthrough-return
666  %q = ptrtoint ptr %p to i32
667  %r = add nsw i32 %q, 16
668  %s = inttoptr i32 %r to ptr
669  %v = load <8 x i8>, ptr %s
670  %v2 = sext <8 x i8> %v to <8 x i16>
671  ret <8 x i16> %v2
672}
673
674define <8 x i16> @load_zext_v8i16_with_unfolded_offset(ptr %p) {
675; CHECK-LABEL: load_zext_v8i16_with_unfolded_offset:
676; CHECK:         .functype load_zext_v8i16_with_unfolded_offset (i32) -> (v128)
677; CHECK-NEXT:  # %bb.0:
678; CHECK-NEXT:    local.get 0
679; CHECK-NEXT:    i32.const 16
680; CHECK-NEXT:    i32.add
681; CHECK-NEXT:    i16x8.load8x8_u 0
682; CHECK-NEXT:    # fallthrough-return
683  %q = ptrtoint ptr %p to i32
684  %r = add nsw i32 %q, 16
685  %s = inttoptr i32 %r to ptr
686  %v = load <8 x i8>, ptr %s
687  %v2 = zext <8 x i8> %v to <8 x i16>
688  ret <8 x i16> %v2
689}
690
691define <8 x i8> @load_ext_v8i16_with_unfolded_offset(ptr %p) {
692; CHECK-LABEL: load_ext_v8i16_with_unfolded_offset:
693; CHECK:         .functype load_ext_v8i16_with_unfolded_offset (i32) -> (v128)
694; CHECK-NEXT:  # %bb.0:
695; CHECK-NEXT:    local.get 0
696; CHECK-NEXT:    i32.const 16
697; CHECK-NEXT:    i32.add
698; CHECK-NEXT:    v128.load64_zero 0
699; CHECK-NEXT:    # fallthrough-return
700  %q = ptrtoint ptr %p to i32
701  %r = add nsw i32 %q, 16
702  %s = inttoptr i32 %r to ptr
703  %v = load <8 x i8>, ptr %s
704  ret <8 x i8> %v
705}
706
707define <8 x i16> @load_v8i16_with_unfolded_gep_offset(ptr %p) {
708; CHECK-LABEL: load_v8i16_with_unfolded_gep_offset:
709; CHECK:         .functype load_v8i16_with_unfolded_gep_offset (i32) -> (v128)
710; CHECK-NEXT:  # %bb.0:
711; CHECK-NEXT:    local.get 0
712; CHECK-NEXT:    i32.const 16
713; CHECK-NEXT:    i32.add
714; CHECK-NEXT:    v128.load 0
715; CHECK-NEXT:    # fallthrough-return
716  %s = getelementptr <8 x i16>, ptr %p, i32 1
717  %v = load <8 x i16>, ptr %s
718  ret <8 x i16> %v
719}
720
721define <8 x i16> @load_splat_v8i16_with_unfolded_gep_offset(ptr %p) {
722; CHECK-LABEL: load_splat_v8i16_with_unfolded_gep_offset:
723; CHECK:         .functype load_splat_v8i16_with_unfolded_gep_offset (i32) -> (v128)
724; CHECK-NEXT:  # %bb.0:
725; CHECK-NEXT:    local.get 0
726; CHECK-NEXT:    i32.const 2
727; CHECK-NEXT:    i32.add
728; CHECK-NEXT:    v128.load16_splat 0
729; CHECK-NEXT:    # fallthrough-return
730  %s = getelementptr i16, ptr %p, i32 1
731  %e = load i16, ptr %s
732  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
733  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
734  ret <8 x i16> %v2
735}
736
737define <8 x i16> @load_sext_v8i16_with_unfolded_gep_offset(ptr %p) {
738; CHECK-LABEL: load_sext_v8i16_with_unfolded_gep_offset:
739; CHECK:         .functype load_sext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
740; CHECK-NEXT:  # %bb.0:
741; CHECK-NEXT:    local.get 0
742; CHECK-NEXT:    i32.const 8
743; CHECK-NEXT:    i32.add
744; CHECK-NEXT:    i16x8.load8x8_s 0
745; CHECK-NEXT:    # fallthrough-return
746  %s = getelementptr <8 x i8>, ptr %p, i32 1
747  %v = load <8 x i8>, ptr %s
748  %v2 = sext <8 x i8> %v to <8 x i16>
749  ret <8 x i16> %v2
750}
751
752define <8 x i16> @load_zext_v8i16_with_unfolded_gep_offset(ptr %p) {
753; CHECK-LABEL: load_zext_v8i16_with_unfolded_gep_offset:
754; CHECK:         .functype load_zext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
755; CHECK-NEXT:  # %bb.0:
756; CHECK-NEXT:    local.get 0
757; CHECK-NEXT:    i32.const 8
758; CHECK-NEXT:    i32.add
759; CHECK-NEXT:    i16x8.load8x8_u 0
760; CHECK-NEXT:    # fallthrough-return
761  %s = getelementptr <8 x i8>, ptr %p, i32 1
762  %v = load <8 x i8>, ptr %s
763  %v2 = zext <8 x i8> %v to <8 x i16>
764  ret <8 x i16> %v2
765}
766
767define <8 x i8> @load_ext_v8i16_with_unfolded_gep_offset(ptr %p) {
768; CHECK-LABEL: load_ext_v8i16_with_unfolded_gep_offset:
769; CHECK:         .functype load_ext_v8i16_with_unfolded_gep_offset (i32) -> (v128)
770; CHECK-NEXT:  # %bb.0:
771; CHECK-NEXT:    local.get 0
772; CHECK-NEXT:    i32.const 8
773; CHECK-NEXT:    i32.add
774; CHECK-NEXT:    v128.load64_zero 0
775; CHECK-NEXT:    # fallthrough-return
776  %s = getelementptr <8 x i8>, ptr %p, i32 1
777  %v = load <8 x i8>, ptr %s
778  ret <8 x i8> %v
779}
780
781define <8 x i16> @load_v8i16_from_numeric_address() {
782; CHECK-LABEL: load_v8i16_from_numeric_address:
783; CHECK:         .functype load_v8i16_from_numeric_address () -> (v128)
784; CHECK-NEXT:  # %bb.0:
785; CHECK-NEXT:    i32.const 0
786; CHECK-NEXT:    v128.load 32
787; CHECK-NEXT:    # fallthrough-return
788  %s = inttoptr i32 32 to ptr
789  %v = load <8 x i16>, ptr %s
790  ret <8 x i16> %v
791}
792
793define <8 x i16> @load_splat_v8i16_from_numeric_address() {
794; CHECK-LABEL: load_splat_v8i16_from_numeric_address:
795; CHECK:         .functype load_splat_v8i16_from_numeric_address () -> (v128)
796; CHECK-NEXT:  # %bb.0:
797; CHECK-NEXT:    i32.const 0
798; CHECK-NEXT:    v128.load16_splat 32
799; CHECK-NEXT:    # fallthrough-return
800  %s = inttoptr i32 32 to ptr
801  %e = load i16, ptr %s
802  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
803  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
804  ret <8 x i16> %v2
805}
806
807define <8 x i16> @load_sext_v8i16_from_numeric_address() {
808; CHECK-LABEL: load_sext_v8i16_from_numeric_address:
809; CHECK:         .functype load_sext_v8i16_from_numeric_address () -> (v128)
810; CHECK-NEXT:  # %bb.0:
811; CHECK-NEXT:    i32.const 0
812; CHECK-NEXT:    i16x8.load8x8_s 32
813; CHECK-NEXT:    # fallthrough-return
814  %s = inttoptr i32 32 to ptr
815  %v = load <8 x i8>, ptr %s
816  %v2 = sext <8 x i8> %v to <8 x i16>
817  ret <8 x i16> %v2
818}
819
820define <8 x i16> @load_zext_v8i16_from_numeric_address() {
821; CHECK-LABEL: load_zext_v8i16_from_numeric_address:
822; CHECK:         .functype load_zext_v8i16_from_numeric_address () -> (v128)
823; CHECK-NEXT:  # %bb.0:
824; CHECK-NEXT:    i32.const 0
825; CHECK-NEXT:    i16x8.load8x8_u 32
826; CHECK-NEXT:    # fallthrough-return
827  %s = inttoptr i32 32 to ptr
828  %v = load <8 x i8>, ptr %s
829  %v2 = zext <8 x i8> %v to <8 x i16>
830  ret <8 x i16> %v2
831}
832
833define <8 x i8> @load_ext_v8i16_from_numeric_address() {
834; CHECK-LABEL: load_ext_v8i16_from_numeric_address:
835; CHECK:         .functype load_ext_v8i16_from_numeric_address () -> (v128)
836; CHECK-NEXT:  # %bb.0:
837; CHECK-NEXT:    i32.const 0
838; CHECK-NEXT:    v128.load64_zero 32
839; CHECK-NEXT:    # fallthrough-return
840  %s = inttoptr i32 32 to ptr
841  %v = load <8 x i8>, ptr %s
842  ret <8 x i8> %v
843}
844
845@gv_v8i16 = global <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
846define <8 x i16> @load_v8i16_from_global_address() {
847; CHECK-LABEL: load_v8i16_from_global_address:
848; CHECK:         .functype load_v8i16_from_global_address () -> (v128)
849; CHECK-NEXT:  # %bb.0:
850; CHECK-NEXT:    i32.const 0
851; CHECK-NEXT:    v128.load gv_v8i16
852; CHECK-NEXT:    # fallthrough-return
853  %v = load <8 x i16>, ptr @gv_v8i16
854  ret <8 x i16> %v
855}
856
857@gv_i16 = global i16 42
858define <8 x i16> @load_splat_v8i16_from_global_address() {
859; CHECK-LABEL: load_splat_v8i16_from_global_address:
860; CHECK:         .functype load_splat_v8i16_from_global_address () -> (v128)
861; CHECK-NEXT:  # %bb.0:
862; CHECK-NEXT:    i32.const 0
863; CHECK-NEXT:    v128.load16_splat gv_i16
864; CHECK-NEXT:    # fallthrough-return
865  %e = load i16, ptr @gv_i16
866  %v1 = insertelement <8 x i16> undef, i16 %e, i32 0
867  %v2 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> zeroinitializer
868  ret <8 x i16> %v2
869}
870
871@gv_v8i8 = global <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
872define <8 x i16> @load_sext_v8i16_from_global_address() {
873; CHECK-LABEL: load_sext_v8i16_from_global_address:
874; CHECK:         .functype load_sext_v8i16_from_global_address () -> (v128)
875; CHECK-NEXT:  # %bb.0:
876; CHECK-NEXT:    i32.const 0
877; CHECK-NEXT:    i16x8.load8x8_s gv_v8i8
878; CHECK-NEXT:    # fallthrough-return
879  %v = load <8 x i8>, ptr @gv_v8i8
880  %v2 = sext <8 x i8> %v to <8 x i16>
881  ret <8 x i16> %v2
882}
883
884define <8 x i16> @load_zext_v8i16_from_global_address() {
885; CHECK-LABEL: load_zext_v8i16_from_global_address:
886; CHECK:         .functype load_zext_v8i16_from_global_address () -> (v128)
887; CHECK-NEXT:  # %bb.0:
888; CHECK-NEXT:    i32.const 0
889; CHECK-NEXT:    i16x8.load8x8_u gv_v8i8
890; CHECK-NEXT:    # fallthrough-return
891  %v = load <8 x i8>, ptr @gv_v8i8
892  %v2 = zext <8 x i8> %v to <8 x i16>
893  ret <8 x i16> %v2
894}
895
896define <8 x i8> @load_ext_v8i16_from_global_address() {
897; CHECK-LABEL: load_ext_v8i16_from_global_address:
898; CHECK:         .functype load_ext_v8i16_from_global_address () -> (v128)
899; CHECK-NEXT:  # %bb.0:
900; CHECK-NEXT:    i32.const 0
901; CHECK-NEXT:    v128.load64_zero gv_v8i8
902; CHECK-NEXT:    # fallthrough-return
903  %v = load <8 x i8>, ptr @gv_v8i8
904  ret <8 x i8> %v
905}
906
907
908define void @store_v8i16(<8 x i16> %v, ptr %p) {
909; CHECK-LABEL: store_v8i16:
910; CHECK:         .functype store_v8i16 (v128, i32) -> ()
911; CHECK-NEXT:  # %bb.0:
912; CHECK-NEXT:    local.get 1
913; CHECK-NEXT:    local.get 0
914; CHECK-NEXT:    v128.store 0
915; CHECK-NEXT:    # fallthrough-return
916  store <8 x i16> %v , ptr %p
917  ret void
918}
919
920define void @store_narrowing_v8i16(<8 x i8> %v, ptr %p) {
921; CHECK-LABEL: store_narrowing_v8i16:
922; CHECK:         .functype store_narrowing_v8i16 (v128, i32) -> ()
923; CHECK-NEXT:  # %bb.0:
924; CHECK-NEXT:    local.get 1
925; CHECK-NEXT:    local.get 0
926; CHECK-NEXT:    v128.store64_lane 0, 0
927; CHECK-NEXT:    # fallthrough-return
928  store <8 x i8> %v, ptr %p
929  ret void
930}
931
932define void @store_v8i16_with_folded_offset(<8 x i16> %v, ptr %p) {
933; CHECK-LABEL: store_v8i16_with_folded_offset:
934; CHECK:         .functype store_v8i16_with_folded_offset (v128, i32) -> ()
935; CHECK-NEXT:  # %bb.0:
936; CHECK-NEXT:    local.get 1
937; CHECK-NEXT:    local.get 0
938; CHECK-NEXT:    v128.store 16
939; CHECK-NEXT:    # fallthrough-return
940  %q = ptrtoint ptr %p to i32
941  %r = add nuw i32 %q, 16
942  %s = inttoptr i32 %r to ptr
943  store <8 x i16> %v , ptr %s
944  ret void
945}
946
947define void @store_narrowing_v8i16_with_folded_offset(<8 x i8> %v, ptr %p) {
948; CHECK-LABEL: store_narrowing_v8i16_with_folded_offset:
949; CHECK:         .functype store_narrowing_v8i16_with_folded_offset (v128, i32) -> ()
950; CHECK-NEXT:  # %bb.0:
951; CHECK-NEXT:    local.get 1
952; CHECK-NEXT:    local.get 0
953; CHECK-NEXT:    v128.store64_lane 16, 0
954; CHECK-NEXT:    # fallthrough-return
955  %q = ptrtoint ptr %p to i32
956  %r = add nuw i32 %q, 16
957  %s = inttoptr i32 %r to ptr
958  store <8 x i8> %v , ptr %s
959  ret void
960}
961
962define void @store_v8i16_with_folded_gep_offset(<8 x i16> %v, ptr %p) {
963; CHECK-LABEL: store_v8i16_with_folded_gep_offset:
964; CHECK:         .functype store_v8i16_with_folded_gep_offset (v128, i32) -> ()
965; CHECK-NEXT:  # %bb.0:
966; CHECK-NEXT:    local.get 1
967; CHECK-NEXT:    local.get 0
968; CHECK-NEXT:    v128.store 16
969; CHECK-NEXT:    # fallthrough-return
970  %s = getelementptr inbounds <8 x i16>, ptr %p, i32 1
971  store <8 x i16> %v , ptr %s
972  ret void
973}
974
975define void @store_narrowing_v8i16_with_folded_gep_offset(<8 x i8> %v, ptr %p) {
976; CHECK-LABEL: store_narrowing_v8i16_with_folded_gep_offset:
977; CHECK:         .functype store_narrowing_v8i16_with_folded_gep_offset (v128, i32) -> ()
978; CHECK-NEXT:  # %bb.0:
979; CHECK-NEXT:    local.get 1
980; CHECK-NEXT:    local.get 0
981; CHECK-NEXT:    v128.store64_lane 8, 0
982; CHECK-NEXT:    # fallthrough-return
983  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 1
984  store <8 x i8> %v , ptr %s
985  ret void
986}
987
988define void @store_v8i16_with_unfolded_gep_negative_offset(<8 x i16> %v, ptr %p) {
989; CHECK-LABEL: store_v8i16_with_unfolded_gep_negative_offset:
990; CHECK:         .functype store_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
991; CHECK-NEXT:  # %bb.0:
992; CHECK-NEXT:    local.get 1
993; CHECK-NEXT:    i32.const -16
994; CHECK-NEXT:    i32.add
995; CHECK-NEXT:    local.get 0
996; CHECK-NEXT:    v128.store 0
997; CHECK-NEXT:    # fallthrough-return
998  %s = getelementptr inbounds <8 x i16>, ptr %p, i32 -1
999  store <8 x i16> %v , ptr %s
1000  ret void
1001}
1002
1003define void @store_narrowing_v8i16_with_unfolded_gep_negative_offset(<8 x i8> %v, ptr %p) {
1004; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_negative_offset:
1005; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_negative_offset (v128, i32) -> ()
1006; CHECK-NEXT:  # %bb.0:
1007; CHECK-NEXT:    local.get 1
1008; CHECK-NEXT:    i32.const -8
1009; CHECK-NEXT:    i32.add
1010; CHECK-NEXT:    local.get 0
1011; CHECK-NEXT:    v128.store64_lane 0, 0
1012; CHECK-NEXT:    # fallthrough-return
1013  %s = getelementptr inbounds <8 x i8>, ptr %p, i32 -1
1014  store <8 x i8> %v , ptr %s
1015  ret void
1016}
1017
1018define void @store_v8i16_with_unfolded_offset(<8 x i16> %v, ptr %p) {
1019; CHECK-LABEL: store_v8i16_with_unfolded_offset:
1020; CHECK:         .functype store_v8i16_with_unfolded_offset (v128, i32) -> ()
1021; CHECK-NEXT:  # %bb.0:
1022; CHECK-NEXT:    local.get 1
1023; CHECK-NEXT:    i32.const 16
1024; CHECK-NEXT:    i32.add
1025; CHECK-NEXT:    local.get 0
1026; CHECK-NEXT:    v128.store 0
1027; CHECK-NEXT:    # fallthrough-return
1028  %q = ptrtoint ptr %p to i32
1029  %r = add nsw i32 %q, 16
1030  %s = inttoptr i32 %r to ptr
1031  store <8 x i16> %v , ptr %s
1032  ret void
1033}
1034
1035define void @store_narrowing_v8i16_with_unfolded_offset(<8 x i8> %v, ptr %p) {
1036; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_offset:
1037; CHECK:         .functype store_narrowing_v8i16_with_unfolded_offset (v128, i32) -> ()
1038; CHECK-NEXT:  # %bb.0:
1039; CHECK-NEXT:    local.get 1
1040; CHECK-NEXT:    i32.const 16
1041; CHECK-NEXT:    i32.add
1042; CHECK-NEXT:    local.get 0
1043; CHECK-NEXT:    v128.store64_lane 0, 0
1044; CHECK-NEXT:    # fallthrough-return
1045  %q = ptrtoint ptr %p to i32
1046  %r = add nsw i32 %q, 16
1047  %s = inttoptr i32 %r to ptr
1048  store <8 x i8> %v , ptr %s
1049  ret void
1050}
1051
1052define void @store_v8i16_with_unfolded_gep_offset(<8 x i16> %v, ptr %p) {
1053; CHECK-LABEL: store_v8i16_with_unfolded_gep_offset:
1054; CHECK:         .functype store_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1055; CHECK-NEXT:  # %bb.0:
1056; CHECK-NEXT:    local.get 1
1057; CHECK-NEXT:    i32.const 16
1058; CHECK-NEXT:    i32.add
1059; CHECK-NEXT:    local.get 0
1060; CHECK-NEXT:    v128.store 0
1061; CHECK-NEXT:    # fallthrough-return
1062  %s = getelementptr <8 x i16>, ptr %p, i32 1
1063  store <8 x i16> %v , ptr %s
1064  ret void
1065}
1066
1067define void @store_narrowing_v8i16_with_unfolded_gep_offset(<8 x i8> %v, ptr %p) {
1068; CHECK-LABEL: store_narrowing_v8i16_with_unfolded_gep_offset:
1069; CHECK:         .functype store_narrowing_v8i16_with_unfolded_gep_offset (v128, i32) -> ()
1070; CHECK-NEXT:  # %bb.0:
1071; CHECK-NEXT:    local.get 1
1072; CHECK-NEXT:    i32.const 8
1073; CHECK-NEXT:    i32.add
1074; CHECK-NEXT:    local.get 0
1075; CHECK-NEXT:    v128.store64_lane 0, 0
1076; CHECK-NEXT:    # fallthrough-return
1077  %s = getelementptr <8 x i8>, ptr %p, i32 1
1078  store <8 x i8> %v , ptr %s
1079  ret void
1080}
1081
1082define void @store_v8i16_to_numeric_address(<8 x i16> %v) {
1083; CHECK-LABEL: store_v8i16_to_numeric_address:
1084; CHECK:         .functype store_v8i16_to_numeric_address (v128) -> ()
1085; CHECK-NEXT:  # %bb.0:
1086; CHECK-NEXT:    i32.const 0
1087; CHECK-NEXT:    local.get 0
1088; CHECK-NEXT:    v128.store 32
1089; CHECK-NEXT:    # fallthrough-return
1090  %s = inttoptr i32 32 to ptr
1091  store <8 x i16> %v , ptr %s
1092  ret void
1093}
1094
1095define void @store_narrowing_v8i16_to_numeric_address(<8 x i8> %v, ptr %p) {
1096; CHECK-LABEL: store_narrowing_v8i16_to_numeric_address:
1097; CHECK:         .functype store_narrowing_v8i16_to_numeric_address (v128, i32) -> ()
1098; CHECK-NEXT:  # %bb.0:
1099; CHECK-NEXT:    i32.const 0
1100; CHECK-NEXT:    local.get 0
1101; CHECK-NEXT:    v128.store64_lane 32, 0
1102; CHECK-NEXT:    # fallthrough-return
1103  %s = inttoptr i32 32 to ptr
1104  store <8 x i8> %v , ptr %s
1105  ret void
1106}
1107
1108define void @store_v8i16_to_global_address(<8 x i16> %v) {
1109; CHECK-LABEL: store_v8i16_to_global_address:
1110; CHECK:         .functype store_v8i16_to_global_address (v128) -> ()
1111; CHECK-NEXT:  # %bb.0:
1112; CHECK-NEXT:    i32.const 0
1113; CHECK-NEXT:    local.get 0
1114; CHECK-NEXT:    v128.store gv_v8i16
1115; CHECK-NEXT:    # fallthrough-return
1116  store <8 x i16> %v , ptr @gv_v8i16
1117  ret void
1118}
1119
1120define void @store_narrowing_v8i16_to_global_address(<8 x i8> %v) {
1121; CHECK-LABEL: store_narrowing_v8i16_to_global_address:
1122; CHECK:         .functype store_narrowing_v8i16_to_global_address (v128) -> ()
1123; CHECK-NEXT:  # %bb.0:
1124; CHECK-NEXT:    i32.const 0
1125; CHECK-NEXT:    local.get 0
1126; CHECK-NEXT:    v128.store64_lane gv_v8i8, 0
1127; CHECK-NEXT:    # fallthrough-return
1128  store <8 x i8> %v , ptr @gv_v8i8
1129  ret void
1130}
1131
1132; ==============================================================================
1133; 4 x i32
1134; ==============================================================================
1135define <4 x i32> @load_v4i32(ptr %p) {
1136; CHECK-LABEL: load_v4i32:
1137; CHECK:         .functype load_v4i32 (i32) -> (v128)
1138; CHECK-NEXT:  # %bb.0:
1139; CHECK-NEXT:    local.get 0
1140; CHECK-NEXT:    v128.load 0
1141; CHECK-NEXT:    # fallthrough-return
1142  %v = load <4 x i32>, ptr %p
1143  ret <4 x i32> %v
1144}
1145
1146define <4 x i32> @load_splat_v4i32(ptr %addr) {
1147; CHECK-LABEL: load_splat_v4i32:
1148; CHECK:         .functype load_splat_v4i32 (i32) -> (v128)
1149; CHECK-NEXT:  # %bb.0:
1150; CHECK-NEXT:    local.get 0
1151; CHECK-NEXT:    v128.load32_splat 0
1152; CHECK-NEXT:    # fallthrough-return
1153  %e = load i32, ptr %addr, align 4
1154  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1155  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1156  ret <4 x i32> %v2
1157}
1158
1159define <4 x i32> @load_sext_v4i16_to_v4i32(ptr %p) {
1160; CHECK-LABEL: load_sext_v4i16_to_v4i32:
1161; CHECK:         .functype load_sext_v4i16_to_v4i32 (i32) -> (v128)
1162; CHECK-NEXT:  # %bb.0:
1163; CHECK-NEXT:    local.get 0
1164; CHECK-NEXT:    i32x4.load16x4_s 0
1165; CHECK-NEXT:    # fallthrough-return
1166  %v = load <4 x i16>, ptr %p
1167  %v2 = sext <4 x i16> %v to <4 x i32>
1168  ret <4 x i32> %v2
1169}
1170
1171define <4 x i32> @load_zext_v4i16_to_v4i32(ptr %p) {
1172; CHECK-LABEL: load_zext_v4i16_to_v4i32:
1173; CHECK:         .functype load_zext_v4i16_to_v4i32 (i32) -> (v128)
1174; CHECK-NEXT:  # %bb.0:
1175; CHECK-NEXT:    local.get 0
1176; CHECK-NEXT:    i32x4.load16x4_u 0
1177; CHECK-NEXT:    # fallthrough-return
1178  %v = load <4 x i16>, ptr %p
1179  %v2 = zext <4 x i16> %v to <4 x i32>
1180  ret <4 x i32> %v2
1181}
1182
1183define <4 x i32> @load_sext_v4i8_to_v4i32(ptr %p) {
1184; CHECK-LABEL: load_sext_v4i8_to_v4i32:
1185; CHECK:         .functype load_sext_v4i8_to_v4i32 (i32) -> (v128)
1186; CHECK-NEXT:  # %bb.0:
1187; CHECK-NEXT:    local.get 0
1188; CHECK-NEXT:    v128.load32_zero 0
1189; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1190; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1191; CHECK-NEXT:    # fallthrough-return
1192  %v = load <4 x i8>, ptr %p
1193  %v2 = sext <4 x i8> %v to <4 x i32>
1194  ret <4 x i32> %v2
1195}
1196
1197define <4 x i32> @load_zext_v4i8_to_v4i32(ptr %p) {
1198; CHECK-LABEL: load_zext_v4i8_to_v4i32:
1199; CHECK:         .functype load_zext_v4i8_to_v4i32 (i32) -> (v128)
1200; CHECK-NEXT:  # %bb.0:
1201; CHECK-NEXT:    local.get 0
1202; CHECK-NEXT:    v128.load32_zero 0
1203; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1204; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1205; CHECK-NEXT:    # fallthrough-return
1206  %v = load <4 x i8>, ptr %p
1207  %v2 = zext <4 x i8> %v to <4 x i32>
1208  ret <4 x i32> %v2
1209}
1210
1211define <4 x i16> @load_ext_v4i32(ptr %p) {
1212; CHECK-LABEL: load_ext_v4i32:
1213; CHECK:         .functype load_ext_v4i32 (i32) -> (v128)
1214; CHECK-NEXT:  # %bb.0:
1215; CHECK-NEXT:    local.get 0
1216; CHECK-NEXT:    v128.load64_zero 0
1217; CHECK-NEXT:    # fallthrough-return
1218  %v = load <4 x i16>, ptr %p
1219  ret <4 x i16> %v
1220}
1221
1222define <4 x i32> @load_v4i32_with_folded_offset(ptr %p) {
1223; CHECK-LABEL: load_v4i32_with_folded_offset:
1224; CHECK:         .functype load_v4i32_with_folded_offset (i32) -> (v128)
1225; CHECK-NEXT:  # %bb.0:
1226; CHECK-NEXT:    local.get 0
1227; CHECK-NEXT:    v128.load 16
1228; CHECK-NEXT:    # fallthrough-return
1229  %q = ptrtoint ptr %p to i32
1230  %r = add nuw i32 %q, 16
1231  %s = inttoptr i32 %r to ptr
1232  %v = load <4 x i32>, ptr %s
1233  ret <4 x i32> %v
1234}
1235
1236define <4 x i32> @load_splat_v4i32_with_folded_offset(ptr %p) {
1237; CHECK-LABEL: load_splat_v4i32_with_folded_offset:
1238; CHECK:         .functype load_splat_v4i32_with_folded_offset (i32) -> (v128)
1239; CHECK-NEXT:  # %bb.0:
1240; CHECK-NEXT:    local.get 0
1241; CHECK-NEXT:    v128.load32_splat 16
1242; CHECK-NEXT:    # fallthrough-return
1243  %q = ptrtoint ptr %p to i32
1244  %r = add nuw i32 %q, 16
1245  %s = inttoptr i32 %r to ptr
1246  %e = load i32, ptr %s
1247  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1248  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1249  ret <4 x i32> %v2
1250}
1251
1252define <4 x i32> @load_sext_v4i16_to_v4i32_with_folded_offset(ptr %p) {
1253; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_folded_offset:
1254; CHECK:         .functype load_sext_v4i16_to_v4i32_with_folded_offset (i32) -> (v128)
1255; CHECK-NEXT:  # %bb.0:
1256; CHECK-NEXT:    local.get 0
1257; CHECK-NEXT:    i32x4.load16x4_s 16
1258; CHECK-NEXT:    # fallthrough-return
1259  %q = ptrtoint ptr %p to i32
1260  %r = add nuw i32 %q, 16
1261  %s = inttoptr i32 %r to ptr
1262  %v = load <4 x i16>, ptr %s
1263  %v2 = sext <4 x i16> %v to <4 x i32>
1264  ret <4 x i32> %v2
1265}
1266
1267define <4 x i32> @load_zext_v4i16_to_v4i32_with_folded_offset(ptr %p) {
1268; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_folded_offset:
1269; CHECK:         .functype load_zext_v4i16_to_v4i32_with_folded_offset (i32) -> (v128)
1270; CHECK-NEXT:  # %bb.0:
1271; CHECK-NEXT:    local.get 0
1272; CHECK-NEXT:    i32x4.load16x4_u 16
1273; CHECK-NEXT:    # fallthrough-return
1274  %q = ptrtoint ptr %p to i32
1275  %r = add nuw i32 %q, 16
1276  %s = inttoptr i32 %r to ptr
1277  %v = load <4 x i16>, ptr %s
1278  %v2 = zext <4 x i16> %v to <4 x i32>
1279  ret <4 x i32> %v2
1280}
1281
1282define <4 x i32> @load_sext_v4i8_to_v4i32_with_folded_offset(ptr %p) {
1283; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_folded_offset:
1284; CHECK:         .functype load_sext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128)
1285; CHECK-NEXT:  # %bb.0:
1286; CHECK-NEXT:    local.get 0
1287; CHECK-NEXT:    v128.load32_zero 16
1288; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1289; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1290; CHECK-NEXT:    # fallthrough-return
1291  %q = ptrtoint ptr %p to i32
1292  %r = add nuw i32 %q, 16
1293  %s = inttoptr i32 %r to ptr
1294  %v = load <4 x i8>, ptr %s
1295  %v2 = sext <4 x i8> %v to <4 x i32>
1296  ret <4 x i32> %v2
1297}
1298
1299define <4 x i32> @load_zext_v4i8_to_v4i32_with_folded_offset(ptr %p) {
1300; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_offset:
1301; CHECK:         .functype load_zext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128)
1302; CHECK-NEXT:  # %bb.0:
1303; CHECK-NEXT:    local.get 0
1304; CHECK-NEXT:    v128.load32_zero 16
1305; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1306; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1307; CHECK-NEXT:    # fallthrough-return
1308  %q = ptrtoint ptr %p to i32
1309  %r = add nuw i32 %q, 16
1310  %s = inttoptr i32 %r to ptr
1311  %v = load <4 x i8>, ptr %s
1312  %v2 = zext <4 x i8> %v to <4 x i32>
1313  ret <4 x i32> %v2
1314}
1315
1316define <4 x i16> @load_ext_v4i32_with_folded_offset(ptr %p) {
1317; CHECK-LABEL: load_ext_v4i32_with_folded_offset:
1318; CHECK:         .functype load_ext_v4i32_with_folded_offset (i32) -> (v128)
1319; CHECK-NEXT:  # %bb.0:
1320; CHECK-NEXT:    local.get 0
1321; CHECK-NEXT:    v128.load64_zero 16
1322; CHECK-NEXT:    # fallthrough-return
1323  %q = ptrtoint ptr %p to i32
1324  %r = add nuw i32 %q, 16
1325  %s = inttoptr i32 %r to ptr
1326  %v = load <4 x i16>, ptr %s
1327  ret <4 x i16> %v
1328}
1329
1330define <4 x i32> @load_v4i32_with_folded_gep_offset(ptr %p) {
1331; CHECK-LABEL: load_v4i32_with_folded_gep_offset:
1332; CHECK:         .functype load_v4i32_with_folded_gep_offset (i32) -> (v128)
1333; CHECK-NEXT:  # %bb.0:
1334; CHECK-NEXT:    local.get 0
1335; CHECK-NEXT:    v128.load 16
1336; CHECK-NEXT:    # fallthrough-return
1337  %s = getelementptr inbounds <4 x i32>, ptr %p, i32 1
1338  %v = load <4 x i32>, ptr %s
1339  ret <4 x i32> %v
1340}
1341
1342define <4 x i32> @load_splat_v4i32_with_folded_gep_offset(ptr %p) {
1343; CHECK-LABEL: load_splat_v4i32_with_folded_gep_offset:
1344; CHECK:         .functype load_splat_v4i32_with_folded_gep_offset (i32) -> (v128)
1345; CHECK-NEXT:  # %bb.0:
1346; CHECK-NEXT:    local.get 0
1347; CHECK-NEXT:    v128.load32_splat 4
1348; CHECK-NEXT:    # fallthrough-return
1349  %s = getelementptr inbounds i32, ptr %p, i32 1
1350  %e = load i32, ptr %s
1351  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1352  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1353  ret <4 x i32> %v2
1354}
1355
1356define <4 x i32> @load_sext_v4i16_to_v4i32_with_folded_gep_offset(ptr %p) {
1357; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_folded_gep_offset:
1358; CHECK:         .functype load_sext_v4i16_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1359; CHECK-NEXT:  # %bb.0:
1360; CHECK-NEXT:    local.get 0
1361; CHECK-NEXT:    i32x4.load16x4_s 8
1362; CHECK-NEXT:    # fallthrough-return
1363  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
1364  %v = load <4 x i16>, ptr %s
1365  %v2 = sext <4 x i16> %v to <4 x i32>
1366  ret <4 x i32> %v2
1367}
1368
1369define <4 x i32> @load_zext_v4i16_to_v4i32_with_folded_gep_offset(ptr %p) {
1370; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_folded_gep_offset:
1371; CHECK:         .functype load_zext_v4i16_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1372; CHECK-NEXT:  # %bb.0:
1373; CHECK-NEXT:    local.get 0
1374; CHECK-NEXT:    i32x4.load16x4_u 8
1375; CHECK-NEXT:    # fallthrough-return
1376  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
1377  %v = load <4 x i16>, ptr %s
1378  %v2 = zext <4 x i16> %v to <4 x i32>
1379  ret <4 x i32> %v2
1380}
1381
1382define <4 x i32> @load_sext_v4i8_to_v4i32_with_folded_gep_offset(ptr %p) {
1383; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_folded_gep_offset:
1384; CHECK:         .functype load_sext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1385; CHECK-NEXT:  # %bb.0:
1386; CHECK-NEXT:    local.get 0
1387; CHECK-NEXT:    v128.load32_zero 4
1388; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1389; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1390; CHECK-NEXT:    # fallthrough-return
1391  %s = getelementptr inbounds <4 x i8>, ptr %p, i32 1
1392  %v = load <4 x i8>, ptr %s
1393  %v2 = sext <4 x i8> %v to <4 x i32>
1394  ret <4 x i32> %v2
1395}
1396
1397define <4 x i32> @load_zext_v4i8_to_v4i32_with_folded_gep_offset(ptr %p) {
1398; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_gep_offset:
1399; CHECK:         .functype load_zext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128)
1400; CHECK-NEXT:  # %bb.0:
1401; CHECK-NEXT:    local.get 0
1402; CHECK-NEXT:    v128.load32_zero 4
1403; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1404; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1405; CHECK-NEXT:    # fallthrough-return
1406  %s = getelementptr inbounds <4 x i8>, ptr %p, i32 1
1407  %v = load <4 x i8>, ptr %s
1408  %v2 = zext <4 x i8> %v to <4 x i32>
1409  ret <4 x i32> %v2
1410}
1411
1412define <4 x i16> @load_ext_v4i32_with_folded_gep_offset(ptr %p) {
1413; CHECK-LABEL: load_ext_v4i32_with_folded_gep_offset:
1414; CHECK:         .functype load_ext_v4i32_with_folded_gep_offset (i32) -> (v128)
1415; CHECK-NEXT:  # %bb.0:
1416; CHECK-NEXT:    local.get 0
1417; CHECK-NEXT:    v128.load64_zero 8
1418; CHECK-NEXT:    # fallthrough-return
1419  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
1420  %v = load <4 x i16>, ptr %s
1421  ret <4 x i16> %v
1422}
1423
1424define <4 x i32> @load_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1425; CHECK-LABEL: load_v4i32_with_unfolded_gep_negative_offset:
1426; CHECK:         .functype load_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1427; CHECK-NEXT:  # %bb.0:
1428; CHECK-NEXT:    local.get 0
1429; CHECK-NEXT:    i32.const -16
1430; CHECK-NEXT:    i32.add
1431; CHECK-NEXT:    v128.load 0
1432; CHECK-NEXT:    # fallthrough-return
1433  %s = getelementptr inbounds <4 x i32>, ptr %p, i32 -1
1434  %v = load <4 x i32>, ptr %s
1435  ret <4 x i32> %v
1436}
1437
1438define <4 x i32> @load_splat_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1439; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_negative_offset:
1440; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1441; CHECK-NEXT:  # %bb.0:
1442; CHECK-NEXT:    local.get 0
1443; CHECK-NEXT:    i32.const -4
1444; CHECK-NEXT:    i32.add
1445; CHECK-NEXT:    v128.load32_splat 0
1446; CHECK-NEXT:    # fallthrough-return
1447  %s = getelementptr inbounds i32, ptr %p, i32 -1
1448  %e = load i32, ptr %s
1449  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1450  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1451  ret <4 x i32> %v2
1452}
1453
1454define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1455; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset:
1456; CHECK:         .functype load_sext_v4i16_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1457; CHECK-NEXT:  # %bb.0:
1458; CHECK-NEXT:    local.get 0
1459; CHECK-NEXT:    i32.const -8
1460; CHECK-NEXT:    i32.add
1461; CHECK-NEXT:    i32x4.load16x4_s 0
1462; CHECK-NEXT:    # fallthrough-return
1463  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
1464  %v = load <4 x i16>, ptr %s
1465  %v2 = sext <4 x i16> %v to <4 x i32>
1466  ret <4 x i32> %v2
1467}
1468
1469define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1470; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset:
1471; CHECK:         .functype load_zext_v4i16_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1472; CHECK-NEXT:  # %bb.0:
1473; CHECK-NEXT:    local.get 0
1474; CHECK-NEXT:    i32.const -8
1475; CHECK-NEXT:    i32.add
1476; CHECK-NEXT:    i32x4.load16x4_u 0
1477; CHECK-NEXT:    # fallthrough-return
1478  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
1479  %v = load <4 x i16>, ptr %s
1480  %v2 = zext <4 x i16> %v to <4 x i32>
1481  ret <4 x i32> %v2
1482}
1483
1484define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1485; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset:
1486; CHECK:         .functype load_sext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1487; CHECK-NEXT:  # %bb.0:
1488; CHECK-NEXT:    local.get 0
1489; CHECK-NEXT:    i32.const -4
1490; CHECK-NEXT:    i32.add
1491; CHECK-NEXT:    v128.load32_zero 0
1492; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1493; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1494; CHECK-NEXT:    # fallthrough-return
1495  %s = getelementptr inbounds <4 x i8>, ptr %p, i32 -1
1496  %v = load <4 x i8>, ptr %s
1497  %v2 = sext <4 x i8> %v to <4 x i32>
1498  ret <4 x i32> %v2
1499}
1500
1501define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1502; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset:
1503; CHECK:         .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1504; CHECK-NEXT:  # %bb.0:
1505; CHECK-NEXT:    local.get 0
1506; CHECK-NEXT:    i32.const -4
1507; CHECK-NEXT:    i32.add
1508; CHECK-NEXT:    v128.load32_zero 0
1509; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1510; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1511; CHECK-NEXT:    # fallthrough-return
1512  %s = getelementptr inbounds <4 x i8>, ptr %p, i32 -1
1513  %v = load <4 x i8>, ptr %s
1514  %v2 = zext <4 x i8> %v to <4 x i32>
1515  ret <4 x i32> %v2
1516}
1517
1518define <4 x i16> @load_ext_v4i32_with_unfolded_gep_negative_offset(ptr %p) {
1519; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_negative_offset:
1520; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128)
1521; CHECK-NEXT:  # %bb.0:
1522; CHECK-NEXT:    local.get 0
1523; CHECK-NEXT:    i32.const -8
1524; CHECK-NEXT:    i32.add
1525; CHECK-NEXT:    v128.load64_zero 0
1526; CHECK-NEXT:    # fallthrough-return
1527  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
1528  %v = load <4 x i16>, ptr %s
1529  ret <4 x i16> %v
1530}
1531
1532define <4 x i32> @load_v4i32_with_unfolded_offset(ptr %p) {
1533; CHECK-LABEL: load_v4i32_with_unfolded_offset:
1534; CHECK:         .functype load_v4i32_with_unfolded_offset (i32) -> (v128)
1535; CHECK-NEXT:  # %bb.0:
1536; CHECK-NEXT:    local.get 0
1537; CHECK-NEXT:    i32.const 16
1538; CHECK-NEXT:    i32.add
1539; CHECK-NEXT:    v128.load 0
1540; CHECK-NEXT:    # fallthrough-return
1541  %q = ptrtoint ptr %p to i32
1542  %r = add nsw i32 %q, 16
1543  %s = inttoptr i32 %r to ptr
1544  %v = load <4 x i32>, ptr %s
1545  ret <4 x i32> %v
1546}
1547
1548define <4 x i32> @load_splat_v4i32_with_unfolded_offset(ptr %p) {
1549; CHECK-LABEL: load_splat_v4i32_with_unfolded_offset:
1550; CHECK:         .functype load_splat_v4i32_with_unfolded_offset (i32) -> (v128)
1551; CHECK-NEXT:  # %bb.0:
1552; CHECK-NEXT:    local.get 0
1553; CHECK-NEXT:    i32.const 16
1554; CHECK-NEXT:    i32.add
1555; CHECK-NEXT:    v128.load32_splat 0
1556; CHECK-NEXT:    # fallthrough-return
1557  %q = ptrtoint ptr %p to i32
1558  %r = add nsw i32 %q, 16
1559  %s = inttoptr i32 %r to ptr
1560  %e = load i32, ptr %s
1561  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1562  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1563  ret <4 x i32> %v2
1564}
1565
1566define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_offset(ptr %p) {
1567; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_offset:
1568; CHECK:         .functype load_sext_v4i16_to_v4i32_with_unfolded_offset (i32) -> (v128)
1569; CHECK-NEXT:  # %bb.0:
1570; CHECK-NEXT:    local.get 0
1571; CHECK-NEXT:    i32.const 16
1572; CHECK-NEXT:    i32.add
1573; CHECK-NEXT:    i32x4.load16x4_s 0
1574; CHECK-NEXT:    # fallthrough-return
1575  %q = ptrtoint ptr %p to i32
1576  %r = add nsw i32 %q, 16
1577  %s = inttoptr i32 %r to ptr
1578  %v = load <4 x i16>, ptr %s
1579  %v2 = sext <4 x i16> %v to <4 x i32>
1580  ret <4 x i32> %v2
1581}
1582
1583define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_offset(ptr %p) {
1584; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_offset:
1585; CHECK:         .functype load_zext_v4i16_to_v4i32_with_unfolded_offset (i32) -> (v128)
1586; CHECK-NEXT:  # %bb.0:
1587; CHECK-NEXT:    local.get 0
1588; CHECK-NEXT:    i32.const 16
1589; CHECK-NEXT:    i32.add
1590; CHECK-NEXT:    i32x4.load16x4_u 0
1591; CHECK-NEXT:    # fallthrough-return
1592  %q = ptrtoint ptr %p to i32
1593  %r = add nsw i32 %q, 16
1594  %s = inttoptr i32 %r to ptr
1595  %v = load <4 x i16>, ptr %s
1596  %v2 = zext <4 x i16> %v to <4 x i32>
1597  ret <4 x i32> %v2
1598}
1599
1600define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_offset(ptr %p) {
1601; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_offset:
1602; CHECK:         .functype load_sext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128)
1603; CHECK-NEXT:  # %bb.0:
1604; CHECK-NEXT:    local.get 0
1605; CHECK-NEXT:    i32.const 16
1606; CHECK-NEXT:    i32.add
1607; CHECK-NEXT:    v128.load32_zero 0
1608; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1609; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1610; CHECK-NEXT:    # fallthrough-return
1611  %q = ptrtoint ptr %p to i32
1612  %r = add nsw i32 %q, 16
1613  %s = inttoptr i32 %r to ptr
1614  %v = load <4 x i8>, ptr %s
1615  %v2 = sext <4 x i8> %v to <4 x i32>
1616  ret <4 x i32> %v2
1617}
1618
1619define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_offset(ptr %p) {
1620; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_offset:
1621; CHECK:         .functype load_zext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128)
1622; CHECK-NEXT:  # %bb.0:
1623; CHECK-NEXT:    local.get 0
1624; CHECK-NEXT:    i32.const 16
1625; CHECK-NEXT:    i32.add
1626; CHECK-NEXT:    v128.load32_zero 0
1627; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1628; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1629; CHECK-NEXT:    # fallthrough-return
1630  %q = ptrtoint ptr %p to i32
1631  %r = add nsw i32 %q, 16
1632  %s = inttoptr i32 %r to ptr
1633  %v = load <4 x i8>, ptr %s
1634  %v2 = zext <4 x i8> %v to <4 x i32>
1635  ret <4 x i32> %v2
1636}
1637
1638define <4 x i16> @load_ext_v4i32_with_unfolded_offset(ptr %p) {
1639; CHECK-LABEL: load_ext_v4i32_with_unfolded_offset:
1640; CHECK:         .functype load_ext_v4i32_with_unfolded_offset (i32) -> (v128)
1641; CHECK-NEXT:  # %bb.0:
1642; CHECK-NEXT:    local.get 0
1643; CHECK-NEXT:    i32.const 16
1644; CHECK-NEXT:    i32.add
1645; CHECK-NEXT:    v128.load64_zero 0
1646; CHECK-NEXT:    # fallthrough-return
1647  %q = ptrtoint ptr %p to i32
1648  %r = add nsw i32 %q, 16
1649  %s = inttoptr i32 %r to ptr
1650  %v = load <4 x i16>, ptr %s
1651  ret <4 x i16> %v
1652}
1653
1654define <4 x i32> @load_v4i32_with_unfolded_gep_offset(ptr %p) {
1655; CHECK-LABEL: load_v4i32_with_unfolded_gep_offset:
1656; CHECK:         .functype load_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1657; CHECK-NEXT:  # %bb.0:
1658; CHECK-NEXT:    local.get 0
1659; CHECK-NEXT:    i32.const 16
1660; CHECK-NEXT:    i32.add
1661; CHECK-NEXT:    v128.load 0
1662; CHECK-NEXT:    # fallthrough-return
1663  %s = getelementptr <4 x i32>, ptr %p, i32 1
1664  %v = load <4 x i32>, ptr %s
1665  ret <4 x i32> %v
1666}
1667
1668define <4 x i32> @load_splat_v4i32_with_unfolded_gep_offset(ptr %p) {
1669; CHECK-LABEL: load_splat_v4i32_with_unfolded_gep_offset:
1670; CHECK:         .functype load_splat_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1671; CHECK-NEXT:  # %bb.0:
1672; CHECK-NEXT:    local.get 0
1673; CHECK-NEXT:    i32.const 4
1674; CHECK-NEXT:    i32.add
1675; CHECK-NEXT:    v128.load32_splat 0
1676; CHECK-NEXT:    # fallthrough-return
1677  %s = getelementptr i32, ptr %p, i32 1
1678  %e = load i32, ptr %s
1679  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1680  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1681  ret <4 x i32> %v2
1682}
1683
1684define <4 x i32> @load_sext_v4i16_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1685; CHECK-LABEL: load_sext_v4i16_to_v4i32_with_unfolded_gep_offset:
1686; CHECK:         .functype load_sext_v4i16_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1687; CHECK-NEXT:  # %bb.0:
1688; CHECK-NEXT:    local.get 0
1689; CHECK-NEXT:    i32.const 8
1690; CHECK-NEXT:    i32.add
1691; CHECK-NEXT:    i32x4.load16x4_s 0
1692; CHECK-NEXT:    # fallthrough-return
1693  %s = getelementptr <4 x i16>, ptr %p, i32 1
1694  %v = load <4 x i16>, ptr %s
1695  %v2 = sext <4 x i16> %v to <4 x i32>
1696  ret <4 x i32> %v2
1697}
1698
1699define <4 x i32> @load_zext_v4i16_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1700; CHECK-LABEL: load_zext_v4i16_to_v4i32_with_unfolded_gep_offset:
1701; CHECK:         .functype load_zext_v4i16_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1702; CHECK-NEXT:  # %bb.0:
1703; CHECK-NEXT:    local.get 0
1704; CHECK-NEXT:    i32.const 8
1705; CHECK-NEXT:    i32.add
1706; CHECK-NEXT:    i32x4.load16x4_u 0
1707; CHECK-NEXT:    # fallthrough-return
1708  %s = getelementptr <4 x i16>, ptr %p, i32 1
1709  %v = load <4 x i16>, ptr %s
1710  %v2 = zext <4 x i16> %v to <4 x i32>
1711  ret <4 x i32> %v2
1712}
1713
1714define <4 x i32> @load_sext_v4i8_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1715; CHECK-LABEL: load_sext_v4i8_to_v4i32_with_unfolded_gep_offset:
1716; CHECK:         .functype load_sext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1717; CHECK-NEXT:  # %bb.0:
1718; CHECK-NEXT:    local.get 0
1719; CHECK-NEXT:    i32.const 4
1720; CHECK-NEXT:    i32.add
1721; CHECK-NEXT:    v128.load32_zero 0
1722; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1723; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1724; CHECK-NEXT:    # fallthrough-return
1725  %s = getelementptr <4 x i8>, ptr %p, i32 1
1726  %v = load <4 x i8>, ptr %s
1727  %v2 = sext <4 x i8> %v to <4 x i32>
1728  ret <4 x i32> %v2
1729}
1730
1731define <4 x i32> @load_zext_v4i8_to_v4i32_with_unfolded_gep_offset(ptr %p) {
1732; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_offset:
1733; CHECK:         .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1734; CHECK-NEXT:  # %bb.0:
1735; CHECK-NEXT:    local.get 0
1736; CHECK-NEXT:    i32.const 4
1737; CHECK-NEXT:    i32.add
1738; CHECK-NEXT:    v128.load32_zero 0
1739; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1740; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1741; CHECK-NEXT:    # fallthrough-return
1742  %s = getelementptr <4 x i8>, ptr %p, i32 1
1743  %v = load <4 x i8>, ptr %s
1744  %v2 = zext <4 x i8> %v to <4 x i32>
1745  ret <4 x i32> %v2
1746}
1747
1748define <4 x i16> @load_ext_v4i32_with_unfolded_gep_offset(ptr %p) {
1749; CHECK-LABEL: load_ext_v4i32_with_unfolded_gep_offset:
1750; CHECK:         .functype load_ext_v4i32_with_unfolded_gep_offset (i32) -> (v128)
1751; CHECK-NEXT:  # %bb.0:
1752; CHECK-NEXT:    local.get 0
1753; CHECK-NEXT:    i32.const 8
1754; CHECK-NEXT:    i32.add
1755; CHECK-NEXT:    v128.load64_zero 0
1756; CHECK-NEXT:    # fallthrough-return
1757  %s = getelementptr <4 x i16>, ptr %p, i32 1
1758  %v = load <4 x i16>, ptr %s
1759  ret <4 x i16> %v
1760}
1761
1762define <4 x i32> @load_v4i32_from_numeric_address() {
1763; CHECK-LABEL: load_v4i32_from_numeric_address:
1764; CHECK:         .functype load_v4i32_from_numeric_address () -> (v128)
1765; CHECK-NEXT:  # %bb.0:
1766; CHECK-NEXT:    i32.const 0
1767; CHECK-NEXT:    v128.load 32
1768; CHECK-NEXT:    # fallthrough-return
1769  %s = inttoptr i32 32 to ptr
1770  %v = load <4 x i32>, ptr %s
1771  ret <4 x i32> %v
1772}
1773
1774define <4 x i32> @load_splat_v4i32_from_numeric_address() {
1775; CHECK-LABEL: load_splat_v4i32_from_numeric_address:
1776; CHECK:         .functype load_splat_v4i32_from_numeric_address () -> (v128)
1777; CHECK-NEXT:  # %bb.0:
1778; CHECK-NEXT:    i32.const 0
1779; CHECK-NEXT:    v128.load32_splat 32
1780; CHECK-NEXT:    # fallthrough-return
1781  %s = inttoptr i32 32 to ptr
1782  %e = load i32, ptr %s
1783  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1784  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1785  ret <4 x i32> %v2
1786}
1787
1788define <4 x i32> @load_sext_v4i16_to_v4i32_from_numeric_address() {
1789; CHECK-LABEL: load_sext_v4i16_to_v4i32_from_numeric_address:
1790; CHECK:         .functype load_sext_v4i16_to_v4i32_from_numeric_address () -> (v128)
1791; CHECK-NEXT:  # %bb.0:
1792; CHECK-NEXT:    i32.const 0
1793; CHECK-NEXT:    i32x4.load16x4_s 32
1794; CHECK-NEXT:    # fallthrough-return
1795  %s = inttoptr i32 32 to ptr
1796  %v = load <4 x i16>, ptr %s
1797  %v2 = sext <4 x i16> %v to <4 x i32>
1798  ret <4 x i32> %v2
1799}
1800
1801define <4 x i32> @load_zext_v4i16_to_v4i32_from_numeric_address() {
1802; CHECK-LABEL: load_zext_v4i16_to_v4i32_from_numeric_address:
1803; CHECK:         .functype load_zext_v4i16_to_v4i32_from_numeric_address () -> (v128)
1804; CHECK-NEXT:  # %bb.0:
1805; CHECK-NEXT:    i32.const 0
1806; CHECK-NEXT:    i32x4.load16x4_u 32
1807; CHECK-NEXT:    # fallthrough-return
1808  %s = inttoptr i32 32 to ptr
1809  %v = load <4 x i16>, ptr %s
1810  %v2 = zext <4 x i16> %v to <4 x i32>
1811  ret <4 x i32> %v2
1812}
1813
1814define <4 x i32> @load_sext_v4i8_to_v4i32_from_numeric_address() {
1815; CHECK-LABEL: load_sext_v4i8_to_v4i32_from_numeric_address:
1816; CHECK:         .functype load_sext_v4i8_to_v4i32_from_numeric_address () -> (v128)
1817; CHECK-NEXT:  # %bb.0:
1818; CHECK-NEXT:    i32.const 0
1819; CHECK-NEXT:    v128.load32_zero 32
1820; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1821; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1822; CHECK-NEXT:    # fallthrough-return
1823  %s = inttoptr i32 32 to ptr
1824  %v = load <4 x i8>, ptr %s
1825  %v2 = sext <4 x i8> %v to <4 x i32>
1826  ret <4 x i32> %v2
1827}
1828
1829define <4 x i32> @load_zext_v4i8_to_v4i32_from_numeric_address() {
1830; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_numeric_address:
1831; CHECK:         .functype load_zext_v4i8_to_v4i32_from_numeric_address () -> (v128)
1832; CHECK-NEXT:  # %bb.0:
1833; CHECK-NEXT:    i32.const 0
1834; CHECK-NEXT:    v128.load32_zero 32
1835; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1836; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1837; CHECK-NEXT:    # fallthrough-return
1838  %s = inttoptr i32 32 to ptr
1839  %v = load <4 x i8>, ptr %s
1840  %v2 = zext <4 x i8> %v to <4 x i32>
1841  ret <4 x i32> %v2
1842}
1843
1844define <4 x i16> @load_ext_v4i32_from_numeric_address() {
1845; CHECK-LABEL: load_ext_v4i32_from_numeric_address:
1846; CHECK:         .functype load_ext_v4i32_from_numeric_address () -> (v128)
1847; CHECK-NEXT:  # %bb.0:
1848; CHECK-NEXT:    i32.const 0
1849; CHECK-NEXT:    v128.load64_zero 32
1850; CHECK-NEXT:    # fallthrough-return
1851  %s = inttoptr i32 32 to ptr
1852  %v = load <4 x i16>, ptr %s
1853  ret <4 x i16> %v
1854}
1855
1856@gv_v4i32 = global <4 x i32> <i32 42, i32 42, i32 42, i32 42>
1857define <4 x i32> @load_v4i32_from_global_address() {
1858; CHECK-LABEL: load_v4i32_from_global_address:
1859; CHECK:         .functype load_v4i32_from_global_address () -> (v128)
1860; CHECK-NEXT:  # %bb.0:
1861; CHECK-NEXT:    i32.const 0
1862; CHECK-NEXT:    v128.load gv_v4i32
1863; CHECK-NEXT:    # fallthrough-return
1864  %v = load <4 x i32>, ptr @gv_v4i32
1865  ret <4 x i32> %v
1866}
1867
1868@gv_i32 = global i32 42
1869define <4 x i32> @load_splat_v4i32_from_global_address() {
1870; CHECK-LABEL: load_splat_v4i32_from_global_address:
1871; CHECK:         .functype load_splat_v4i32_from_global_address () -> (v128)
1872; CHECK-NEXT:  # %bb.0:
1873; CHECK-NEXT:    i32.const 0
1874; CHECK-NEXT:    v128.load32_splat gv_i32
1875; CHECK-NEXT:    # fallthrough-return
1876  %e = load i32, ptr @gv_i32
1877  %v1 = insertelement <4 x i32> undef, i32 %e, i32 0
1878  %v2 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> zeroinitializer
1879  ret <4 x i32> %v2
1880}
1881
1882@gv_v4i16 = global <4 x i16> <i16 42, i16 42, i16 42, i16 42>
1883define <4 x i32> @load_sext_v4i16_to_v4i32_from_global_address() {
1884; CHECK-LABEL: load_sext_v4i16_to_v4i32_from_global_address:
1885; CHECK:         .functype load_sext_v4i16_to_v4i32_from_global_address () -> (v128)
1886; CHECK-NEXT:  # %bb.0:
1887; CHECK-NEXT:    i32.const 0
1888; CHECK-NEXT:    i32x4.load16x4_s gv_v4i16
1889; CHECK-NEXT:    # fallthrough-return
1890  %v = load <4 x i16>, ptr @gv_v4i16
1891  %v2 = sext <4 x i16> %v to <4 x i32>
1892  ret <4 x i32> %v2
1893}
1894
1895define <4 x i32> @load_zext_v4i16_to_v4i32_from_global_address() {
1896; CHECK-LABEL: load_zext_v4i16_to_v4i32_from_global_address:
1897; CHECK:         .functype load_zext_v4i16_to_v4i32_from_global_address () -> (v128)
1898; CHECK-NEXT:  # %bb.0:
1899; CHECK-NEXT:    i32.const 0
1900; CHECK-NEXT:    i32x4.load16x4_u gv_v4i16
1901; CHECK-NEXT:    # fallthrough-return
1902  %v = load <4 x i16>, ptr @gv_v4i16
1903  %v2 = zext <4 x i16> %v to <4 x i32>
1904  ret <4 x i32> %v2
1905}
1906
1907@gv_v4i8 = global <4 x i8> <i8 42, i8 42, i8 42, i8 42>
1908define <4 x i32> @load_sext_v4i8_to_v4i32_from_global_address() {
1909; CHECK-LABEL: load_sext_v4i8_to_v4i32_from_global_address:
1910; CHECK:         .functype load_sext_v4i8_to_v4i32_from_global_address () -> (v128)
1911; CHECK-NEXT:  # %bb.0:
1912; CHECK-NEXT:    i32.const 0
1913; CHECK-NEXT:    v128.load32_zero gv_v4i8
1914; CHECK-NEXT:    i16x8.extend_low_i8x16_s
1915; CHECK-NEXT:    i32x4.extend_low_i16x8_s
1916; CHECK-NEXT:    # fallthrough-return
1917  %v = load <4 x i8>, ptr @gv_v4i8
1918  %v2 = sext <4 x i8> %v to <4 x i32>
1919  ret <4 x i32> %v2
1920}
1921
1922define <4 x i32> @load_zext_v4i8_to_v4i32_from_global_address() {
1923; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_global_address:
1924; CHECK:         .functype load_zext_v4i8_to_v4i32_from_global_address () -> (v128)
1925; CHECK-NEXT:  # %bb.0:
1926; CHECK-NEXT:    i32.const 0
1927; CHECK-NEXT:    v128.load32_zero gv_v4i8
1928; CHECK-NEXT:    i16x8.extend_low_i8x16_u
1929; CHECK-NEXT:    i32x4.extend_low_i16x8_u
1930; CHECK-NEXT:    # fallthrough-return
1931  %v = load <4 x i8>, ptr @gv_v4i8
1932  %v2 = zext <4 x i8> %v to <4 x i32>
1933  ret <4 x i32> %v2
1934}
1935
1936define <4 x i16> @load_ext_v4i32_from_global_address() {
1937; CHECK-LABEL: load_ext_v4i32_from_global_address:
1938; CHECK:         .functype load_ext_v4i32_from_global_address () -> (v128)
1939; CHECK-NEXT:  # %bb.0:
1940; CHECK-NEXT:    i32.const 0
1941; CHECK-NEXT:    v128.load64_zero gv_v4i16
1942; CHECK-NEXT:    # fallthrough-return
1943  %v = load <4 x i16>, ptr @gv_v4i16
1944  ret <4 x i16> %v
1945}
1946
1947define void @store_v4i32(<4 x i32> %v, ptr %p) {
1948; CHECK-LABEL: store_v4i32:
1949; CHECK:         .functype store_v4i32 (v128, i32) -> ()
1950; CHECK-NEXT:  # %bb.0:
1951; CHECK-NEXT:    local.get 1
1952; CHECK-NEXT:    local.get 0
1953; CHECK-NEXT:    v128.store 0
1954; CHECK-NEXT:    # fallthrough-return
1955  store <4 x i32> %v , ptr %p
1956  ret void
1957}
1958
1959define void @store_narrowing_v4i32(<4 x i16> %v, ptr %p) {
1960; CHECK-LABEL: store_narrowing_v4i32:
1961; CHECK:         .functype store_narrowing_v4i32 (v128, i32) -> ()
1962; CHECK-NEXT:  # %bb.0:
1963; CHECK-NEXT:    local.get 1
1964; CHECK-NEXT:    local.get 0
1965; CHECK-NEXT:    v128.store64_lane 0, 0
1966; CHECK-NEXT:    # fallthrough-return
1967  store <4 x i16> %v , ptr %p
1968  ret void
1969}
1970
1971define void @store_v4i32_with_folded_offset(<4 x i32> %v, ptr %p) {
1972; CHECK-LABEL: store_v4i32_with_folded_offset:
1973; CHECK:         .functype store_v4i32_with_folded_offset (v128, i32) -> ()
1974; CHECK-NEXT:  # %bb.0:
1975; CHECK-NEXT:    local.get 1
1976; CHECK-NEXT:    local.get 0
1977; CHECK-NEXT:    v128.store 16
1978; CHECK-NEXT:    # fallthrough-return
1979  %q = ptrtoint ptr %p to i32
1980  %r = add nuw i32 %q, 16
1981  %s = inttoptr i32 %r to ptr
1982  store <4 x i32> %v , ptr %s
1983  ret void
1984}
1985
1986define void @store_narrowing_v4i32_with_folded_offset(<4 x i16> %v, ptr %p) {
1987; CHECK-LABEL: store_narrowing_v4i32_with_folded_offset:
1988; CHECK:         .functype store_narrowing_v4i32_with_folded_offset (v128, i32) -> ()
1989; CHECK-NEXT:  # %bb.0:
1990; CHECK-NEXT:    local.get 1
1991; CHECK-NEXT:    local.get 0
1992; CHECK-NEXT:    v128.store64_lane 16, 0
1993; CHECK-NEXT:    # fallthrough-return
1994  %q = ptrtoint ptr %p to i32
1995  %r = add nuw i32 %q, 16
1996  %s = inttoptr i32 %r to ptr
1997  store <4 x i16> %v , ptr %s
1998  ret void
1999}
2000
2001define void @store_v4i32_with_folded_gep_offset(<4 x i32> %v, ptr %p) {
2002; CHECK-LABEL: store_v4i32_with_folded_gep_offset:
2003; CHECK:         .functype store_v4i32_with_folded_gep_offset (v128, i32) -> ()
2004; CHECK-NEXT:  # %bb.0:
2005; CHECK-NEXT:    local.get 1
2006; CHECK-NEXT:    local.get 0
2007; CHECK-NEXT:    v128.store 16
2008; CHECK-NEXT:    # fallthrough-return
2009  %s = getelementptr inbounds <4 x i32>, ptr %p, i32 1
2010  store <4 x i32> %v , ptr %s
2011  ret void
2012}
2013
2014define void @store_narrowing_v4i32_with_folded_gep_offset(<4 x i16> %v, ptr %p) {
2015; CHECK-LABEL: store_narrowing_v4i32_with_folded_gep_offset:
2016; CHECK:         .functype store_narrowing_v4i32_with_folded_gep_offset (v128, i32) -> ()
2017; CHECK-NEXT:  # %bb.0:
2018; CHECK-NEXT:    local.get 1
2019; CHECK-NEXT:    local.get 0
2020; CHECK-NEXT:    v128.store64_lane 8, 0
2021; CHECK-NEXT:    # fallthrough-return
2022  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 1
2023  store <4 x i16> %v , ptr %s
2024  ret void
2025}
2026
2027define void @store_v4i32_with_unfolded_gep_negative_offset(<4 x i32> %v, ptr %p) {
2028; CHECK-LABEL: store_v4i32_with_unfolded_gep_negative_offset:
2029; CHECK:         .functype store_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2030; CHECK-NEXT:  # %bb.0:
2031; CHECK-NEXT:    local.get 1
2032; CHECK-NEXT:    i32.const -16
2033; CHECK-NEXT:    i32.add
2034; CHECK-NEXT:    local.get 0
2035; CHECK-NEXT:    v128.store 0
2036; CHECK-NEXT:    # fallthrough-return
2037  %s = getelementptr inbounds <4 x i32>, ptr %p, i32 -1
2038  store <4 x i32> %v , ptr %s
2039  ret void
2040}
2041
2042define void @store_narrowing_v4i32_with_unfolded_gep_negative_offset(<4 x i16> %v, ptr %p) {
2043; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_negative_offset:
2044; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_negative_offset (v128, i32) -> ()
2045; CHECK-NEXT:  # %bb.0:
2046; CHECK-NEXT:    local.get 1
2047; CHECK-NEXT:    i32.const -8
2048; CHECK-NEXT:    i32.add
2049; CHECK-NEXT:    local.get 0
2050; CHECK-NEXT:    v128.store64_lane 0, 0
2051; CHECK-NEXT:    # fallthrough-return
2052  %s = getelementptr inbounds <4 x i16>, ptr %p, i32 -1
2053  store <4 x i16> %v , ptr %s
2054  ret void
2055}
2056
2057define void @store_v4i32_with_unfolded_offset(<4 x i32> %v, ptr %p) {
2058; CHECK-LABEL: store_v4i32_with_unfolded_offset:
2059; CHECK:         .functype store_v4i32_with_unfolded_offset (v128, i32) -> ()
2060; CHECK-NEXT:  # %bb.0:
2061; CHECK-NEXT:    local.get 1
2062; CHECK-NEXT:    i32.const 16
2063; CHECK-NEXT:    i32.add
2064; CHECK-NEXT:    local.get 0
2065; CHECK-NEXT:    v128.store 0
2066; CHECK-NEXT:    # fallthrough-return
2067  %q = ptrtoint ptr %p to i32
2068  %r = add nsw i32 %q, 16
2069  %s = inttoptr i32 %r to ptr
2070  store <4 x i32> %v , ptr %s
2071  ret void
2072}
2073
2074define void @store_narrowing_v4i32_with_unfolded_offset(<4 x i16> %v, ptr %p) {
2075; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_offset:
2076; CHECK:         .functype store_narrowing_v4i32_with_unfolded_offset (v128, i32) -> ()
2077; CHECK-NEXT:  # %bb.0:
2078; CHECK-NEXT:    local.get 1
2079; CHECK-NEXT:    i32.const 16
2080; CHECK-NEXT:    i32.add
2081; CHECK-NEXT:    local.get 0
2082; CHECK-NEXT:    v128.store64_lane 0, 0
2083; CHECK-NEXT:    # fallthrough-return
2084  %q = ptrtoint ptr %p to i32
2085  %r = add nsw i32 %q, 16
2086  %s = inttoptr i32 %r to ptr
2087  store <4 x i16> %v , ptr %s
2088  ret void
2089}
2090
2091define void @store_v4i32_with_unfolded_gep_offset(<4 x i32> %v, ptr %p) {
2092; CHECK-LABEL: store_v4i32_with_unfolded_gep_offset:
2093; CHECK:         .functype store_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
2094; CHECK-NEXT:  # %bb.0:
2095; CHECK-NEXT:    local.get 1
2096; CHECK-NEXT:    i32.const 16
2097; CHECK-NEXT:    i32.add
2098; CHECK-NEXT:    local.get 0
2099; CHECK-NEXT:    v128.store 0
2100; CHECK-NEXT:    # fallthrough-return
2101  %s = getelementptr <4 x i32>, ptr %p, i32 1
2102  store <4 x i32> %v , ptr %s
2103  ret void
2104}
2105
2106define void @store_narrowing_v4i32_with_unfolded_gep_offset(<4 x i16> %v, ptr %p) {
2107; CHECK-LABEL: store_narrowing_v4i32_with_unfolded_gep_offset:
2108; CHECK:         .functype store_narrowing_v4i32_with_unfolded_gep_offset (v128, i32) -> ()
2109; CHECK-NEXT:  # %bb.0:
2110; CHECK-NEXT:    local.get 1
2111; CHECK-NEXT:    i32.const 8
2112; CHECK-NEXT:    i32.add
2113; CHECK-NEXT:    local.get 0
2114; CHECK-NEXT:    v128.store64_lane 0, 0
2115; CHECK-NEXT:    # fallthrough-return
2116  %s = getelementptr <4 x i16>, ptr %p, i32 1
2117  store <4 x i16> %v , ptr %s
2118  ret void
2119}
2120
2121define void @store_v4i32_to_numeric_address(<4 x i32> %v) {
2122; CHECK-LABEL: store_v4i32_to_numeric_address:
2123; CHECK:         .functype store_v4i32_to_numeric_address (v128) -> ()
2124; CHECK-NEXT:  # %bb.0:
2125; CHECK-NEXT:    i32.const 0
2126; CHECK-NEXT:    local.get 0
2127; CHECK-NEXT:    v128.store 32
2128; CHECK-NEXT:    # fallthrough-return
2129  %s = inttoptr i32 32 to ptr
2130  store <4 x i32> %v , ptr %s
2131  ret void
2132}
2133
2134define void @store_narrowing_v4i32_to_numeric_address(<4 x i16> %v) {
2135; CHECK-LABEL: store_narrowing_v4i32_to_numeric_address:
2136; CHECK:         .functype store_narrowing_v4i32_to_numeric_address (v128) -> ()
2137; CHECK-NEXT:  # %bb.0:
2138; CHECK-NEXT:    i32.const 0
2139; CHECK-NEXT:    local.get 0
2140; CHECK-NEXT:    v128.store64_lane 32, 0
2141; CHECK-NEXT:    # fallthrough-return
2142  %s = inttoptr i32 32 to ptr
2143  store <4 x i16> %v , ptr %s
2144  ret void
2145}
2146
2147define void @store_v4i32_to_global_address(<4 x i32> %v) {
2148; CHECK-LABEL: store_v4i32_to_global_address:
2149; CHECK:         .functype store_v4i32_to_global_address (v128) -> ()
2150; CHECK-NEXT:  # %bb.0:
2151; CHECK-NEXT:    i32.const 0
2152; CHECK-NEXT:    local.get 0
2153; CHECK-NEXT:    v128.store gv_v4i32
2154; CHECK-NEXT:    # fallthrough-return
2155  store <4 x i32> %v , ptr @gv_v4i32
2156  ret void
2157}
2158
2159define void @store_narrowing_v4i32_to_global_address(<4 x i16> %v) {
2160; CHECK-LABEL: store_narrowing_v4i32_to_global_address:
2161; CHECK:         .functype store_narrowing_v4i32_to_global_address (v128) -> ()
2162; CHECK-NEXT:  # %bb.0:
2163; CHECK-NEXT:    i32.const 0
2164; CHECK-NEXT:    local.get 0
2165; CHECK-NEXT:    v128.store64_lane gv_v4i16, 0
2166; CHECK-NEXT:    # fallthrough-return
2167  store <4 x i16> %v , ptr @gv_v4i16
2168  ret void
2169}
2170
2171; ==============================================================================
2172; 2 x i64
2173; ==============================================================================
2174define <2 x i64> @load_v2i64(ptr %p) {
2175; CHECK-LABEL: load_v2i64:
2176; CHECK:         .functype load_v2i64 (i32) -> (v128)
2177; CHECK-NEXT:  # %bb.0:
2178; CHECK-NEXT:    local.get 0
2179; CHECK-NEXT:    v128.load 0
2180; CHECK-NEXT:    # fallthrough-return
2181  %v = load <2 x i64>, ptr %p
2182  ret <2 x i64> %v
2183}
2184
2185define <2 x i64> @load_splat_v2i64(ptr %p) {
2186; CHECK-LABEL: load_splat_v2i64:
2187; CHECK:         .functype load_splat_v2i64 (i32) -> (v128)
2188; CHECK-NEXT:  # %bb.0:
2189; CHECK-NEXT:    local.get 0
2190; CHECK-NEXT:    v128.load64_splat 0
2191; CHECK-NEXT:    # fallthrough-return
2192  %e = load i64, ptr %p
2193  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2194  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2195  ret <2 x i64> %v2
2196}
2197
2198define <2 x i64> @load_sext_v2i64(ptr %p) {
2199; CHECK-LABEL: load_sext_v2i64:
2200; CHECK:         .functype load_sext_v2i64 (i32) -> (v128)
2201; CHECK-NEXT:  # %bb.0:
2202; CHECK-NEXT:    local.get 0
2203; CHECK-NEXT:    i64x2.load32x2_s 0
2204; CHECK-NEXT:    # fallthrough-return
2205  %v = load <2 x i32>, ptr %p
2206  %v2 = sext <2 x i32> %v to <2 x i64>
2207  ret <2 x i64> %v2
2208}
2209
2210define <2 x i64> @load_zext_v2i64(ptr %p) {
2211; CHECK-LABEL: load_zext_v2i64:
2212; CHECK:         .functype load_zext_v2i64 (i32) -> (v128)
2213; CHECK-NEXT:  # %bb.0:
2214; CHECK-NEXT:    local.get 0
2215; CHECK-NEXT:    i64x2.load32x2_u 0
2216; CHECK-NEXT:    # fallthrough-return
2217  %v = load <2 x i32>, ptr %p
2218  %v2 = zext <2 x i32> %v to <2 x i64>
2219  ret <2 x i64> %v2
2220}
2221
2222define <2 x i32> @load_ext_v2i64(ptr %p) {
2223; CHECK-LABEL: load_ext_v2i64:
2224; CHECK:         .functype load_ext_v2i64 (i32) -> (v128)
2225; CHECK-NEXT:  # %bb.0:
2226; CHECK-NEXT:    local.get 0
2227; CHECK-NEXT:    v128.load64_zero 0
2228; CHECK-NEXT:    # fallthrough-return
2229  %v = load <2 x i32>, ptr %p
2230  ret <2 x i32> %v
2231}
2232
2233define <2 x i64> @load_v2i64_with_folded_offset(ptr %p) {
2234; CHECK-LABEL: load_v2i64_with_folded_offset:
2235; CHECK:         .functype load_v2i64_with_folded_offset (i32) -> (v128)
2236; CHECK-NEXT:  # %bb.0:
2237; CHECK-NEXT:    local.get 0
2238; CHECK-NEXT:    v128.load 16
2239; CHECK-NEXT:    # fallthrough-return
2240  %q = ptrtoint ptr %p to i32
2241  %r = add nuw i32 %q, 16
2242  %s = inttoptr i32 %r to ptr
2243  %v = load <2 x i64>, ptr %s
2244  ret <2 x i64> %v
2245}
2246
2247define <2 x i64> @load_splat_v2i64_with_folded_offset(ptr %p) {
2248; CHECK-LABEL: load_splat_v2i64_with_folded_offset:
2249; CHECK:         .functype load_splat_v2i64_with_folded_offset (i32) -> (v128)
2250; CHECK-NEXT:  # %bb.0:
2251; CHECK-NEXT:    local.get 0
2252; CHECK-NEXT:    v128.load64_splat 16
2253; CHECK-NEXT:    # fallthrough-return
2254  %q = ptrtoint ptr %p to i32
2255  %r = add nuw i32 %q, 16
2256  %s = inttoptr i32 %r to ptr
2257  %e = load i64, ptr %s
2258  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2259  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2260  ret <2 x i64> %v2
2261}
2262
2263define <2 x i64> @load_sext_v2i64_with_folded_offset(ptr %p) {
2264; CHECK-LABEL: load_sext_v2i64_with_folded_offset:
2265; CHECK:         .functype load_sext_v2i64_with_folded_offset (i32) -> (v128)
2266; CHECK-NEXT:  # %bb.0:
2267; CHECK-NEXT:    local.get 0
2268; CHECK-NEXT:    i64x2.load32x2_s 16
2269; CHECK-NEXT:    # fallthrough-return
2270  %q = ptrtoint ptr %p to i32
2271  %r = add nuw i32 %q, 16
2272  %s = inttoptr i32 %r to ptr
2273  %v = load <2 x i32>, ptr %s
2274  %v2 = sext <2 x i32> %v to <2 x i64>
2275  ret <2 x i64> %v2
2276}
2277
2278define <2 x i64> @load_zext_v2i64_with_folded_offset(ptr %p) {
2279; CHECK-LABEL: load_zext_v2i64_with_folded_offset:
2280; CHECK:         .functype load_zext_v2i64_with_folded_offset (i32) -> (v128)
2281; CHECK-NEXT:  # %bb.0:
2282; CHECK-NEXT:    local.get 0
2283; CHECK-NEXT:    i64x2.load32x2_u 16
2284; CHECK-NEXT:    # fallthrough-return
2285  %q = ptrtoint ptr %p to i32
2286  %r = add nuw i32 %q, 16
2287  %s = inttoptr i32 %r to ptr
2288  %v = load <2 x i32>, ptr %s
2289  %v2 = zext <2 x i32> %v to <2 x i64>
2290  ret <2 x i64> %v2
2291}
2292
2293define <2 x i32> @load_ext_v2i64_with_folded_offset(ptr %p) {
2294; CHECK-LABEL: load_ext_v2i64_with_folded_offset:
2295; CHECK:         .functype load_ext_v2i64_with_folded_offset (i32) -> (v128)
2296; CHECK-NEXT:  # %bb.0:
2297; CHECK-NEXT:    local.get 0
2298; CHECK-NEXT:    v128.load64_zero 16
2299; CHECK-NEXT:    # fallthrough-return
2300  %q = ptrtoint ptr %p to i32
2301  %r = add nuw i32 %q, 16
2302  %s = inttoptr i32 %r to ptr
2303  %v = load <2 x i32>, ptr %s
2304  ret <2 x i32> %v
2305}
2306
2307define <2 x i64> @load_v2i64_with_folded_gep_offset(ptr %p) {
2308; CHECK-LABEL: load_v2i64_with_folded_gep_offset:
2309; CHECK:         .functype load_v2i64_with_folded_gep_offset (i32) -> (v128)
2310; CHECK-NEXT:  # %bb.0:
2311; CHECK-NEXT:    local.get 0
2312; CHECK-NEXT:    v128.load 16
2313; CHECK-NEXT:    # fallthrough-return
2314  %s = getelementptr inbounds <2 x i64>, ptr %p, i32 1
2315  %v = load <2 x i64>, ptr %s
2316  ret <2 x i64> %v
2317}
2318
2319define <2 x i64> @load_splat_v2i64_with_folded_gep_offset(ptr %p) {
2320; CHECK-LABEL: load_splat_v2i64_with_folded_gep_offset:
2321; CHECK:         .functype load_splat_v2i64_with_folded_gep_offset (i32) -> (v128)
2322; CHECK-NEXT:  # %bb.0:
2323; CHECK-NEXT:    local.get 0
2324; CHECK-NEXT:    v128.load64_splat 8
2325; CHECK-NEXT:    # fallthrough-return
2326  %s = getelementptr inbounds i64, ptr %p, i32 1
2327  %e = load i64, ptr %s
2328  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2329  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2330  ret <2 x i64> %v2
2331}
2332
2333define <2 x i64> @load_sext_v2i64_with_folded_gep_offset(ptr %p) {
2334; CHECK-LABEL: load_sext_v2i64_with_folded_gep_offset:
2335; CHECK:         .functype load_sext_v2i64_with_folded_gep_offset (i32) -> (v128)
2336; CHECK-NEXT:  # %bb.0:
2337; CHECK-NEXT:    local.get 0
2338; CHECK-NEXT:    i64x2.load32x2_s 8
2339; CHECK-NEXT:    # fallthrough-return
2340  %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1
2341  %v = load <2 x i32>, ptr %s
2342  %v2 = sext <2 x i32> %v to <2 x i64>
2343  ret <2 x i64> %v2
2344}
2345
2346define <2 x i64> @load_zext_v2i64_with_folded_gep_offset(ptr %p) {
2347; CHECK-LABEL: load_zext_v2i64_with_folded_gep_offset:
2348; CHECK:         .functype load_zext_v2i64_with_folded_gep_offset (i32) -> (v128)
2349; CHECK-NEXT:  # %bb.0:
2350; CHECK-NEXT:    local.get 0
2351; CHECK-NEXT:    i64x2.load32x2_u 8
2352; CHECK-NEXT:    # fallthrough-return
2353  %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1
2354  %v = load <2 x i32>, ptr %s
2355  %v2 = zext <2 x i32> %v to <2 x i64>
2356  ret <2 x i64> %v2
2357}
2358
2359define <2 x i32> @load_ext_v2i64_with_folded_gep_offset(ptr %p) {
2360; CHECK-LABEL: load_ext_v2i64_with_folded_gep_offset:
2361; CHECK:         .functype load_ext_v2i64_with_folded_gep_offset (i32) -> (v128)
2362; CHECK-NEXT:  # %bb.0:
2363; CHECK-NEXT:    local.get 0
2364; CHECK-NEXT:    v128.load64_zero 8
2365; CHECK-NEXT:    # fallthrough-return
2366  %s = getelementptr inbounds <2 x i32>, ptr %p, i32 1
2367  %v = load <2 x i32>, ptr %s
2368  ret <2 x i32> %v
2369}
2370
2371define <2 x i64> @load_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2372; CHECK-LABEL: load_v2i64_with_unfolded_gep_negative_offset:
2373; CHECK:         .functype load_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2374; CHECK-NEXT:  # %bb.0:
2375; CHECK-NEXT:    local.get 0
2376; CHECK-NEXT:    i32.const -16
2377; CHECK-NEXT:    i32.add
2378; CHECK-NEXT:    v128.load 0
2379; CHECK-NEXT:    # fallthrough-return
2380  %s = getelementptr inbounds <2 x i64>, ptr %p, i32 -1
2381  %v = load <2 x i64>, ptr %s
2382  ret <2 x i64> %v
2383}
2384
2385define <2 x i64> @load_splat_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2386; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_negative_offset:
2387; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2388; CHECK-NEXT:  # %bb.0:
2389; CHECK-NEXT:    local.get 0
2390; CHECK-NEXT:    i32.const -8
2391; CHECK-NEXT:    i32.add
2392; CHECK-NEXT:    v128.load64_splat 0
2393; CHECK-NEXT:    # fallthrough-return
2394  %s = getelementptr inbounds i64, ptr %p, i32 -1
2395  %e = load i64, ptr %s
2396  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2397  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2398  ret <2 x i64> %v2
2399}
2400
2401define <2 x i64> @load_sext_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2402; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_negative_offset:
2403; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2404; CHECK-NEXT:  # %bb.0:
2405; CHECK-NEXT:    local.get 0
2406; CHECK-NEXT:    i32.const -8
2407; CHECK-NEXT:    i32.add
2408; CHECK-NEXT:    i64x2.load32x2_s 0
2409; CHECK-NEXT:    # fallthrough-return
2410  %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1
2411  %v = load <2 x i32>, ptr %s
2412  %v2 = sext <2 x i32> %v to <2 x i64>
2413  ret <2 x i64> %v2
2414}
2415
2416define <2 x i64> @load_zext_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2417; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_negative_offset:
2418; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2419; CHECK-NEXT:  # %bb.0:
2420; CHECK-NEXT:    local.get 0
2421; CHECK-NEXT:    i32.const -8
2422; CHECK-NEXT:    i32.add
2423; CHECK-NEXT:    i64x2.load32x2_u 0
2424; CHECK-NEXT:    # fallthrough-return
2425  %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1
2426  %v = load <2 x i32>, ptr %s
2427  %v2 = zext <2 x i32> %v to <2 x i64>
2428  ret <2 x i64> %v2
2429}
2430
2431define <2 x i32> @load_ext_v2i64_with_unfolded_gep_negative_offset(ptr %p) {
2432; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_negative_offset:
2433; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_negative_offset (i32) -> (v128)
2434; CHECK-NEXT:  # %bb.0:
2435; CHECK-NEXT:    local.get 0
2436; CHECK-NEXT:    i32.const -8
2437; CHECK-NEXT:    i32.add
2438; CHECK-NEXT:    v128.load64_zero 0
2439; CHECK-NEXT:    # fallthrough-return
2440  %s = getelementptr inbounds <2 x i32>, ptr %p, i32 -1
2441  %v = load <2 x i32>, ptr %s
2442  ret <2 x i32> %v
2443}
2444
2445define <2 x i64> @load_v2i64_with_unfolded_offset(ptr %p) {
2446; CHECK-LABEL: load_v2i64_with_unfolded_offset:
2447; CHECK:         .functype load_v2i64_with_unfolded_offset (i32) -> (v128)
2448; CHECK-NEXT:  # %bb.0:
2449; CHECK-NEXT:    local.get 0
2450; CHECK-NEXT:    i32.const 16
2451; CHECK-NEXT:    i32.add
2452; CHECK-NEXT:    v128.load 0
2453; CHECK-NEXT:    # fallthrough-return
2454  %q = ptrtoint ptr %p to i32
2455  %r = add nsw i32 %q, 16
2456  %s = inttoptr i32 %r to ptr
2457  %v = load <2 x i64>, ptr %s
2458  ret <2 x i64> %v
2459}
2460
2461define <2 x i64> @load_splat_v2i64_with_unfolded_offset(ptr %p) {
2462; CHECK-LABEL: load_splat_v2i64_with_unfolded_offset:
2463; CHECK:         .functype load_splat_v2i64_with_unfolded_offset (i32) -> (v128)
2464; CHECK-NEXT:  # %bb.0:
2465; CHECK-NEXT:    local.get 0
2466; CHECK-NEXT:    i32.const 16
2467; CHECK-NEXT:    i32.add
2468; CHECK-NEXT:    v128.load64_splat 0
2469; CHECK-NEXT:    # fallthrough-return
2470  %q = ptrtoint ptr %p to i32
2471  %r = add nsw i32 %q, 16
2472  %s = inttoptr i32 %r to ptr
2473  %e = load i64, ptr %s
2474  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2475  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2476  ret <2 x i64> %v2
2477}
2478
2479define <2 x i64> @load_sext_v2i64_with_unfolded_offset(ptr %p) {
2480; CHECK-LABEL: load_sext_v2i64_with_unfolded_offset:
2481; CHECK:         .functype load_sext_v2i64_with_unfolded_offset (i32) -> (v128)
2482; CHECK-NEXT:  # %bb.0:
2483; CHECK-NEXT:    local.get 0
2484; CHECK-NEXT:    i32.const 16
2485; CHECK-NEXT:    i32.add
2486; CHECK-NEXT:    i64x2.load32x2_s 0
2487; CHECK-NEXT:    # fallthrough-return
2488  %q = ptrtoint ptr %p to i32
2489  %r = add nsw i32 %q, 16
2490  %s = inttoptr i32 %r to ptr
2491  %v = load <2 x i32>, ptr %s
2492  %v2 = sext <2 x i32> %v to <2 x i64>
2493  ret <2 x i64> %v2
2494}
2495
2496define <2 x i64> @load_zext_v2i64_with_unfolded_offset(ptr %p) {
2497; CHECK-LABEL: load_zext_v2i64_with_unfolded_offset:
2498; CHECK:         .functype load_zext_v2i64_with_unfolded_offset (i32) -> (v128)
2499; CHECK-NEXT:  # %bb.0:
2500; CHECK-NEXT:    local.get 0
2501; CHECK-NEXT:    i32.const 16
2502; CHECK-NEXT:    i32.add
2503; CHECK-NEXT:    i64x2.load32x2_u 0
2504; CHECK-NEXT:    # fallthrough-return
2505  %q = ptrtoint ptr %p to i32
2506  %r = add nsw i32 %q, 16
2507  %s = inttoptr i32 %r to ptr
2508  %v = load <2 x i32>, ptr %s
2509  %v2 = zext <2 x i32> %v to <2 x i64>
2510  ret <2 x i64> %v2
2511}
2512
2513define <2 x i32> @load_ext_v2i64_with_unfolded_offset(ptr %p) {
2514; CHECK-LABEL: load_ext_v2i64_with_unfolded_offset:
2515; CHECK:         .functype load_ext_v2i64_with_unfolded_offset (i32) -> (v128)
2516; CHECK-NEXT:  # %bb.0:
2517; CHECK-NEXT:    local.get 0
2518; CHECK-NEXT:    i32.const 16
2519; CHECK-NEXT:    i32.add
2520; CHECK-NEXT:    v128.load64_zero 0
2521; CHECK-NEXT:    # fallthrough-return
2522  %q = ptrtoint ptr %p to i32
2523  %r = add nsw i32 %q, 16
2524  %s = inttoptr i32 %r to ptr
2525  %v = load <2 x i32>, ptr %s
2526  ret <2 x i32> %v
2527}
2528
2529define <2 x i64> @load_v2i64_with_unfolded_gep_offset(ptr %p) {
2530; CHECK-LABEL: load_v2i64_with_unfolded_gep_offset:
2531; CHECK:         .functype load_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2532; CHECK-NEXT:  # %bb.0:
2533; CHECK-NEXT:    local.get 0
2534; CHECK-NEXT:    i32.const 16
2535; CHECK-NEXT:    i32.add
2536; CHECK-NEXT:    v128.load 0
2537; CHECK-NEXT:    # fallthrough-return
2538  %s = getelementptr <2 x i64>, ptr %p, i32 1
2539  %v = load <2 x i64>, ptr %s
2540  ret <2 x i64> %v
2541}
2542
2543define <2 x i64> @load_splat_v2i64_with_unfolded_gep_offset(ptr %p) {
2544; CHECK-LABEL: load_splat_v2i64_with_unfolded_gep_offset:
2545; CHECK:         .functype load_splat_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2546; CHECK-NEXT:  # %bb.0:
2547; CHECK-NEXT:    local.get 0
2548; CHECK-NEXT:    i32.const 8
2549; CHECK-NEXT:    i32.add
2550; CHECK-NEXT:    v128.load64_splat 0
2551; CHECK-NEXT:    # fallthrough-return
2552  %s = getelementptr i64, ptr %p, i32 1
2553  %e = load i64, ptr %s
2554  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2555  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2556  ret <2 x i64> %v2
2557}
2558
2559define <2 x i64> @load_sext_v2i64_with_unfolded_gep_offset(ptr %p) {
2560; CHECK-LABEL: load_sext_v2i64_with_unfolded_gep_offset:
2561; CHECK:         .functype load_sext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2562; CHECK-NEXT:  # %bb.0:
2563; CHECK-NEXT:    local.get 0
2564; CHECK-NEXT:    i32.const 8
2565; CHECK-NEXT:    i32.add
2566; CHECK-NEXT:    i64x2.load32x2_s 0
2567; CHECK-NEXT:    # fallthrough-return
2568  %s = getelementptr <2 x i32>, ptr %p, i32 1
2569  %v = load <2 x i32>, ptr %s
2570  %v2 = sext <2 x i32> %v to <2 x i64>
2571  ret <2 x i64> %v2
2572}
2573
2574define <2 x i64> @load_zext_v2i64_with_unfolded_gep_offset(ptr %p) {
2575; CHECK-LABEL: load_zext_v2i64_with_unfolded_gep_offset:
2576; CHECK:         .functype load_zext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2577; CHECK-NEXT:  # %bb.0:
2578; CHECK-NEXT:    local.get 0
2579; CHECK-NEXT:    i32.const 8
2580; CHECK-NEXT:    i32.add
2581; CHECK-NEXT:    i64x2.load32x2_u 0
2582; CHECK-NEXT:    # fallthrough-return
2583  %s = getelementptr <2 x i32>, ptr %p, i32 1
2584  %v = load <2 x i32>, ptr %s
2585  %v2 = zext <2 x i32> %v to <2 x i64>
2586  ret <2 x i64> %v2
2587}
2588
2589define <2 x i32> @load_ext_v2i64_with_unfolded_gep_offset(ptr %p) {
2590; CHECK-LABEL: load_ext_v2i64_with_unfolded_gep_offset:
2591; CHECK:         .functype load_ext_v2i64_with_unfolded_gep_offset (i32) -> (v128)
2592; CHECK-NEXT:  # %bb.0:
2593; CHECK-NEXT:    local.get 0
2594; CHECK-NEXT:    i32.const 8
2595; CHECK-NEXT:    i32.add
2596; CHECK-NEXT:    v128.load64_zero 0
2597; CHECK-NEXT:    # fallthrough-return
2598  %s = getelementptr <2 x i32>, ptr %p, i32 1
2599  %v = load <2 x i32>, ptr %s
2600  ret <2 x i32> %v
2601}
2602
2603define <2 x i64> @load_v2i64_from_numeric_address() {
2604; CHECK-LABEL: load_v2i64_from_numeric_address:
2605; CHECK:         .functype load_v2i64_from_numeric_address () -> (v128)
2606; CHECK-NEXT:  # %bb.0:
2607; CHECK-NEXT:    i32.const 0
2608; CHECK-NEXT:    v128.load 32
2609; CHECK-NEXT:    # fallthrough-return
2610  %s = inttoptr i32 32 to ptr
2611  %v = load <2 x i64>, ptr %s
2612  ret <2 x i64> %v
2613}
2614
2615define <2 x i64> @load_splat_v2i64_from_numeric_address() {
2616; CHECK-LABEL: load_splat_v2i64_from_numeric_address:
2617; CHECK:         .functype load_splat_v2i64_from_numeric_address () -> (v128)
2618; CHECK-NEXT:  # %bb.0:
2619; CHECK-NEXT:    i32.const 0
2620; CHECK-NEXT:    v128.load64_splat 32
2621; CHECK-NEXT:    # fallthrough-return
2622  %s = inttoptr i32 32 to ptr
2623  %e = load i64, ptr %s
2624  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2625  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2626  ret <2 x i64> %v2
2627}
2628
2629define <2 x i64> @load_sext_v2i64_from_numeric_address() {
2630; CHECK-LABEL: load_sext_v2i64_from_numeric_address:
2631; CHECK:         .functype load_sext_v2i64_from_numeric_address () -> (v128)
2632; CHECK-NEXT:  # %bb.0:
2633; CHECK-NEXT:    i32.const 0
2634; CHECK-NEXT:    i64x2.load32x2_s 32
2635; CHECK-NEXT:    # fallthrough-return
2636  %s = inttoptr i32 32 to ptr
2637  %v = load <2 x i32>, ptr %s
2638  %v2 = sext <2 x i32> %v to <2 x i64>
2639  ret <2 x i64> %v2
2640}
2641
2642define <2 x i64> @load_zext_v2i64_from_numeric_address() {
2643; CHECK-LABEL: load_zext_v2i64_from_numeric_address:
2644; CHECK:         .functype load_zext_v2i64_from_numeric_address () -> (v128)
2645; CHECK-NEXT:  # %bb.0:
2646; CHECK-NEXT:    i32.const 0
2647; CHECK-NEXT:    i64x2.load32x2_u 32
2648; CHECK-NEXT:    # fallthrough-return
2649  %s = inttoptr i32 32 to ptr
2650  %v = load <2 x i32>, ptr %s
2651  %v2 = zext <2 x i32> %v to <2 x i64>
2652  ret <2 x i64> %v2
2653}
2654
2655define <2 x i32> @load_ext_v2i64_from_numeric_address() {
2656; CHECK-LABEL: load_ext_v2i64_from_numeric_address:
2657; CHECK:         .functype load_ext_v2i64_from_numeric_address () -> (v128)
2658; CHECK-NEXT:  # %bb.0:
2659; CHECK-NEXT:    i32.const 0
2660; CHECK-NEXT:    v128.load64_zero 32
2661; CHECK-NEXT:    # fallthrough-return
2662  %s = inttoptr i32 32 to ptr
2663  %v = load <2 x i32>, ptr %s
2664  ret <2 x i32> %v
2665}
2666
2667@gv_v2i64 = global <2 x i64> <i64 42, i64 42>
2668define <2 x i64> @load_v2i64_from_global_address() {
2669; CHECK-LABEL: load_v2i64_from_global_address:
2670; CHECK:         .functype load_v2i64_from_global_address () -> (v128)
2671; CHECK-NEXT:  # %bb.0:
2672; CHECK-NEXT:    i32.const 0
2673; CHECK-NEXT:    v128.load gv_v2i64
2674; CHECK-NEXT:    # fallthrough-return
2675  %v = load <2 x i64>, ptr @gv_v2i64
2676  ret <2 x i64> %v
2677}
2678
2679@gv_i64 = global i64 42
2680define <2 x i64> @load_splat_v2i64_from_global_address() {
2681; CHECK-LABEL: load_splat_v2i64_from_global_address:
2682; CHECK:         .functype load_splat_v2i64_from_global_address () -> (v128)
2683; CHECK-NEXT:  # %bb.0:
2684; CHECK-NEXT:    i32.const 0
2685; CHECK-NEXT:    v128.load64_splat gv_i64
2686; CHECK-NEXT:    # fallthrough-return
2687  %e = load i64, ptr @gv_i64
2688  %v1 = insertelement <2 x i64> undef, i64 %e, i32 0
2689  %v2 = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
2690  ret <2 x i64> %v2
2691}
2692
2693@gv_v2i32 = global <2 x i32> <i32 42, i32 42>
2694define <2 x i64> @load_sext_v2i64_from_global_address() {
2695; CHECK-LABEL: load_sext_v2i64_from_global_address:
2696; CHECK:         .functype load_sext_v2i64_from_global_address () -> (v128)
2697; CHECK-NEXT:  # %bb.0:
2698; CHECK-NEXT:    i32.const 0
2699; CHECK-NEXT:    i64x2.load32x2_s gv_v2i32
2700; CHECK-NEXT:    # fallthrough-return
2701  %v = load <2 x i32>, ptr @gv_v2i32
2702  %v2 = sext <2 x i32> %v to <2 x i64>
2703  ret <2 x i64> %v2
2704}
2705
2706define <2 x i64> @load_zext_v2i64_from_global_address() {
2707; CHECK-LABEL: load_zext_v2i64_from_global_address:
2708; CHECK:         .functype load_zext_v2i64_from_global_address () -> (v128)
2709; CHECK-NEXT:  # %bb.0:
2710; CHECK-NEXT:    i32.const 0
2711; CHECK-NEXT:    i64x2.load32x2_u gv_v2i32
2712; CHECK-NEXT:    # fallthrough-return
2713  %v = load <2 x i32>, ptr @gv_v2i32
2714  %v2 = zext <2 x i32> %v to <2 x i64>
2715  ret <2 x i64> %v2
2716}
2717
2718define <2 x i32> @load_ext_v2i64_from_global_address() {
2719; CHECK-LABEL: load_ext_v2i64_from_global_address:
2720; CHECK:         .functype load_ext_v2i64_from_global_address () -> (v128)
2721; CHECK-NEXT:  # %bb.0:
2722; CHECK-NEXT:    i32.const 0
2723; CHECK-NEXT:    v128.load64_zero gv_v2i32
2724; CHECK-NEXT:    # fallthrough-return
2725  %v = load <2 x i32>, ptr @gv_v2i32
2726  ret <2 x i32> %v
2727}
2728
2729define void @store_v2i64(<2 x i64> %v, ptr %p) {
2730; CHECK-LABEL: store_v2i64:
2731; CHECK:         .functype store_v2i64 (v128, i32) -> ()
2732; CHECK-NEXT:  # %bb.0:
2733; CHECK-NEXT:    local.get 1
2734; CHECK-NEXT:    local.get 0
2735; CHECK-NEXT:    v128.store 0
2736; CHECK-NEXT:    # fallthrough-return
2737  store <2 x i64> %v , ptr %p
2738  ret void
2739}
2740
2741define void @store_v2i64_with_folded_offset(<2 x i64> %v, ptr %p) {
2742; CHECK-LABEL: store_v2i64_with_folded_offset:
2743; CHECK:         .functype store_v2i64_with_folded_offset (v128, i32) -> ()
2744; CHECK-NEXT:  # %bb.0:
2745; CHECK-NEXT:    local.get 1
2746; CHECK-NEXT:    local.get 0
2747; CHECK-NEXT:    v128.store 16
2748; CHECK-NEXT:    # fallthrough-return
2749  %q = ptrtoint ptr %p to i32
2750  %r = add nuw i32 %q, 16
2751  %s = inttoptr i32 %r to ptr
2752  store <2 x i64> %v , ptr %s
2753  ret void
2754}
2755
2756define void @store_v2i64_with_folded_gep_offset(<2 x i64> %v, ptr %p) {
2757; CHECK-LABEL: store_v2i64_with_folded_gep_offset:
2758; CHECK:         .functype store_v2i64_with_folded_gep_offset (v128, i32) -> ()
2759; CHECK-NEXT:  # %bb.0:
2760; CHECK-NEXT:    local.get 1
2761; CHECK-NEXT:    local.get 0
2762; CHECK-NEXT:    v128.store 16
2763; CHECK-NEXT:    # fallthrough-return
2764  %s = getelementptr inbounds <2 x i64>, ptr %p, i32 1
2765  store <2 x i64> %v , ptr %s
2766  ret void
2767}
2768
2769define void @store_v2i64_with_unfolded_gep_negative_offset(<2 x i64> %v, ptr %p) {
2770; CHECK-LABEL: store_v2i64_with_unfolded_gep_negative_offset:
2771; CHECK:         .functype store_v2i64_with_unfolded_gep_negative_offset (v128, i32) -> ()
2772; CHECK-NEXT:  # %bb.0:
2773; CHECK-NEXT:    local.get 1
2774; CHECK-NEXT:    i32.const -16
2775; CHECK-NEXT:    i32.add
2776; CHECK-NEXT:    local.get 0
2777; CHECK-NEXT:    v128.store 0
2778; CHECK-NEXT:    # fallthrough-return
2779  %s = getelementptr inbounds <2 x i64>, ptr %p, i32 -1
2780  store <2 x i64> %v , ptr %s
2781  ret void
2782}
2783
2784define void @store_v2i64_with_unfolded_offset(<2 x i64> %v, ptr %p) {
2785; CHECK-LABEL: store_v2i64_with_unfolded_offset:
2786; CHECK:         .functype store_v2i64_with_unfolded_offset (v128, i32) -> ()
2787; CHECK-NEXT:  # %bb.0:
2788; CHECK-NEXT:    local.get 1
2789; CHECK-NEXT:    i32.const 16
2790; CHECK-NEXT:    i32.add
2791; CHECK-NEXT:    local.get 0
2792; CHECK-NEXT:    v128.store 0
2793; CHECK-NEXT:    # fallthrough-return
2794  %q = ptrtoint ptr %p to i32
2795  %r = add nsw i32 %q, 16
2796  %s = inttoptr i32 %r to ptr
2797  store <2 x i64> %v , ptr %s
2798  ret void
2799}
2800
2801define void @store_v2i64_with_unfolded_gep_offset(<2 x i64> %v, ptr %p) {
2802; CHECK-LABEL: store_v2i64_with_unfolded_gep_offset:
2803; CHECK:         .functype store_v2i64_with_unfolded_gep_offset (v128, i32) -> ()
2804; CHECK-NEXT:  # %bb.0:
2805; CHECK-NEXT:    local.get 1
2806; CHECK-NEXT:    i32.const 16
2807; CHECK-NEXT:    i32.add
2808; CHECK-NEXT:    local.get 0
2809; CHECK-NEXT:    v128.store 0
2810; CHECK-NEXT:    # fallthrough-return
2811  %s = getelementptr <2 x i64>, ptr %p, i32 1
2812  store <2 x i64> %v , ptr %s
2813  ret void
2814}
2815
2816define void @store_v2i64_to_numeric_address(<2 x i64> %v) {
2817; CHECK-LABEL: store_v2i64_to_numeric_address:
2818; CHECK:         .functype store_v2i64_to_numeric_address (v128) -> ()
2819; CHECK-NEXT:  # %bb.0:
2820; CHECK-NEXT:    i32.const 0
2821; CHECK-NEXT:    local.get 0
2822; CHECK-NEXT:    v128.store 32
2823; CHECK-NEXT:    # fallthrough-return
2824  %s = inttoptr i32 32 to ptr
2825  store <2 x i64> %v , ptr %s
2826  ret void
2827}
2828
2829define void @store_v2i64_to_global_address(<2 x i64> %v) {
2830; CHECK-LABEL: store_v2i64_to_global_address:
2831; CHECK:         .functype store_v2i64_to_global_address (v128) -> ()
2832; CHECK-NEXT:  # %bb.0:
2833; CHECK-NEXT:    i32.const 0
2834; CHECK-NEXT:    local.get 0
2835; CHECK-NEXT:    v128.store gv_v2i64
2836; CHECK-NEXT:    # fallthrough-return
2837  store <2 x i64> %v , ptr @gv_v2i64
2838  ret void
2839}
2840
2841; ==============================================================================
2842; 4 x float
2843; ==============================================================================
2844define <4 x float> @load_v4f32(ptr %p) {
2845; CHECK-LABEL: load_v4f32:
2846; CHECK:         .functype load_v4f32 (i32) -> (v128)
2847; CHECK-NEXT:  # %bb.0:
2848; CHECK-NEXT:    local.get 0
2849; CHECK-NEXT:    v128.load 0
2850; CHECK-NEXT:    # fallthrough-return
2851  %v = load <4 x float>, ptr %p
2852  ret <4 x float> %v
2853}
2854
2855define <4 x float> @load_splat_v4f32(ptr %p) {
2856; CHECK-LABEL: load_splat_v4f32:
2857; CHECK:         .functype load_splat_v4f32 (i32) -> (v128)
2858; CHECK-NEXT:  # %bb.0:
2859; CHECK-NEXT:    local.get 0
2860; CHECK-NEXT:    v128.load32_splat 0
2861; CHECK-NEXT:    # fallthrough-return
2862  %e = load float, ptr %p
2863  %v1 = insertelement <4 x float> undef, float %e, i32 0
2864  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2865  ret <4 x float> %v2
2866}
2867
2868define <4 x float> @load_v4f32_with_folded_offset(ptr %p) {
2869; CHECK-LABEL: load_v4f32_with_folded_offset:
2870; CHECK:         .functype load_v4f32_with_folded_offset (i32) -> (v128)
2871; CHECK-NEXT:  # %bb.0:
2872; CHECK-NEXT:    local.get 0
2873; CHECK-NEXT:    v128.load 16
2874; CHECK-NEXT:    # fallthrough-return
2875  %q = ptrtoint ptr %p to i32
2876  %r = add nuw i32 %q, 16
2877  %s = inttoptr i32 %r to ptr
2878  %v = load <4 x float>, ptr %s
2879  ret <4 x float> %v
2880}
2881
2882define <4 x float> @load_splat_v4f32_with_folded_offset(ptr %p) {
2883; CHECK-LABEL: load_splat_v4f32_with_folded_offset:
2884; CHECK:         .functype load_splat_v4f32_with_folded_offset (i32) -> (v128)
2885; CHECK-NEXT:  # %bb.0:
2886; CHECK-NEXT:    local.get 0
2887; CHECK-NEXT:    v128.load32_splat 16
2888; CHECK-NEXT:    # fallthrough-return
2889  %q = ptrtoint ptr %p to i32
2890  %r = add nuw i32 %q, 16
2891  %s = inttoptr i32 %r to ptr
2892  %e = load float, ptr %s
2893  %v1 = insertelement <4 x float> undef, float %e, i32 0
2894  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2895  ret <4 x float> %v2
2896}
2897
2898define <4 x float> @load_v4f32_with_folded_gep_offset(ptr %p) {
2899; CHECK-LABEL: load_v4f32_with_folded_gep_offset:
2900; CHECK:         .functype load_v4f32_with_folded_gep_offset (i32) -> (v128)
2901; CHECK-NEXT:  # %bb.0:
2902; CHECK-NEXT:    local.get 0
2903; CHECK-NEXT:    v128.load 16
2904; CHECK-NEXT:    # fallthrough-return
2905  %s = getelementptr inbounds <4 x float>, ptr %p, i32 1
2906  %v = load <4 x float>, ptr %s
2907  ret <4 x float> %v
2908}
2909
2910define <4 x float> @load_splat_v4f32_with_folded_gep_offset(ptr %p) {
2911; CHECK-LABEL: load_splat_v4f32_with_folded_gep_offset:
2912; CHECK:         .functype load_splat_v4f32_with_folded_gep_offset (i32) -> (v128)
2913; CHECK-NEXT:  # %bb.0:
2914; CHECK-NEXT:    local.get 0
2915; CHECK-NEXT:    v128.load32_splat 4
2916; CHECK-NEXT:    # fallthrough-return
2917  %s = getelementptr inbounds float, ptr %p, i32 1
2918  %e = load float, ptr %s
2919  %v1 = insertelement <4 x float> undef, float %e, i32 0
2920  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2921  ret <4 x float> %v2
2922}
2923
2924define <4 x float> @load_v4f32_with_unfolded_gep_negative_offset(ptr %p) {
2925; CHECK-LABEL: load_v4f32_with_unfolded_gep_negative_offset:
2926; CHECK:         .functype load_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2927; CHECK-NEXT:  # %bb.0:
2928; CHECK-NEXT:    local.get 0
2929; CHECK-NEXT:    i32.const -16
2930; CHECK-NEXT:    i32.add
2931; CHECK-NEXT:    v128.load 0
2932; CHECK-NEXT:    # fallthrough-return
2933  %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1
2934  %v = load <4 x float>, ptr %s
2935  ret <4 x float> %v
2936}
2937
2938define <4 x float> @load_splat_v4f32_with_unfolded_gep_negative_offset(ptr %p) {
2939; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_negative_offset:
2940; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_negative_offset (i32) -> (v128)
2941; CHECK-NEXT:  # %bb.0:
2942; CHECK-NEXT:    local.get 0
2943; CHECK-NEXT:    i32.const -4
2944; CHECK-NEXT:    i32.add
2945; CHECK-NEXT:    v128.load32_splat 0
2946; CHECK-NEXT:    # fallthrough-return
2947  %s = getelementptr inbounds float, ptr %p, i32 -1
2948  %e = load float, ptr %s
2949  %v1 = insertelement <4 x float> undef, float %e, i32 0
2950  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2951  ret <4 x float> %v2
2952}
2953
2954define <4 x float> @load_v4f32_with_unfolded_offset(ptr %p) {
2955; CHECK-LABEL: load_v4f32_with_unfolded_offset:
2956; CHECK:         .functype load_v4f32_with_unfolded_offset (i32) -> (v128)
2957; CHECK-NEXT:  # %bb.0:
2958; CHECK-NEXT:    local.get 0
2959; CHECK-NEXT:    i32.const 16
2960; CHECK-NEXT:    i32.add
2961; CHECK-NEXT:    v128.load 0
2962; CHECK-NEXT:    # fallthrough-return
2963  %q = ptrtoint ptr %p to i32
2964  %r = add nsw i32 %q, 16
2965  %s = inttoptr i32 %r to ptr
2966  %v = load <4 x float>, ptr %s
2967  ret <4 x float> %v
2968}
2969
2970define <4 x float> @load_splat_v4f32_with_unfolded_offset(ptr %p) {
2971; CHECK-LABEL: load_splat_v4f32_with_unfolded_offset:
2972; CHECK:         .functype load_splat_v4f32_with_unfolded_offset (i32) -> (v128)
2973; CHECK-NEXT:  # %bb.0:
2974; CHECK-NEXT:    local.get 0
2975; CHECK-NEXT:    i32.const 16
2976; CHECK-NEXT:    i32.add
2977; CHECK-NEXT:    v128.load32_splat 0
2978; CHECK-NEXT:    # fallthrough-return
2979  %q = ptrtoint ptr %p to i32
2980  %r = add nsw i32 %q, 16
2981  %s = inttoptr i32 %r to ptr
2982  %e = load float, ptr %s
2983  %v1 = insertelement <4 x float> undef, float %e, i32 0
2984  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
2985  ret <4 x float> %v2
2986}
2987
2988define <4 x float> @load_v4f32_with_unfolded_gep_offset(ptr %p) {
2989; CHECK-LABEL: load_v4f32_with_unfolded_gep_offset:
2990; CHECK:         .functype load_v4f32_with_unfolded_gep_offset (i32) -> (v128)
2991; CHECK-NEXT:  # %bb.0:
2992; CHECK-NEXT:    local.get 0
2993; CHECK-NEXT:    i32.const 16
2994; CHECK-NEXT:    i32.add
2995; CHECK-NEXT:    v128.load 0
2996; CHECK-NEXT:    # fallthrough-return
2997  %s = getelementptr <4 x float>, ptr %p, i32 1
2998  %v = load <4 x float>, ptr %s
2999  ret <4 x float> %v
3000}
3001
3002define <4 x float> @load_splat_v4f32_with_unfolded_gep_offset(ptr %p) {
3003; CHECK-LABEL: load_splat_v4f32_with_unfolded_gep_offset:
3004; CHECK:         .functype load_splat_v4f32_with_unfolded_gep_offset (i32) -> (v128)
3005; CHECK-NEXT:  # %bb.0:
3006; CHECK-NEXT:    local.get 0
3007; CHECK-NEXT:    i32.const 4
3008; CHECK-NEXT:    i32.add
3009; CHECK-NEXT:    v128.load32_splat 0
3010; CHECK-NEXT:    # fallthrough-return
3011  %s = getelementptr float, ptr %p, i32 1
3012  %e = load float, ptr %s
3013  %v1 = insertelement <4 x float> undef, float %e, i32 0
3014  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
3015  ret <4 x float> %v2
3016}
3017
3018define <4 x float> @load_v4f32_from_numeric_address() {
3019; CHECK-LABEL: load_v4f32_from_numeric_address:
3020; CHECK:         .functype load_v4f32_from_numeric_address () -> (v128)
3021; CHECK-NEXT:  # %bb.0:
3022; CHECK-NEXT:    i32.const 0
3023; CHECK-NEXT:    v128.load 32
3024; CHECK-NEXT:    # fallthrough-return
3025  %s = inttoptr i32 32 to ptr
3026  %v = load <4 x float>, ptr %s
3027  ret <4 x float> %v
3028}
3029
3030define <4 x float> @load_splat_v4f32_from_numeric_address() {
3031; CHECK-LABEL: load_splat_v4f32_from_numeric_address:
3032; CHECK:         .functype load_splat_v4f32_from_numeric_address () -> (v128)
3033; CHECK-NEXT:  # %bb.0:
3034; CHECK-NEXT:    i32.const 0
3035; CHECK-NEXT:    v128.load32_splat 32
3036; CHECK-NEXT:    # fallthrough-return
3037  %s = inttoptr i32 32 to ptr
3038  %e = load float, ptr %s
3039  %v1 = insertelement <4 x float> undef, float %e, i32 0
3040  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
3041  ret <4 x float> %v2
3042}
3043
3044@gv_v4f32 = global <4 x float> <float 42., float 42., float 42., float 42.>
3045define <4 x float> @load_v4f32_from_global_address() {
3046; CHECK-LABEL: load_v4f32_from_global_address:
3047; CHECK:         .functype load_v4f32_from_global_address () -> (v128)
3048; CHECK-NEXT:  # %bb.0:
3049; CHECK-NEXT:    i32.const 0
3050; CHECK-NEXT:    v128.load gv_v4f32
3051; CHECK-NEXT:    # fallthrough-return
3052  %v = load <4 x float>, ptr @gv_v4f32
3053  ret <4 x float> %v
3054}
3055
3056@gv_f32 = global float 42.
3057define <4 x float> @load_splat_v4f32_from_global_address() {
3058; CHECK-LABEL: load_splat_v4f32_from_global_address:
3059; CHECK:         .functype load_splat_v4f32_from_global_address () -> (v128)
3060; CHECK-NEXT:  # %bb.0:
3061; CHECK-NEXT:    i32.const 0
3062; CHECK-NEXT:    v128.load32_splat gv_f32
3063; CHECK-NEXT:    # fallthrough-return
3064  %e = load float, ptr @gv_f32
3065  %v1 = insertelement <4 x float> undef, float %e, i32 0
3066  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> zeroinitializer
3067  ret <4 x float> %v2
3068}
3069
3070define void @store_v4f32(<4 x float> %v, ptr %p) {
3071; CHECK-LABEL: store_v4f32:
3072; CHECK:         .functype store_v4f32 (v128, i32) -> ()
3073; CHECK-NEXT:  # %bb.0:
3074; CHECK-NEXT:    local.get 1
3075; CHECK-NEXT:    local.get 0
3076; CHECK-NEXT:    v128.store 0
3077; CHECK-NEXT:    # fallthrough-return
3078  store <4 x float> %v , ptr %p
3079  ret void
3080}
3081
3082define void @store_v4f32_with_folded_offset(<4 x float> %v, ptr %p) {
3083; CHECK-LABEL: store_v4f32_with_folded_offset:
3084; CHECK:         .functype store_v4f32_with_folded_offset (v128, i32) -> ()
3085; CHECK-NEXT:  # %bb.0:
3086; CHECK-NEXT:    local.get 1
3087; CHECK-NEXT:    local.get 0
3088; CHECK-NEXT:    v128.store 16
3089; CHECK-NEXT:    # fallthrough-return
3090  %q = ptrtoint ptr %p to i32
3091  %r = add nuw i32 %q, 16
3092  %s = inttoptr i32 %r to ptr
3093  store <4 x float> %v , ptr %s
3094  ret void
3095}
3096
3097define void @store_v4f32_with_folded_gep_offset(<4 x float> %v, ptr %p) {
3098; CHECK-LABEL: store_v4f32_with_folded_gep_offset:
3099; CHECK:         .functype store_v4f32_with_folded_gep_offset (v128, i32) -> ()
3100; CHECK-NEXT:  # %bb.0:
3101; CHECK-NEXT:    local.get 1
3102; CHECK-NEXT:    local.get 0
3103; CHECK-NEXT:    v128.store 16
3104; CHECK-NEXT:    # fallthrough-return
3105  %s = getelementptr inbounds <4 x float>, ptr %p, i32 1
3106  store <4 x float> %v , ptr %s
3107  ret void
3108}
3109
3110define void @store_v4f32_with_unfolded_gep_negative_offset(<4 x float> %v, ptr %p) {
3111; CHECK-LABEL: store_v4f32_with_unfolded_gep_negative_offset:
3112; CHECK:         .functype store_v4f32_with_unfolded_gep_negative_offset (v128, i32) -> ()
3113; CHECK-NEXT:  # %bb.0:
3114; CHECK-NEXT:    local.get 1
3115; CHECK-NEXT:    i32.const -16
3116; CHECK-NEXT:    i32.add
3117; CHECK-NEXT:    local.get 0
3118; CHECK-NEXT:    v128.store 0
3119; CHECK-NEXT:    # fallthrough-return
3120  %s = getelementptr inbounds <4 x float>, ptr %p, i32 -1
3121  store <4 x float> %v , ptr %s
3122  ret void
3123}
3124
3125define void @store_v4f32_with_unfolded_offset(<4 x float> %v, ptr %p) {
3126; CHECK-LABEL: store_v4f32_with_unfolded_offset:
3127; CHECK:         .functype store_v4f32_with_unfolded_offset (v128, i32) -> ()
3128; CHECK-NEXT:  # %bb.0:
3129; CHECK-NEXT:    local.get 1
3130; CHECK-NEXT:    i32.const 16
3131; CHECK-NEXT:    i32.add
3132; CHECK-NEXT:    local.get 0
3133; CHECK-NEXT:    v128.store 0
3134; CHECK-NEXT:    # fallthrough-return
3135  %q = ptrtoint ptr %p to i32
3136  %r = add nsw i32 %q, 16
3137  %s = inttoptr i32 %r to ptr
3138  store <4 x float> %v , ptr %s
3139  ret void
3140}
3141
3142define void @store_v4f32_with_unfolded_gep_offset(<4 x float> %v, ptr %p) {
3143; CHECK-LABEL: store_v4f32_with_unfolded_gep_offset:
3144; CHECK:         .functype store_v4f32_with_unfolded_gep_offset (v128, i32) -> ()
3145; CHECK-NEXT:  # %bb.0:
3146; CHECK-NEXT:    local.get 1
3147; CHECK-NEXT:    i32.const 16
3148; CHECK-NEXT:    i32.add
3149; CHECK-NEXT:    local.get 0
3150; CHECK-NEXT:    v128.store 0
3151; CHECK-NEXT:    # fallthrough-return
3152  %s = getelementptr <4 x float>, ptr %p, i32 1
3153  store <4 x float> %v , ptr %s
3154  ret void
3155}
3156
3157define void @store_v4f32_to_numeric_address(<4 x float> %v) {
3158; CHECK-LABEL: store_v4f32_to_numeric_address:
3159; CHECK:         .functype store_v4f32_to_numeric_address (v128) -> ()
3160; CHECK-NEXT:  # %bb.0:
3161; CHECK-NEXT:    i32.const 0
3162; CHECK-NEXT:    local.get 0
3163; CHECK-NEXT:    v128.store 32
3164; CHECK-NEXT:    # fallthrough-return
3165  %s = inttoptr i32 32 to ptr
3166  store <4 x float> %v , ptr %s
3167  ret void
3168}
3169
3170define void @store_v4f32_to_global_address(<4 x float> %v) {
3171; CHECK-LABEL: store_v4f32_to_global_address:
3172; CHECK:         .functype store_v4f32_to_global_address (v128) -> ()
3173; CHECK-NEXT:  # %bb.0:
3174; CHECK-NEXT:    i32.const 0
3175; CHECK-NEXT:    local.get 0
3176; CHECK-NEXT:    v128.store gv_v4f32
3177; CHECK-NEXT:    # fallthrough-return
3178  store <4 x float> %v , ptr @gv_v4f32
3179  ret void
3180}
3181
3182; ==============================================================================
3183; 2 x double
3184; ==============================================================================
3185define <2 x double> @load_v2f64(ptr %p) {
3186; CHECK-LABEL: load_v2f64:
3187; CHECK:         .functype load_v2f64 (i32) -> (v128)
3188; CHECK-NEXT:  # %bb.0:
3189; CHECK-NEXT:    local.get 0
3190; CHECK-NEXT:    v128.load 0
3191; CHECK-NEXT:    # fallthrough-return
3192  %v = load <2 x double>, ptr %p
3193  ret <2 x double> %v
3194}
3195
3196define <2 x double> @load_splat_v2f64(ptr %p) {
3197; CHECK-LABEL: load_splat_v2f64:
3198; CHECK:         .functype load_splat_v2f64 (i32) -> (v128)
3199; CHECK-NEXT:  # %bb.0:
3200; CHECK-NEXT:    local.get 0
3201; CHECK-NEXT:    v128.load64_splat 0
3202; CHECK-NEXT:    # fallthrough-return
3203  %e = load double, ptr %p
3204  %v1 = insertelement <2 x double> undef, double %e, i32 0
3205  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3206  ret <2 x double> %v2
3207}
3208
3209define <2 x double> @load_promote_v2f64(ptr %p) {
3210; CHECK-LABEL: load_promote_v2f64:
3211; CHECK:         .functype load_promote_v2f64 (i32) -> (v128)
3212; CHECK-NEXT:  # %bb.0:
3213; CHECK-NEXT:    local.get 0
3214; CHECK-NEXT:    v128.load64_zero 0
3215; CHECK-NEXT:    f64x2.promote_low_f32x4
3216; CHECK-NEXT:    # fallthrough-return
3217  %e = load <2 x float>, ptr %p
3218  %v = fpext <2 x float> %e to <2 x double>
3219  ret <2 x double> %v
3220}
3221
3222define <2 x double> @load_v2f64_with_folded_offset(ptr %p) {
3223; CHECK-LABEL: load_v2f64_with_folded_offset:
3224; CHECK:         .functype load_v2f64_with_folded_offset (i32) -> (v128)
3225; CHECK-NEXT:  # %bb.0:
3226; CHECK-NEXT:    local.get 0
3227; CHECK-NEXT:    v128.load 16
3228; CHECK-NEXT:    # fallthrough-return
3229  %q = ptrtoint ptr %p to i32
3230  %r = add nuw i32 %q, 16
3231  %s = inttoptr i32 %r to ptr
3232  %v = load <2 x double>, ptr %s
3233  ret <2 x double> %v
3234}
3235
3236define <2 x double> @load_splat_v2f64_with_folded_offset(ptr %p) {
3237; CHECK-LABEL: load_splat_v2f64_with_folded_offset:
3238; CHECK:         .functype load_splat_v2f64_with_folded_offset (i32) -> (v128)
3239; CHECK-NEXT:  # %bb.0:
3240; CHECK-NEXT:    local.get 0
3241; CHECK-NEXT:    v128.load64_splat 16
3242; CHECK-NEXT:    # fallthrough-return
3243  %q = ptrtoint ptr %p to i32
3244  %r = add nuw i32 %q, 16
3245  %s = inttoptr i32 %r to ptr
3246  %e = load double, ptr %s
3247  %v1 = insertelement <2 x double> undef, double %e, i32 0
3248  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3249  ret <2 x double> %v2
3250}
3251
3252define <2 x double> @load_promote_v2f64_with_folded_offset(ptr %p) {
3253; CHECK-LABEL: load_promote_v2f64_with_folded_offset:
3254; CHECK:         .functype load_promote_v2f64_with_folded_offset (i32) -> (v128)
3255; CHECK-NEXT:  # %bb.0:
3256; CHECK-NEXT:    local.get 0
3257; CHECK-NEXT:    i32.const 16
3258; CHECK-NEXT:    i32.add
3259; CHECK-NEXT:    v128.load64_zero 0
3260; CHECK-NEXT:    f64x2.promote_low_f32x4
3261; CHECK-NEXT:    # fallthrough-return
3262  %q = ptrtoint ptr %p to i32
3263  %r = add nuw i32 %q, 16
3264  %s = inttoptr i32 %r to ptr
3265  %e = load <2 x float>, ptr %s
3266  %v = fpext <2 x float> %e to <2 x double>
3267  ret <2 x double> %v
3268}
3269
3270define <2 x double> @load_v2f64_with_folded_gep_offset(ptr %p) {
3271; CHECK-LABEL: load_v2f64_with_folded_gep_offset:
3272; CHECK:         .functype load_v2f64_with_folded_gep_offset (i32) -> (v128)
3273; CHECK-NEXT:  # %bb.0:
3274; CHECK-NEXT:    local.get 0
3275; CHECK-NEXT:    v128.load 16
3276; CHECK-NEXT:    # fallthrough-return
3277  %s = getelementptr inbounds <2 x double>, ptr %p, i32 1
3278  %v = load <2 x double>, ptr %s
3279  ret <2 x double> %v
3280}
3281
3282define <2 x double> @load_splat_v2f64_with_folded_gep_offset(ptr %p) {
3283; CHECK-LABEL: load_splat_v2f64_with_folded_gep_offset:
3284; CHECK:         .functype load_splat_v2f64_with_folded_gep_offset (i32) -> (v128)
3285; CHECK-NEXT:  # %bb.0:
3286; CHECK-NEXT:    local.get 0
3287; CHECK-NEXT:    v128.load64_splat 8
3288; CHECK-NEXT:    # fallthrough-return
3289  %s = getelementptr inbounds double, ptr %p, i32 1
3290  %e = load double, ptr %s
3291  %v1 = insertelement <2 x double> undef, double %e, i32 0
3292  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3293  ret <2 x double> %v2
3294}
3295
3296define <2 x double> @load_promote_v2f64_with_folded_gep_offset(ptr %p) {
3297; CHECK-LABEL: load_promote_v2f64_with_folded_gep_offset:
3298; CHECK:         .functype load_promote_v2f64_with_folded_gep_offset (i32) -> (v128)
3299; CHECK-NEXT:  # %bb.0:
3300; CHECK-NEXT:    local.get 0
3301; CHECK-NEXT:    i32.const 8
3302; CHECK-NEXT:    i32.add
3303; CHECK-NEXT:    v128.load64_zero 0
3304; CHECK-NEXT:    f64x2.promote_low_f32x4
3305; CHECK-NEXT:    # fallthrough-return
3306  %s = getelementptr inbounds <2 x float>, ptr %p, i32 1
3307  %e = load <2 x float>, ptr %s
3308  %v = fpext <2 x float> %e to <2 x double>
3309  ret <2 x double> %v
3310}
3311
3312define <2 x double> @load_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
3313; CHECK-LABEL: load_v2f64_with_unfolded_gep_negative_offset:
3314; CHECK:         .functype load_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3315; CHECK-NEXT:  # %bb.0:
3316; CHECK-NEXT:    local.get 0
3317; CHECK-NEXT:    i32.const -16
3318; CHECK-NEXT:    i32.add
3319; CHECK-NEXT:    v128.load 0
3320; CHECK-NEXT:    # fallthrough-return
3321  %s = getelementptr inbounds <2 x double>, ptr %p, i32 -1
3322  %v = load <2 x double>, ptr %s
3323  ret <2 x double> %v
3324}
3325
3326define <2 x double> @load_splat_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
3327; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_negative_offset:
3328; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3329; CHECK-NEXT:  # %bb.0:
3330; CHECK-NEXT:    local.get 0
3331; CHECK-NEXT:    i32.const -8
3332; CHECK-NEXT:    i32.add
3333; CHECK-NEXT:    v128.load64_splat 0
3334; CHECK-NEXT:    # fallthrough-return
3335  %s = getelementptr inbounds double, ptr %p, i32 -1
3336  %e = load double, ptr %s
3337  %v1 = insertelement <2 x double> undef, double %e, i32 0
3338  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3339  ret <2 x double> %v2
3340}
3341
3342define <2 x double> @load_promote_v2f64_with_unfolded_gep_negative_offset(ptr %p) {
3343; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_negative_offset:
3344; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_negative_offset (i32) -> (v128)
3345; CHECK-NEXT:  # %bb.0:
3346; CHECK-NEXT:    local.get 0
3347; CHECK-NEXT:    i32.const -8
3348; CHECK-NEXT:    i32.add
3349; CHECK-NEXT:    v128.load64_zero 0
3350; CHECK-NEXT:    f64x2.promote_low_f32x4
3351; CHECK-NEXT:    # fallthrough-return
3352  %s = getelementptr inbounds <2 x float>, ptr %p, i32 -1
3353  %e = load <2 x float>, ptr %s
3354  %v = fpext <2 x float> %e to <2 x double>
3355  ret <2 x double> %v
3356}
3357
3358define <2 x double> @load_v2f64_with_unfolded_offset(ptr %p) {
3359; CHECK-LABEL: load_v2f64_with_unfolded_offset:
3360; CHECK:         .functype load_v2f64_with_unfolded_offset (i32) -> (v128)
3361; CHECK-NEXT:  # %bb.0:
3362; CHECK-NEXT:    local.get 0
3363; CHECK-NEXT:    i32.const 16
3364; CHECK-NEXT:    i32.add
3365; CHECK-NEXT:    v128.load 0
3366; CHECK-NEXT:    # fallthrough-return
3367  %q = ptrtoint ptr %p to i32
3368  %r = add nsw i32 %q, 16
3369  %s = inttoptr i32 %r to ptr
3370  %v = load <2 x double>, ptr %s
3371  ret <2 x double> %v
3372}
3373
3374define <2 x double> @load_splat_v2f64_with_unfolded_offset(ptr %p) {
3375; CHECK-LABEL: load_splat_v2f64_with_unfolded_offset:
3376; CHECK:         .functype load_splat_v2f64_with_unfolded_offset (i32) -> (v128)
3377; CHECK-NEXT:  # %bb.0:
3378; CHECK-NEXT:    local.get 0
3379; CHECK-NEXT:    i32.const 16
3380; CHECK-NEXT:    i32.add
3381; CHECK-NEXT:    v128.load64_splat 0
3382; CHECK-NEXT:    # fallthrough-return
3383  %q = ptrtoint ptr %p to i32
3384  %r = add nsw i32 %q, 16
3385  %s = inttoptr i32 %r to ptr
3386  %e = load double, ptr %s
3387  %v1 = insertelement <2 x double> undef, double %e, i32 0
3388  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3389  ret <2 x double> %v2
3390}
3391
3392define <2 x double> @load_promote_v2f64_with_unfolded_offset(ptr %p) {
3393; CHECK-LABEL: load_promote_v2f64_with_unfolded_offset:
3394; CHECK:         .functype load_promote_v2f64_with_unfolded_offset (i32) -> (v128)
3395; CHECK-NEXT:  # %bb.0:
3396; CHECK-NEXT:    local.get 0
3397; CHECK-NEXT:    i32.const 16
3398; CHECK-NEXT:    i32.add
3399; CHECK-NEXT:    v128.load64_zero 0
3400; CHECK-NEXT:    f64x2.promote_low_f32x4
3401; CHECK-NEXT:    # fallthrough-return
3402  %q = ptrtoint ptr %p to i32
3403  %r = add nsw i32 %q, 16
3404  %s = inttoptr i32 %r to ptr
3405  %e = load <2 x float>, ptr %s
3406  %v = fpext <2 x float> %e to <2 x double>
3407  ret <2 x double> %v
3408}
3409
3410define <2 x double> @load_v2f64_with_unfolded_gep_offset(ptr %p) {
3411; CHECK-LABEL: load_v2f64_with_unfolded_gep_offset:
3412; CHECK:         .functype load_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3413; CHECK-NEXT:  # %bb.0:
3414; CHECK-NEXT:    local.get 0
3415; CHECK-NEXT:    i32.const 16
3416; CHECK-NEXT:    i32.add
3417; CHECK-NEXT:    v128.load 0
3418; CHECK-NEXT:    # fallthrough-return
3419  %s = getelementptr <2 x double>, ptr %p, i32 1
3420  %v = load <2 x double>, ptr %s
3421  ret <2 x double> %v
3422}
3423
3424define <2 x double> @load_splat_v2f64_with_unfolded_gep_offset(ptr %p) {
3425; CHECK-LABEL: load_splat_v2f64_with_unfolded_gep_offset:
3426; CHECK:         .functype load_splat_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3427; CHECK-NEXT:  # %bb.0:
3428; CHECK-NEXT:    local.get 0
3429; CHECK-NEXT:    i32.const 8
3430; CHECK-NEXT:    i32.add
3431; CHECK-NEXT:    v128.load64_splat 0
3432; CHECK-NEXT:    # fallthrough-return
3433  %s = getelementptr double, ptr %p, i32 1
3434  %e = load double, ptr %s
3435  %v1 = insertelement <2 x double> undef, double %e, i32 0
3436  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3437  ret <2 x double> %v2
3438}
3439
3440define <2 x double> @load_promote_v2f64_with_unfolded_gep_offset(ptr %p) {
3441; CHECK-LABEL: load_promote_v2f64_with_unfolded_gep_offset:
3442; CHECK:         .functype load_promote_v2f64_with_unfolded_gep_offset (i32) -> (v128)
3443; CHECK-NEXT:  # %bb.0:
3444; CHECK-NEXT:    local.get 0
3445; CHECK-NEXT:    i32.const 8
3446; CHECK-NEXT:    i32.add
3447; CHECK-NEXT:    v128.load64_zero 0
3448; CHECK-NEXT:    f64x2.promote_low_f32x4
3449; CHECK-NEXT:    # fallthrough-return
3450  %s = getelementptr <2 x float>, ptr %p, i32 1
3451  %e = load <2 x float>, ptr %s
3452  %v = fpext <2 x float> %e to <2 x double>
3453  ret <2 x double> %v
3454}
3455
3456define <2 x double> @load_v2f64_from_numeric_address() {
3457; CHECK-LABEL: load_v2f64_from_numeric_address:
3458; CHECK:         .functype load_v2f64_from_numeric_address () -> (v128)
3459; CHECK-NEXT:  # %bb.0:
3460; CHECK-NEXT:    i32.const 0
3461; CHECK-NEXT:    v128.load 32
3462; CHECK-NEXT:    # fallthrough-return
3463  %s = inttoptr i32 32 to ptr
3464  %v = load <2 x double>, ptr %s
3465  ret <2 x double> %v
3466}
3467
3468define <2 x double> @load_splat_v2f64_from_numeric_address() {
3469; CHECK-LABEL: load_splat_v2f64_from_numeric_address:
3470; CHECK:         .functype load_splat_v2f64_from_numeric_address () -> (v128)
3471; CHECK-NEXT:  # %bb.0:
3472; CHECK-NEXT:    i32.const 0
3473; CHECK-NEXT:    v128.load64_splat 32
3474; CHECK-NEXT:    # fallthrough-return
3475  %s = inttoptr i32 32 to ptr
3476  %e = load double, ptr %s
3477  %v1 = insertelement <2 x double> undef, double %e, i32 0
3478  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3479  ret <2 x double> %v2
3480}
3481
3482define <2 x double> @load_promote_v2f64_from_numeric_address() {
3483; CHECK-LABEL: load_promote_v2f64_from_numeric_address:
3484; CHECK:         .functype load_promote_v2f64_from_numeric_address () -> (v128)
3485; CHECK-NEXT:  # %bb.0:
3486; CHECK-NEXT:    i32.const 32
3487; CHECK-NEXT:    v128.load64_zero 0
3488; CHECK-NEXT:    f64x2.promote_low_f32x4
3489; CHECK-NEXT:    # fallthrough-return
3490  %s = inttoptr i32 32 to ptr
3491  %e = load <2 x float>, ptr %s
3492  %v = fpext <2 x float> %e to <2 x double>
3493  ret <2 x double> %v
3494}
3495
3496@gv_v2f64 = global <2 x double> <double 42., double 42.>
3497define <2 x double> @load_v2f64_from_global_address() {
3498; CHECK-LABEL: load_v2f64_from_global_address:
3499; CHECK:         .functype load_v2f64_from_global_address () -> (v128)
3500; CHECK-NEXT:  # %bb.0:
3501; CHECK-NEXT:    i32.const 0
3502; CHECK-NEXT:    v128.load gv_v2f64
3503; CHECK-NEXT:    # fallthrough-return
3504  %v = load <2 x double>, ptr @gv_v2f64
3505  ret <2 x double> %v
3506}
3507
3508@gv_f64 = global double 42.
3509define <2 x double> @load_splat_v2f64_from_global_address() {
3510; CHECK-LABEL: load_splat_v2f64_from_global_address:
3511; CHECK:         .functype load_splat_v2f64_from_global_address () -> (v128)
3512; CHECK-NEXT:  # %bb.0:
3513; CHECK-NEXT:    i32.const 0
3514; CHECK-NEXT:    v128.load64_splat gv_f64
3515; CHECK-NEXT:    # fallthrough-return
3516  %e = load double, ptr @gv_f64
3517  %v1 = insertelement <2 x double> undef, double %e, i32 0
3518  %v2 = shufflevector <2 x double> %v1, <2 x double> undef, <2 x i32> zeroinitializer
3519  ret <2 x double> %v2
3520}
3521
3522@gv_v2f32 = global <2 x float> <float 42., float 42.>
3523define <2 x double> @load_promote_v2f64_from_global_address() {
3524; CHECK-LABEL: load_promote_v2f64_from_global_address:
3525; CHECK:         .functype load_promote_v2f64_from_global_address () -> (v128)
3526; CHECK-NEXT:  # %bb.0:
3527; CHECK-NEXT:    i32.const gv_v2f32
3528; CHECK-NEXT:    v128.load64_zero 0
3529; CHECK-NEXT:    f64x2.promote_low_f32x4
3530; CHECK-NEXT:    # fallthrough-return
3531  %e = load <2 x float>, ptr @gv_v2f32
3532  %v = fpext <2 x float> %e to <2 x double>
3533  ret <2 x double> %v
3534}
3535
3536define void @store_v2f64(<2 x double> %v, ptr %p) {
3537; CHECK-LABEL: store_v2f64:
3538; CHECK:         .functype store_v2f64 (v128, i32) -> ()
3539; CHECK-NEXT:  # %bb.0:
3540; CHECK-NEXT:    local.get 1
3541; CHECK-NEXT:    local.get 0
3542; CHECK-NEXT:    v128.store 0
3543; CHECK-NEXT:    # fallthrough-return
3544  store <2 x double> %v , ptr %p
3545  ret void
3546}
3547
3548define void @store_v2f64_with_folded_offset(<2 x double> %v, ptr %p) {
3549; CHECK-LABEL: store_v2f64_with_folded_offset:
3550; CHECK:         .functype store_v2f64_with_folded_offset (v128, i32) -> ()
3551; CHECK-NEXT:  # %bb.0:
3552; CHECK-NEXT:    local.get 1
3553; CHECK-NEXT:    local.get 0
3554; CHECK-NEXT:    v128.store 16
3555; CHECK-NEXT:    # fallthrough-return
3556  %q = ptrtoint ptr %p to i32
3557  %r = add nuw i32 %q, 16
3558  %s = inttoptr i32 %r to ptr
3559  store <2 x double> %v , ptr %s
3560  ret void
3561}
3562
3563define void @store_v2f64_with_folded_gep_offset(<2 x double> %v, ptr %p) {
3564; CHECK-LABEL: store_v2f64_with_folded_gep_offset:
3565; CHECK:         .functype store_v2f64_with_folded_gep_offset (v128, i32) -> ()
3566; CHECK-NEXT:  # %bb.0:
3567; CHECK-NEXT:    local.get 1
3568; CHECK-NEXT:    local.get 0
3569; CHECK-NEXT:    v128.store 16
3570; CHECK-NEXT:    # fallthrough-return
3571  %s = getelementptr inbounds <2 x double>, ptr %p, i32 1
3572  store <2 x double> %v , ptr %s
3573  ret void
3574}
3575
3576define void @store_v2f64_with_unfolded_gep_negative_offset(<2 x double> %v, ptr %p) {
3577; CHECK-LABEL: store_v2f64_with_unfolded_gep_negative_offset:
3578; CHECK:         .functype store_v2f64_with_unfolded_gep_negative_offset (v128, i32) -> ()
3579; CHECK-NEXT:  # %bb.0:
3580; CHECK-NEXT:    local.get 1
3581; CHECK-NEXT:    i32.const -16
3582; CHECK-NEXT:    i32.add
3583; CHECK-NEXT:    local.get 0
3584; CHECK-NEXT:    v128.store 0
3585; CHECK-NEXT:    # fallthrough-return
3586  %s = getelementptr inbounds <2 x double>, ptr %p, i32 -1
3587  store <2 x double> %v , ptr %s
3588  ret void
3589}
3590
3591define void @store_v2f64_with_unfolded_offset(<2 x double> %v, ptr %p) {
3592; CHECK-LABEL: store_v2f64_with_unfolded_offset:
3593; CHECK:         .functype store_v2f64_with_unfolded_offset (v128, i32) -> ()
3594; CHECK-NEXT:  # %bb.0:
3595; CHECK-NEXT:    local.get 1
3596; CHECK-NEXT:    i32.const 16
3597; CHECK-NEXT:    i32.add
3598; CHECK-NEXT:    local.get 0
3599; CHECK-NEXT:    v128.store 0
3600; CHECK-NEXT:    # fallthrough-return
3601  %q = ptrtoint ptr %p to i32
3602  %r = add nsw i32 %q, 16
3603  %s = inttoptr i32 %r to ptr
3604  store <2 x double> %v , ptr %s
3605  ret void
3606}
3607
3608define void @store_v2f64_with_unfolded_gep_offset(<2 x double> %v, ptr %p) {
3609; CHECK-LABEL: store_v2f64_with_unfolded_gep_offset:
3610; CHECK:         .functype store_v2f64_with_unfolded_gep_offset (v128, i32) -> ()
3611; CHECK-NEXT:  # %bb.0:
3612; CHECK-NEXT:    local.get 1
3613; CHECK-NEXT:    i32.const 16
3614; CHECK-NEXT:    i32.add
3615; CHECK-NEXT:    local.get 0
3616; CHECK-NEXT:    v128.store 0
3617; CHECK-NEXT:    # fallthrough-return
3618  %s = getelementptr <2 x double>, ptr %p, i32 1
3619  store <2 x double> %v , ptr %s
3620  ret void
3621}
3622
3623define void @store_v2f64_to_numeric_address(<2 x double> %v) {
3624; CHECK-LABEL: store_v2f64_to_numeric_address:
3625; CHECK:         .functype store_v2f64_to_numeric_address (v128) -> ()
3626; CHECK-NEXT:  # %bb.0:
3627; CHECK-NEXT:    i32.const 0
3628; CHECK-NEXT:    local.get 0
3629; CHECK-NEXT:    v128.store 32
3630; CHECK-NEXT:    # fallthrough-return
3631  %s = inttoptr i32 32 to ptr
3632  store <2 x double> %v , ptr %s
3633  ret void
3634}
3635
3636define void @store_v2f64_to_global_address(<2 x double> %v) {
3637; CHECK-LABEL: store_v2f64_to_global_address:
3638; CHECK:         .functype store_v2f64_to_global_address (v128) -> ()
3639; CHECK-NEXT:  # %bb.0:
3640; CHECK-NEXT:    i32.const 0
3641; CHECK-NEXT:    local.get 0
3642; CHECK-NEXT:    v128.store gv_v2f64
3643; CHECK-NEXT:    # fallthrough-return
3644  store <2 x double> %v , ptr @gv_v2f64
3645  ret void
3646}
3647