xref: /llvm-project/llvm/test/Transforms/InstCombine/load-store-forward.ll (revision 56c091ea7106507b36015297ee9005c9d5fab0bf)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=instcombine < %s | FileCheck %s --check-prefixes=CHECK,LITTLE
3; RUN: opt -S -passes=instcombine -data-layout="E" < %s | FileCheck %s --check-prefixes=CHECK,BIG
4
5define i8 @load_smaller_int(ptr %p) {
6; LITTLE-LABEL: @load_smaller_int(
7; LITTLE-NEXT:    store i16 258, ptr [[P:%.*]], align 2
8; LITTLE-NEXT:    ret i8 2
9;
10; BIG-LABEL: @load_smaller_int(
11; BIG-NEXT:    store i16 258, ptr [[P:%.*]], align 2
12; BIG-NEXT:    ret i8 1
13;
14  store i16 258, ptr %p
15  %load = load i8, ptr %p
16  ret i8 %load
17}
18
19; This case can *not* be forwarded, as we only see part of the stored value.
20define i32 @load_larger_int(ptr %p) {
21; CHECK-LABEL: @load_larger_int(
22; CHECK-NEXT:    store i16 258, ptr [[P:%.*]], align 2
23; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[P]], align 4
24; CHECK-NEXT:    ret i32 [[LOAD]]
25;
26  store i16 258, ptr %p
27  %load = load i32, ptr %p
28  ret i32 %load
29}
30
31define i32 @vec_store_load_first(ptr %p) {
32; CHECK-LABEL: @vec_store_load_first(
33; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
34; CHECK-NEXT:    ret i32 1
35;
36  store <2 x i32> <i32 1, i32 2>, ptr %p
37  %load = load i32, ptr %p
38  ret i32 %load
39}
40
41define i17 @vec_store_load_first_odd_size(ptr %p) {
42; CHECK-LABEL: @vec_store_load_first_odd_size(
43; CHECK-NEXT:    store <2 x i17> <i17 1, i17 2>, ptr [[P:%.*]], align 8
44; CHECK-NEXT:    [[LOAD:%.*]] = load i17, ptr [[P]], align 4
45; CHECK-NEXT:    ret i17 [[LOAD]]
46;
47  store <2 x i17> <i17 1, i17 2>, ptr %p
48  %load = load i17, ptr %p
49  ret i17 %load
50}
51
52define i32 @vec_store_load_first_constexpr(ptr %p) {
53; CHECK-LABEL: @vec_store_load_first_constexpr(
54; CHECK-NEXT:    store <2 x i32> bitcast (i64 ptrtoint (ptr @vec_store_load_first to i64) to <2 x i32>), ptr [[P:%.*]], align 8
55; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[P]], align 4
56; CHECK-NEXT:    ret i32 [[LOAD]]
57;
58  store <2 x i32> bitcast (i64 ptrtoint (ptr @vec_store_load_first to i64) to <2 x i32>), ptr %p, align 8
59  %load = load i32, ptr %p, align 4
60  ret i32 %load
61}
62
63define i32 @vec_store_load_second(ptr %p) {
64; CHECK-LABEL: @vec_store_load_second(
65; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
66; CHECK-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i64 4
67; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[P3]], align 4
68; CHECK-NEXT:    ret i32 [[LOAD]]
69;
70  store <2 x i32> <i32 1, i32 2>, ptr %p
71  %p3 = getelementptr i32, ptr %p, i64 1
72  %load = load i32, ptr %p3
73  ret i32 %load
74}
75
76define i64 @vec_store_load_whole(ptr %p) {
77; LITTLE-LABEL: @vec_store_load_whole(
78; LITTLE-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
79; LITTLE-NEXT:    ret i64 8589934593
80;
81; BIG-LABEL: @vec_store_load_whole(
82; BIG-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
83; BIG-NEXT:    ret i64 4294967298
84;
85  store <2 x i32> <i32 1, i32 2>, ptr %p
86  %load = load i64, ptr %p
87  ret i64 %load
88}
89
90define i32 @vec_store_load_overlap(ptr %p) {
91; CHECK-LABEL: @vec_store_load_overlap(
92; CHECK-NEXT:    store <2 x i32> <i32 1, i32 2>, ptr [[P:%.*]], align 8
93; CHECK-NEXT:    [[P4:%.*]] = getelementptr i8, ptr [[P]], i64 2
94; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[P4]], align 2
95; CHECK-NEXT:    ret i32 [[LOAD]]
96;
97  store <2 x i32> <i32 1, i32 2>, ptr %p
98  %p4 = getelementptr i8, ptr %p, i64 2
99  %load = load i32, ptr %p4, align 2
100  ret i32 %load
101}
102
103define i32 @load_i32_store_nxv4i32(ptr %a) {
104; CHECK-LABEL: @load_i32_store_nxv4i32(
105; CHECK-NEXT:  entry:
106; CHECK-NEXT:    store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
107; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
108; CHECK-NEXT:    ret i32 [[TMP0]]
109;
110entry:
111  store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
112  %0 = load i32, ptr %a, align 4
113  ret i32 %0
114}
115
116define i64 @load_i64_store_nxv8i8(ptr %a) {
117; CHECK-LABEL: @load_i64_store_nxv8i8(
118; CHECK-NEXT:  entry:
119; CHECK-NEXT:    store <vscale x 8 x i8> splat (i8 1), ptr [[A:%.*]], align 16
120; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[A]], align 8
121; CHECK-NEXT:    ret i64 [[LOAD]]
122;
123entry:
124  store <vscale x 8 x i8> splat (i8 1), ptr %a, align 16
125  %load = load i64, ptr %a, align 8
126  ret i64 %load
127}
128
129define i64 @load_i64_store_nxv4i32(ptr %a) {
130; CHECK-LABEL: @load_i64_store_nxv4i32(
131; CHECK-NEXT:  entry:
132; CHECK-NEXT:    store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
133; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[A]], align 8
134; CHECK-NEXT:    ret i64 [[LOAD]]
135;
136entry:
137  store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
138  %load = load i64, ptr %a, align 8
139  ret i64 %load
140}
141
142define i8 @load_i8_store_nxv4i32(ptr %a) {
143; CHECK-LABEL: @load_i8_store_nxv4i32(
144; CHECK-NEXT:  entry:
145; CHECK-NEXT:    store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
146; CHECK-NEXT:    [[LOAD:%.*]] = load i8, ptr [[A]], align 1
147; CHECK-NEXT:    ret i8 [[LOAD]]
148;
149entry:
150  store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
151  %load = load i8, ptr %a, align 1
152  ret i8 %load
153}
154
155define float @load_f32_store_nxv4f32(ptr %a) {
156; CHECK-LABEL: @load_f32_store_nxv4f32(
157; CHECK-NEXT:  entry:
158; CHECK-NEXT:    store <vscale x 4 x float> splat (float 1.000000e+00), ptr [[A:%.*]], align 16
159; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A]], align 4
160; CHECK-NEXT:    ret float [[TMP0]]
161;
162entry:
163  store <vscale x 4 x float> splat (float 1.0), ptr %a, align 16
164  %0 = load float, ptr %a, align 4
165  ret float %0
166}
167
168define i32 @load_i32_store_nxv4f32(ptr %a) {
169; CHECK-LABEL: @load_i32_store_nxv4f32(
170; CHECK-NEXT:  entry:
171; CHECK-NEXT:    store <vscale x 4 x float> splat (float 1.000000e+00), ptr [[A:%.*]], align 16
172; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[A]], align 4
173; CHECK-NEXT:    ret i32 [[LOAD]]
174;
175entry:
176  store <vscale x 4 x float> splat (float 1.0), ptr %a, align 16
177  %load = load i32, ptr %a, align 4
178  ret i32 %load
179}
180
181define <4 x i32> @load_v4i32_store_nxv4i32(ptr %a) {
182; CHECK-LABEL: @load_v4i32_store_nxv4i32(
183; CHECK-NEXT:  entry:
184; CHECK-NEXT:    store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
185; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16
186; CHECK-NEXT:    ret <4 x i32> [[TMP0]]
187;
188entry:
189  store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
190  %0 = load <4 x i32>, ptr %a, align 16
191  ret <4 x i32> %0
192}
193
194define <4 x i16> @load_v4i16_store_nxv4i32(ptr %a) {
195; CHECK-LABEL: @load_v4i16_store_nxv4i32(
196; CHECK-NEXT:  entry:
197; CHECK-NEXT:    store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
198; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[A]], align 16
199; CHECK-NEXT:    ret <4 x i16> [[TMP0]]
200;
201entry:
202  store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
203  %0 = load <4 x i16>, ptr %a, align 16
204  ret <4 x i16> %0
205}
206
207; Loaded data type exceeds the known minimum size of the store.
208define i64 @load_i64_store_nxv4i8(ptr %a) {
209; CHECK-LABEL: @load_i64_store_nxv4i8(
210; CHECK-NEXT:  entry:
211; CHECK-NEXT:    store <vscale x 4 x i8> splat (i8 1), ptr [[A:%.*]], align 16
212; CHECK-NEXT:    [[LOAD:%.*]] = load i64, ptr [[A]], align 8
213; CHECK-NEXT:    ret i64 [[LOAD]]
214;
215entry:
216  store <vscale x 4 x i8> splat (i8 1), ptr %a, align 16
217  %load = load i64, ptr %a, align 8
218  ret i64 %load
219}
220
221; Loaded data size is unknown - we cannot guarantee it won't
222; exceed the store size.
223define <vscale x 4 x i8> @load_nxv4i8_store_nxv4i32(ptr %a) {
224; CHECK-LABEL: @load_nxv4i8_store_nxv4i32(
225; CHECK-NEXT:  entry:
226; CHECK-NEXT:    store <vscale x 4 x i32> splat (i32 1), ptr [[A:%.*]], align 16
227; CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 4 x i8>, ptr [[A]], align 16
228; CHECK-NEXT:    ret <vscale x 4 x i8> [[TMP0]]
229;
230entry:
231  store <vscale x 4 x i32> splat (i32 1), ptr %a, align 16
232  %0 = load <vscale x 4 x i8>, ptr %a, align 16
233  ret <vscale x 4 x i8> %0
234}
235
236define i8 @load_i8_store_i1(ptr %a) {
237; CHECK-LABEL: @load_i8_store_i1(
238; CHECK-NEXT:    store i1 true, ptr [[A:%.*]], align 1
239; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[A]], align 1
240; CHECK-NEXT:    ret i8 [[V]]
241;
242  store i1 true, ptr %a
243  %v = load i8, ptr %a
244  ret i8 %v
245}
246
247define i1 @load_i1_store_i8(ptr %a) {
248; CHECK-LABEL: @load_i1_store_i8(
249; CHECK-NEXT:    store i8 1, ptr [[A:%.*]], align 1
250; CHECK-NEXT:    ret i1 true
251;
252  store i8 1, ptr %a
253  %v = load i1, ptr %a
254  ret i1 %v
255}
256
257define i32 @load_after_memset_0(ptr %a) {
258; CHECK-LABEL: @load_after_memset_0(
259; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
260; CHECK-NEXT:    ret i32 0
261;
262  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
263  %v = load i32, ptr %a
264  ret i32 %v
265}
266
267define float @load_after_memset_0_float(ptr %a) {
268; CHECK-LABEL: @load_after_memset_0_float(
269; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
270; CHECK-NEXT:    ret float 0.000000e+00
271;
272  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
273  %v = load float, ptr %a
274  ret float %v
275}
276
277define i27 @load_after_memset_0_non_byte_sized(ptr %a) {
278; CHECK-LABEL: @load_after_memset_0_non_byte_sized(
279; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
280; CHECK-NEXT:    ret i27 0
281;
282  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
283  %v = load i27, ptr %a
284  ret i27 %v
285}
286
287define i1 @load_after_memset_0_i1(ptr %a) {
288; CHECK-LABEL: @load_after_memset_0_i1(
289; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
290; CHECK-NEXT:    ret i1 false
291;
292  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
293  %v = load i1, ptr %a
294  ret i1 %v
295}
296
297define <4 x i8> @load_after_memset_0_vec(ptr %a) {
298; CHECK-LABEL: @load_after_memset_0_vec(
299; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
300; CHECK-NEXT:    ret <4 x i8> zeroinitializer
301;
302  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
303  %v = load <4 x i8>, ptr %a
304  ret <4 x i8> %v
305}
306
307define i32 @load_after_memset_1(ptr %a) {
308; CHECK-LABEL: @load_after_memset_1(
309; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
310; CHECK-NEXT:    ret i32 16843009
311;
312  call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
313  %v = load i32, ptr %a
314  ret i32 %v
315}
316
317define float @load_after_memset_1_float(ptr %a) {
318; CHECK-LABEL: @load_after_memset_1_float(
319; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
320; CHECK-NEXT:    ret float 0x3820202020000000
321;
322  call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
323  %v = load float, ptr %a
324  ret float %v
325}
326
327define i27 @load_after_memset_1_non_byte_sized(ptr %a) {
328; CHECK-LABEL: @load_after_memset_1_non_byte_sized(
329; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
330; CHECK-NEXT:    ret i27 16843009
331;
332  call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
333  %v = load i27, ptr %a
334  ret i27 %v
335}
336
337define i1 @load_after_memset_1_i1(ptr %a) {
338; CHECK-LABEL: @load_after_memset_1_i1(
339; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
340; CHECK-NEXT:    ret i1 true
341;
342  call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
343  %v = load i1, ptr %a
344  ret i1 %v
345}
346
347define <4 x i8> @load_after_memset_1_vec(ptr %a) {
348; CHECK-LABEL: @load_after_memset_1_vec(
349; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
350; CHECK-NEXT:    ret <4 x i8> splat (i8 1)
351;
352  call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
353  %v = load <4 x i8>, ptr %a
354  ret <4 x i8> %v
355}
356
357define i32 @load_after_memset_unknown(ptr %a, i8 %byte) {
358; CHECK-LABEL: @load_after_memset_unknown(
359; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 [[BYTE:%.*]], i64 16, i1 false)
360; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[A]], align 4
361; CHECK-NEXT:    ret i32 [[V]]
362;
363  call void @llvm.memset.p0.i64(ptr %a, i8 %byte, i64 16, i1 false)
364  %v = load i32, ptr %a
365  ret i32 %v
366}
367
368; TODO: Handle load at offset.
369define i32 @load_after_memset_0_offset(ptr %a) {
370; CHECK-LABEL: @load_after_memset_0_offset(
371; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
372; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 4
373; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[GEP]], align 4
374; CHECK-NEXT:    ret i32 [[V]]
375;
376  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
377  %gep = getelementptr i8, ptr %a, i64 4
378  %v = load i32, ptr %gep
379  ret i32 %v
380}
381
382define i32 @load_after_memset_0_offset_too_large(ptr %a) {
383; CHECK-LABEL: @load_after_memset_0_offset_too_large(
384; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
385; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 13
386; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[GEP]], align 4
387; CHECK-NEXT:    ret i32 [[V]]
388;
389  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
390  %gep = getelementptr i8, ptr %a, i64 13
391  %v = load i32, ptr %gep
392  ret i32 %v
393}
394
395define i32 @load_after_memset_0_offset_negative(ptr %a) {
396; CHECK-LABEL: @load_after_memset_0_offset_negative(
397; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
398; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[A]], i64 -1
399; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[GEP]], align 4
400; CHECK-NEXT:    ret i32 [[V]]
401;
402  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
403  %gep = getelementptr i8, ptr %a, i64 -1
404  %v = load i32, ptr %gep
405  ret i32 %v
406}
407
408define i32 @load_after_memset_0_clobber(ptr %a) {
409; CHECK-LABEL: @load_after_memset_0_clobber(
410; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
411; CHECK-NEXT:    store i8 1, ptr [[A]], align 1
412; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[A]], align 4
413; CHECK-NEXT:    ret i32 [[V]]
414;
415  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
416  store i8 1, ptr %a
417  %v = load i32, ptr %a
418  ret i32 %v
419}
420
421define i256 @load_after_memset_0_too_small(ptr %a) {
422; CHECK-LABEL: @load_after_memset_0_too_small(
423; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
424; CHECK-NEXT:    [[V:%.*]] = load i256, ptr [[A]], align 4
425; CHECK-NEXT:    ret i256 [[V]]
426;
427  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
428  %v = load i256, ptr %a
429  ret i256 %v
430}
431
432define i129 @load_after_memset_0_too_small_by_one_bit(ptr %a) {
433; CHECK-LABEL: @load_after_memset_0_too_small_by_one_bit(
434; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
435; CHECK-NEXT:    [[V:%.*]] = load i129, ptr [[A]], align 4
436; CHECK-NEXT:    ret i129 [[V]]
437;
438  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
439  %v = load i129, ptr %a
440  ret i129 %v
441}
442
443define i32 @load_after_memset_0_unknown_length(ptr %a, i64 %len) {
444; CHECK-LABEL: @load_after_memset_0_unknown_length(
445; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[A:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
446; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[A]], align 4
447; CHECK-NEXT:    ret i32 [[V]]
448;
449  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 %len, i1 false)
450  %v = load i32, ptr %a
451  ret i32 %v
452}
453
454define i32 @load_after_memset_0_atomic(ptr %a) {
455; CHECK-LABEL: @load_after_memset_0_atomic(
456; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
457; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[A]] seq_cst, align 4
458; CHECK-NEXT:    ret i32 [[V]]
459;
460  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
461  %v = load atomic i32, ptr %a seq_cst, align 4
462  ret i32 %v
463}
464
465define <vscale x 1 x i32> @load_after_memset_0_scalable(ptr %a) {
466; CHECK-LABEL: @load_after_memset_0_scalable(
467; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 0, i64 16, i1 false)
468; CHECK-NEXT:    [[V:%.*]] = load <vscale x 1 x i32>, ptr [[A]], align 4
469; CHECK-NEXT:    ret <vscale x 1 x i32> [[V]]
470;
471  call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 16, i1 false)
472  %v = load <vscale x 1 x i32>, ptr %a
473  ret <vscale x 1 x i32> %v
474}
475
476declare void @llvm.memset.p0.i64(ptr, i8, i64, i1)
477