xref: /llvm-project/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll (revision 055fb7795aa219a3d274d280ec9129784f169f56)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-idiom < %s -S | FileCheck %s
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
4target triple = "x86_64-unknown-linux-gnu"
5
6;; memcpy.atomic formation (atomic load & store)
7define void @test1(i64 %Size) nounwind ssp {
8; CHECK-LABEL: @test1(
9; CHECK-NEXT:  bb.nph:
10; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
11; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
12; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i32 1)
13; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
14; CHECK:       for.body:
15; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
16; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
17; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
18; CHECK-NEXT:    [[V:%.*]] = load atomic i8, ptr [[I_0_014]] unordered, align 1
19; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
20; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
21; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
22; CHECK:       for.end:
23; CHECK-NEXT:    ret void
24;
25bb.nph:
26  %Base = alloca i8, i32 10000
27  %Dest = alloca i8, i32 10000
28  br label %for.body
29
30for.body:                                         ; preds = %bb.nph, %for.body
31  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
32  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
33  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
34  %V = load atomic i8, ptr %I.0.014 unordered, align 1
35  store atomic i8 %V, ptr %DestI unordered, align 1
36  %indvar.next = add i64 %indvar, 1
37  %exitcond = icmp eq i64 %indvar.next, %Size
38  br i1 %exitcond, label %for.end, label %for.body
39
40for.end:                                          ; preds = %for.body, %entry
41  ret void
42}
43
44;; memcpy.atomic formation (atomic store, normal load)
45define void @test2(i64 %Size) nounwind ssp {
46; CHECK-LABEL: @test2(
47; CHECK-NEXT:  bb.nph:
48; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
49; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
50; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i32 1)
51; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
52; CHECK:       for.body:
53; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
54; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
55; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
56; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[I_0_014]], align 1
57; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
58; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
59; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
60; CHECK:       for.end:
61; CHECK-NEXT:    ret void
62;
63bb.nph:
64  %Base = alloca i8, i32 10000
65  %Dest = alloca i8, i32 10000
66  br label %for.body
67
68for.body:                                         ; preds = %bb.nph, %for.body
69  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
70  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
71  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
72  %V = load i8, ptr %I.0.014, align 1
73  store atomic i8 %V, ptr %DestI unordered, align 1
74  %indvar.next = add i64 %indvar, 1
75  %exitcond = icmp eq i64 %indvar.next, %Size
76  br i1 %exitcond, label %for.end, label %for.body
77
78for.end:                                          ; preds = %for.body, %entry
79  ret void
80}
81
82;; memcpy.atomic formation (atomic store, normal load w/ no align)
83define void @test2b(i64 %Size) nounwind ssp {
84; CHECK-LABEL: @test2b(
85; CHECK-NEXT:  bb.nph:
86; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
87; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
88; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i32 1)
89; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
90; CHECK:       for.body:
91; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
92; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
93; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
94; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[I_0_014]], align 1
95; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
96; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
97; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
98; CHECK:       for.end:
99; CHECK-NEXT:    ret void
100;
101bb.nph:
102  %Base = alloca i8, i32 10000
103  %Dest = alloca i8, i32 10000
104  br label %for.body
105
106for.body:                                         ; preds = %bb.nph, %for.body
107  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
108  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
109  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
110  %V = load i8, ptr %I.0.014
111  store atomic i8 %V, ptr %DestI unordered, align 1
112  %indvar.next = add i64 %indvar, 1
113  %exitcond = icmp eq i64 %indvar.next, %Size
114  br i1 %exitcond, label %for.end, label %for.body
115
116for.end:                                          ; preds = %for.body, %entry
117  ret void
118}
119
120;; memcpy.atomic formation rejection (atomic store, normal load w/ bad align)
121define void @test2c(i64 %Size) nounwind ssp {
122; CHECK-LABEL: @test2c(
123; CHECK-NEXT:  bb.nph:
124; CHECK-NEXT:    [[BASE:%.*]] = alloca i32, i32 10000, align 4
125; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, i32 10000, align 4
126; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
127; CHECK:       for.body:
128; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
129; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
130; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
131; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[I_0_014]], align 2
132; CHECK-NEXT:    store atomic i32 [[V]], ptr [[DESTI]] unordered, align 4
133; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
134; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
135; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
136; CHECK:       for.end:
137; CHECK-NEXT:    ret void
138;
139bb.nph:
140  %Base = alloca i32, i32 10000
141  %Dest = alloca i32, i32 10000
142  br label %for.body
143
144for.body:                                         ; preds = %bb.nph, %for.body
145  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
146  %I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
147  %DestI = getelementptr i32, ptr %Dest, i64 %indvar
148  %V = load i32, ptr %I.0.014, align 2
149  store atomic i32 %V, ptr %DestI unordered, align 4
150  %indvar.next = add i64 %indvar, 1
151  %exitcond = icmp eq i64 %indvar.next, %Size
152  br i1 %exitcond, label %for.end, label %for.body
153
154for.end:                                          ; preds = %for.body, %entry
155  ret void
156}
157
158;; memcpy.atomic formation rejection (atomic store w/ bad align, normal load)
159define void @test2d(i64 %Size) nounwind ssp {
160; CHECK-LABEL: @test2d(
161; CHECK-NEXT:  bb.nph:
162; CHECK-NEXT:    [[BASE:%.*]] = alloca i32, i32 10000, align 4
163; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, i32 10000, align 4
164; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
165; CHECK:       for.body:
166; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
167; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
168; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
169; CHECK-NEXT:    [[V:%.*]] = load i32, ptr [[I_0_014]], align 4
170; CHECK-NEXT:    store atomic i32 [[V]], ptr [[DESTI]] unordered, align 2
171; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
172; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
173; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
174; CHECK:       for.end:
175; CHECK-NEXT:    ret void
176;
177bb.nph:
178  %Base = alloca i32, i32 10000
179  %Dest = alloca i32, i32 10000
180  br label %for.body
181
182for.body:                                         ; preds = %bb.nph, %for.body
183  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
184  %I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
185  %DestI = getelementptr i32, ptr %Dest, i64 %indvar
186  %V = load i32, ptr %I.0.014, align 4
187  store atomic i32 %V, ptr %DestI unordered, align 2
188  %indvar.next = add i64 %indvar, 1
189  %exitcond = icmp eq i64 %indvar.next, %Size
190  br i1 %exitcond, label %for.end, label %for.body
191
192for.end:                                          ; preds = %for.body, %entry
193  ret void
194}
195
196
197;; memcpy.atomic formation (normal store, atomic load)
198define void @test3(i64 %Size) nounwind ssp {
199; CHECK-LABEL: @test3(
200; CHECK-NEXT:  bb.nph:
201; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
202; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
203; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i32 1)
204; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
205; CHECK:       for.body:
206; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
207; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
208; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
209; CHECK-NEXT:    [[V:%.*]] = load atomic i8, ptr [[I_0_014]] unordered, align 1
210; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
211; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
212; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
213; CHECK:       for.end:
214; CHECK-NEXT:    ret void
215;
216bb.nph:
217  %Base = alloca i8, i32 10000
218  %Dest = alloca i8, i32 10000
219  br label %for.body
220
221for.body:                                         ; preds = %bb.nph, %for.body
222  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
223  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
224  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
225  %V = load atomic i8, ptr %I.0.014 unordered, align 1
226  store i8 %V, ptr %DestI, align 1
227  %indvar.next = add i64 %indvar, 1
228  %exitcond = icmp eq i64 %indvar.next, %Size
229  br i1 %exitcond, label %for.end, label %for.body
230
231for.end:                                          ; preds = %for.body, %entry
232  ret void
233}
234
235;; memcpy.atomic formation rejection (normal store w/ no align, atomic load)
236define void @test3b(i64 %Size) nounwind ssp {
237; CHECK-LABEL: @test3b(
238; CHECK-NEXT:  bb.nph:
239; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
240; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
241; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[DEST]], ptr align 1 [[BASE]], i64 [[SIZE:%.*]], i32 1)
242; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
243; CHECK:       for.body:
244; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
245; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
246; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
247; CHECK-NEXT:    [[V:%.*]] = load atomic i8, ptr [[I_0_014]] unordered, align 1
248; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
249; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
250; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
251; CHECK:       for.end:
252; CHECK-NEXT:    ret void
253;
254bb.nph:
255  %Base = alloca i8, i32 10000
256  %Dest = alloca i8, i32 10000
257  br label %for.body
258
259for.body:                                         ; preds = %bb.nph, %for.body
260  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
261  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
262  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
263  %V = load atomic i8, ptr %I.0.014 unordered, align 1
264  store i8 %V, ptr %DestI
265  %indvar.next = add i64 %indvar, 1
266  %exitcond = icmp eq i64 %indvar.next, %Size
267  br i1 %exitcond, label %for.end, label %for.body
268
269for.end:                                          ; preds = %for.body, %entry
270  ret void
271}
272
273;; memcpy.atomic formation rejection (normal store, atomic load w/ bad align)
274define void @test3c(i64 %Size) nounwind ssp {
275; CHECK-LABEL: @test3c(
276; CHECK-NEXT:  bb.nph:
277; CHECK-NEXT:    [[BASE:%.*]] = alloca i32, i32 10000, align 4
278; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, i32 10000, align 4
279; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
280; CHECK:       for.body:
281; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
282; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
283; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
284; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[I_0_014]] unordered, align 2
285; CHECK-NEXT:    store i32 [[V]], ptr [[DESTI]], align 4
286; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
287; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
288; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
289; CHECK:       for.end:
290; CHECK-NEXT:    ret void
291;
292bb.nph:
293  %Base = alloca i32, i32 10000
294  %Dest = alloca i32, i32 10000
295  br label %for.body
296
297for.body:                                         ; preds = %bb.nph, %for.body
298  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
299  %I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
300  %DestI = getelementptr i32, ptr %Dest, i64 %indvar
301  %V = load atomic i32, ptr %I.0.014 unordered, align 2
302  store i32 %V, ptr %DestI, align 4
303  %indvar.next = add i64 %indvar, 1
304  %exitcond = icmp eq i64 %indvar.next, %Size
305  br i1 %exitcond, label %for.end, label %for.body
306
307for.end:                                          ; preds = %for.body, %entry
308  ret void
309}
310
311;; memcpy.atomic formation rejection (normal store w/ bad align, atomic load)
312define void @test3d(i64 %Size) nounwind ssp {
313; CHECK-LABEL: @test3d(
314; CHECK-NEXT:  bb.nph:
315; CHECK-NEXT:    [[BASE:%.*]] = alloca i32, i32 10000, align 4
316; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, i32 10000, align 4
317; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
318; CHECK:       for.body:
319; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
320; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
321; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
322; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[I_0_014]] unordered, align 4
323; CHECK-NEXT:    store i32 [[V]], ptr [[DESTI]], align 2
324; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
325; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
326; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
327; CHECK:       for.end:
328; CHECK-NEXT:    ret void
329;
330bb.nph:
331  %Base = alloca i32, i32 10000
332  %Dest = alloca i32, i32 10000
333  br label %for.body
334
335for.body:                                         ; preds = %bb.nph, %for.body
336  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
337  %I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
338  %DestI = getelementptr i32, ptr %Dest, i64 %indvar
339  %V = load atomic i32, ptr %I.0.014 unordered, align 4
340  store i32 %V, ptr %DestI, align 2
341  %indvar.next = add i64 %indvar, 1
342  %exitcond = icmp eq i64 %indvar.next, %Size
343  br i1 %exitcond, label %for.end, label %for.body
344
345for.end:                                          ; preds = %for.body, %entry
346  ret void
347}
348
349
350;; memcpy.atomic formation rejection (atomic load, ordered-atomic store)
351define void @test4(i64 %Size) nounwind ssp {
352; CHECK-LABEL: @test4(
353; CHECK-NEXT:  bb.nph:
354; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
355; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
356; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
357; CHECK:       for.body:
358; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
359; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
360; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
361; CHECK-NEXT:    [[V:%.*]] = load atomic i8, ptr [[I_0_014]] unordered, align 1
362; CHECK-NEXT:    store atomic i8 [[V]], ptr [[DESTI]] monotonic, align 1
363; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
364; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
365; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
366; CHECK:       for.end:
367; CHECK-NEXT:    ret void
368;
369bb.nph:
370  %Base = alloca i8, i32 10000
371  %Dest = alloca i8, i32 10000
372  br label %for.body
373
374for.body:                                         ; preds = %bb.nph, %for.body
375  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
376  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
377  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
378  %V = load atomic i8, ptr %I.0.014 unordered, align 1
379  store atomic i8 %V, ptr %DestI monotonic, align 1
380  %indvar.next = add i64 %indvar, 1
381  %exitcond = icmp eq i64 %indvar.next, %Size
382  br i1 %exitcond, label %for.end, label %for.body
383
384for.end:                                          ; preds = %for.body, %entry
385  ret void
386}
387
388;; memcpy.atomic formation rejection (ordered-atomic load, unordered-atomic store)
389define void @test5(i64 %Size) nounwind ssp {
390; CHECK-LABEL: @test5(
391; CHECK-NEXT:  bb.nph:
392; CHECK-NEXT:    [[BASE:%.*]] = alloca i8, i32 10000, align 1
393; CHECK-NEXT:    [[DEST:%.*]] = alloca i8, i32 10000, align 1
394; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
395; CHECK:       for.body:
396; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
397; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVAR]]
398; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[INDVAR]]
399; CHECK-NEXT:    [[V:%.*]] = load atomic i8, ptr [[I_0_014]] monotonic, align 1
400; CHECK-NEXT:    store atomic i8 [[V]], ptr [[DESTI]] unordered, align 1
401; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
402; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
403; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
404; CHECK:       for.end:
405; CHECK-NEXT:    ret void
406;
407bb.nph:
408  %Base = alloca i8, i32 10000
409  %Dest = alloca i8, i32 10000
410  br label %for.body
411
412for.body:                                         ; preds = %bb.nph, %for.body
413  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
414  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
415  %DestI = getelementptr i8, ptr %Dest, i64 %indvar
416  %V = load atomic i8, ptr %I.0.014 monotonic, align 1
417  store atomic i8 %V, ptr %DestI unordered, align 1
418  %indvar.next = add i64 %indvar, 1
419  %exitcond = icmp eq i64 %indvar.next, %Size
420  br i1 %exitcond, label %for.end, label %for.body
421
422for.end:                                          ; preds = %for.body, %entry
423  ret void
424}
425
426;; memcpy.atomic formation (atomic load & store) -- element size 2
427define void @test6(i64 %Size) nounwind ssp {
428; CHECK-LABEL: @test6(
429; CHECK-NEXT:  bb.nph:
430; CHECK-NEXT:    [[BASE:%.*]] = alloca i16, i32 10000, align 2
431; CHECK-NEXT:    [[DEST:%.*]] = alloca i16, i32 10000, align 2
432; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 1
433; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 2 [[DEST]], ptr align 2 [[BASE]], i64 [[TMP0]], i32 2)
434; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
435; CHECK:       for.body:
436; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
437; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i16, ptr [[BASE]], i64 [[INDVAR]]
438; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i16, ptr [[DEST]], i64 [[INDVAR]]
439; CHECK-NEXT:    [[V:%.*]] = load atomic i16, ptr [[I_0_014]] unordered, align 2
440; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
441; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
442; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
443; CHECK:       for.end:
444; CHECK-NEXT:    ret void
445;
446bb.nph:
447  %Base = alloca i16, i32 10000
448  %Dest = alloca i16, i32 10000
449  br label %for.body
450
451for.body:                                         ; preds = %bb.nph, %for.body
452  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
453  %I.0.014 = getelementptr i16, ptr %Base, i64 %indvar
454  %DestI = getelementptr i16, ptr %Dest, i64 %indvar
455  %V = load atomic i16, ptr %I.0.014 unordered, align 2
456  store atomic i16 %V, ptr %DestI unordered, align 2
457  %indvar.next = add i64 %indvar, 1
458  %exitcond = icmp eq i64 %indvar.next, %Size
459  br i1 %exitcond, label %for.end, label %for.body
460
461for.end:                                          ; preds = %for.body, %entry
462  ret void
463}
464
465;; memcpy.atomic formation (atomic load & store) -- element size 4
466define void @test7(i64 %Size) nounwind ssp {
467; CHECK-LABEL: @test7(
468; CHECK-NEXT:  bb.nph:
469; CHECK-NEXT:    [[BASE:%.*]] = alloca i32, i32 10000, align 4
470; CHECK-NEXT:    [[DEST:%.*]] = alloca i32, i32 10000, align 4
471; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 2
472; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[BASE]], i64 [[TMP0]], i32 4)
473; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
474; CHECK:       for.body:
475; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
476; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]]
477; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]]
478; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[I_0_014]] unordered, align 4
479; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
480; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
481; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
482; CHECK:       for.end:
483; CHECK-NEXT:    ret void
484;
485bb.nph:
486  %Base = alloca i32, i32 10000
487  %Dest = alloca i32, i32 10000
488  br label %for.body
489
490for.body:                                         ; preds = %bb.nph, %for.body
491  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
492  %I.0.014 = getelementptr i32, ptr %Base, i64 %indvar
493  %DestI = getelementptr i32, ptr %Dest, i64 %indvar
494  %V = load atomic i32, ptr %I.0.014 unordered, align 4
495  store atomic i32 %V, ptr %DestI unordered, align 4
496  %indvar.next = add i64 %indvar, 1
497  %exitcond = icmp eq i64 %indvar.next, %Size
498  br i1 %exitcond, label %for.end, label %for.body
499
500for.end:                                          ; preds = %for.body, %entry
501  ret void
502}
503
504;; memcpy.atomic formation (atomic load & store) -- element size 8
505define void @test8(i64 %Size) nounwind ssp {
506; CHECK-LABEL: @test8(
507; CHECK-NEXT:  bb.nph:
508; CHECK-NEXT:    [[BASE:%.*]] = alloca i64, i32 10000, align 8
509; CHECK-NEXT:    [[DEST:%.*]] = alloca i64, i32 10000, align 8
510; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 3
511; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 8 [[DEST]], ptr align 8 [[BASE]], i64 [[TMP0]], i32 8)
512; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
513; CHECK:       for.body:
514; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
515; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i64, ptr [[BASE]], i64 [[INDVAR]]
516; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i64, ptr [[DEST]], i64 [[INDVAR]]
517; CHECK-NEXT:    [[V:%.*]] = load atomic i64, ptr [[I_0_014]] unordered, align 8
518; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
519; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
520; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
521; CHECK:       for.end:
522; CHECK-NEXT:    ret void
523;
524bb.nph:
525  %Base = alloca i64, i32 10000
526  %Dest = alloca i64, i32 10000
527  br label %for.body
528
529for.body:                                         ; preds = %bb.nph, %for.body
530  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
531  %I.0.014 = getelementptr i64, ptr %Base, i64 %indvar
532  %DestI = getelementptr i64, ptr %Dest, i64 %indvar
533  %V = load atomic i64, ptr %I.0.014 unordered, align 8
534  store atomic i64 %V, ptr %DestI unordered, align 8
535  %indvar.next = add i64 %indvar, 1
536  %exitcond = icmp eq i64 %indvar.next, %Size
537  br i1 %exitcond, label %for.end, label %for.body
538
539for.end:                                          ; preds = %for.body, %entry
540  ret void
541}
542
543;; memcpy.atomic formation rejection (atomic load & store) -- element size 16
544define void @test9(i64 %Size) nounwind ssp {
545; CHECK-LABEL: @test9(
546; CHECK-NEXT:  bb.nph:
547; CHECK-NEXT:    [[BASE:%.*]] = alloca i128, i32 10000, align 8
548; CHECK-NEXT:    [[DEST:%.*]] = alloca i128, i32 10000, align 8
549; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i64 [[SIZE:%.*]], 4
550; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 16 [[DEST]], ptr align 16 [[BASE]], i64 [[TMP0]], i32 16)
551; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
552; CHECK:       for.body:
553; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
554; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i128, ptr [[BASE]], i64 [[INDVAR]]
555; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i128, ptr [[DEST]], i64 [[INDVAR]]
556; CHECK-NEXT:    [[V:%.*]] = load atomic i128, ptr [[I_0_014]] unordered, align 16
557; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
558; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]]
559; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
560; CHECK:       for.end:
561; CHECK-NEXT:    ret void
562;
563bb.nph:
564  %Base = alloca i128, i32 10000
565  %Dest = alloca i128, i32 10000
566  br label %for.body
567
568for.body:                                         ; preds = %bb.nph, %for.body
569  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
570  %I.0.014 = getelementptr i128, ptr %Base, i64 %indvar
571  %DestI = getelementptr i128, ptr %Dest, i64 %indvar
572  %V = load atomic i128, ptr %I.0.014 unordered, align 16
573  store atomic i128 %V, ptr %DestI unordered, align 16
574  %indvar.next = add i64 %indvar, 1
575  %exitcond = icmp eq i64 %indvar.next, %Size
576  br i1 %exitcond, label %for.end, label %for.body
577
578for.end:                                          ; preds = %for.body, %entry
579  ret void
580}
581
582;; memcpy.atomic formation rejection (atomic load & store) -- element size 32
583define void @test10(i64 %Size) nounwind ssp {
584; CHECK-LABEL: @test10(
585; CHECK-NEXT:  bb.nph:
586; CHECK-NEXT:    [[BASE:%.*]] = alloca i256, i32 10000, align 8
587; CHECK-NEXT:    [[DEST:%.*]] = alloca i256, i32 10000, align 8
588; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
589; CHECK:       for.body:
590; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
591; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i256, ptr [[BASE]], i64 [[INDVAR]]
592; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i256, ptr [[DEST]], i64 [[INDVAR]]
593; CHECK-NEXT:    [[V:%.*]] = load atomic i256, ptr [[I_0_014]] unordered, align 32
594; CHECK-NEXT:    store atomic i256 [[V]], ptr [[DESTI]] unordered, align 32
595; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
596; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
597; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
598; CHECK:       for.end:
599; CHECK-NEXT:    ret void
600;
601bb.nph:
602  %Base = alloca i256, i32 10000
603  %Dest = alloca i256, i32 10000
604  br label %for.body
605
606for.body:                                         ; preds = %bb.nph, %for.body
607  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
608  %I.0.014 = getelementptr i256, ptr %Base, i64 %indvar
609  %DestI = getelementptr i256, ptr %Dest, i64 %indvar
610  %V = load atomic i256, ptr %I.0.014 unordered, align 32
611  store atomic i256 %V, ptr %DestI unordered, align 32
612  %indvar.next = add i64 %indvar, 1
613  %exitcond = icmp eq i64 %indvar.next, %Size
614  br i1 %exitcond, label %for.end, label %for.body
615
616for.end:                                          ; preds = %for.body, %entry
617  ret void
618}
619
620
621
622; Make sure that atomic memset doesn't get recognized by mistake
623define void @test_nomemset(ptr %Base, i64 %Size) nounwind ssp {
624; CHECK-LABEL: @test_nomemset(
625; CHECK-NEXT:  bb.nph:
626; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
627; CHECK:       for.body:
628; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
629; CHECK-NEXT:    [[I_0_014:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[INDVAR]]
630; CHECK-NEXT:    store atomic i8 0, ptr [[I_0_014]] unordered, align 1
631; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
632; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
633; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
634; CHECK:       for.end:
635; CHECK-NEXT:    ret void
636;
637bb.nph:                                           ; preds = %entry
638  br label %for.body
639
640for.body:                                         ; preds = %bb.nph, %for.body
641  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
642  %I.0.014 = getelementptr i8, ptr %Base, i64 %indvar
643  store atomic i8 0, ptr %I.0.014 unordered, align 1
644  %indvar.next = add i64 %indvar, 1
645  %exitcond = icmp eq i64 %indvar.next, %Size
646  br i1 %exitcond, label %for.end, label %for.body
647
648for.end:                                          ; preds = %for.body, %entry
649  ret void
650}
651
652; Verify that unordered memset_pattern isn't recognized.
653; This is a replica of test11_pattern from basic.ll
654define void @test_nomemset_pattern(ptr nocapture %P) nounwind ssp {
655; CHECK-LABEL: @test_nomemset_pattern(
656; CHECK-NEXT:  entry:
657; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
658; CHECK:       for.body:
659; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
660; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[INDVAR]]
661; CHECK-NEXT:    store atomic i32 1, ptr [[ARRAYIDX]] unordered, align 4
662; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
663; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 10000
664; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
665; CHECK:       for.end:
666; CHECK-NEXT:    ret void
667;
668entry:
669  br label %for.body
670
671for.body:                                         ; preds = %entry, %for.body
672  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
673  %arrayidx = getelementptr i32, ptr %P, i64 %indvar
674  store atomic i32 1, ptr %arrayidx unordered, align 4
675  %indvar.next = add i64 %indvar, 1
676  %exitcond = icmp eq i64 %indvar.next, 10000
677  br i1 %exitcond, label %for.end, label %for.body
678
679for.end:                                          ; preds = %for.body
680  ret void
681}
682
683; Make sure that atomic memcpy or memmove don't get recognized by mistake
684; when looping with positive stride
685define void @test_no_memcpy_memmove1(ptr %Src, i64 %Size) {
686; CHECK-LABEL: @test_no_memcpy_memmove1(
687; CHECK-NEXT:  bb.nph:
688; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
689; CHECK:       for.body:
690; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
691; CHECK-NEXT:    [[STEP:%.*]] = add nuw nsw i64 [[INDVAR]], 1
692; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[STEP]]
693; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVAR]]
694; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[SRCI]], align 1
695; CHECK-NEXT:    store atomic i8 [[V]], ptr [[DESTI]] unordered, align 1
696; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
697; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE:%.*]]
698; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
699; CHECK:       for.end:
700; CHECK-NEXT:    ret void
701;
702bb.nph:
703  br label %for.body
704
705for.body:                                         ; preds = %bb.nph, %for.body
706  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
707  %Step = add nuw nsw i64 %indvar, 1
708  %SrcI = getelementptr i8, ptr %Src, i64 %Step
709  %DestI = getelementptr i8, ptr %Src, i64 %indvar
710  %V = load i8, ptr %SrcI, align 1
711  store atomic i8 %V, ptr %DestI unordered, align 1
712  %indvar.next = add i64 %indvar, 1
713  %exitcond = icmp eq i64 %indvar.next, %Size
714  br i1 %exitcond, label %for.end, label %for.body
715
716for.end:                                          ; preds = %for.body, %entry
717  ret void
718}
719
720; Make sure that atomic memcpy or memmove don't get recognized by mistake
721; when looping with negative stride
722define void @test_no_memcpy_memmove2(ptr %Src, i64 %Size) {
723; CHECK-LABEL: @test_no_memcpy_memmove2(
724; CHECK-NEXT:  bb.nph:
725; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i64 [[SIZE:%.*]], 0
726; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
727; CHECK:       for.body.preheader:
728; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
729; CHECK:       for.body:
730; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[STEP:%.*]], [[FOR_BODY]] ], [ [[SIZE]], [[FOR_BODY_PREHEADER]] ]
731; CHECK-NEXT:    [[STEP]] = add nsw i64 [[INDVAR]], -1
732; CHECK-NEXT:    [[SRCI:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[STEP]]
733; CHECK-NEXT:    [[V:%.*]] = load i8, ptr [[SRCI]], align 1
734; CHECK-NEXT:    [[DESTI:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDVAR]]
735; CHECK-NEXT:    store atomic i8 [[V]], ptr [[DESTI]] unordered, align 1
736; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i64 [[INDVAR]], 1
737; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
738; CHECK:       for.end.loopexit:
739; CHECK-NEXT:    br label [[FOR_END]]
740; CHECK:       for.end:
741; CHECK-NEXT:    ret void
742;
743bb.nph:
744  %cmp1 = icmp sgt i64 %Size, 0
745  br i1 %cmp1, label %for.body, label %for.end
746
747for.body:                                           ; preds = %bb.nph, %.for.body
748  %indvar = phi i64 [ %Step, %for.body ], [ %Size, %bb.nph ]
749  %Step = add nsw i64 %indvar, -1
750  %SrcI = getelementptr inbounds i8, ptr %Src, i64 %Step
751  %V = load i8, ptr %SrcI, align 1
752  %DestI = getelementptr inbounds i8, ptr %Src, i64 %indvar
753  store atomic i8 %V, ptr %DestI unordered, align 1
754  %exitcond = icmp sgt i64 %indvar, 1
755  br i1 %exitcond, label %for.body, label %for.end
756
757for.end:                                          ; preds = %for.body, %entry
758  ret void
759}
760