xref: /llvm-project/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll (revision d69033d245d4e129142b3908b8d406cdd897c9c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --arm-memtransfer-tploop=allow -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
3
4define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
5; CHECK-LABEL: test_memcpy:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    cmp r2, #1
8; CHECK-NEXT:    it lt
9; CHECK-NEXT:    bxlt lr
10; CHECK-NEXT:  .LBB0_1: @ %for.body.preheader
11; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
12; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
13; CHECK-NEXT:    lsl.w r12, r3, #2
14; CHECK-NEXT:    movs r7, #0
15; CHECK-NEXT:    b .LBB0_2
16; CHECK-NEXT:  .LBB0_2: @ %for.body
17; CHECK-NEXT:    @ =>This Loop Header: Depth=1
18; CHECK-NEXT:    @ Child Loop BB0_4 Depth 2
19; CHECK-NEXT:    adds r4, r1, r7
20; CHECK-NEXT:    adds r5, r0, r7
21; CHECK-NEXT:    wlstp.8 lr, r3, .LBB0_3
22; CHECK-NEXT:    b .LBB0_4
23; CHECK-NEXT:  .LBB0_3: @ %for.body
24; CHECK-NEXT:    @ in Loop: Header=BB0_2 Depth=1
25; CHECK-NEXT:    add r7, r12
26; CHECK-NEXT:    subs r2, #1
27; CHECK-NEXT:    beq .LBB0_5
28; CHECK-NEXT:    b .LBB0_2
29; CHECK-NEXT:  .LBB0_4: @ Parent Loop BB0_2 Depth=1
30; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
31; CHECK-NEXT:    vldrb.u8 q0, [r4], #16
32; CHECK-NEXT:    vstrb.8 q0, [r5], #16
33; CHECK-NEXT:    letp lr, .LBB0_4
34; CHECK-NEXT:    b .LBB0_3
35; CHECK-NEXT:  .LBB0_5:
36; CHECK-NEXT:    pop.w {r4, r5, r6, r7, lr}
37; CHECK-NEXT:    bx lr
38entry:
39  %cmp8 = icmp sgt i32 %n, 0
40  br i1 %cmp8, label %for.body, label %for.cond.cleanup
41
42for.cond.cleanup:                                 ; preds = %for.body, %entry
43  ret void
44
45for.body:                                         ; preds = %entry, %for.body
46  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
47  %x.addr.010 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
48  %y.addr.09 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
49  tail call void @llvm.memcpy.p0.p0.i32(ptr align 4 %x.addr.010, ptr align 4 %y.addr.09, i32 %m, i1 false)
50  %add.ptr = getelementptr inbounds i32, ptr %x.addr.010, i32 %m
51  %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.09, i32 %m
52  %inc = add nuw nsw i32 %i.011, 1
53  %exitcond.not = icmp eq i32 %inc, %n
54  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
55}
56
57define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) {
58; CHECK-LABEL: test_memset:
59; CHECK:       @ %bb.0: @ %entry
60; CHECK-NEXT:    cmp r1, #1
61; CHECK-NEXT:    it lt
62; CHECK-NEXT:    bxlt lr
63; CHECK-NEXT:  .LBB1_1: @ %for.body.preheader
64; CHECK-NEXT:    .save {r4, lr}
65; CHECK-NEXT:    push {r4, lr}
66; CHECK-NEXT:    lsl.w r12, r2, #2
67; CHECK-NEXT:    vmov.i32 q0, #0x0
68; CHECK-NEXT:    b .LBB1_2
69; CHECK-NEXT:  .LBB1_2: @ %for.body
70; CHECK-NEXT:    @ =>This Loop Header: Depth=1
71; CHECK-NEXT:    @ Child Loop BB1_4 Depth 2
72; CHECK-NEXT:    mov r4, r0
73; CHECK-NEXT:    wlstp.8 lr, r2, .LBB1_3
74; CHECK-NEXT:    b .LBB1_4
75; CHECK-NEXT:  .LBB1_3: @ %for.body
76; CHECK-NEXT:    @ in Loop: Header=BB1_2 Depth=1
77; CHECK-NEXT:    add r0, r12
78; CHECK-NEXT:    subs r1, #1
79; CHECK-NEXT:    beq .LBB1_5
80; CHECK-NEXT:    b .LBB1_2
81; CHECK-NEXT:  .LBB1_4: @ Parent Loop BB1_2 Depth=1
82; CHECK-NEXT:    @ => This Inner Loop Header: Depth=2
83; CHECK-NEXT:    vstrb.8 q0, [r4], #16
84; CHECK-NEXT:    letp lr, .LBB1_4
85; CHECK-NEXT:    b .LBB1_3
86; CHECK-NEXT:  .LBB1_5:
87; CHECK-NEXT:    pop.w {r4, lr}
88; CHECK-NEXT:    bx lr
89entry:
90  %cmp5 = icmp sgt i32 %n, 0
91  br i1 %cmp5, label %for.body, label %for.cond.cleanup
92
93for.cond.cleanup:                                 ; preds = %for.body, %entry
94  ret void
95
96for.body:                                         ; preds = %entry, %for.body
97  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
98  %x.addr.06 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
99  tail call void @llvm.memset.p0.i32(ptr align 4 %x.addr.06, i8 0, i32 %m, i1 false)
100  %add.ptr = getelementptr inbounds i32, ptr %x.addr.06, i32 %m
101  %inc = add nuw nsw i32 %i.07, 1
102  %exitcond.not = icmp eq i32 %inc, %n
103  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
104}
105
106define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) {
107; CHECK-LABEL: test_memmove:
108; CHECK:       @ %bb.0: @ %entry
109; CHECK-NEXT:    cmp r2, #1
110; CHECK-NEXT:    it lt
111; CHECK-NEXT:    bxlt lr
112; CHECK-NEXT:  .LBB2_1: @ %for.body.preheader
113; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
114; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
115; CHECK-NEXT:    .pad #4
116; CHECK-NEXT:    sub sp, #4
117; CHECK-NEXT:    mov r8, r3
118; CHECK-NEXT:    mov r5, r2
119; CHECK-NEXT:    mov r9, r1
120; CHECK-NEXT:    mov r7, r0
121; CHECK-NEXT:    lsls r4, r3, #2
122; CHECK-NEXT:    movs r6, #0
123; CHECK-NEXT:  .LBB2_2: @ %for.body
124; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
125; CHECK-NEXT:    adds r0, r7, r6
126; CHECK-NEXT:    add.w r1, r9, r6
127; CHECK-NEXT:    mov r2, r8
128; CHECK-NEXT:    bl __aeabi_memmove4
129; CHECK-NEXT:    add r6, r4
130; CHECK-NEXT:    subs r5, #1
131; CHECK-NEXT:    bne .LBB2_2
132; CHECK-NEXT:  @ %bb.3:
133; CHECK-NEXT:    add sp, #4
134; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, lr}
135; CHECK-NEXT:    bx lr
136entry:
137  %cmp8 = icmp sgt i32 %n, 0
138  br i1 %cmp8, label %for.body, label %for.cond.cleanup
139
140for.cond.cleanup:                                 ; preds = %for.body, %entry
141  ret void
142
143for.body:                                         ; preds = %entry, %for.body
144  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
145  %x.addr.010 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
146  %y.addr.09 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
147  tail call void @llvm.memmove.p0.p0.i32(ptr align 4 %x.addr.010, ptr align 4 %y.addr.09, i32 %m, i1 false)
148  %add.ptr = getelementptr inbounds i32, ptr %x.addr.010, i32 %m
149  %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.09, i32 %m
150  %inc = add nuw nsw i32 %i.011, 1
151  %exitcond.not = icmp eq i32 %inc, %n
152  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
153}
154
155
156define void @test_memcpy16(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
157; CHECK-LABEL: test_memcpy16:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    .save {r4, lr}
160; CHECK-NEXT:    push {r4, lr}
161; CHECK-NEXT:    cmp r2, #1
162; CHECK-NEXT:    it lt
163; CHECK-NEXT:    poplt {r4, pc}
164; CHECK-NEXT:  .LBB3_1: @ %for.body.preheader
165; CHECK-NEXT:    dls lr, r2
166; CHECK-NEXT:  .LBB3_2: @ %for.body
167; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
168; CHECK-NEXT:    ldm.w r1, {r2, r3, r12}
169; CHECK-NEXT:    ldr r4, [r1, #12]
170; CHECK-NEXT:    adds r1, #64
171; CHECK-NEXT:    stm.w r0, {r2, r3, r12}
172; CHECK-NEXT:    str r4, [r0, #12]
173; CHECK-NEXT:    adds r0, #64
174; CHECK-NEXT:    le lr, .LBB3_2
175; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
176; CHECK-NEXT:    pop {r4, pc}
177entry:
178  %cmp6 = icmp sgt i32 %n, 0
179  br i1 %cmp6, label %for.body, label %for.cond.cleanup
180
181for.cond.cleanup:                                 ; preds = %for.body, %entry
182  ret void
183
184for.body:                                         ; preds = %entry, %for.body
185  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
186  %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
187  %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
188  tail call void @llvm.memcpy.p0.p0.i32(ptr nonnull align 4 dereferenceable(16) %x.addr.08, ptr nonnull align 4 dereferenceable(16) %y.addr.07, i32 16, i1 false)
189  %add.ptr = getelementptr inbounds i32, ptr %x.addr.08, i32 16
190  %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.07, i32 16
191  %inc = add nuw nsw i32 %i.09, 1
192  %exitcond.not = icmp eq i32 %inc, %n
193  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
194}
195
196define void @test_memset16(ptr nocapture %x, i32 %n) {
197; CHECK-LABEL: test_memset16:
198; CHECK:       @ %bb.0: @ %entry
199; CHECK-NEXT:    .save {r7, lr}
200; CHECK-NEXT:    push {r7, lr}
201; CHECK-NEXT:    cmp r1, #1
202; CHECK-NEXT:    it lt
203; CHECK-NEXT:    poplt {r7, pc}
204; CHECK-NEXT:  .LBB4_1: @ %for.body.preheader
205; CHECK-NEXT:    dls lr, r1
206; CHECK-NEXT:    movs r1, #0
207; CHECK-NEXT:  .LBB4_2: @ %for.body
208; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
209; CHECK-NEXT:    strd r1, r1, [r0]
210; CHECK-NEXT:    strd r1, r1, [r0, #8]
211; CHECK-NEXT:    adds r0, #64
212; CHECK-NEXT:    le lr, .LBB4_2
213; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
214; CHECK-NEXT:    pop {r7, pc}
215entry:
216  %cmp4 = icmp sgt i32 %n, 0
217  br i1 %cmp4, label %for.body, label %for.cond.cleanup
218
219for.cond.cleanup:                                 ; preds = %for.body, %entry
220  ret void
221
222for.body:                                         ; preds = %entry, %for.body
223  %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
224  %x.addr.05 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
225  tail call void @llvm.memset.p0.i32(ptr nonnull align 4 dereferenceable(16) %x.addr.05, i8 0, i32 16, i1 false)
226  %add.ptr = getelementptr inbounds i32, ptr %x.addr.05, i32 16
227  %inc = add nuw nsw i32 %i.06, 1
228  %exitcond.not = icmp eq i32 %inc, %n
229  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
230}
231
232define void @test_memmove16(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) {
233; CHECK-LABEL: test_memmove16:
234; CHECK:       @ %bb.0: @ %entry
235; CHECK-NEXT:    .save {r4, lr}
236; CHECK-NEXT:    push {r4, lr}
237; CHECK-NEXT:    cmp r2, #1
238; CHECK-NEXT:    it lt
239; CHECK-NEXT:    poplt {r4, pc}
240; CHECK-NEXT:  .LBB5_1: @ %for.body.preheader
241; CHECK-NEXT:    dls lr, r2
242; CHECK-NEXT:  .LBB5_2: @ %for.body
243; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
244; CHECK-NEXT:    ldm.w r1, {r2, r3, r12}
245; CHECK-NEXT:    ldr r4, [r1, #12]
246; CHECK-NEXT:    adds r1, #64
247; CHECK-NEXT:    stm.w r0, {r2, r3, r12}
248; CHECK-NEXT:    str r4, [r0, #12]
249; CHECK-NEXT:    adds r0, #64
250; CHECK-NEXT:    le lr, .LBB5_2
251; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
252; CHECK-NEXT:    pop {r4, pc}
253entry:
254  %cmp6 = icmp sgt i32 %n, 0
255  br i1 %cmp6, label %for.body, label %for.cond.cleanup
256
257for.cond.cleanup:                                 ; preds = %for.body, %entry
258  ret void
259
260for.body:                                         ; preds = %entry, %for.body
261  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
262  %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %entry ]
263  %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %entry ]
264  tail call void @llvm.memmove.p0.p0.i32(ptr nonnull align 4 dereferenceable(16) %x.addr.08, ptr nonnull align 4 dereferenceable(16) %y.addr.07, i32 16, i1 false)
265  %add.ptr = getelementptr inbounds i32, ptr %x.addr.08, i32 16
266  %add.ptr1 = getelementptr inbounds i32, ptr %y.addr.07, i32 16
267  %inc = add nuw nsw i32 %i.09, 1
268  %exitcond.not = icmp eq i32 %inc, %n
269  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
270}
271
272define void @test_memset_preheader(ptr %x, ptr %y, i32 %n) {
273; CHECK-LABEL: test_memset_preheader:
274; CHECK:       @ %bb.0: @ %entry
275; CHECK-NEXT:    .save {r4, lr}
276; CHECK-NEXT:    push {r4, lr}
277; CHECK-NEXT:    cbz r2, .LBB6_5
278; CHECK-NEXT:  @ %bb.1: @ %prehead
279; CHECK-NEXT:    vmov.i32 q0, #0x0
280; CHECK-NEXT:    mov r4, r0
281; CHECK-NEXT:    wlstp.8 lr, r2, .LBB6_3
282; CHECK-NEXT:  .LBB6_2: @ =>This Inner Loop Header: Depth=1
283; CHECK-NEXT:    vstrb.8 q0, [r4], #16
284; CHECK-NEXT:    letp lr, .LBB6_2
285; CHECK-NEXT:  .LBB6_3: @ %prehead
286; CHECK-NEXT:    dls lr, r2
287; CHECK-NEXT:    mov r3, r0
288; CHECK-NEXT:  .LBB6_4: @ %for.body
289; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
290; CHECK-NEXT:    ldrb r4, [r3], #1
291; CHECK-NEXT:    strb r4, [r1], #1
292; CHECK-NEXT:    le lr, .LBB6_4
293; CHECK-NEXT:  .LBB6_5: @ %for.cond.cleanup
294; CHECK-NEXT:    vmov.i32 q0, #0x0
295; CHECK-NEXT:    wlstp.8 lr, r2, .LBB6_7
296; CHECK-NEXT:  .LBB6_6: @ =>This Inner Loop Header: Depth=1
297; CHECK-NEXT:    vstrb.8 q0, [r0], #16
298; CHECK-NEXT:    letp lr, .LBB6_6
299; CHECK-NEXT:  .LBB6_7: @ %for.cond.cleanup
300; CHECK-NEXT:    pop {r4, pc}
301entry:
302  %cmp6 = icmp ne i32 %n, 0
303  br i1 %cmp6, label %prehead, label %for.cond.cleanup
304
305prehead:
306  call void @llvm.memset.p0.i32(ptr %x, i8 0, i32 %n, i1 false)
307  br label %for.body
308
309for.body:                                         ; preds = %entry, %for.body
310  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %prehead ]
311  %x.addr.08 = phi ptr [ %add.ptr, %for.body ], [ %x, %prehead ]
312  %y.addr.07 = phi ptr [ %add.ptr1, %for.body ], [ %y, %prehead ]
313  %add.ptr = getelementptr inbounds i8, ptr %x.addr.08, i32 1
314  %add.ptr1 = getelementptr inbounds i8, ptr %y.addr.07, i32 1
315  %l = load i8, ptr %x.addr.08
316  store i8 %l, ptr %y.addr.07
317  %inc = add nuw nsw i32 %i.09, 1
318  %exitcond.not = icmp eq i32 %inc, %n
319  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
320
321for.cond.cleanup:                                 ; preds = %for.body, %entry
322  call void @llvm.memset.p0.i32(ptr %x, i8 0, i32 %n, i1 false)
323  ret void
324}
325
326
327
328declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg)
329declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg)
330declare void @llvm.memmove.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1 immarg)
331