xref: /llvm-project/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-chain-store.mir (revision 59c6bd156cc8b42758ce90909615748e21c6eee2)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
3
4--- |
5  define hidden arm_aapcs_vfpcc void @it_block_store_count_before_start(ptr %pSrc, ptr %pDst, i32 %blockSize, ptr %iter.addr) #0 {
6  entry:
7    %mul = shl i32 %blockSize, 1
8    %0 = add i32 %mul, 3
9    %1 = icmp slt i32 %mul, 4
10    %smin = select i1 %1, i32 %mul, i32 4
11    %2 = sub i32 %0, %smin
12    %3 = lshr i32 %2, 2
13    %4 = add nuw nsw i32 %3, 1
14    store i32 %4, ptr %iter.addr, align 4
15    %start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
16    br label %do.body
17
18  do.body:                                          ; preds = %do.body, %entry
19    %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ]
20    %blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
21    %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr4, %do.body ]
22    %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
23    %pDst.addr.01 = bitcast ptr %pDst.addr.0 to ptr
24    %pSrc.addr.02 = bitcast ptr %pSrc.addr.0 to ptr
25    %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
26    %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef)
27    %7 = fmul <4 x float> %6, %6
28    tail call void @llvm.masked.store.v4f32.p0(<4 x float> %7, ptr %pDst.addr.01, i32 4, <4 x i1> %5)
29    %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
30    %add.ptr4 = getelementptr inbounds float, ptr %pDst.addr.0, i32 4
31    %sub = add nsw i32 %blkCnt.0, -4
32    %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
33    %9 = icmp ne i32 %8, 0
34    %lsr.iv.next = add nsw i32 %lsr.iv, -1
35    br i1 %9, label %do.body, label %do.end
36
37  do.end:                                           ; preds = %do.body
38    ret void
39  }
40
41  define hidden arm_aapcs_vfpcc void @it_block_store_count_after_start(ptr %pSrc, ptr %pDst, i32 %blockSize, ptr %iter.addr) #0 {
42  entry:
43    %mul = shl i32 %blockSize, 1
44    %0 = add i32 %mul, 3
45    %1 = icmp slt i32 %mul, 4
46    %smin = select i1 %1, i32 %mul, i32 4
47    %2 = sub i32 %0, %smin
48    %3 = lshr i32 %2, 2
49    %4 = add nuw nsw i32 %3, 1
50    %start = call i32 @llvm.start.loop.iterations.i32(i32 %4)
51    store i32 %4, ptr %iter.addr, align 4
52    br label %do.body
53
54  do.body:                                          ; preds = %do.body, %entry
55    %lsr.iv = phi i32 [ %lsr.iv.next, %do.body ], [ %start, %entry ]
56    %blkCnt.0 = phi i32 [ %mul, %entry ], [ %sub, %do.body ]
57    %pDst.addr.0 = phi ptr [ %pDst, %entry ], [ %add.ptr4, %do.body ]
58    %pSrc.addr.0 = phi ptr [ %pSrc, %entry ], [ %add.ptr, %do.body ]
59    %pDst.addr.01 = bitcast ptr %pDst.addr.0 to ptr
60    %pSrc.addr.02 = bitcast ptr %pSrc.addr.0 to ptr
61    %5 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %blkCnt.0)
62    %6 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %pSrc.addr.02, i32 4, <4 x i1> %5, <4 x float> undef)
63    %7 = fmul <4 x float> %6, %6
64    tail call void @llvm.masked.store.v4f32.p0(<4 x float> %7, ptr %pDst.addr.01, i32 4, <4 x i1> %5)
65    %add.ptr = getelementptr inbounds float, ptr %pSrc.addr.0, i32 4
66    %add.ptr4 = getelementptr inbounds float, ptr %pDst.addr.0, i32 4
67    %sub = add nsw i32 %blkCnt.0, -4
68    %8 = call i32 @llvm.loop.decrement.reg.i32(i32 %lsr.iv, i32 1)
69    %9 = icmp ne i32 %8, 0
70    %lsr.iv.next = add nsw i32 %lsr.iv, -1
71    br i1 %9, label %do.body, label %do.end
72
73  do.end:                                           ; preds = %do.body
74    ret void
75  }
76
77  ; Function Attrs: nounwind readnone
78  declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1
79
80  ; Function Attrs: argmemonly nounwind readonly willreturn
81  declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) #2
82
83  ; Function Attrs: argmemonly nounwind willreturn writeonly
84  declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) #3
85
86  ; Function Attrs: noduplicate nounwind
87  declare i32 @llvm.start.loop.iterations.i32(i32) #4
88
89  ; Function Attrs: noduplicate nounwind
90  declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4
91
92  attributes #0 = { "target-features"="+mve.fp" }
93  attributes #1 = { nounwind readnone "target-features"="+mve.fp" }
94  attributes #2 = { argmemonly nounwind readonly willreturn "target-features"="+mve.fp" }
95  attributes #3 = { argmemonly nounwind willreturn writeonly "target-features"="+mve.fp" }
96  attributes #4 = { noduplicate nounwind "target-features"="+mve.fp" }
97
98...
99---
100name:            it_block_store_count_before_start
101alignment:       2
102tracksRegLiveness: true
103registers:       []
104liveins:
105  - { reg: '$r0', virtual-reg: '' }
106  - { reg: '$r1', virtual-reg: '' }
107  - { reg: '$r2', virtual-reg: '' }
108  - { reg: '$r3', virtual-reg: '' }
109frameInfo:
110  stackSize:       8
111  offsetAdjustment: 0
112  maxAlignment:    4
113  localFrameSize:  0
114  savePoint:       ''
115  restorePoint:    ''
116fixedStack:      []
117stack:
118  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
119      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
120      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
121  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
122      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
123      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
124callSites:       []
125constants:       []
126machineFunctionInfo: {}
127body:             |
128  ; CHECK-LABEL: name: it_block_store_count_before_start
129  ; CHECK: bb.0.entry:
130  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
131  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r2, $r3, $r7
132  ; CHECK-NEXT: {{  $}}
133  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
134  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
135  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
136  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8
137  ; CHECK-NEXT:   renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
138  ; CHECK-NEXT:   renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg
139  ; CHECK-NEXT:   t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
140  ; CHECK-NEXT:   t2IT 11, 8, implicit-def $itstate
141  ; CHECK-NEXT:   $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate
142  ; CHECK-NEXT:   renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg
143  ; CHECK-NEXT:   renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
144  ; CHECK-NEXT:   renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
145  ; CHECK-NEXT:   renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
146  ; CHECK-NEXT:   t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr)
147  ; CHECK-NEXT:   $lr = MVE_DLSTP_32 killed renamable $r12
148  ; CHECK-NEXT:   $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
149  ; CHECK-NEXT: {{  $}}
150  ; CHECK-NEXT: bb.1.do.body:
151  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
152  ; CHECK-NEXT:   liveins: $r0, $r1, $r2
153  ; CHECK-NEXT: {{  $}}
154  ; CHECK-NEXT:   $lr = tMOVr $r2, 14 /* CC::al */, $noreg
155  ; CHECK-NEXT:   renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
156  ; CHECK-NEXT:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, $noreg :: (load (s128) from %ir.pSrc.addr.02, align 4)
157  ; CHECK-NEXT:   renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, $noreg, undef renamable $q0
158  ; CHECK-NEXT:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg, $noreg :: (store (s128) into %ir.pDst.addr.01, align 4)
159  ; CHECK-NEXT:   dead $lr = MVE_LETP killed renamable $lr, %bb.1
160  ; CHECK-NEXT: {{  $}}
161  ; CHECK-NEXT: bb.2.do.end:
162  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
163  bb.0.entry:
164    successors: %bb.1(0x80000000)
165    liveins: $r0, $r1, $r2, $r3, $r7, $lr
166
167    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
168    frame-setup CFI_INSTRUCTION def_cfa_offset 8
169    frame-setup CFI_INSTRUCTION offset $lr, -4
170    frame-setup CFI_INSTRUCTION offset $r7, -8
171    renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
172    renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg
173    t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
174    t2IT 11, 8, implicit-def $itstate
175    $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate
176    renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg
177    renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
178    renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
179    renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
180    t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr)
181    $lr = t2DoLoopStart renamable $lr
182    $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
183
184  bb.1.do.body:
185    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
186    liveins: $r0, $r1, $r2, $r12
187
188    $lr = tMOVr $r2, 14 /* CC::al */, $noreg
189    renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg, $noreg
190    renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
191    renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
192    MVE_VPST 8, implicit $vpr
193    renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.pSrc.addr.02, align 4)
194    renamable $lr = t2LoopDec killed renamable $lr, 1
195    renamable $q0 = MVE_VMULf32 killed renamable $q0, renamable $q0, 0, $noreg, $noreg, undef renamable $q0
196    MVE_VPST 8, implicit $vpr
197    renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.pDst.addr.01, align 4)
198    t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
199    tB %bb.2, 14 /* CC::al */, $noreg
200
201  bb.2.do.end:
202    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
203
204...
205---
206name:            it_block_store_count_after_start
207alignment:       2
208tracksRegLiveness: true
209registers:       []
210liveins:
211  - { reg: '$r0', virtual-reg: '' }
212  - { reg: '$r1', virtual-reg: '' }
213  - { reg: '$r2', virtual-reg: '' }
214  - { reg: '$r3', virtual-reg: '' }
215frameInfo:
216  stackSize:       8
217  offsetAdjustment: 0
218  maxAlignment:    4
219  savePoint:       ''
220  restorePoint:    ''
221fixedStack:      []
222stack:
223  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
224      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
225      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
226  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
227      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
228      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
229callSites:       []
230constants:       []
231machineFunctionInfo: {}
232body:             |
233  ; CHECK-LABEL: name: it_block_store_count_after_start
234  ; CHECK: bb.0.entry:
235  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
236  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r2, $r3, $r7
237  ; CHECK-NEXT: {{  $}}
238  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
239  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
240  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
241  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8
242  ; CHECK-NEXT:   renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
243  ; CHECK-NEXT:   renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg
244  ; CHECK-NEXT:   t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
245  ; CHECK-NEXT:   t2IT 11, 8, implicit-def $itstate
246  ; CHECK-NEXT:   $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate
247  ; CHECK-NEXT:   renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg
248  ; CHECK-NEXT:   renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
249  ; CHECK-NEXT:   renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
250  ; CHECK-NEXT:   renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
251  ; CHECK-NEXT:   t2STRi12 killed renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr)
252  ; CHECK-NEXT:   $lr = MVE_DLSTP_32 killed renamable $r12
253  ; CHECK-NEXT:   $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
254  ; CHECK-NEXT: {{  $}}
255  ; CHECK-NEXT: bb.1.do.body:
256  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
257  ; CHECK-NEXT:   liveins: $r0, $r1, $r2
258  ; CHECK-NEXT: {{  $}}
259  ; CHECK-NEXT:   $lr = tMOVr $r2, 14 /* CC::al */, $noreg
260  ; CHECK-NEXT:   renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
261  ; CHECK-NEXT:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 0, $noreg, $noreg :: (load (s128) from %ir.pSrc.addr.02, align 4)
262  ; CHECK-NEXT:   renamable $q0 = MVE_VMULf32 killed renamable $q0, killed renamable $q0, 0, $noreg, $noreg, undef renamable $q0
263  ; CHECK-NEXT:   renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 0, killed $noreg, $noreg :: (store (s128) into %ir.pDst.addr.01, align 4)
264  ; CHECK-NEXT:   dead $lr = MVE_LETP killed renamable $lr, %bb.1
265  ; CHECK-NEXT: {{  $}}
266  ; CHECK-NEXT: bb.2.do.end:
267  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
268  bb.0.entry:
269    successors: %bb.1(0x80000000)
270    liveins: $r0, $r1, $r2, $r3, $r7, $lr
271
272    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
273    frame-setup CFI_INSTRUCTION def_cfa_offset 8
274    frame-setup CFI_INSTRUCTION offset $lr, -4
275    frame-setup CFI_INSTRUCTION offset $r7, -8
276    renamable $lr = t2MOVi 4, 14 /* CC::al */, $noreg, $noreg
277    renamable $r12 = t2LSLri renamable $r2, 1, 14 /* CC::al */, $noreg, $noreg
278    t2CMPri renamable $r12, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr
279    t2IT 11, 8, implicit-def $itstate
280    $lr = t2LSLri renamable $r2, 1, 11 /* CC::lt */, killed $cpsr, $noreg, implicit killed renamable $lr, implicit killed $itstate
281    renamable $r2 = t2RSBrs killed renamable $lr, killed renamable $r2, 10, 14 /* CC::al */, $noreg, $noreg
282    renamable $lr = t2ADDri killed renamable $r2, 3, 14 /* CC::al */, $noreg, $noreg
283    renamable $r2, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg
284    renamable $lr = nuw nsw t2ADDrs killed renamable $r2, killed renamable $lr, 19, 14 /* CC::al */, $noreg, $noreg
285    t2STRi12 renamable $lr, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.iter.addr)
286    $lr = t2DoLoopStart renamable $lr
287    $r2 = tMOVr killed $lr, 14 /* CC::al */, $noreg
288
289  bb.1.do.body:
290    successors: %bb.1(0x7c000000), %bb.2(0x04000000)
291    liveins: $r0, $r1, $r2, $r12
292
293    $lr = tMOVr $r2, 14 /* CC::al */, $noreg
294    renamable $vpr = MVE_VCTP32 renamable $r12, 0, $noreg, $noreg
295    renamable $r2, dead $cpsr = nsw tSUBi8 killed $r2, 1, 14 /* CC::al */, $noreg
296    renamable $r12 = nsw t2SUBri killed renamable $r12, 4, 14 /* CC::al */, $noreg, $noreg
297    MVE_VPST 8, implicit $vpr
298    renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.pSrc.addr.02, align 4)
299    renamable $lr = t2LoopDec killed renamable $lr, 1
300    renamable $q0 = MVE_VMULf32 killed renamable $q0, renamable $q0, 0, $noreg, $noreg, undef renamable $q0
301    MVE_VPST 8, implicit $vpr
302    renamable $r1 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r1, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.pDst.addr.01, align 4)
303    t2LoopEnd killed renamable $lr, %bb.1, implicit-def dead $cpsr
304    tB %bb.2, 14 /* CC::al */, $noreg
305
306  bb.2.do.end:
307    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
308
309...
310