xref: /llvm-project/llvm/test/CodeGen/Thumb2/LowOverheadLoops/ctlz-non-zeros.mir (revision 59c6bd156cc8b42758ce90909615748e21c6eee2)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s
3
4# CHECK-NOT: LETP
5
6--- |
7  define arm_aapcs_vfpcc void @test_ctlz_i8(ptr %a, ptr %b, ptr %c, i32 %elts, i32 %iters) #0 {
8  entry:
9    %cmp = icmp slt i32 %elts, 1
10    br i1 %cmp, label %exit, label %loop.ph
11
12  loop.ph:                                          ; preds = %entry
13    %start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
14    br label %loop.body
15
16  loop.body:                                        ; preds = %loop.body, %loop.ph
17    %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
18    %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
19    %addr.a = phi ptr [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
20    %addr.b = phi ptr [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
21    %addr.c = phi ptr [ %c, %loop.ph ], [ %addr.c.next, %loop.body ]
22    %pred = call <8 x i1> @llvm.arm.mve.vctp16(i32 %count)
23    %elts.rem = sub i32 %count, 8
24    %masked.load.a = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %addr.a, i32 2, <8 x i1> %pred, <8 x i16> undef)
25    %masked.load.b = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %addr.b, i32 2, <8 x i1> %pred, <8 x i16> undef)
26    %bitcast.a = bitcast <8 x i16> %masked.load.a to <16 x i8>
27    %ctlz = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %bitcast.a, i1 false)
28    %shrn = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %ctlz, <8 x i16> %masked.load.b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 1)
29    %bitcast = bitcast <16 x i8> %shrn to <8 x i16>
30    call void @llvm.masked.store.v8i16.p0(<8 x i16> %bitcast, ptr %addr.c, i32 2, <8 x i1> %pred)
31    %addr.a.next = getelementptr <8 x i16>, ptr %addr.b, i32 1
32    %addr.b.next = getelementptr <8 x i16>, ptr %addr.b, i32 1
33    %addr.c.next = getelementptr <8 x i16>, ptr %addr.c, i32 1
34    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
35    %end = icmp ne i32 %loop.dec, 0
36    %lsr.iv.next = add i32 %lsr.iv, -1
37    br i1 %end, label %loop.body, label %exit
38
39  exit:                                             ; preds = %loop.body, %entry
40    ret void
41  }
42
43  define arm_aapcs_vfpcc void @test_ctlz_i16(ptr %a, ptr %b, ptr %c, i32 %elts, i32 %iters) #0 {
44  entry:
45    %cmp = icmp slt i32 %elts, 1
46    br i1 %cmp, label %exit, label %loop.ph
47
48  loop.ph:                                          ; preds = %entry
49    %start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
50    br label %loop.body
51
52  loop.body:                                        ; preds = %loop.body, %loop.ph
53    %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
54    %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
55    %addr.a = phi ptr [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
56    %addr.b = phi ptr [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
57    %addr.c = phi ptr [ %c, %loop.ph ], [ %addr.c.next, %loop.body ]
58    %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %count)
59    %elts.rem = sub i32 %count, 4
60    %masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr.a, i32 4, <4 x i1> %pred, <4 x i32> undef)
61    %masked.load.b = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr.b, i32 4, <4 x i1> %pred, <4 x i32> undef)
62    %bitcast.a = bitcast <4 x i32> %masked.load.a to <8 x i16>
63    %ctlz = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %bitcast.a, i1 false)
64    %shrn = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %ctlz, <4 x i32> %masked.load.b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1)
65    %bitcast = bitcast <8 x i16> %shrn to <4 x i32>
66    call void @llvm.masked.store.v4i32.p0(<4 x i32> %bitcast, ptr %addr.c, i32 4, <4 x i1> %pred)
67    %addr.a.next = getelementptr <4 x i32>, ptr %addr.a, i32 1
68    %addr.b.next = getelementptr <4 x i32>, ptr %addr.b, i32 1
69    %addr.c.next = getelementptr <4 x i32>, ptr %addr.c, i32 1
70    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
71    %end = icmp ne i32 %loop.dec, 0
72    %lsr.iv.next = add i32 %lsr.iv, -1
73    br i1 %end, label %loop.body, label %exit
74
75  exit:                                             ; preds = %loop.body, %entry
76    ret void
77  }
78
79  define arm_aapcs_vfpcc void @test_ctlz_i32(ptr %a, ptr %b, ptr %c, i32 %elts, i32 %iters) #0 {
80  entry:
81    %cmp = icmp slt i32 %elts, 1
82    br i1 %cmp, label %exit, label %loop.ph
83
84  loop.ph:                                          ; preds = %entry
85    %start = call i32 @llvm.start.loop.iterations.i32(i32 %iters)
86    br label %loop.body
87
88  loop.body:                                        ; preds = %loop.body, %loop.ph
89    %lsr.iv = phi i32 [ %lsr.iv.next, %loop.body ], [ %start, %loop.ph ]
90    %count = phi i32 [ %elts, %loop.ph ], [ %elts.rem, %loop.body ]
91    %addr.a = phi ptr [ %a, %loop.ph ], [ %addr.a.next, %loop.body ]
92    %addr.b = phi ptr [ %b, %loop.ph ], [ %addr.b.next, %loop.body ]
93    %addr.c = phi ptr [ %c, %loop.ph ], [ %addr.c.next, %loop.body ]
94    %pred = call <4 x i1> @llvm.arm.mve.vctp32(i32 %count)
95    %elts.rem = sub i32 %count, 4
96    %masked.load.a = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr.a, i32 4, <4 x i1> %pred, <4 x i32> undef)
97    %masked.load.b = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr.b, i32 4, <4 x i1> %pred, <4 x i32> undef)
98    %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %masked.load.b, i1 false)
99    %bitcast.a = bitcast <4 x i32> %masked.load.a to <8 x i16>
100    %shrn = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %bitcast.a, <4 x i32> %ctlz, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1)
101    %bitcast = bitcast <8 x i16> %shrn to <4 x i32>
102    call void @llvm.masked.store.v4i32.p0(<4 x i32> %bitcast, ptr %addr.c, i32 4, <4 x i1> %pred)
103    %addr.a.next = getelementptr <4 x i32>, ptr %addr.a, i32 1
104    %addr.b.next = getelementptr <4 x i32>, ptr %addr.b, i32 1
105    %addr.c.next = getelementptr <4 x i32>, ptr %addr.c, i32 1
106    %loop.dec = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %lsr.iv, i32 1)
107    %end = icmp ne i32 %loop.dec, 0
108    %lsr.iv.next = add i32 %lsr.iv, -1
109    br i1 %end, label %loop.body, label %exit
110
111  exit:                                             ; preds = %loop.body, %entry
112    ret void
113  }
114
115  declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1 immarg)
116  declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1 immarg)
117  declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1 immarg)
118  declare i32 @llvm.start.loop.iterations.i32(i32)
119  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
120  declare <4 x i1> @llvm.arm.mve.vctp32(i32)
121  declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
122  declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
123  declare <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32)
124  declare <8 x i1> @llvm.arm.mve.vctp16(i32)
125  declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>)
126  declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>)
127  declare <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32)
128
129...
130---
131name:            test_ctlz_i8
132alignment:       2
133tracksRegLiveness: true
134registers:       []
135liveins:
136  - { reg: '$r0', virtual-reg: '' }
137  - { reg: '$r1', virtual-reg: '' }
138  - { reg: '$r2', virtual-reg: '' }
139  - { reg: '$r3', virtual-reg: '' }
140frameInfo:
141  stackSize:       8
142  offsetAdjustment: 0
143  maxAlignment:    4
144fixedStack:
145  - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default,
146      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
147      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
148stack:
149  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
150      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
151      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
152  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
153      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
154      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
155callSites:       []
156constants:       []
157machineFunctionInfo: {}
158body:             |
159  ; CHECK-LABEL: name: test_ctlz_i8
160  ; CHECK: bb.0.entry:
161  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
162  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r2, $r3, $r4
163  ; CHECK-NEXT: {{  $}}
164  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
165  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
166  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
167  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r4, -8
168  ; CHECK-NEXT:   tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
169  ; CHECK-NEXT:   t2IT 11, 8, implicit-def $itstate
170  ; CHECK-NEXT:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
171  ; CHECK-NEXT: {{  $}}
172  ; CHECK-NEXT: bb.1.loop.ph:
173  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
174  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3
175  ; CHECK-NEXT: {{  $}}
176  ; CHECK-NEXT:   renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8)
177  ; CHECK-NEXT:   $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
178  ; CHECK-NEXT: {{  $}}
179  ; CHECK-NEXT: bb.2.loop.body:
180  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
181  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3, $r4
182  ; CHECK-NEXT: {{  $}}
183  ; CHECK-NEXT:   $lr = tMOVr $r4, 14 /* CC::al */, $noreg
184  ; CHECK-NEXT:   renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg, $noreg
185  ; CHECK-NEXT:   MVE_VPST 4, implicit $vpr
186  ; CHECK-NEXT:   renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.b, align 2)
187  ; CHECK-NEXT:   renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.a, align 2)
188  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
189  ; CHECK-NEXT:   renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
190  ; CHECK-NEXT:   renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, $noreg, undef renamable $q1
191  ; CHECK-NEXT:   $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
192  ; CHECK-NEXT:   renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg, $noreg
193  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
194  ; CHECK-NEXT:   renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.addr.c, align 2)
195  ; CHECK-NEXT:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
196  ; CHECK-NEXT: {{  $}}
197  ; CHECK-NEXT: bb.3.exit:
198  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
199  bb.0.entry:
200    successors: %bb.1(0x80000000)
201    liveins: $r0, $r1, $r2, $r3, $r4, $lr
202
203    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r4, killed $lr, implicit-def $sp, implicit $sp
204    frame-setup CFI_INSTRUCTION def_cfa_offset 8
205    frame-setup CFI_INSTRUCTION offset $lr, -4
206    frame-setup CFI_INSTRUCTION offset $r4, -8
207    tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
208    t2IT 11, 8, implicit-def $itstate
209    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r4, def $pc, implicit killed $itstate
210
211  bb.1.loop.ph:
212    successors: %bb.2(0x80000000)
213    liveins: $r0, $r1, $r2, $r3
214
215    renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8)
216    renamable $lr = t2DoLoopStart killed renamable $lr
217    $r4 = tMOVr killed $lr, 14 /* CC::al */, $noreg
218
219  bb.2.loop.body:
220    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
221    liveins: $r0, $r1, $r2, $r3, $r4
222
223    $lr = tMOVr $r4, 14 /* CC::al */, $noreg
224    renamable $vpr = MVE_VCTP16 renamable $r3, 0, $noreg, $noreg
225    MVE_VPST 4, implicit $vpr
226    renamable $r1, renamable $q0 = MVE_VLDRHU16_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.b, align 2)
227    renamable $q1 = MVE_VLDRHU16 killed renamable $r0, 0, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.a, align 2)
228    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14 /* CC::al */, $noreg
229    renamable $r4, dead $cpsr = tSUBi8 killed $r4, 1, 14 /* CC::al */, $noreg
230    renamable $q1 = MVE_VCLZs8 killed renamable $q1, 0, $noreg, $noreg, undef renamable $q1
231    renamable $lr = t2LoopDec killed renamable $lr, 1
232    $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
233    renamable $q1 = MVE_VQSHRUNs16th killed renamable $q1, killed renamable $q0, 1, 0, $noreg, $noreg
234    MVE_VPST 8, implicit $vpr
235    renamable $r2 = MVE_VSTRHU16_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.addr.c, align 2)
236    t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
237    tB %bb.3, 14 /* CC::al */, $noreg
238
239  bb.3.exit:
240    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
241
242...
243---
244name:            test_ctlz_i16
245alignment:       2
246tracksRegLiveness: true
247registers:       []
248liveins:
249  - { reg: '$r0', virtual-reg: '' }
250  - { reg: '$r1', virtual-reg: '' }
251  - { reg: '$r2', virtual-reg: '' }
252  - { reg: '$r3', virtual-reg: '' }
253frameInfo:
254  stackSize:       8
255  offsetAdjustment: 0
256  maxAlignment:    4
257fixedStack:
258  - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default,
259      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
260      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
261stack:
262  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
263      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
264      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
265  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
266      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
267      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
268callSites:       []
269constants:       []
270machineFunctionInfo: {}
271body:             |
272  ; CHECK-LABEL: name: test_ctlz_i16
273  ; CHECK: bb.0.entry:
274  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
275  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7
276  ; CHECK-NEXT: {{  $}}
277  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
278  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
279  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
280  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8
281  ; CHECK-NEXT:   tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
282  ; CHECK-NEXT:   t2IT 11, 8, implicit-def $itstate
283  ; CHECK-NEXT:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def dead $r7, def $pc, implicit killed $itstate
284  ; CHECK-NEXT: {{  $}}
285  ; CHECK-NEXT: bb.1.loop.ph:
286  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
287  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3, $r4
288  ; CHECK-NEXT: {{  $}}
289  ; CHECK-NEXT:   renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8)
290  ; CHECK-NEXT:   $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
291  ; CHECK-NEXT: {{  $}}
292  ; CHECK-NEXT: bb.2.loop.body:
293  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
294  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3, $r4, $r12
295  ; CHECK-NEXT: {{  $}}
296  ; CHECK-NEXT:   $lr = tMOVr $r12, 14 /* CC::al */, $noreg
297  ; CHECK-NEXT:   renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg, $noreg
298  ; CHECK-NEXT:   MVE_VPST 4, implicit $vpr
299  ; CHECK-NEXT:   renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.b, align 4)
300  ; CHECK-NEXT:   renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.a, align 4)
301  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
302  ; CHECK-NEXT:   renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
303  ; CHECK-NEXT:   renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, $noreg, undef renamable $q1
304  ; CHECK-NEXT:   renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg, $noreg
305  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
306  ; CHECK-NEXT:   renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.addr.c, align 4)
307  ; CHECK-NEXT:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
308  ; CHECK-NEXT: {{  $}}
309  ; CHECK-NEXT: bb.3.exit:
310  ; CHECK-NEXT:   liveins: $r4
311  ; CHECK-NEXT: {{  $}}
312  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def dead $r7, def $pc
313  bb.0.entry:
314    successors: %bb.1(0x80000000)
315    liveins: $r0, $r1, $r2, $r3, $r7, $lr
316
317    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
318    frame-setup CFI_INSTRUCTION def_cfa_offset 8
319    frame-setup CFI_INSTRUCTION offset $lr, -4
320    frame-setup CFI_INSTRUCTION offset $r7, -8
321    tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
322    t2IT 11, 8, implicit-def $itstate
323    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
324
325  bb.1.loop.ph:
326    successors: %bb.2(0x80000000)
327    liveins: $r0, $r1, $r2, $r3
328
329    renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8)
330    renamable $lr = t2DoLoopStart killed renamable $lr
331    $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
332
333  bb.2.loop.body:
334    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
335    liveins: $r0, $r1, $r2, $r3, $r12
336
337    $lr = tMOVr $r12, 14 /* CC::al */, $noreg
338    renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg, $noreg
339    MVE_VPST 4, implicit $vpr
340    renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.b, align 4)
341    renamable $r0, renamable $q1 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.a, align 4)
342    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
343    renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
344    renamable $q1 = MVE_VCLZs16 killed renamable $q1, 0, $noreg, $noreg, undef renamable $q1
345    renamable $lr = t2LoopDec killed renamable $lr, 1
346    renamable $q1 = MVE_VQSHRUNs32th killed renamable $q1, killed renamable $q0, 3, 0, $noreg, $noreg
347    MVE_VPST 8, implicit $vpr
348    renamable $r2 = MVE_VSTRWU32_post killed renamable $q1, killed renamable $r2, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.addr.c, align 4)
349    t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
350    tB %bb.3, 14 /* CC::al */, $noreg
351
352  bb.3.exit:
353    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
354
355...
356---
357name:            test_ctlz_i32
358alignment:       2
359tracksRegLiveness: true
360registers:       []
361liveins:
362  - { reg: '$r0', virtual-reg: '' }
363  - { reg: '$r1', virtual-reg: '' }
364  - { reg: '$r2', virtual-reg: '' }
365  - { reg: '$r3', virtual-reg: '' }
366frameInfo:
367  stackSize:       8
368  offsetAdjustment: 0
369  maxAlignment:    4
370fixedStack:
371  - { id: 0, type: default, offset: 0, size: 4, alignment: 8, stack-id: default,
372      isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
373      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
374stack:
375  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
376      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
377      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
378  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
379      stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true,
380      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
381callSites:       []
382constants:       []
383machineFunctionInfo: {}
384body:             |
385  ; CHECK-LABEL: name: test_ctlz_i32
386  ; CHECK: bb.0.entry:
387  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
388  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r2, $r3, $r4, $r7
389  ; CHECK-NEXT: {{  $}}
390  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
391  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
392  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
393  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8
394  ; CHECK-NEXT:   tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
395  ; CHECK-NEXT:   t2IT 11, 8, implicit-def $itstate
396  ; CHECK-NEXT:   frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def dead $r7, def $pc, implicit killed $itstate
397  ; CHECK-NEXT: {{  $}}
398  ; CHECK-NEXT: bb.1.loop.ph:
399  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
400  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3, $r4
401  ; CHECK-NEXT: {{  $}}
402  ; CHECK-NEXT:   renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8)
403  ; CHECK-NEXT:   $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
404  ; CHECK-NEXT: {{  $}}
405  ; CHECK-NEXT: bb.2.loop.body:
406  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
407  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3, $r4, $r12
408  ; CHECK-NEXT: {{  $}}
409  ; CHECK-NEXT:   $lr = tMOVr $r12, 14 /* CC::al */, $noreg
410  ; CHECK-NEXT:   renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg, $noreg
411  ; CHECK-NEXT:   MVE_VPST 4, implicit $vpr
412  ; CHECK-NEXT:   renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.a, align 4)
413  ; CHECK-NEXT:   renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.b, align 4)
414  ; CHECK-NEXT:   renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
415  ; CHECK-NEXT:   renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
416  ; CHECK-NEXT:   renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, $noreg, undef renamable $q1
417  ; CHECK-NEXT:   renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg, $noreg
418  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
419  ; CHECK-NEXT:   renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.addr.c, align 4)
420  ; CHECK-NEXT:   dead $lr = t2LEUpdate killed renamable $lr, %bb.2
421  ; CHECK-NEXT: {{  $}}
422  ; CHECK-NEXT: bb.3.exit:
423  ; CHECK-NEXT:   liveins: $r4
424  ; CHECK-NEXT: {{  $}}
425  ; CHECK-NEXT:   frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def dead $r7, def $pc
426  bb.0.entry:
427    successors: %bb.1(0x80000000)
428    liveins: $r0, $r1, $r2, $r3, $r7, $lr
429
430    frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp
431    frame-setup CFI_INSTRUCTION def_cfa_offset 8
432    frame-setup CFI_INSTRUCTION offset $lr, -4
433    frame-setup CFI_INSTRUCTION offset $r7, -8
434    tCMPi8 renamable $r3, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
435    t2IT 11, 8, implicit-def $itstate
436    frame-destroy tPOP_RET 11 /* CC::lt */, killed $cpsr, def $r7, def $pc, implicit killed $itstate
437
438  bb.1.loop.ph:
439    successors: %bb.2(0x80000000)
440    liveins: $r0, $r1, $r2, $r3
441
442    renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %fixed-stack.0, align 8)
443    renamable $lr = t2DoLoopStart killed renamable $lr
444    $r12 = tMOVr killed $lr, 14 /* CC::al */, $noreg
445
446  bb.2.loop.body:
447    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
448    liveins: $r0, $r1, $r2, $r3, $r12
449
450    $lr = tMOVr $r12, 14 /* CC::al */, $noreg
451    renamable $vpr = MVE_VCTP32 renamable $r3, 0, $noreg, $noreg
452    MVE_VPST 4, implicit $vpr
453    renamable $r0, renamable $q0 = MVE_VLDRWU32_post killed renamable $r0, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.a, align 4)
454    renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.addr.b, align 4)
455    renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 4, 14 /* CC::al */, $noreg
456    renamable $r12 = t2SUBri killed $r12, 1, 14 /* CC::al */, $noreg, $noreg
457    renamable $q1 = MVE_VCLZs32 killed renamable $q1, 0, $noreg, $noreg, undef renamable $q1
458    renamable $lr = t2LoopDec killed renamable $lr, 1
459    renamable $q0 = MVE_VQSHRUNs32th killed renamable $q0, killed renamable $q1, 3, 0, $noreg, $noreg
460    MVE_VPST 8, implicit $vpr
461    renamable $r2 = MVE_VSTRWU32_post killed renamable $q0, killed renamable $r2, 16, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.addr.c, align 4)
462    t2LoopEnd killed renamable $lr, %bb.2, implicit-def dead $cpsr
463    tB %bb.3, 14 /* CC::al */, $noreg
464
465  bb.3.exit:
466    frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
467
468...
469