xref: /llvm-project/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vctp-in-vpt-2.mir (revision 59c6bd156cc8b42758ce90909615748e21c6eee2)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops -verify-machineinstrs %s -o - | FileCheck %s
3
4--- |
5  define dso_local void @test(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 %arg2, i16 zeroext %mask) local_unnamed_addr #0 {
6  bb:
7    %tmp = icmp eq i32 %arg2, 0
8    %tmp1 = add i32 %arg2, 3
9    %tmp2 = lshr i32 %tmp1, 2
10    %tmp3 = shl nuw i32 %tmp2, 2
11    %tmp4 = add i32 %tmp3, -4
12    %tmp5 = lshr i32 %tmp4, 2
13    %tmp6 = add nuw nsw i32 %tmp5, 1
14    %conv.mask = zext i16 %mask to i32
15    %invariant.mask = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %conv.mask)
16    br i1 %tmp, label %bb27, label %bb3
17
18  bb3:                                              ; preds = %bb
19    %start = call i32 @llvm.start.loop.iterations.i32(i32 %tmp6)
20    br label %bb9
21
22  bb9:                                              ; preds = %bb9, %bb3
23    %lsr.iv2 = phi ptr [ %scevgep3, %bb9 ], [ %arg1, %bb3 ]
24    %lsr.iv = phi ptr [ %scevgep, %bb9 ], [ %arg, %bb3 ]
25    %tmp7 = phi i32 [ %start, %bb3 ], [ %tmp12, %bb9 ]
26    %tmp8 = phi i32 [ %arg2, %bb3 ], [ %tmp11, %bb9 ]
27    %lsr.iv24 = bitcast ptr %lsr.iv2 to ptr
28    %lsr.iv1 = bitcast ptr %lsr.iv to ptr
29    %vctp = call <4 x i1> @llvm.arm.mve.vctp32(i32 %tmp8)
30    %and = and <4 x i1> %vctp, %invariant.mask
31    %tmp11 = sub i32 %tmp8, 4
32    %tmp17 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv24, i32 4, <4 x i1> %and, <4 x i32> undef)
33    %tmp18 = icmp ne <4 x i32> %tmp17, zeroinitializer
34    %tmp20 = and <4 x i1> %tmp18, %vctp
35    %tmp22 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %lsr.iv1, i32 4, <4 x i1> %tmp20, <4 x i32> undef)
36    %tmp23 = mul nsw <4 x i32> %tmp22, %tmp17
37    call void @llvm.masked.store.v4i32.p0(<4 x i32> %tmp23, ptr %lsr.iv1, i32 4, <4 x i1> %tmp20)
38    %tmp12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %tmp7, i32 1)
39    %tmp13 = icmp ne i32 %tmp12, 0
40    %scevgep = getelementptr i32, ptr %lsr.iv, i32 4
41    %scevgep3 = getelementptr i32, ptr %lsr.iv2, i32 4
42    br i1 %tmp13, label %bb9, label %bb27
43
44  bb27:                                             ; preds = %bb9, %bb
45    ret void
46  }
47  declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>)
48  declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>)
49  declare i32 @llvm.start.loop.iterations.i32(i32)
50  declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
51  declare <4 x i1> @llvm.arm.mve.vctp32(i32)
52  declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
53
54...
55---
56name:            test
57alignment:       2
58exposesReturnsTwice: false
59legalized:       false
60regBankSelected: false
61selected:        false
62failedISel:      false
63tracksRegLiveness: true
64hasWinCFI:       false
65registers:       []
66liveins:
67  - { reg: '$r0', virtual-reg: '' }
68  - { reg: '$r1', virtual-reg: '' }
69  - { reg: '$r2', virtual-reg: '' }
70  - { reg: '$r3', virtual-reg: '' }
71frameInfo:
72  isFrameAddressTaken: false
73  isReturnAddressTaken: false
74  hasStackMap:     false
75  hasPatchPoint:   false
76  stackSize:       12
77  offsetAdjustment: -4
78  maxAlignment:    4
79  adjustsStack:    false
80  hasCalls:        false
81  stackProtector:  ''
82  maxCallFrameSize: 0
83  cvBytesOfCalleeSavedRegisters: 0
84  hasOpaqueSPAdjustment: false
85  hasVAStart:      false
86  hasMustTailInVarArgFunc: false
87  localFrameSize:  0
88  savePoint:       ''
89  restorePoint:    ''
90fixedStack:      []
91stack:
92  - { id: 0, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4,
93      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
94      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
95  - { id: 1, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
96      stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false,
97      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
98  - { id: 2, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
99      stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true,
100      debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
101callSites:       []
102constants:       []
103machineFunctionInfo: {}
104body:             |
105  ; CHECK-LABEL: name: test
106  ; CHECK: bb.0.bb:
107  ; CHECK-NEXT:   successors: %bb.3(0x30000000), %bb.1(0x50000000)
108  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r2, $r3
109  ; CHECK-NEXT: {{  $}}
110  ; CHECK-NEXT:   frame-setup tPUSH 14 /* CC::al */, $noreg, killed $lr, implicit-def $sp, implicit $sp
111  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
112  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $lr, -4
113  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $r7, -8
114  ; CHECK-NEXT:   dead $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg
115  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_register $r7
116  ; CHECK-NEXT:   $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
117  ; CHECK-NEXT:   tCBZ $r2, %bb.3
118  ; CHECK-NEXT: {{  $}}
119  ; CHECK-NEXT: bb.1.bb3:
120  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
121  ; CHECK-NEXT:   liveins: $r0, $r1, $r2, $r3
122  ; CHECK-NEXT: {{  $}}
123  ; CHECK-NEXT:   $vpr = VMSR_P0 killed $r3, 14 /* CC::al */, $noreg
124  ; CHECK-NEXT:   VSTR_P0_off killed renamable $vpr, $sp, 0, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0)
125  ; CHECK-NEXT:   $r3 = tMOVr $r0, 14 /* CC::al */, $noreg
126  ; CHECK-NEXT:   $lr = MVE_DLSTP_32 killed renamable $r2
127  ; CHECK-NEXT: {{  $}}
128  ; CHECK-NEXT: bb.2.bb9:
129  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
130  ; CHECK-NEXT:   liveins: $lr, $r0, $r1, $r3
131  ; CHECK-NEXT: {{  $}}
132  ; CHECK-NEXT:   renamable $vpr = VLDR_P0_off $sp, 0, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0)
133  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
134  ; CHECK-NEXT:   renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr, $noreg :: (load (s128) from %ir.lsr.iv24, align 4)
135  ; CHECK-NEXT:   MVE_VPTv4i32r 8, renamable $q0, $zr, 1, implicit-def $vpr
136  ; CHECK-NEXT:   renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.lsr.iv1, align 4)
137  ; CHECK-NEXT:   renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, $noreg, undef renamable $q0
138  ; CHECK-NEXT:   MVE_VPST 8, implicit $vpr
139  ; CHECK-NEXT:   MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.lsr.iv1, align 4)
140  ; CHECK-NEXT:   $r0 = tMOVr $r3, 14 /* CC::al */, $noreg
141  ; CHECK-NEXT:   $lr = MVE_LETP killed renamable $lr, %bb.2
142  ; CHECK-NEXT: {{  $}}
143  ; CHECK-NEXT: bb.3.bb27:
144  ; CHECK-NEXT:   $sp = tADDspi $sp, 1, 14 /* CC::al */, $noreg
145  ; CHECK-NEXT:   tPOP_RET 14 /* CC::al */, $noreg, def $r7, def $pc
146  bb.0.bb:
147    successors: %bb.3(0x30000000), %bb.1(0x50000000)
148    liveins: $r0, $r1, $r2, $r3, $lr
149
150    frame-setup tPUSH 14, $noreg, killed $lr, implicit-def $sp, implicit $sp
151    frame-setup CFI_INSTRUCTION def_cfa_offset 8
152    frame-setup CFI_INSTRUCTION offset $lr, -4
153    frame-setup CFI_INSTRUCTION offset $r7, -8
154    $r7 = frame-setup tMOVr $sp, 14, $noreg
155    frame-setup CFI_INSTRUCTION def_cfa_register $r7
156    $sp = frame-setup tSUBspi $sp, 1, 14, $noreg
157    tCBZ $r2, %bb.3
158
159  bb.1.bb3:
160    successors: %bb.2(0x80000000)
161    liveins: $r0, $r1, $r2, $r3
162
163    renamable $r12 = t2ADDri renamable $r2, 3, 14, $noreg, $noreg
164    renamable $lr = t2MOVi 1, 14, $noreg, $noreg
165    renamable $r12 = t2BICri killed renamable $r12, 3, 14, $noreg, $noreg
166    $vpr = VMSR_P0 killed $r3, 14, $noreg
167    renamable $r12 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg
168    VSTR_P0_off killed renamable $vpr, $sp, 0, 14, $noreg :: (store (s32) into %stack.0)
169    $r3 = tMOVr $r0, 14, $noreg
170    renamable $lr = nuw nsw t2ADDrs killed renamable $lr, killed renamable $r12, 19, 14, $noreg, $noreg
171    $lr = t2DoLoopStart renamable $lr
172
173  bb.2.bb9:
174    successors: %bb.2(0x7c000000), %bb.3(0x04000000)
175    liveins: $lr, $r0, $r1, $r2, $r3
176
177    renamable $vpr = VLDR_P0_off $sp, 0, 14, $noreg :: (load (s32) from %stack.0)
178    MVE_VPST 4, implicit $vpr
179    renamable $vpr = MVE_VCTP32 renamable $r2, 1, killed renamable $vpr, $noreg
180    renamable $r1, renamable $q0 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, killed renamable $vpr, $noreg :: (load (s128) from %ir.lsr.iv24, align 4)
181    renamable $vpr = MVE_VCTP32 renamable $r2, 0, $noreg, $noreg
182    renamable $r2, dead $cpsr = tSUBi8 killed renamable $r2, 4, 14, $noreg
183    MVE_VPST 4, implicit $vpr
184    renamable $vpr = MVE_VCMPi32r renamable $q0, $zr, 1, 1, killed renamable $vpr, $noreg
185    renamable $r3, renamable $q1 = MVE_VLDRWU32_post killed renamable $r3, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.lsr.iv1, align 4)
186    renamable $q0 = nsw MVE_VMULi32 killed renamable $q1, killed renamable $q0, 0, $noreg, $noreg, undef renamable $q0
187    MVE_VPST 8, implicit $vpr
188    MVE_VSTRWU32 killed renamable $q0, killed renamable $r0, 0, 1, killed renamable $vpr, $noreg :: (store (s128) into %ir.lsr.iv1, align 4)
189    renamable $lr = t2LoopDec killed renamable $lr, 1
190    $r0 = tMOVr $r3, 14, $noreg
191    t2LoopEnd renamable $lr, %bb.2, implicit-def dead $cpsr
192    tB %bb.3, 14, $noreg
193
194  bb.3.bb27:
195    $sp = tADDspi $sp, 1, 14, $noreg
196    tPOP_RET 14, $noreg, def $r7, def $pc
197
198...
199