xref: /llvm-project/llvm/test/CodeGen/AMDGPU/split-liverange-overlapping-copies.mir (revision e7900e695e7dfb36be8651d914a31f42a5d6c634)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
3
4# Check that liverange splitting does not create copies that overlap within a bundle.
5# By overlap, we mean that they write to the same subregisters.
6# e.g. the following bundle is desirable
7#     %0.sub1_sub2 = COPY ... {
8#       %0.sub3 = COPY ...
9#     }
10# but the following bundle isn't desirable as the overlap of the copies can make
11# virtregrewriter fail due to cycles in the copy bundle.
12#     %0.sub1_sub2 = COPY ... {
13#       %0.sub2_sub3 = COPY ...
14#     }
15---
16name: split_liverange_copy_overlap_31
17tracksRegLiveness: true
18machineFunctionInfo:
19  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
20  stackPtrOffsetReg: '$sgpr32'
21  occupancy:       7
22body:             |
23  ; CHECK-LABEL: name: split_liverange_copy_overlap_31
24  ; CHECK: bb.0:
25  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
26  ; CHECK-NEXT: {{  $}}
27  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
28  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF
29  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:av_1024_align2 = COPY [[DEF1]]
30  ; CHECK-NEXT: {{  $}}
31  ; CHECK-NEXT: bb.1:
32  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
33  ; CHECK-NEXT: {{  $}}
34  ; CHECK-NEXT:   dead [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
35  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
36  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
37  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF
38  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
39  ; CHECK-NEXT: {{  $}}
40  ; CHECK-NEXT: bb.2:
41  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
42  ; CHECK-NEXT: {{  $}}
43  ; CHECK-NEXT:   undef [[COPY1:%[0-9]+]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 {
44  ; CHECK-NEXT:     internal [[COPY1]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16
45  ; CHECK-NEXT:     internal [[COPY1]].sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31
46  ; CHECK-NEXT:   }
47  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub0:av_1024_align2 = IMPLICIT_DEF
48  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY1]].sub0
49  ; CHECK-NEXT: {{  $}}
50  ; CHECK-NEXT: bb.3:
51  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
52  ; CHECK-NEXT: {{  $}}
53  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY1]]
54  ; CHECK-NEXT: {{  $}}
55  ; CHECK-NEXT: bb.4:
56  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
57  ; CHECK-NEXT: {{  $}}
58  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:av_1024_align2 = IMPLICIT_DEF
59  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
60  ; CHECK-NEXT: {{  $}}
61  ; CHECK-NEXT: bb.5:
62  ; CHECK-NEXT:   undef [[COPY2:%[0-9]+]].sub0:vreg_1024_align2 = COPY [[DEF]]
63  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY2]]
64  bb.0:
65    %0:vgpr_32 = IMPLICIT_DEF
66    %1:vreg_1024_align2 = IMPLICIT_DEF
67    %2:vreg_1024_align2 = COPY %1
68
69  bb.1:
70    %5:vreg_64 = IMPLICIT_DEF
71    S_NOP 0, implicit %1
72    S_NOP 0, implicit %1
73    %1:vreg_1024_align2 = IMPLICIT_DEF
74    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
75
76  bb.2:
77    %2.sub0:vreg_1024_align2 = IMPLICIT_DEF
78    S_NOP 0, implicit %2.sub0
79
80  bb.3:
81    S_NOP 0, implicit %2
82
83  bb.4:
84    %2:vreg_1024_align2 = IMPLICIT_DEF
85    S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
86
87  bb.5:
88    undef %4.sub0:vreg_1024_align2 = COPY %0
89    S_NOP 0, implicit %4
90...
91---
92name: split_liverange_copy_overlap_30
93tracksRegLiveness: true
94machineFunctionInfo:
95  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
96  stackPtrOffsetReg: '$sgpr32'
97  occupancy:       7
98body:             |
99  ; CHECK-LABEL: name: split_liverange_copy_overlap_30
100  ; CHECK: bb.0:
101  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
102  ; CHECK-NEXT: {{  $}}
103  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
104  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
105  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:av_1024 = COPY [[DEF1]]
106  ; CHECK-NEXT: {{  $}}
107  ; CHECK-NEXT: bb.1:
108  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
109  ; CHECK-NEXT: {{  $}}
110  ; CHECK-NEXT:   dead [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
111  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
112  ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
113  ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
114  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
115  ; CHECK-NEXT: {{  $}}
116  ; CHECK-NEXT: bb.2:
117  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
118  ; CHECK-NEXT: {{  $}}
119  ; CHECK-NEXT:   undef [[COPY1:%[0-9]+]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 {
120  ; CHECK-NEXT:     internal [[COPY1]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16
121  ; CHECK-NEXT:     internal [[COPY1]].sub29_sub30:av_1024 = COPY [[COPY]].sub29_sub30
122  ; CHECK-NEXT:   }
123  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub0:av_1024 = IMPLICIT_DEF
124  ; CHECK-NEXT:   [[COPY1:%[0-9]+]].sub31:av_1024 = IMPLICIT_DEF
125  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY1]].sub0, implicit [[COPY1]].sub31
126  ; CHECK-NEXT: {{  $}}
127  ; CHECK-NEXT: bb.3:
128  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
129  ; CHECK-NEXT: {{  $}}
130  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY1]]
131  ; CHECK-NEXT: {{  $}}
132  ; CHECK-NEXT: bb.4:
133  ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
134  ; CHECK-NEXT: {{  $}}
135  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:av_1024 = IMPLICIT_DEF
136  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
137  ; CHECK-NEXT: {{  $}}
138  ; CHECK-NEXT: bb.5:
139  ; CHECK-NEXT:   undef [[COPY2:%[0-9]+]].sub0:vreg_1024 = COPY [[DEF]]
140  ; CHECK-NEXT:   S_NOP 0, implicit [[COPY2]]
141  bb.0:
142    %0:vgpr_32 = IMPLICIT_DEF
143    %1:vreg_1024 = IMPLICIT_DEF
144    %2:vreg_1024 = COPY %1
145
146  bb.1:
147    %5:vreg_64 = IMPLICIT_DEF
148    S_NOP 0, implicit %1
149    S_NOP 0, implicit %1
150    %1:vreg_1024 = IMPLICIT_DEF
151    S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
152
153  bb.2:
154    %2.sub0:vreg_1024 = IMPLICIT_DEF
155    %2.sub31:vreg_1024 = IMPLICIT_DEF
156    S_NOP 0, implicit %2.sub0, implicit %2.sub31
157
158  bb.3:
159    S_NOP 0, implicit %2
160
161  bb.4:
162    %2:vreg_1024 = IMPLICIT_DEF
163    S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
164
165  bb.5:
166    undef %4.sub0:vreg_1024 = COPY %0
167    S_NOP 0, implicit %4
168...
169