xref: /llvm-project/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (revision 7b4c8b35d43c0a17f222722487d7a2b4ceee0a26)
1# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
2# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
3# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
4
5---
6
7# GCN-LABEL: name: vop3
8# GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
9# GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
10# GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
11# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
12# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
13name: vop3
14tracksRegLiveness: true
15body:             |
16  bb.0:
17    liveins: $vgpr0, $vgpr1, $vgpr2
18
19    %0:vgpr_32 = COPY $vgpr0
20    %1:vgpr_32 = COPY $vgpr1
21    %2:vgpr_32 = COPY $vgpr2
22    %3:vgpr_32 = IMPLICIT_DEF
23    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
24
25    %5:sreg_32_xm0_xexec = IMPLICIT_DEF
26    %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
27
28    %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
29
30    ; should not be combined because src2 literal is illegal
31    %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
32    %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
33
34    ; should not be combined on subtargets where src1 imm is illegal
35    %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
36    %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
37...
38---
39
40# GCN-LABEL: name: vop3_sgpr_src1
41# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
42# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
43# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
44# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
45# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
46# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
47# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
48# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
49name: vop3_sgpr_src1
50tracksRegLiveness: true
51body:             |
52  bb.0:
53    liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
54
55    %0:vgpr_32 = COPY $vgpr0
56    %1:vgpr_32 = COPY $vgpr1
57    %2:sgpr_32 = COPY $sgpr0
58    %3:sgpr_32 = COPY $sgpr1
59    %4:vgpr_32 = IMPLICIT_DEF
60
61    ; should be combined because src2 allows sgpr
62    %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
63    %6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
64
65    ; should be combined only on subtargets that allow sgpr for src1
66    %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
67    %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
68
69    ; should be combined only on subtargets that allow sgpr for src1
70    %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
71    %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
72
73    ; should be combined only on subtargets that allow inlinable constants for src1
74    %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
75    %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
76
77    ; should not be combined when literal constants are used
78    %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
79    %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
80...
81---
82
83# Regression test for src_modifiers on base u16 opcode
84# GCN-LABEL: name: vop3_u16
85# GCN: %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
86# GCN: %7:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
87# GCN: %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
88name: vop3_u16
89tracksRegLiveness: true
90body:             |
91  bb.0:
92    liveins: $vgpr0, $vgpr1, $vgpr2
93
94    %0:vgpr_32 = COPY $vgpr0
95    %1:vgpr_32 = COPY $vgpr1
96    %2:vgpr_32 = COPY $vgpr2
97    %3:vgpr_32 = IMPLICIT_DEF
98    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
99    %5:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, %4, 0, %3, 0, 0, implicit $exec
100    %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
101    %7:vgpr_32 = V_ADD_NC_U16_fake16_e64 1, %6, 2, %5, 0, 0, implicit $exec
102    %8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
103    %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
104...
105
106name: vop3p
107tracksRegLiveness: true
108body:             |
109  bb.0:
110    liveins: $vgpr0, $vgpr1, $vgpr2
111
112    ; GCN-LABEL: name: vop3p
113    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
114    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
115    ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
116    ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
117    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
118    ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
119    ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
120    ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
121    ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
122    ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 9, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
123    ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
124    ; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
125    ; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
126    %0:vgpr_32 = COPY $vgpr0
127    %1:vgpr_32 = COPY $vgpr1
128    %2:vgpr_32 = COPY $vgpr2
129    %3:vgpr_32 = IMPLICIT_DEF
130
131    ; this should not be combined because op_sel is not zero
132    %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
133    %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
134
135    ; this should not be combined because op_sel_hi is not all set
136    %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
137    %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
138
139    %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
140    %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 9, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
141
142    %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
143    %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
144
145    %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
146    %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
147
148    %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
149    %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
150
151...
152
153# GCN-LABEL: name: fmac_e64
154# GCN: %5:vgpr_32 = V_FMAC_F32_e64_dpp %3, 2, %0, 2, %1, 2, %2, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
155name: fmac_e64
156tracksRegLiveness: true
157body: |
158  bb.0:
159    liveins: $vgpr0, $vgpr1, $vgpr2
160
161    %0:vgpr_32 = COPY $vgpr0
162    %1:vgpr_32 = COPY $vgpr1
163    %2:vgpr_32 = COPY $vgpr2
164    %3:vgpr_32 = IMPLICIT_DEF
165    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
166    %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
167...
168
169# when the DPP source isn't a src0 operand the operation should be commuted if possible
170# GCN-LABEL: name: dpp_commute_shrink
171# GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
172# GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
173# GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
174# GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
175# GCN: %16:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
176name: dpp_commute_shrink
177tracksRegLiveness: true
178body:             |
179  bb.0:
180    liveins: $vgpr0, $vgpr1
181
182    %0:vgpr_32 = COPY $vgpr0
183    %1:vgpr_32 = COPY $vgpr1
184
185    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
186    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
187    %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
188
189    %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
190    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
191    %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
192
193    %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
194    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
195    %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
196
197    %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
198    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
199    %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
200
201    %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
202    %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
203    %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
204
205...
206
207# do not combine, dpp arg used twice
208# GCN-LABEL: name: dpp_arg_twice
209# GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
210# GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
211# GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
212name: dpp_arg_twice
213tracksRegLiveness: true
214body:             |
215  bb.0:
216    liveins: $vgpr0, $vgpr1
217
218    %0:vgpr_32 = COPY $vgpr0
219    %1:vgpr_32 = COPY $vgpr1
220    %2:vgpr_32 = IMPLICIT_DEF
221
222    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
223    %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
224
225    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
226    %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
227
228    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
229    %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
230
231...
232
233# when the dpp source isn't a src0 operand the operation should be commuted if possible
234# GCN-LABEL: name: dpp_commute_e64
235# GCN: %4:vgpr_32  = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
236# GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
237# GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
238# GCN: %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64_dpp %1, %0, %1, 0, 1, 14, 15, 0, implicit $exec
239# GCN: %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
240name: dpp_commute_e64
241tracksRegLiveness: true
242body:             |
243  bb.0:
244    liveins: $vgpr0, $vgpr1
245
246    %0:vgpr_32 = COPY $vgpr0
247    %1:vgpr_32 = COPY $vgpr1
248
249    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
250    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
251    %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
252
253    %5:vgpr_32 = IMPLICIT_DEF
254    %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
255    %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
256
257    %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
258    %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
259    %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
260
261    %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
262    %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
263    %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
264
265    ; this cannot be combined because immediate as src0 isn't commutable
266    %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
267    %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
268    %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
269...
270
271---
272
273# check for floating point modifiers
274# GCN-LABEL: name: add_f32_e64
275# GCN:   %4:vgpr_32 = V_ADD_F32_e64_dpp %2, 0, %1, 0, %0, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
276# GCN:   %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
277# GCN:   %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
278# GCN:   %10:vgpr_32 = V_ADD_F32_e64_dpp %2, 4, %1, 8, %0, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
279
280name: add_f32_e64
281tracksRegLiveness: true
282body:             |
283  bb.0:
284    liveins: $vgpr0, $vgpr1
285
286    %0:vgpr_32 = COPY $vgpr0
287    %1:vgpr_32 = COPY $vgpr1
288    %2:vgpr_32 = IMPLICIT_DEF
289
290    ; this should be combined as e64
291    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
292    %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
293
294    ; this should be combined and shrunk as all modifiers are default
295    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
296    %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
297
298    ; this should be combined and shrunk as modifiers other than abs|neg are default
299    %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
300    %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
301
302    ; this should be combined as e64
303    %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
304    %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
305...
306
307# check for e64 modifiers
308# GCN-LABEL: name: add_u32_e64
309# GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec
310# GCN: %6:vgpr_32 = V_ADD_U32_e64_dpp %2, %0, %1, 1, 1, 15, 15, 1, implicit $exec
311
312name: add_u32_e64
313tracksRegLiveness: true
314body:             |
315  bb.0:
316    liveins: $vgpr0, $vgpr1
317
318    %0:vgpr_32 = COPY $vgpr0
319    %1:vgpr_32 = COPY $vgpr1
320    %2:vgpr_32 = IMPLICIT_DEF
321
322    ; this should be combined  and shrunk as all modifiers are default
323    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
324    %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
325
326    ; this should be combined as _e64
327    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
328    %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
329...
330
331# tests on sequences of dpp consumers
332# GCN-LABEL: name: dpp_seq
333# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
334# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0,  implicit $exec
335# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
336# broken sequence:
337# GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
338
339name: dpp_seq
340tracksRegLiveness: true
341body: |
342  bb.0:
343    liveins: $vgpr0, $vgpr1
344    %0:vgpr_32 = COPY $vgpr0
345    %1:vgpr_32 = COPY $vgpr1
346    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
347
348    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
349    %4:vgpr_32 = V_ADD_U32_e32 %3, %1,  implicit $exec
350    %5:vgpr_32 = V_SUB_U32_e32 %1, %3,  implicit $exec
351    %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
352
353    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
354    %8:vgpr_32 = V_ADD_U32_e32 %7, %1,  implicit $exec
355    ; this breaks the sequence
356    %9:vgpr_32 = V_SUB_U32_e32 5, %7,  implicit $exec
357...
358
359# tests on sequences of dpp consumers followed by control flow
360# GCN-LABEL: name: dpp_seq_cf
361# GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
362# GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
363# GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
364
365name: dpp_seq_cf
366tracksRegLiveness: true
367body: |
368  bb.0:
369    successors: %bb.1, %bb.2
370    liveins: $vgpr0, $vgpr1
371    %0:vgpr_32 = COPY $vgpr0
372    %1:vgpr_32 = COPY $vgpr1
373    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
374
375    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
376    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
377    %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
378    %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
379
380    %7:sreg_32 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec
381    %8:sreg_32 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
382    S_BRANCH %bb.1
383
384  bb.1:
385    successors: %bb.2
386
387  bb.2:
388    SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
389...
390
391# GCN-LABEL: name: old_in_diff_bb
392# GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec
393
394name: old_in_diff_bb
395tracksRegLiveness: true
396body: |
397  bb.0:
398    successors: %bb.1
399    liveins: $vgpr0, $vgpr1
400
401    %0:vgpr_32 = COPY $vgpr0
402    %1:vgpr_32 = COPY $vgpr1
403    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
404    S_BRANCH %bb.1
405
406  bb.1:
407    %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
408    %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
409...
410
411# old reg def is in diff BB but bound_ctrl:1 - can combine
412# GCN-LABEL: name: old_in_diff_bb_bctrl_zero
413# GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec
414
415name: old_in_diff_bb_bctrl_zero
416tracksRegLiveness: true
417body: |
418  bb.0:
419    successors: %bb.1
420    liveins: $vgpr0, $vgpr1
421
422    %0:vgpr_32 = COPY $vgpr0
423    %1:vgpr_32 = COPY $vgpr1
424    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
425    S_BRANCH %bb.1
426
427  bb.1:
428    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
429    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
430...
431
432# EXEC mask changed between def and use - cannot combine
433# GCN-LABEL: name: exec_changed
434# GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
435
436name: exec_changed
437tracksRegLiveness: true
438body: |
439  bb.0:
440    liveins: $vgpr0, $vgpr1
441
442    %0:vgpr_32 = COPY $vgpr0
443    %1:vgpr_32 = COPY $vgpr1
444    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
445    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
446    %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
447    %5:sreg_64 = COPY $exec, implicit-def $exec
448    %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
449...
450
451# test if $old definition is correctly tracked through subreg manipulation pseudos
452
453# GCN-LABEL: name: mul_old_subreg
454# GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
455
456name:            mul_old_subreg
457tracksRegLiveness: true
458body: |
459  bb.0:
460    liveins: $vgpr0, $vgpr1
461
462    %0:vreg_64 = COPY $vgpr0
463    %1:vgpr_32 = COPY $vgpr1
464    %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
465    %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
466    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
467    %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
468    %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
469    %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
470...
471
472# GCN-LABEL: name: add_old_subreg
473# GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
474
475name:            add_old_subreg
476tracksRegLiveness: true
477body: |
478  bb.0:
479    liveins: $vgpr0, $vgpr1
480
481    %0:vreg_64 = COPY $vgpr0
482    %1:vgpr_32 = COPY $vgpr1
483    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
484    %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
485    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
486    %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
487...
488
489# GCN-LABEL: name: add_old_subreg_undef
490# GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec
491
492name:            add_old_subreg_undef
493tracksRegLiveness: true
494body: |
495  bb.0:
496    liveins: $vgpr0, $vgpr1
497
498    %0:vreg_64 = COPY $vgpr0
499    %1:vgpr_32 = COPY $vgpr1
500    %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
501    %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
502    %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
503    %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
504...
505
506# Test instruction which does not have modifiers in VOP1 form but does in DPP form.
507# GCN-LABEL: name: dpp_vop1
508# GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
509name: dpp_vop1
510tracksRegLiveness: true
511body: |
512  bb.0:
513    %1:vgpr_32 = IMPLICIT_DEF
514    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
515    %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
516...
517
518# Test instruction which does not have modifiers in VOP2 form but does in DPP form.
519# GCN-LABEL: name: dpp_min
520# GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
521name: dpp_min
522tracksRegLiveness: true
523body: |
524  bb.0:
525    %1:vgpr_32 = IMPLICIT_DEF
526    %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
527    %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
528...
529
530# Test an undef old operand
531# GCN-LABEL: name: dpp_undef_old
532# GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
533name: dpp_undef_old
534tracksRegLiveness: true
535body: |
536  bb.0:
537    %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
538    %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
539...
540
541# Do not combine a dpp mov which writes a physreg.
542# GCN-LABEL: name: phys_dpp_mov_dst
543# GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
544# GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
545name: phys_dpp_mov_dst
546tracksRegLiveness: true
547body: |
548  bb.0:
549    $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
550    %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
551...
552
553# Do not combine a dpp mov which reads a physreg.
554# GCN-LABEL: name: phys_dpp_mov_old_src
555# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
556# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
557name: phys_dpp_mov_old_src
558tracksRegLiveness: true
559body: |
560  bb.0:
561    %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
562    %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
563...
564
565# Do not combine a dpp mov which reads a physreg.
566# GCN-LABEL: name: phys_dpp_mov_src
567# GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
568# GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
569name: phys_dpp_mov_src
570tracksRegLiveness: true
571body: |
572  bb.0:
573    %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
574    %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
575...
576
577# GCN-LABEL: name: dpp_reg_sequence_both_combined
578# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
579# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
580# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
581# GCN: %9:vgpr_32 = IMPLICIT_DEF
582# GCN: %8:vgpr_32 = IMPLICIT_DEF
583# GCN: %6:vgpr_32 = V_ADD_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
584# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
585name: dpp_reg_sequence_both_combined
586tracksRegLiveness: true
587body: |
588  bb.0:
589    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
590
591    %0:vreg_64 = COPY $vgpr0_vgpr1
592    %1:vreg_64 = COPY $vgpr2_vgpr3
593    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
594    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
595    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
596    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
597    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
598    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
599...
600
601# GCN-LABEL: name: dpp_reg_sequence_first_combined
602# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
603# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
604# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
605# GCN: %8:vgpr_32 = IMPLICIT_DEF
606# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
607# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
608# GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
609# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
610name: dpp_reg_sequence_first_combined
611tracksRegLiveness: true
612body: |
613  bb.0:
614    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
615
616    %0:vreg_64 = COPY $vgpr0_vgpr1
617    %1:vreg_64 = COPY $vgpr2_vgpr3
618    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
619    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
620    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
621    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
622    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
623    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
624...
625
626# GCN-LABEL: name: dpp_reg_sequence_second_combined
627# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
628# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
629# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
630# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
631# GCN: %8:vgpr_32 = IMPLICIT_DEF
632# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
633# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
634# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
635name: dpp_reg_sequence_second_combined
636tracksRegLiveness: true
637body: |
638  bb.0:
639    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
640
641    %0:vreg_64 = COPY $vgpr0_vgpr1
642    %1:vreg_64 = COPY $vgpr2_vgpr3
643    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
644    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
645    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
646    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
647    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
648    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
649...
650
651# GCN-LABEL: name: dpp_reg_sequence_none_combined
652# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
653# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
654# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
655# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
656# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
657# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
658# GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
659# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
660name: dpp_reg_sequence_none_combined
661tracksRegLiveness: true
662body: |
663  bb.0:
664    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
665
666    %0:vreg_64 = COPY $vgpr0_vgpr1
667    %1:vreg_64 = COPY $vgpr2_vgpr3
668    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
669    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
670    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
671    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
672    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
673    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
674...
675
676# GCN-LABEL: name: dpp_reg_sequence_exec_changed
677# GCN:   %0:vreg_64 = COPY $vgpr0_vgpr1
678# GCN:   %1:vreg_64 = COPY $vgpr2_vgpr3
679# GCN:   %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
680# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
681# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
682# GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
683# GCN:   S_BRANCH %bb.1
684# GCN: bb.1:
685# GCN:   %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
686# GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
687name: dpp_reg_sequence_exec_changed
688tracksRegLiveness: true
689body: |
690  bb.0:
691    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
692
693    %0:vreg_64 = COPY $vgpr0_vgpr1
694    %1:vreg_64 = COPY $vgpr2_vgpr3
695    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
696    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
697    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
698    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
699    S_BRANCH %bb.1
700
701  bb.1:
702    liveins: $vcc_lo
703    %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
704    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
705...
706
707# GCN-LABEL: name: dpp_reg_sequence_subreg
708# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
709# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
710# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
711# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
712# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
713# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
714# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
715# GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
716# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
717name: dpp_reg_sequence_subreg
718tracksRegLiveness: true
719body: |
720  bb.0:
721    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
722
723    %0:vreg_64 = COPY $vgpr0_vgpr1
724    %1:vreg_64 = COPY $vgpr2_vgpr3
725    %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
726    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
727    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
728    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
729    %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
730    %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
731    %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
732...
733
734# GCN-LABEL: name: dpp_reg_sequence_src2_reject
735#GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
736#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
737#GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
738#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
739#GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
740#GCN: %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
741name: dpp_reg_sequence_src2_reject
742tracksRegLiveness: true
743body: |
744  bb.0:
745    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
746
747    %0:vreg_64 = COPY $vgpr0_vgpr1
748    %1:vreg_64 = COPY $vgpr2_vgpr3
749    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
750    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
751    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
752    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
753    ; use of dpp arg as src2, reject
754    %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
755    ; cannot commute src0 and src2, and %4.sub0 already rejected, reject
756    %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
757...
758
759# GCN-LABEL: name: dpp_reg_sequence_src2
760#GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
761#GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
762#GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
763#GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
764name: dpp_reg_sequence_src2
765tracksRegLiveness: true
766body: |
767  bb.0:
768    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
769
770    %0:vreg_64 = COPY $vgpr0_vgpr1
771    %1:vreg_64 = COPY $vgpr2_vgpr3
772    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
773    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
774    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
775    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
776    %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
777...
778
779# GCN-LABEL: name: dpp64_add64_impdef
780# GCN: %3:vgpr_32 = V_ADD_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
781# GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
782name: dpp64_add64_impdef
783tracksRegLiveness: true
784body: |
785  bb.0:
786    liveins: $vcc_lo
787    %0:vreg_64 = IMPLICIT_DEF
788    %1:vreg_64 = IMPLICIT_DEF
789    %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec
790    %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
791    %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
792...
793
794# GCN-LABEL: name:  dpp64_add64_undef
795# GCN: %3:vgpr_32 = V_ADD_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
796# GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
797name: dpp64_add64_undef
798tracksRegLiveness: true
799body: |
800  bb.0:
801    liveins: $vcc_lo
802    %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
803    %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
804    %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
805...
806
807
808# GCN-LABEL: name: cndmask_with_src2
809# GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
810# GCN: %8:vgpr_32 = V_CNDMASK_B32_e64_dpp %2, 4, %1, 0, %1, %7, 1, 15, 15, 1, implicit $exec
811name: cndmask_with_src2
812tracksRegLiveness: true
813body: |
814  bb.0:
815    liveins: $vgpr0, $vgpr1
816    %0:vgpr_32 = COPY $vgpr0
817    %1:vgpr_32 = COPY $vgpr1
818    %2:vgpr_32 = IMPLICIT_DEF
819
820    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
821    %4:sreg_32_xm0_xexec = IMPLICIT_DEF
822    %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
823
824    ; src2 is legal for _e64
825    %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
826    %7:sreg_32_xm0_xexec = IMPLICIT_DEF
827    %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
828...
829
830---
831
832# Make sure flags aren't dropped
833# GCN-LABEL: name: flags_add_f32_e64
834# GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
835name: flags_add_f32_e64
836tracksRegLiveness: true
837body:             |
838  bb.0:
839    liveins: $vgpr0, $vgpr1
840
841    %0:vgpr_32 = COPY $vgpr0
842    %1:vgpr_32 = COPY $vgpr1
843    %2:vgpr_32 = IMPLICIT_DEF
844
845    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
846    %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
847    S_ENDPGM 0, implicit %4
848
849...
850
851# GCN-LABEL: name: dont_combine_more_than_one_operand
852# GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
853name: dont_combine_more_than_one_operand
854tracksRegLiveness: true
855body: |
856  bb.0:
857    liveins: $vgpr0, $vgpr1
858    %0:vgpr_32 = COPY $vgpr0
859    %1:vgpr_32 = COPY $vgpr1
860    %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
861    %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
862...
863
864# GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence
865# GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
866# GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
867name: dont_combine_more_than_one_operand_dpp_reg_sequence
868tracksRegLiveness: true
869body: |
870  bb.0:
871    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
872    %0:vreg_64 = COPY $vgpr0_vgpr1
873    %1:vreg_64 = COPY $vgpr2_vgpr3
874    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
875    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
876    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
877    %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
878    %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
879...
880
881# Check op_sel is all 0s when combining
882# GCN-LABEL: name: opsel_vop3
883# GCN: %4:vgpr_32 = V_ADD_I16_fake16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
884# GCN: %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
885# GCN: %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
886# GCN: %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
887# GCN: %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
888name:            opsel_vop3
889tracksRegLiveness: true
890body:             |
891  bb.0:
892    liveins: $vgpr0, $vgpr1
893
894    %0:vgpr_32 = COPY $vgpr0
895    %1:vgpr_32 = COPY $vgpr1
896    %2:vgpr_32 = IMPLICIT_DEF
897
898    ; Combine for op_sel:[0,0,0]
899    %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
900    %4:vgpr_32 = V_ADD_I16_fake16_e64 0, %3, 0, %1, 0, 0, implicit $exec
901
902    ; Do not combine for op_sel:[1,0,0]
903    %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
904    %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
905
906    ; Do not combine for op_sel:[0,1,0]
907    %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
908    %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
909
910    ; Do not combine for op_sel:[1,1,0]
911    %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
912    %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
913
914    ; Do not combine for op_sel:[0,0,1] (dst_op_sel only)
915    %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
916    %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
917...
918
919# Check op_sel is all 0s and op_sel_hi is all 1s when combining
920# GCN-LABEL: name: opsel_vop3p
921# GCN: %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
922# GCN: %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
923# GCN: %9:vgpr_32 = V_FMA_MIX_F32_dpp %3, 8, %0, 8, %1, 8, %2, 0, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
924# GCN: %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
925
926name: opsel_vop3p
927tracksRegLiveness: true
928body:             |
929  bb.0:
930    liveins: $vgpr0, $vgpr1, $vgpr2
931
932    %0:vgpr_32 = COPY $vgpr0
933    %1:vgpr_32 = COPY $vgpr1
934    %2:vgpr_32 = COPY $vgpr2
935    %3:vgpr_32 = IMPLICIT_DEF
936
937    ; Do not combine for op_sel:[0,0,0] op_sel_hi:[0,0,0]
938    %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
939    %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
940
941    ; Do not combine for op_sel:[1,1,1] op_sel_hi:[0,0,0]
942    %6:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
943    %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
944
945    ; Combine for op_sel:[0,0,0] op_sel_hi:[1,1,1]
946    %8:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
947    %9:vgpr_32 = V_FMA_MIX_F32 8, %8, 8, %1, 8, %2, 0, 0, 0, implicit $mode, implicit $exec
948
949    ; Do not combine for op_sel:[1,1,1] op_sel_hi:[1,1,1]
950    %10:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
951    %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
952...
953