xref: /llvm-project/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6,GFX67-SDAG,GFX6-SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6,GFX67-GISEL,GFX6-GISEL %s
4
5; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7,GFX67-SDAG,GFX7-SDAG %s
6; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7,GFX67-GISEL,GFX7-GISEL %s
7
8; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
9; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
10
11; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG,GFX900-SDAG,GFX900 %s
12; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL,GFX900-GISEL,GFX900 %s
13
14; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-SDAG,GFX90A-SDAG %s
15; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-GISEL,GFX90A-GISEL %s
16
17; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
18; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
19
20; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
21; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
22
23; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-SDAG %s
24; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1200,GFX1200-GISEL %s
25
26; Test for integer mad formation for patterns used in clpeak
27
28define i32 @clpeak_imad_pat_i32(i32 %x, i32 %y) {
29; GFX67-SDAG-LABEL: clpeak_imad_pat_i32:
30; GFX67-SDAG:       ; %bb.0: ; %entry
31; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
33; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
34; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
35; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
36; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
37; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v0
38; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
39; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
40; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX67-GISEL-LABEL: clpeak_imad_pat_i32:
43; GFX67-GISEL:       ; %bb.0: ; %entry
44; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
46; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
47; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
48; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
49; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
50; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
51; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
52; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
53; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
54;
55; GFX8-SDAG-LABEL: clpeak_imad_pat_i32:
56; GFX8-SDAG:       ; %bb.0: ; %entry
57; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
59; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
60; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
61; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
62; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
63; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v0
64; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
65; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
66; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
67;
68; GFX8-GISEL-LABEL: clpeak_imad_pat_i32:
69; GFX8-GISEL:       ; %bb.0: ; %entry
70; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
72; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
73; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
74; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
75; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
76; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
77; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
78; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
79; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX900-SDAG-LABEL: clpeak_imad_pat_i32:
82; GFX900-SDAG:       ; %bb.0: ; %entry
83; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
85; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
86; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
87; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
88; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
89; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
90; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX900-GISEL-LABEL: clpeak_imad_pat_i32:
93; GFX900-GISEL:       ; %bb.0: ; %entry
94; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
96; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
97; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
98; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
99; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
100; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
101; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
102; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
103; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX90A-SDAG-LABEL: clpeak_imad_pat_i32:
106; GFX90A-SDAG:       ; %bb.0: ; %entry
107; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
109; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
110; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
111; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
112; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
113; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
114; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
115;
116; GFX90A-GISEL-LABEL: clpeak_imad_pat_i32:
117; GFX90A-GISEL:       ; %bb.0: ; %entry
118; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
119; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
120; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
121; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
122; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
123; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
124; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
125; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
126; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
127; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
128;
129; GFX10-SDAG-LABEL: clpeak_imad_pat_i32:
130; GFX10-SDAG:       ; %bb.0: ; %entry
131; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
133; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
134; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
135; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
136; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
137; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
138; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
139;
140; GFX10-GISEL-LABEL: clpeak_imad_pat_i32:
141; GFX10-GISEL:       ; %bb.0: ; %entry
142; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
144; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
145; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
146; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
147; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
148; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
149; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
150; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
151; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX11-SDAG-LABEL: clpeak_imad_pat_i32:
154; GFX11-SDAG:       ; %bb.0: ; %entry
155; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
157; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
158; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v1
159; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v5, v0
160; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
161; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
162; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3]
163; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
164; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4]
165; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX11-GISEL-LABEL: clpeak_imad_pat_i32:
168; GFX11-GISEL:       ; %bb.0: ; %entry
169; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
171; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
172; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
173; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
174; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
175; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
176; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
177; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
178; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
179; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
180; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
181; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
182;
183; GFX1200-SDAG-LABEL: clpeak_imad_pat_i32:
184; GFX1200-SDAG:       ; %bb.0: ; %entry
185; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
186; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
187; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
188; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
189; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
190; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
191; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
192; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
193; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
194; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
195; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
196; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1]
197; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
198; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2]
199; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
200;
201; GFX1200-GISEL-LABEL: clpeak_imad_pat_i32:
202; GFX1200-GISEL:       ; %bb.0: ; %entry
203; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
204; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
205; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
206; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
207; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
208; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
209; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
210; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
211; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
212; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
213; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
214; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
215; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
216; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
217; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
218; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
219; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
220entry:
221  %y18 = add i32 %x, 1
222  %add = mul i32 %y18, %y
223  %mul119 = add i32 %add, %y18
224  %add2 = mul i32 %mul119, %y
225  %add220 = add i32 %add, 1
226  %add422 = add i32 %add2, 1
227  %mul521 = mul i32 %add2, %add220
228  %add6 = mul i32 %mul521, %add422
229  ret i32 %add6
230}
231
232define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
233; GFX67-LABEL: clpeak_imad_pat_i16:
234; GFX67:       ; %bb.0: ; %entry
235; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
237; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
238; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v1
239; GFX67-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
240; GFX67-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
241; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
242; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
243; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
244; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
245; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
246; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
247; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
248; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
249; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
250; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
251; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
252; GFX67-NEXT:    v_bfe_i32 v0, v0, 0, 16
253; GFX67-NEXT:    s_setpc_b64 s[30:31]
254;
255; GFX8-SDAG-LABEL: clpeak_imad_pat_i16:
256; GFX8-SDAG:       ; %bb.0: ; %entry
257; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
258; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
259; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
260; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
261; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
262; GFX8-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
263; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
264;
265; GFX8-GISEL-LABEL: clpeak_imad_pat_i16:
266; GFX8-GISEL:       ; %bb.0: ; %entry
267; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
269; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
270; GFX8-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
271; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
272; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
273; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v4
274; GFX8-GISEL-NEXT:    v_mad_u16 v1, v3, v2, 1
275; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
276; GFX8-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
277; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
278;
279; GFX9-SDAG-LABEL: clpeak_imad_pat_i16:
280; GFX9-SDAG:       ; %bb.0: ; %entry
281; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
283; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
284; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
285; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v1, v0
286; GFX9-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
287; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
288;
289; GFX9-GISEL-LABEL: clpeak_imad_pat_i16:
290; GFX9-GISEL:       ; %bb.0: ; %entry
291; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GFX9-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
293; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
294; GFX9-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
295; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
296; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v1, 1
297; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v4
298; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v3, v2, 1
299; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
300; GFX9-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
301; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
302;
303; GFX10-SDAG-LABEL: clpeak_imad_pat_i16:
304; GFX10-SDAG:       ; %bb.0: ; %entry
305; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
307; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
308; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
309; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
310; GFX10-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
311; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
312;
313; GFX10-GISEL-LABEL: clpeak_imad_pat_i16:
314; GFX10-GISEL:       ; %bb.0: ; %entry
315; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
317; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
318; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
319; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
320; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
321; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
322; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
323; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
324; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
325; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
326;
327; GFX11-SDAG-LABEL: clpeak_imad_pat_i16:
328; GFX11-SDAG:       ; %bb.0: ; %entry
329; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
331; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
332; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
333; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
334; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
335; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
336; GFX11-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
337; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
338;
339; GFX11-GISEL-LABEL: clpeak_imad_pat_i16:
340; GFX11-GISEL:       ; %bb.0: ; %entry
341; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
343; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
344; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
345; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
346; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
347; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
348; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
349; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
350; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
351; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
352; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
353; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
354; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX1200-SDAG-LABEL: clpeak_imad_pat_i16:
357; GFX1200-SDAG:       ; %bb.0: ; %entry
358; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
359; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
360; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
361; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
362; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
363; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
364; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
365; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
366; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
367; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
368; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
369; GFX1200-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
370; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
371;
372; GFX1200-GISEL-LABEL: clpeak_imad_pat_i16:
373; GFX1200-GISEL:       ; %bb.0: ; %entry
374; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
375; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
376; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
377; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
378; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
379; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
380; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
381; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
382; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
383; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
384; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
385; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
386; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
387; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
388; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
389; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
390; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
391; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
392entry:
393  %conv33 = add i16 %x, 1
394  %add = mul i16 %conv33, %y
395  %conv434 = add i16 %y, 1
396  %add8 = mul i16 %conv434, %add
397  %conv1035 = add i16 %add, 1
398  %add14 = mul i16 %conv1035, %add8
399  %conv1636 = add i16 %add8, 1
400  %add20 = mul i16 %add14, %conv1636
401  ret i16 %add20
402}
403
404define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
405; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i16:
406; GFX67-SDAG:       ; %bb.0: ; %entry
407; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
409; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
410; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
411; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
412; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v1
413; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
414; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
415; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
416; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
417; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
418; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
419; GFX67-SDAG-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
420; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
421; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
422; GFX67-SDAG-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
423; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
424; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v6
425; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
426; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v5
427; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
428; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v7
429; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
430; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
431; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
432; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
433; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
434; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
435; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
436; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
437;
438; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i16:
439; GFX67-GISEL:       ; %bb.0: ; %entry
440; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
442; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
443; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
444; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
445; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
446; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
447; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
448; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
449; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
450; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
451; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
452; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
453; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
454; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
455; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
456; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
457; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
458; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
459; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
460; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
461; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
462; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
463; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
464; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
465; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
466; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
467; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
468; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
469; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
470; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
471; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
472; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
473; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
474; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
475; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
476; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
477; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
478; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
479; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
480; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
481; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
482; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
483; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
484; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
485; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
486;
487; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16:
488; GFX8-SDAG:       ; %bb.0: ; %entry
489; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490; GFX8-SDAG-NEXT:    v_mov_b32_e32 v3, 1
491; GFX8-SDAG-NEXT:    v_add_u16_e32 v2, 1, v0
492; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
493; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
494; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v0, v3
495; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v3, v0
496; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v2, v1
497; GFX8-SDAG-NEXT:    v_mad_u16 v2, v2, v1, v2
498; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v3
499; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v2, v1
500; GFX8-SDAG-NEXT:    v_mad_u16 v2, v0, v4, v0
501; GFX8-SDAG-NEXT:    v_mad_u16 v3, v1, v5, v1
502; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
503; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
504; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
505; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v1, v0
506; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
507;
508; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i16:
509; GFX8-GISEL:       ; %bb.0: ; %entry
510; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511; GFX8-GISEL-NEXT:    v_mov_b32_e32 v3, 1
512; GFX8-GISEL-NEXT:    v_add_u16_e32 v2, 1, v0
513; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
514; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
515; GFX8-GISEL-NEXT:    v_mad_u16 v4, v2, v1, v2
516; GFX8-GISEL-NEXT:    v_mad_u16 v5, v0, v3, v0
517; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v4, v1
518; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v7, v5, v3
519; GFX8-GISEL-NEXT:    v_mad_u16 v2, v2, v1, 1
520; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v3, 1
521; GFX8-GISEL-NEXT:    v_mad_u16 v1, v4, v1, 1
522; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
523; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v6, v2
524; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v7, v0
525; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v2, v1
526; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
527; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
528; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
529;
530; GFX9-LABEL: clpeak_imad_pat_v2i16:
531; GFX9:       ; %bb.0: ; %entry
532; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533; GFX9-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
534; GFX9-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
535; GFX9-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
536; GFX9-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
537; GFX9-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
538; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
539; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
540; GFX9-NEXT:    s_setpc_b64 s[30:31]
541;
542; GFX10-LABEL: clpeak_imad_pat_v2i16:
543; GFX10:       ; %bb.0: ; %entry
544; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545; GFX10-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
546; GFX10-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
547; GFX10-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
548; GFX10-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
549; GFX10-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
550; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
551; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
552; GFX10-NEXT:    s_setpc_b64 s[30:31]
553;
554; GFX11-LABEL: clpeak_imad_pat_v2i16:
555; GFX11:       ; %bb.0: ; %entry
556; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
557; GFX11-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
558; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
559; GFX11-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
560; GFX11-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
561; GFX11-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
562; GFX11-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
563; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
564; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
565; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
566; GFX11-NEXT:    s_setpc_b64 s[30:31]
567;
568; GFX1200-LABEL: clpeak_imad_pat_v2i16:
569; GFX1200:       ; %bb.0: ; %entry
570; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
571; GFX1200-NEXT:    s_wait_expcnt 0x0
572; GFX1200-NEXT:    s_wait_samplecnt 0x0
573; GFX1200-NEXT:    s_wait_bvhcnt 0x0
574; GFX1200-NEXT:    s_wait_kmcnt 0x0
575; GFX1200-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
576; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
577; GFX1200-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
578; GFX1200-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
579; GFX1200-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
580; GFX1200-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
581; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
582; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
583; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
584; GFX1200-NEXT:    s_setpc_b64 s[30:31]
585entry:
586  %y18 = add <2 x i16> %x, <i16 1, i16 1>
587  %add = mul <2 x i16> %y18, %y
588  %mul119 = add <2 x i16> %add, %y18
589  %add2 = mul <2 x i16> %mul119, %y
590  %add220 = add <2 x i16> %add, <i16 1, i16 1>
591  %add422 = add <2 x i16> %add2, <i16 1, i16 1>
592  %mul521 = mul <2 x i16> %add2, %add220
593  %add6 = mul <2 x i16> %mul521, %add422
594  ret <2 x i16> %add6
595}
596
597define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
598; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i16:
599; GFX67-SDAG:       ; %bb.0: ; %entry
600; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
602; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
603; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v0
604; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v1
605; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
606; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
607; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
608; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v9, v8, v4
609; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v6, v3, v0
610; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v8, v4, v1
611; GFX67-SDAG-NEXT:    v_mad_u32_u24 v6, v6, v3, 1
612; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v2
613; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v5
614; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
615; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
616; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6
617; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
618; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v7, v5, v2
619; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v8, v0, v3
620; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
621; GFX67-SDAG-NEXT:    v_or_b32_e32 v6, v9, v6
622; GFX67-SDAG-NEXT:    s_mov_b32 s4, 0x10000
623; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v3, 1
624; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
625; GFX67-SDAG-NEXT:    v_add_i32_e32 v6, vcc, s4, v6
626; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
627; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
628; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v2, v5
629; GFX67-SDAG-NEXT:    v_mad_u32_u24 v7, v7, v5, 1
630; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v3, v0
631; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v6
632; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
633; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
634; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
635; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v8
636; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6
637; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
638; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v4
639; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
640; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
641; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
642; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
643; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
644; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v5
645; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
646; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
647; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
648; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v5, v0
649; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
650; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
651; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
652;
653; GFX67-GISEL-LABEL: clpeak_imad_pat_v3i16:
654; GFX67-GISEL:       ; %bb.0: ; %entry
655; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
656; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
657; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
658; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v0
659; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
660; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
661; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v1
662; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
663; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v6, v3, v0
664; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v2
665; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
666; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v7, v4, v1
667; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
668; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v8, v5, v2
669; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v9, v0, v3
670; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
671; GFX67-GISEL-NEXT:    v_mad_u32_u24 v6, v6, v3, 1
672; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v10, v1, v4
673; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
674; GFX67-GISEL-NEXT:    v_mad_u32_u24 v7, v7, v4, 1
675; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v3, 1
676; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v4, 1
677; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v9
678; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v6
679; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v11, v2, v5
680; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v5, 1
681; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
682; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
683; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v10
684; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v7
685; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
686; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v11
687; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v8
688; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
689; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
690; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
691; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
692; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
693; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
694; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
695; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
696; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
697; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
698; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
699;
700; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16:
701; GFX8-SDAG:       ; %bb.0: ; %entry
702; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703; GFX8-SDAG-NEXT:    v_mov_b32_e32 v5, 1
704; GFX8-SDAG-NEXT:    v_add_u16_e32 v4, 1, v0
705; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
706; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
707; GFX8-SDAG-NEXT:    v_add_u16_e32 v1, 1, v1
708; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v6, v0, v5
709; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v5, v0
710; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v7, v1, v3
711; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v8, v4, v2
712; GFX8-SDAG-NEXT:    v_mad_u16 v4, v4, v2, v4
713; GFX8-SDAG-NEXT:    v_mad_u16 v1, v1, v3, v1
714; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v5
715; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
716; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v2, v4, v2
717; GFX8-SDAG-NEXT:    v_mad_u16 v3, v0, v6, v0
718; GFX8-SDAG-NEXT:    v_mad_u16 v4, v2, v8, v2
719; GFX8-SDAG-NEXT:    v_mad_u16 v0, v3, v0, v3
720; GFX8-SDAG-NEXT:    v_mad_u16 v5, v1, v7, v1
721; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
722; GFX8-SDAG-NEXT:    v_mad_u16 v2, v4, v2, v4
723; GFX8-SDAG-NEXT:    v_mad_u16 v1, v5, v1, v5
724; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
725; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
726;
727; GFX8-GISEL-LABEL: clpeak_imad_pat_v3i16:
728; GFX8-GISEL:       ; %bb.0: ; %entry
729; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX8-GISEL-NEXT:    v_mov_b32_e32 v6, 1
731; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
732; GFX8-GISEL-NEXT:    v_add_u16_e32 v5, 1, v0
733; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
734; GFX8-GISEL-NEXT:    v_add_u16_e32 v1, 1, v1
735; GFX8-GISEL-NEXT:    v_mad_u16 v6, v5, v2, v5
736; GFX8-GISEL-NEXT:    v_mad_u16 v7, v0, v4, v0
737; GFX8-GISEL-NEXT:    v_mad_u16 v8, v1, v3, v1
738; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v9, v6, v2
739; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v10, v7, v4
740; GFX8-GISEL-NEXT:    v_mad_u16 v5, v5, v2, 1
741; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v4, 1
742; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v11, v8, v3
743; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
744; GFX8-GISEL-NEXT:    v_mad_u16 v2, v6, v2, 1
745; GFX8-GISEL-NEXT:    v_mad_u16 v4, v7, v4, 1
746; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v5, v9, v5
747; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v10, v0
748; GFX8-GISEL-NEXT:    v_mad_u16 v3, v8, v3, 1
749; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v11, v1
750; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v5, v2
751; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
752; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
753; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
754; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
755;
756; GFX9-SDAG-LABEL: clpeak_imad_pat_v3i16:
757; GFX9-SDAG:       ; %bb.0: ; %entry
758; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759; GFX9-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
760; GFX9-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
761; GFX9-SDAG-NEXT:    v_pk_mad_u16 v4, v1, v3, v1
762; GFX9-SDAG-NEXT:    v_pk_mad_u16 v5, v0, v2, v0
763; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v5, v2
764; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v4, v3
765; GFX9-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
766; GFX9-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
767; GFX9-SDAG-NEXT:    v_pk_mad_u16 v3, v4, v3, 1
768; GFX9-SDAG-NEXT:    v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0]
769; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
770; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
771; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
772; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
773; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
774;
775; GFX9-GISEL-LABEL: clpeak_imad_pat_v3i16:
776; GFX9-GISEL:       ; %bb.0: ; %entry
777; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778; GFX9-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
779; GFX9-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
780; GFX9-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
781; GFX9-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
782; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
783; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
784; GFX9-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
785; GFX9-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
786; GFX9-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
787; GFX9-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
788; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
789; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
790; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
791; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
792; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
793;
794; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16:
795; GFX10-SDAG:       ; %bb.0: ; %entry
796; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
797; GFX10-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
798; GFX10-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
799; GFX10-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
800; GFX10-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
801; GFX10-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
802; GFX10-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
803; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
804; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
805; GFX10-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
806; GFX10-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
807; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
808; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
809; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
810; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
811; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
812;
813; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i16:
814; GFX10-GISEL:       ; %bb.0: ; %entry
815; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816; GFX10-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
817; GFX10-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
818; GFX10-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
819; GFX10-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
820; GFX10-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
821; GFX10-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
822; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
823; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
824; GFX10-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
825; GFX10-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
826; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
827; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
828; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
829; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
830; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
831;
832; GFX11-SDAG-LABEL: clpeak_imad_pat_v3i16:
833; GFX11-SDAG:       ; %bb.0: ; %entry
834; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835; GFX11-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
836; GFX11-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
837; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
838; GFX11-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
839; GFX11-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
840; GFX11-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
841; GFX11-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
842; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
843; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
844; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
845; GFX11-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
846; GFX11-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
847; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
848; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
849; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
850; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
851; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
852; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
853; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
854;
855; GFX11-GISEL-LABEL: clpeak_imad_pat_v3i16:
856; GFX11-GISEL:       ; %bb.0: ; %entry
857; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
858; GFX11-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
859; GFX11-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
860; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
861; GFX11-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
862; GFX11-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
863; GFX11-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
864; GFX11-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
865; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
866; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
867; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
868; GFX11-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
869; GFX11-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
870; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
871; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
872; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
873; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
874; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
875; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
876; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
877;
878; GFX1200-SDAG-LABEL: clpeak_imad_pat_v3i16:
879; GFX1200-SDAG:       ; %bb.0: ; %entry
880; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
881; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
882; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
883; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
884; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
885; GFX1200-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
886; GFX1200-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
887; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
888; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
889; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
890; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
891; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
892; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
893; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
894; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
895; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
896; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
897; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
898; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
899; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
900; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
901; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
902; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
903; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
904;
905; GFX1200-GISEL-LABEL: clpeak_imad_pat_v3i16:
906; GFX1200-GISEL:       ; %bb.0: ; %entry
907; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
908; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
909; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
910; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
911; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
912; GFX1200-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
913; GFX1200-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
914; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
915; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
916; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
917; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
918; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
919; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
920; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
921; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
922; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
923; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
924; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
925; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
926; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
927; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
928; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
929; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
930; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
931entry:
932  %y48 = add <3 x i16> %x, <i16 1, i16 1, i16 1>
933  %add = mul <3 x i16> %y48, %y
934  %mul1249 = add <3 x i16> %add, %y48
935  %add15 = mul <3 x i16> %mul1249, %y
936  %add1550 = add <3 x i16> %add, <i16 1, i16 1, i16 1>
937  %add2452 = add <3 x i16> %add15, <i16 1, i16 1, i16 1>
938  %mul3051 = mul <3 x i16> %add15, %add1550
939  %add33 = mul <3 x i16> %mul3051, %add2452
940  ret <3 x i16> %add33
941}
942
943define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
944; GFX67-SDAG-LABEL: clpeak_imad_pat_v4i16:
945; GFX67-SDAG:       ; %bb.0: ; %entry
946; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
947; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
948; GFX67-SDAG-NEXT:    v_and_b32_e32 v11, 0xffff, v3
949; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
950; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
951; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v11, v7, v3
952; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
953; GFX67-SDAG-NEXT:    v_and_b32_e32 v9, 0xffff, v2
954; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
955; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6
956; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
957; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v0
958; GFX67-SDAG-NEXT:    v_and_b32_e32 v10, 0xffff, v1
959; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
960; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v5
961; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v13, v11, v7
962; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
963; GFX67-SDAG-NEXT:    v_mad_u32_u24 v7, v9, v6, 1
964; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v12, v10, v5
965; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v9, v6, v2
966; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v8, v4, v0
967; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v10, v5, v1
968; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
969; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v9, 16, v13
970; GFX67-SDAG-NEXT:    v_mad_u32_u24 v8, v8, v4, 1
971; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
972; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
973; GFX67-SDAG-NEXT:    v_or_b32_e32 v7, v9, v7
974; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v8
975; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v9, 16, v12
976; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
977; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v10, v0, v4
978; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v5
979; GFX67-SDAG-NEXT:    s_mov_b32 s4, 0x10000
980; GFX67-SDAG-NEXT:    v_or_b32_e32 v8, v9, v8
981; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
982; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v5, v2, v6
983; GFX67-SDAG-NEXT:    v_add_i32_e32 v8, vcc, s4, v8
984; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v2, v6, 1
985; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
986; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 16, v1
987; GFX67-SDAG-NEXT:    v_add_i32_e32 v7, vcc, s4, v7
988; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
989; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 16, v3
990; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v4, v0
991; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v8
992; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
993; GFX67-SDAG-NEXT:    v_or_b32_e32 v2, v6, v2
994; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v7
995; GFX67-SDAG-NEXT:    v_and_b32_e32 v9, 0xffff, v10
996; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v8
997; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
998; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v5
999; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v7
1000; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1001; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, s4, v2
1002; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
1003; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v8, v9, v8
1004; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
1005; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v6
1006; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1007; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
1008; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v8
1009; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1010; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1011; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1012; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1013; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1014; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v7, v0
1015; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v5
1016; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
1017; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v6
1018; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
1019;
1020; GFX67-GISEL-LABEL: clpeak_imad_pat_v4i16:
1021; GFX67-GISEL:       ; %bb.0: ; %entry
1022; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
1024; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
1025; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
1026; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v1
1027; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
1028; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v0
1029; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1030; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v3
1031; GFX67-GISEL-NEXT:    v_or_b32_e32 v8, v8, v9
1032; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v2
1033; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
1034; GFX67-GISEL-NEXT:    v_or_b32_e32 v9, v9, v10
1035; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
1036; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1037; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1038; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1039; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v10, v5, v1
1040; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
1041; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
1042; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
1043; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
1044; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v8, v4, v0
1045; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1046; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v9, v6, v2
1047; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v11, v7, v3
1048; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1049; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1050; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
1051; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
1052; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
1053; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1054; GFX67-GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
1055; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1056; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1057; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v13, v2, v5
1058; GFX67-GISEL-NEXT:    v_mad_u32_u24 v10, v10, v5, 1
1059; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
1060; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1061; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v12, v0, v4
1062; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v4, 1
1063; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v10
1064; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
1065; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1066; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1067; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v15, v3, v7
1068; GFX67-GISEL-NEXT:    v_mad_u32_u24 v11, v11, v7, 1
1069; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1070; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
1071; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v3, v7, 1
1072; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1073; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1074; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v14, v1, v6
1075; GFX67-GISEL-NEXT:    v_mad_u32_u24 v9, v9, v6, 1
1076; GFX67-GISEL-NEXT:    v_or_b32_e32 v8, v8, v10
1077; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v11
1078; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v6, 1
1079; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
1080; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
1081; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
1082; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
1083; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1084; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1085; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v12
1086; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v8
1087; GFX67-GISEL-NEXT:    v_or_b32_e32 v9, v9, v10
1088; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
1089; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
1090; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
1091; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v13
1092; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v5, v1
1093; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v14
1094; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v9
1095; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
1096; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
1097; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v15
1098; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v6, v3
1099; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1100; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1101; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1102; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
1103; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v4, v0
1104; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1105; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v5
1106; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1107; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1108; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v6
1109; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
1110; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
1111; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
1112;
1113; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16:
1114; GFX8-SDAG:       ; %bb.0: ; %entry
1115; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116; GFX8-SDAG-NEXT:    v_mov_b32_e32 v6, 1
1117; GFX8-SDAG-NEXT:    v_add_u16_e32 v5, 1, v0
1118; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1119; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
1120; GFX8-SDAG-NEXT:    v_add_u16_e32 v4, 1, v1
1121; GFX8-SDAG-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1122; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
1123; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v9, v0, v8
1124; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v8, v0
1125; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v7, v1, v6
1126; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v10, v5, v2
1127; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v11, v4, v3
1128; GFX8-SDAG-NEXT:    v_mad_u16 v1, v1, v6, v1
1129; GFX8-SDAG-NEXT:    v_mad_u16 v4, v4, v3, v4
1130; GFX8-SDAG-NEXT:    v_mad_u16 v5, v5, v2, v5
1131; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v8
1132; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v6
1133; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v2, v5, v2
1134; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v3, v4, v3
1135; GFX8-SDAG-NEXT:    v_mad_u16 v4, v0, v9, v0
1136; GFX8-SDAG-NEXT:    v_mad_u16 v5, v1, v7, v1
1137; GFX8-SDAG-NEXT:    v_mad_u16 v7, v2, v10, v2
1138; GFX8-SDAG-NEXT:    v_mad_u16 v0, v4, v0, v4
1139; GFX8-SDAG-NEXT:    v_mad_u16 v6, v3, v11, v3
1140; GFX8-SDAG-NEXT:    v_mad_u16 v1, v5, v1, v5
1141; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1142; GFX8-SDAG-NEXT:    v_mad_u16 v2, v7, v2, v7
1143; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
1144; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1145; GFX8-SDAG-NEXT:    v_mad_u16 v2, v6, v3, v6
1146; GFX8-SDAG-NEXT:    v_or_b32_e32 v1, v2, v1
1147; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
1148;
1149; GFX8-GISEL-LABEL: clpeak_imad_pat_v4i16:
1150; GFX8-GISEL:       ; %bb.0: ; %entry
1151; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152; GFX8-GISEL-NEXT:    v_mov_b32_e32 v5, 1
1153; GFX8-GISEL-NEXT:    v_add_u16_e32 v4, 1, v0
1154; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1155; GFX8-GISEL-NEXT:    v_add_u16_e32 v6, 1, v1
1156; GFX8-GISEL-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1157; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1158; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
1159; GFX8-GISEL-NEXT:    v_mad_u16 v8, v4, v2, v4
1160; GFX8-GISEL-NEXT:    v_mad_u16 v9, v0, v5, v0
1161; GFX8-GISEL-NEXT:    v_mad_u16 v10, v6, v3, v6
1162; GFX8-GISEL-NEXT:    v_mad_u16 v11, v1, v7, v1
1163; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v12, v8, v2
1164; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v13, v9, v5
1165; GFX8-GISEL-NEXT:    v_mad_u16 v4, v4, v2, 1
1166; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v5, 1
1167; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v14, v10, v3
1168; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v15, v11, v7
1169; GFX8-GISEL-NEXT:    v_mad_u16 v6, v6, v3, 1
1170; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v7, 1
1171; GFX8-GISEL-NEXT:    v_mad_u16 v2, v8, v2, 1
1172; GFX8-GISEL-NEXT:    v_mad_u16 v5, v9, v5, 1
1173; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v12, v4
1174; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v13, v0
1175; GFX8-GISEL-NEXT:    v_mad_u16 v3, v10, v3, 1
1176; GFX8-GISEL-NEXT:    v_mad_u16 v7, v11, v7, 1
1177; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v14, v6
1178; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v15, v1
1179; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v4, v2
1180; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1181; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
1182; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v6, v3
1183; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1184; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v2, v1
1185; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1186;
1187; GFX9-SDAG-LABEL: clpeak_imad_pat_v4i16:
1188; GFX9-SDAG:       ; %bb.0: ; %entry
1189; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190; GFX9-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1191; GFX9-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1192; GFX9-SDAG-NEXT:    v_pk_mad_u16 v4, v1, v3, v1
1193; GFX9-SDAG-NEXT:    v_pk_mad_u16 v5, v0, v2, v0
1194; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v5, v2
1195; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v4, v3
1196; GFX9-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1197; GFX9-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1198; GFX9-SDAG-NEXT:    v_pk_mad_u16 v3, v4, v3, 1 op_sel_hi:[1,1,0]
1199; GFX9-SDAG-NEXT:    v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0]
1200; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1201; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1202; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1203; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1204; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1205;
1206; GFX9-GISEL-LABEL: clpeak_imad_pat_v4i16:
1207; GFX9-GISEL:       ; %bb.0: ; %entry
1208; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209; GFX9-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1210; GFX9-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1211; GFX9-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1212; GFX9-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1213; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1214; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1215; GFX9-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1216; GFX9-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1217; GFX9-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1218; GFX9-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1219; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1220; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1221; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1222; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1223; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1224;
1225; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16:
1226; GFX10-SDAG:       ; %bb.0: ; %entry
1227; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1228; GFX10-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1229; GFX10-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1230; GFX10-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1231; GFX10-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1232; GFX10-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1233; GFX10-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1234; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1235; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1236; GFX10-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1237; GFX10-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1238; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1239; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1240; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1241; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1242; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1243;
1244; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i16:
1245; GFX10-GISEL:       ; %bb.0: ; %entry
1246; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1247; GFX10-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1248; GFX10-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1249; GFX10-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1250; GFX10-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1251; GFX10-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1252; GFX10-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1253; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1254; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1255; GFX10-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1256; GFX10-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1257; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1258; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1259; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1260; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1261; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1262;
1263; GFX11-SDAG-LABEL: clpeak_imad_pat_v4i16:
1264; GFX11-SDAG:       ; %bb.0: ; %entry
1265; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1266; GFX11-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1267; GFX11-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1268; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1269; GFX11-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1270; GFX11-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1271; GFX11-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1272; GFX11-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1273; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1274; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1275; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1276; GFX11-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1277; GFX11-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1278; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1279; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1280; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1281; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1282; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1283; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1284; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1285;
1286; GFX11-GISEL-LABEL: clpeak_imad_pat_v4i16:
1287; GFX11-GISEL:       ; %bb.0: ; %entry
1288; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289; GFX11-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1290; GFX11-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1291; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1292; GFX11-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1293; GFX11-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1294; GFX11-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1295; GFX11-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1296; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1297; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1298; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1299; GFX11-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1300; GFX11-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1301; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1302; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1303; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1304; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1305; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1306; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1307; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1308;
1309; GFX1200-SDAG-LABEL: clpeak_imad_pat_v4i16:
1310; GFX1200-SDAG:       ; %bb.0: ; %entry
1311; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1312; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
1313; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
1314; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
1315; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
1316; GFX1200-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1317; GFX1200-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1318; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1319; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1320; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1321; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1322; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1323; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1324; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1325; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1326; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1327; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1328; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1329; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1330; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1331; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1332; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1333; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1334; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
1335;
1336; GFX1200-GISEL-LABEL: clpeak_imad_pat_v4i16:
1337; GFX1200-GISEL:       ; %bb.0: ; %entry
1338; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1339; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
1340; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
1341; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
1342; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
1343; GFX1200-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1344; GFX1200-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1345; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1346; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1347; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1348; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1349; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
1350; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1351; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1352; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1353; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1354; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
1355; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1356; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1357; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1358; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1359; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1360; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1361; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
1362entry:
1363  %y18 = add <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1>
1364  %add = mul <4 x i16> %y18, %y
1365  %mul119 = add <4 x i16> %add, %y18
1366  %add2 = mul <4 x i16> %mul119, %y
1367  %add220 = add <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
1368  %add422 = add <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
1369  %mul521 = mul <4 x i16> %add2, %add220
1370  %add6 = mul <4 x i16> %mul521, %add422
1371  ret <4 x i16> %add6
1372}
1373
1374define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
1375; GFX67-LABEL: clpeak_umad_pat_i16:
1376; GFX67:       ; %bb.0: ; %entry
1377; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1378; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
1379; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1380; GFX67-NEXT:    v_mul_u32_u24_e32 v2, v0, v1
1381; GFX67-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
1382; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1383; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1384; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v3, v2
1385; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
1386; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1387; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
1388; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
1389; GFX67-NEXT:    v_mad_u32_u24 v1, v3, v2, 1
1390; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1391; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1392; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
1393; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1394; GFX67-NEXT:    s_setpc_b64 s[30:31]
1395;
1396; GFX8-SDAG-LABEL: clpeak_umad_pat_i16:
1397; GFX8-SDAG:       ; %bb.0: ; %entry
1398; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1400; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
1401; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1402; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
1403; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
1404;
1405; GFX8-GISEL-LABEL: clpeak_umad_pat_i16:
1406; GFX8-GISEL:       ; %bb.0: ; %entry
1407; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1408; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
1409; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
1410; GFX8-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
1411; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
1412; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
1413; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v4
1414; GFX8-GISEL-NEXT:    v_mad_u16 v1, v3, v2, 1
1415; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
1416; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1417;
1418; GFX9-SDAG-LABEL: clpeak_umad_pat_i16:
1419; GFX9-SDAG:       ; %bb.0: ; %entry
1420; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1421; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
1422; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
1423; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
1424; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v1, v0
1425; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1426;
1427; GFX9-GISEL-LABEL: clpeak_umad_pat_i16:
1428; GFX9-GISEL:       ; %bb.0: ; %entry
1429; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430; GFX9-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
1431; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
1432; GFX9-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
1433; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
1434; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v1, 1
1435; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v4
1436; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v3, v2, 1
1437; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
1438; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1439;
1440; GFX10-SDAG-LABEL: clpeak_umad_pat_i16:
1441; GFX10-SDAG:       ; %bb.0: ; %entry
1442; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1444; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
1445; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1446; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
1447; GFX10-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1448; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1449;
1450; GFX10-GISEL-LABEL: clpeak_umad_pat_i16:
1451; GFX10-GISEL:       ; %bb.0: ; %entry
1452; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
1454; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
1455; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
1456; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
1457; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
1458; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
1459; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
1460; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
1461; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1462; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1463;
1464; GFX11-SDAG-LABEL: clpeak_umad_pat_i16:
1465; GFX11-SDAG:       ; %bb.0: ; %entry
1466; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1468; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1469; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
1470; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1471; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1472; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
1473; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1474; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1475;
1476; GFX11-GISEL-LABEL: clpeak_umad_pat_i16:
1477; GFX11-GISEL:       ; %bb.0: ; %entry
1478; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1479; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
1480; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
1481; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1482; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
1483; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
1484; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
1485; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1486; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
1487; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
1488; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
1489; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1490; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1491; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1492;
1493; GFX1200-SDAG-LABEL: clpeak_umad_pat_i16:
1494; GFX1200-SDAG:       ; %bb.0: ; %entry
1495; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
1496; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
1497; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
1498; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
1499; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
1500; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1501; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1502; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
1503; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
1504; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1505; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
1506; GFX1200-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1507; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
1508;
1509; GFX1200-GISEL-LABEL: clpeak_umad_pat_i16:
1510; GFX1200-GISEL:       ; %bb.0: ; %entry
1511; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
1512; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
1513; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
1514; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
1515; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
1516; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
1517; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
1518; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1519; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
1520; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
1521; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
1522; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1523; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
1524; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
1525; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
1526; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1527; GFX1200-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1528; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
1529entry:
1530  %conv33 = add i16 %x, 1
1531  %add = mul i16 %conv33, %y
1532  %conv434 = add i16 %y, 1
1533  %add8 = mul i16 %conv434, %add
1534  %conv1035 = add i16 %add, 1
1535  %add14 = mul i16 %conv1035, %add8
1536  %conv1636 = add i16 %add8, 1
1537  %add20 = mul i16 %add14, %conv1636
1538  ret i16 %add20
1539}
1540
1541define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
1542; GFX67-SDAG-LABEL: clpeak_umad_pat_v2i16:
1543; GFX67-SDAG:       ; %bb.0: ; %entry
1544; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1545; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
1546; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
1547; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
1548; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1549; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v1
1550; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1551; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
1552; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1553; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
1554; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1555; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
1556; GFX67-SDAG-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
1557; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
1558; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1559; GFX67-SDAG-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
1560; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
1561; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v6
1562; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
1563; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v5
1564; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
1565; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v7
1566; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
1567; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1568; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1569; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1570; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1571; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
1572; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
1573; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
1574;
1575; GFX67-GISEL-LABEL: clpeak_umad_pat_v2i16:
1576; GFX67-GISEL:       ; %bb.0: ; %entry
1577; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1578; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
1579; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
1580; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
1581; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
1582; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1583; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
1584; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
1585; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1586; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1587; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1588; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
1589; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
1590; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1591; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1592; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1593; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
1594; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
1595; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1596; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
1597; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1598; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1599; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
1600; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1601; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1602; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
1603; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
1604; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
1605; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
1606; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1607; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1608; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1609; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
1610; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
1611; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
1612; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
1613; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
1614; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
1615; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
1616; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
1617; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1618; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1619; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1620; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
1621; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
1622; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
1623;
1624; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16:
1625; GFX8-SDAG:       ; %bb.0: ; %entry
1626; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1627; GFX8-SDAG-NEXT:    v_mov_b32_e32 v3, 1
1628; GFX8-SDAG-NEXT:    v_add_u16_e32 v2, 1, v0
1629; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1630; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1631; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v0, v3
1632; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v3, v0
1633; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v2, v1
1634; GFX8-SDAG-NEXT:    v_mad_u16 v2, v2, v1, v2
1635; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v3
1636; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v2, v1
1637; GFX8-SDAG-NEXT:    v_mad_u16 v2, v0, v4, v0
1638; GFX8-SDAG-NEXT:    v_mad_u16 v3, v1, v5, v1
1639; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
1640; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1641; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
1642; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v1, v0
1643; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
1644;
1645; GFX8-GISEL-LABEL: clpeak_umad_pat_v2i16:
1646; GFX8-GISEL:       ; %bb.0: ; %entry
1647; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648; GFX8-GISEL-NEXT:    v_mov_b32_e32 v3, 1
1649; GFX8-GISEL-NEXT:    v_add_u16_e32 v2, 1, v0
1650; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1651; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1652; GFX8-GISEL-NEXT:    v_mad_u16 v4, v2, v1, v2
1653; GFX8-GISEL-NEXT:    v_mad_u16 v5, v0, v3, v0
1654; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v4, v1
1655; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v7, v5, v3
1656; GFX8-GISEL-NEXT:    v_mad_u16 v2, v2, v1, 1
1657; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v3, 1
1658; GFX8-GISEL-NEXT:    v_mad_u16 v1, v4, v1, 1
1659; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
1660; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v6, v2
1661; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v7, v0
1662; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v2, v1
1663; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1664; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
1665; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1666;
1667; GFX9-LABEL: clpeak_umad_pat_v2i16:
1668; GFX9:       ; %bb.0: ; %entry
1669; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670; GFX9-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1671; GFX9-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
1672; GFX9-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
1673; GFX9-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
1674; GFX9-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
1675; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
1676; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
1677; GFX9-NEXT:    s_setpc_b64 s[30:31]
1678;
1679; GFX10-LABEL: clpeak_umad_pat_v2i16:
1680; GFX10:       ; %bb.0: ; %entry
1681; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682; GFX10-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1683; GFX10-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
1684; GFX10-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
1685; GFX10-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
1686; GFX10-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
1687; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
1688; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
1689; GFX10-NEXT:    s_setpc_b64 s[30:31]
1690;
1691; GFX11-LABEL: clpeak_umad_pat_v2i16:
1692; GFX11:       ; %bb.0: ; %entry
1693; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694; GFX11-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1695; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1696; GFX11-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
1697; GFX11-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
1698; GFX11-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
1699; GFX11-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
1700; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1701; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
1702; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
1703; GFX11-NEXT:    s_setpc_b64 s[30:31]
1704;
1705; GFX1200-LABEL: clpeak_umad_pat_v2i16:
1706; GFX1200:       ; %bb.0: ; %entry
1707; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
1708; GFX1200-NEXT:    s_wait_expcnt 0x0
1709; GFX1200-NEXT:    s_wait_samplecnt 0x0
1710; GFX1200-NEXT:    s_wait_bvhcnt 0x0
1711; GFX1200-NEXT:    s_wait_kmcnt 0x0
1712; GFX1200-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1713; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1714; GFX1200-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
1715; GFX1200-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
1716; GFX1200-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
1717; GFX1200-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
1718; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1719; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
1720; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
1721; GFX1200-NEXT:    s_setpc_b64 s[30:31]
1722entry:
1723  %y18 = add <2 x i16> %x, <i16 1, i16 1>
1724  %add = mul <2 x i16> %y18, %y
1725  %mul119 = add <2 x i16> %add, %y18
1726  %add2 = mul <2 x i16> %mul119, %y
1727  %add220 = add <2 x i16> %add, <i16 1, i16 1>
1728  %add422 = add <2 x i16> %add2, <i16 1, i16 1>
1729  %mul521 = mul <2 x i16> %add2, %add220
1730  %add6 = mul <2 x i16> %mul521, %add422
1731  ret <2 x i16> %add6
1732}
1733
1734define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
1735; GFX67-SDAG-LABEL: clpeak_umad_pat_v3i16:
1736; GFX67-SDAG:       ; %bb.0: ; %entry
1737; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
1739; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
1740; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v0
1741; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v1
1742; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1743; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1744; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
1745; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v9, v8, v4
1746; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v6, v3, v0
1747; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v8, v4, v1
1748; GFX67-SDAG-NEXT:    v_mad_u32_u24 v6, v6, v3, 1
1749; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v2
1750; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1751; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1752; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1753; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6
1754; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1755; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v7, v5, v2
1756; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v8, v0, v3
1757; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
1758; GFX67-SDAG-NEXT:    v_or_b32_e32 v6, v9, v6
1759; GFX67-SDAG-NEXT:    s_mov_b32 s4, 0x10000
1760; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v3, 1
1761; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1762; GFX67-SDAG-NEXT:    v_add_i32_e32 v6, vcc, s4, v6
1763; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1764; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
1765; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v2, v5
1766; GFX67-SDAG-NEXT:    v_mad_u32_u24 v7, v7, v5, 1
1767; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v3, v0
1768; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v6
1769; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1770; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
1771; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
1772; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v8
1773; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6
1774; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
1775; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v4
1776; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
1777; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
1778; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
1779; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1780; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
1781; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1782; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1783; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1784; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1785; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v5, v0
1786; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
1787; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
1788; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
1789;
1790; GFX67-GISEL-LABEL: clpeak_umad_pat_v3i16:
1791; GFX67-GISEL:       ; %bb.0: ; %entry
1792; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
1794; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
1795; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v0
1796; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1797; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
1798; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v1
1799; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1800; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v6, v3, v0
1801; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v2
1802; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1803; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v7, v4, v1
1804; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1805; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v8, v5, v2
1806; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v9, v0, v3
1807; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1808; GFX67-GISEL-NEXT:    v_mad_u32_u24 v6, v6, v3, 1
1809; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v10, v1, v4
1810; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1811; GFX67-GISEL-NEXT:    v_mad_u32_u24 v7, v7, v4, 1
1812; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v3, 1
1813; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v4, 1
1814; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v9
1815; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v6
1816; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v11, v2, v5
1817; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v5, 1
1818; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
1819; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
1820; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v10
1821; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v7
1822; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
1823; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v11
1824; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v8
1825; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
1826; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1827; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
1828; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
1829; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
1830; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
1831; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
1832; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v5
1833; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1834; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
1835; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
1836;
1837; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16:
1838; GFX8-SDAG:       ; %bb.0: ; %entry
1839; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840; GFX8-SDAG-NEXT:    v_mov_b32_e32 v5, 1
1841; GFX8-SDAG-NEXT:    v_add_u16_e32 v4, 1, v0
1842; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1843; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
1844; GFX8-SDAG-NEXT:    v_add_u16_e32 v1, 1, v1
1845; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v6, v0, v5
1846; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v5, v0
1847; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v7, v1, v3
1848; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v8, v4, v2
1849; GFX8-SDAG-NEXT:    v_mad_u16 v4, v4, v2, v4
1850; GFX8-SDAG-NEXT:    v_mad_u16 v1, v1, v3, v1
1851; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v5
1852; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
1853; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v2, v4, v2
1854; GFX8-SDAG-NEXT:    v_mad_u16 v3, v0, v6, v0
1855; GFX8-SDAG-NEXT:    v_mad_u16 v4, v2, v8, v2
1856; GFX8-SDAG-NEXT:    v_mad_u16 v0, v3, v0, v3
1857; GFX8-SDAG-NEXT:    v_mad_u16 v5, v1, v7, v1
1858; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1859; GFX8-SDAG-NEXT:    v_mad_u16 v2, v4, v2, v4
1860; GFX8-SDAG-NEXT:    v_mad_u16 v1, v5, v1, v5
1861; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
1862; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
1863;
1864; GFX8-GISEL-LABEL: clpeak_umad_pat_v3i16:
1865; GFX8-GISEL:       ; %bb.0: ; %entry
1866; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1867; GFX8-GISEL-NEXT:    v_mov_b32_e32 v6, 1
1868; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
1869; GFX8-GISEL-NEXT:    v_add_u16_e32 v5, 1, v0
1870; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1871; GFX8-GISEL-NEXT:    v_add_u16_e32 v1, 1, v1
1872; GFX8-GISEL-NEXT:    v_mad_u16 v6, v5, v2, v5
1873; GFX8-GISEL-NEXT:    v_mad_u16 v7, v0, v4, v0
1874; GFX8-GISEL-NEXT:    v_mad_u16 v8, v1, v3, v1
1875; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v9, v6, v2
1876; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v10, v7, v4
1877; GFX8-GISEL-NEXT:    v_mad_u16 v5, v5, v2, 1
1878; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v4, 1
1879; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v11, v8, v3
1880; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
1881; GFX8-GISEL-NEXT:    v_mad_u16 v2, v6, v2, 1
1882; GFX8-GISEL-NEXT:    v_mad_u16 v4, v7, v4, 1
1883; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v5, v9, v5
1884; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v10, v0
1885; GFX8-GISEL-NEXT:    v_mad_u16 v3, v8, v3, 1
1886; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v11, v1
1887; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v5, v2
1888; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1889; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
1890; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
1891; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
1892;
1893; GFX9-SDAG-LABEL: clpeak_umad_pat_v3i16:
1894; GFX9-SDAG:       ; %bb.0: ; %entry
1895; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1896; GFX9-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1897; GFX9-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
1898; GFX9-SDAG-NEXT:    v_pk_mad_u16 v4, v1, v3, v1
1899; GFX9-SDAG-NEXT:    v_pk_mad_u16 v5, v0, v2, v0
1900; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v5, v2
1901; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v4, v3
1902; GFX9-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1903; GFX9-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
1904; GFX9-SDAG-NEXT:    v_pk_mad_u16 v3, v4, v3, 1
1905; GFX9-SDAG-NEXT:    v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0]
1906; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1907; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1908; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1909; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1910; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1911;
1912; GFX9-GISEL-LABEL: clpeak_umad_pat_v3i16:
1913; GFX9-GISEL:       ; %bb.0: ; %entry
1914; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1915; GFX9-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1916; GFX9-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
1917; GFX9-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1918; GFX9-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1919; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1920; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1921; GFX9-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1922; GFX9-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
1923; GFX9-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1924; GFX9-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
1925; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1926; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1927; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1928; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1929; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1930;
1931; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16:
1932; GFX10-SDAG:       ; %bb.0: ; %entry
1933; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1934; GFX10-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1935; GFX10-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
1936; GFX10-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1937; GFX10-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1938; GFX10-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1939; GFX10-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
1940; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1941; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1942; GFX10-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
1943; GFX10-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1944; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1945; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1946; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1947; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1948; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
1949;
1950; GFX10-GISEL-LABEL: clpeak_umad_pat_v3i16:
1951; GFX10-GISEL:       ; %bb.0: ; %entry
1952; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1953; GFX10-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1954; GFX10-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
1955; GFX10-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1956; GFX10-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1957; GFX10-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1958; GFX10-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
1959; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1960; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1961; GFX10-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1962; GFX10-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
1963; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1964; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1965; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1966; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1967; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
1968;
1969; GFX11-SDAG-LABEL: clpeak_umad_pat_v3i16:
1970; GFX11-SDAG:       ; %bb.0: ; %entry
1971; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972; GFX11-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1973; GFX11-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
1974; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1975; GFX11-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1976; GFX11-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
1977; GFX11-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
1978; GFX11-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
1979; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1980; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
1981; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
1982; GFX11-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
1983; GFX11-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
1984; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1985; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
1986; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
1987; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1988; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
1989; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
1990; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1991;
1992; GFX11-GISEL-LABEL: clpeak_umad_pat_v3i16:
1993; GFX11-GISEL:       ; %bb.0: ; %entry
1994; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1995; GFX11-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1996; GFX11-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
1997; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1998; GFX11-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
1999; GFX11-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2000; GFX11-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2001; GFX11-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
2002; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2003; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2004; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2005; GFX11-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2006; GFX11-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
2007; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2008; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2009; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2010; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2011; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2012; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2013; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2014;
2015; GFX1200-SDAG-LABEL: clpeak_umad_pat_v3i16:
2016; GFX1200-SDAG:       ; %bb.0: ; %entry
2017; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2018; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
2019; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
2020; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
2021; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
2022; GFX1200-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2023; GFX1200-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1
2024; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2025; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2026; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2027; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2028; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
2029; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2030; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2031; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2032; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
2033; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2034; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2035; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2036; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2037; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2038; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2039; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2040; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
2041;
2042; GFX1200-GISEL-LABEL: clpeak_umad_pat_v3i16:
2043; GFX1200-GISEL:       ; %bb.0: ; %entry
2044; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2045; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
2046; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
2047; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
2048; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
2049; GFX1200-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2050; GFX1200-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1
2051; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2052; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2053; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2054; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2055; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1
2056; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2057; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2058; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2059; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2060; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1
2061; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2062; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2063; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2064; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2065; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2066; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2067; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
2068entry:
2069  %y48 = add <3 x i16> %x, <i16 1, i16 1, i16 1>
2070  %add = mul <3 x i16> %y48, %y
2071  %mul1249 = add <3 x i16> %add, %y48
2072  %add15 = mul <3 x i16> %mul1249, %y
2073  %add1550 = add <3 x i16> %add, <i16 1, i16 1, i16 1>
2074  %add2452 = add <3 x i16> %add15, <i16 1, i16 1, i16 1>
2075  %mul3051 = mul <3 x i16> %add15, %add1550
2076  %add33 = mul <3 x i16> %mul3051, %add2452
2077  ret <3 x i16> %add33
2078}
2079
2080define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
2081; GFX67-SDAG-LABEL: clpeak_umad_pat_v4i16:
2082; GFX67-SDAG:       ; %bb.0: ; %entry
2083; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2084; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
2085; GFX67-SDAG-NEXT:    v_and_b32_e32 v11, 0xffff, v3
2086; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
2087; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
2088; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v11, v7, v3
2089; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
2090; GFX67-SDAG-NEXT:    v_and_b32_e32 v9, 0xffff, v2
2091; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2092; GFX67-SDAG-NEXT:    v_and_b32_e32 v6, 0xffff, v6
2093; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
2094; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v0
2095; GFX67-SDAG-NEXT:    v_and_b32_e32 v10, 0xffff, v1
2096; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
2097; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v5
2098; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v13, v11, v7
2099; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
2100; GFX67-SDAG-NEXT:    v_mad_u32_u24 v7, v9, v6, 1
2101; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v12, v10, v5
2102; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v9, v6, v2
2103; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v8, v4, v0
2104; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v10, v5, v1
2105; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
2106; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v9, 16, v13
2107; GFX67-SDAG-NEXT:    v_mad_u32_u24 v8, v8, v4, 1
2108; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2109; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2110; GFX67-SDAG-NEXT:    v_or_b32_e32 v7, v9, v7
2111; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v8
2112; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v9, 16, v12
2113; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2114; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v10, v0, v4
2115; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v5
2116; GFX67-SDAG-NEXT:    s_mov_b32 s4, 0x10000
2117; GFX67-SDAG-NEXT:    v_or_b32_e32 v8, v9, v8
2118; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
2119; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v5, v2, v6
2120; GFX67-SDAG-NEXT:    v_add_i32_e32 v8, vcc, s4, v8
2121; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v2, v6, 1
2122; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2123; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 16, v1
2124; GFX67-SDAG-NEXT:    v_add_i32_e32 v7, vcc, s4, v7
2125; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2126; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 16, v3
2127; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v4, v0
2128; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v8
2129; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2130; GFX67-SDAG-NEXT:    v_or_b32_e32 v2, v6, v2
2131; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v7
2132; GFX67-SDAG-NEXT:    v_and_b32_e32 v9, 0xffff, v10
2133; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v8
2134; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v4
2135; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v5
2136; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v7
2137; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
2138; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, s4, v2
2139; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, s4, v0
2140; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v8, v9, v8
2141; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
2142; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v6
2143; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
2144; GFX67-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
2145; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v8
2146; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2147; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2148; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
2149; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2150; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
2151; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v7, v0
2152; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v5
2153; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
2154; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v6
2155; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
2156;
2157; GFX67-GISEL-LABEL: clpeak_umad_pat_v4i16:
2158; GFX67-GISEL:       ; %bb.0: ; %entry
2159; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2161; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
2162; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
2163; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v1
2164; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
2165; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v0
2166; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
2167; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v3
2168; GFX67-GISEL-NEXT:    v_or_b32_e32 v8, v8, v9
2169; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v2
2170; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
2171; GFX67-GISEL-NEXT:    v_or_b32_e32 v9, v9, v10
2172; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
2173; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
2174; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
2175; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
2176; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v10, v5, v1
2177; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v11, 16, v9
2178; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
2179; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
2180; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
2181; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v8, v4, v0
2182; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2183; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v9, v6, v2
2184; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v11, v7, v3
2185; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2186; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2187; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
2188; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
2189; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
2190; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2191; GFX67-GISEL-NEXT:    v_or_b32_e32 v1, v1, v2
2192; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
2193; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2194; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v13, v2, v5
2195; GFX67-GISEL-NEXT:    v_mad_u32_u24 v10, v10, v5, 1
2196; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v2, v5, 1
2197; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
2198; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v12, v0, v4
2199; GFX67-GISEL-NEXT:    v_mad_u32_u24 v8, v8, v4, 1
2200; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v10
2201; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
2202; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2203; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2204; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v15, v3, v7
2205; GFX67-GISEL-NEXT:    v_mad_u32_u24 v11, v11, v7, 1
2206; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
2207; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
2208; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v3, v7, 1
2209; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2210; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2211; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v14, v1, v6
2212; GFX67-GISEL-NEXT:    v_mad_u32_u24 v9, v9, v6, 1
2213; GFX67-GISEL-NEXT:    v_or_b32_e32 v8, v8, v10
2214; GFX67-GISEL-NEXT:    v_and_b32_e32 v10, 0xffff, v11
2215; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v6, 1
2216; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
2217; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v3
2218; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
2219; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
2220; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2221; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2222; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v12
2223; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v8
2224; GFX67-GISEL-NEXT:    v_or_b32_e32 v9, v9, v10
2225; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v1, v2
2226; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
2227; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
2228; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v13
2229; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v5, v1
2230; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v14
2231; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v9
2232; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
2233; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v5, v5, v6
2234; GFX67-GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v15
2235; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v6, v3
2236; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
2237; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
2238; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2239; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
2240; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v4, v0
2241; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
2242; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v5
2243; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
2244; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
2245; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v6
2246; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
2247; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v7
2248; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
2249;
2250; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16:
2251; GFX8-SDAG:       ; %bb.0: ; %entry
2252; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2253; GFX8-SDAG-NEXT:    v_mov_b32_e32 v6, 1
2254; GFX8-SDAG-NEXT:    v_add_u16_e32 v5, 1, v0
2255; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2256; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
2257; GFX8-SDAG-NEXT:    v_add_u16_e32 v4, 1, v1
2258; GFX8-SDAG-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2259; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
2260; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v9, v0, v8
2261; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v8, v0
2262; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v7, v1, v6
2263; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v10, v5, v2
2264; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v11, v4, v3
2265; GFX8-SDAG-NEXT:    v_mad_u16 v1, v1, v6, v1
2266; GFX8-SDAG-NEXT:    v_mad_u16 v4, v4, v3, v4
2267; GFX8-SDAG-NEXT:    v_mad_u16 v5, v5, v2, v5
2268; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v8
2269; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v6
2270; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v2, v5, v2
2271; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v3, v4, v3
2272; GFX8-SDAG-NEXT:    v_mad_u16 v4, v0, v9, v0
2273; GFX8-SDAG-NEXT:    v_mad_u16 v5, v1, v7, v1
2274; GFX8-SDAG-NEXT:    v_mad_u16 v7, v2, v10, v2
2275; GFX8-SDAG-NEXT:    v_mad_u16 v0, v4, v0, v4
2276; GFX8-SDAG-NEXT:    v_mad_u16 v6, v3, v11, v3
2277; GFX8-SDAG-NEXT:    v_mad_u16 v1, v5, v1, v5
2278; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2279; GFX8-SDAG-NEXT:    v_mad_u16 v2, v7, v2, v7
2280; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
2281; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2282; GFX8-SDAG-NEXT:    v_mad_u16 v2, v6, v3, v6
2283; GFX8-SDAG-NEXT:    v_or_b32_e32 v1, v2, v1
2284; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
2285;
2286; GFX8-GISEL-LABEL: clpeak_umad_pat_v4i16:
2287; GFX8-GISEL:       ; %bb.0: ; %entry
2288; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2289; GFX8-GISEL-NEXT:    v_mov_b32_e32 v5, 1
2290; GFX8-GISEL-NEXT:    v_add_u16_e32 v4, 1, v0
2291; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2292; GFX8-GISEL-NEXT:    v_add_u16_e32 v6, 1, v1
2293; GFX8-GISEL-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
2294; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
2295; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
2296; GFX8-GISEL-NEXT:    v_mad_u16 v8, v4, v2, v4
2297; GFX8-GISEL-NEXT:    v_mad_u16 v9, v0, v5, v0
2298; GFX8-GISEL-NEXT:    v_mad_u16 v10, v6, v3, v6
2299; GFX8-GISEL-NEXT:    v_mad_u16 v11, v1, v7, v1
2300; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v12, v8, v2
2301; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v13, v9, v5
2302; GFX8-GISEL-NEXT:    v_mad_u16 v4, v4, v2, 1
2303; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v5, 1
2304; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v14, v10, v3
2305; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v15, v11, v7
2306; GFX8-GISEL-NEXT:    v_mad_u16 v6, v6, v3, 1
2307; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v7, 1
2308; GFX8-GISEL-NEXT:    v_mad_u16 v2, v8, v2, 1
2309; GFX8-GISEL-NEXT:    v_mad_u16 v5, v9, v5, 1
2310; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v12, v4
2311; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v13, v0
2312; GFX8-GISEL-NEXT:    v_mad_u16 v3, v10, v3, 1
2313; GFX8-GISEL-NEXT:    v_mad_u16 v7, v11, v7, 1
2314; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v14, v6
2315; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v15, v1
2316; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v4, v2
2317; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2318; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
2319; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v6, v3
2320; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
2321; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v2, v1
2322; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
2323;
2324; GFX9-SDAG-LABEL: clpeak_umad_pat_v4i16:
2325; GFX9-SDAG:       ; %bb.0: ; %entry
2326; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2327; GFX9-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2328; GFX9-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2329; GFX9-SDAG-NEXT:    v_pk_mad_u16 v4, v1, v3, v1
2330; GFX9-SDAG-NEXT:    v_pk_mad_u16 v5, v0, v2, v0
2331; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v5, v2
2332; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v4, v3
2333; GFX9-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2334; GFX9-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2335; GFX9-SDAG-NEXT:    v_pk_mad_u16 v3, v4, v3, 1 op_sel_hi:[1,1,0]
2336; GFX9-SDAG-NEXT:    v_pk_mad_u16 v2, v5, v2, 1 op_sel_hi:[1,1,0]
2337; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2338; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2339; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2340; GFX9-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2341; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
2342;
2343; GFX9-GISEL-LABEL: clpeak_umad_pat_v4i16:
2344; GFX9-GISEL:       ; %bb.0: ; %entry
2345; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2346; GFX9-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2347; GFX9-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2348; GFX9-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2349; GFX9-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2350; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2351; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2352; GFX9-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2353; GFX9-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2354; GFX9-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2355; GFX9-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2356; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2357; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2358; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2359; GFX9-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2360; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
2361;
2362; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16:
2363; GFX10-SDAG:       ; %bb.0: ; %entry
2364; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2365; GFX10-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2366; GFX10-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2367; GFX10-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2368; GFX10-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2369; GFX10-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2370; GFX10-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2371; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2372; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2373; GFX10-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2374; GFX10-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2375; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2376; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2377; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2378; GFX10-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2379; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
2380;
2381; GFX10-GISEL-LABEL: clpeak_umad_pat_v4i16:
2382; GFX10-GISEL:       ; %bb.0: ; %entry
2383; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2384; GFX10-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2385; GFX10-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2386; GFX10-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2387; GFX10-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2388; GFX10-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2389; GFX10-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2390; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2391; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2392; GFX10-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2393; GFX10-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2394; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2395; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2396; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2397; GFX10-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2398; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
2399;
2400; GFX11-SDAG-LABEL: clpeak_umad_pat_v4i16:
2401; GFX11-SDAG:       ; %bb.0: ; %entry
2402; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2403; GFX11-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2404; GFX11-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2405; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2406; GFX11-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2407; GFX11-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2408; GFX11-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2409; GFX11-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2410; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2411; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2412; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2413; GFX11-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2414; GFX11-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2415; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2416; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2417; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2418; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2419; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2420; GFX11-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2421; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
2422;
2423; GFX11-GISEL-LABEL: clpeak_umad_pat_v4i16:
2424; GFX11-GISEL:       ; %bb.0: ; %entry
2425; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2426; GFX11-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2427; GFX11-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2428; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2429; GFX11-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2430; GFX11-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2431; GFX11-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2432; GFX11-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2433; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2434; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2435; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2436; GFX11-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2437; GFX11-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2438; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2439; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2440; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2441; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2442; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2443; GFX11-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2444; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2445;
2446; GFX1200-SDAG-LABEL: clpeak_umad_pat_v4i16:
2447; GFX1200-SDAG:       ; %bb.0: ; %entry
2448; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2449; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
2450; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
2451; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
2452; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
2453; GFX1200-SDAG-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2454; GFX1200-SDAG-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2455; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2456; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2457; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2458; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2459; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2460; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2461; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2462; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2463; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2464; GFX1200-SDAG-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2465; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2466; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2467; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2468; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2469; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2470; GFX1200-SDAG-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2471; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
2472;
2473; GFX1200-GISEL-LABEL: clpeak_umad_pat_v4i16:
2474; GFX1200-GISEL:       ; %bb.0: ; %entry
2475; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2476; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
2477; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
2478; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
2479; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
2480; GFX1200-GISEL-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
2481; GFX1200-GISEL-NEXT:    v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
2482; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2483; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v4, v0, v2, v0
2484; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v5, v1, v3, v1
2485; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v0, v0, v2, 1 op_sel_hi:[1,1,0]
2486; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v1, v1, v3, 1 op_sel_hi:[1,1,0]
2487; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2488; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v6, v4, v2
2489; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v7, v5, v3
2490; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v2, v4, v2, 1 op_sel_hi:[1,1,0]
2491; GFX1200-GISEL-NEXT:    v_pk_mad_u16 v3, v5, v3, 1 op_sel_hi:[1,1,0]
2492; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
2493; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v6, v0
2494; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v7, v1
2495; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2496; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v0, v0, v2
2497; GFX1200-GISEL-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
2498; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
2499entry:
2500  %y18 = add <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1>
2501  %add = mul <4 x i16> %y18, %y
2502  %mul119 = add <4 x i16> %add, %y18
2503  %add2 = mul <4 x i16> %mul119, %y
2504  %add220 = add <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
2505  %add422 = add <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
2506  %mul521 = mul <4 x i16> %add2, %add220
2507  %add6 = mul <4 x i16> %mul521, %add422
2508  ret <4 x i16> %add6
2509}
2510
2511define <2 x i32> @clpeak_imad_pat_v2i32(<2 x i32> %x, <2 x i32> %y) {
2512; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i32:
2513; GFX67-SDAG:       ; %bb.0: ; %entry
2514; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2515; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
2516; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2517; GFX67-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
2518; GFX67-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
2519; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
2520; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
2521; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
2522; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
2523; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v4
2524; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v5
2525; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v0
2526; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v1
2527; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
2528; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
2529; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
2530; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
2531; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
2532;
2533; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i32:
2534; GFX67-GISEL:       ; %bb.0: ; %entry
2535; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2536; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
2537; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2538; GFX67-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2539; GFX67-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2540; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
2541; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
2542; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2543; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2544; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
2545; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
2546; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
2547; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2548; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v3
2549; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2550; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
2551; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v2, v1
2552; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
2553;
2554; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i32:
2555; GFX8-SDAG:       ; %bb.0: ; %entry
2556; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2557; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
2558; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
2559; GFX8-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
2560; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
2561; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
2562; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v5, v1
2563; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
2564; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
2565; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v4
2566; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v5
2567; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v2, v0
2568; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v3, v1
2569; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
2570; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
2571; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
2572; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
2573; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
2574;
2575; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i32:
2576; GFX8-GISEL:       ; %bb.0: ; %entry
2577; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2578; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
2579; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
2580; GFX8-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2581; GFX8-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2582; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
2583; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, v5, v1
2584; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2585; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2586; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v4
2587; GFX8-GISEL-NEXT:    v_add_u32_e32 v3, vcc, 1, v5
2588; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
2589; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2590; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v3
2591; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
2592; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
2593; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v2, v1
2594; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
2595;
2596; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i32:
2597; GFX900-SDAG:       ; %bb.0: ; %entry
2598; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2599; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
2600; GFX900-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
2601; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
2602; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
2603; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v4, v0
2604; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
2605; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, v5, v1
2606; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v3
2607; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[0:1]
2608; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v5, v[2:3]
2609; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4]
2610; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v2, v[4:5]
2611; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
2612;
2613; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i32:
2614; GFX900-GISEL:       ; %bb.0: ; %entry
2615; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
2617; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
2618; GFX900-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2619; GFX900-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2620; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v4, v0
2621; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, v5, v1
2622; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2623; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2624; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
2625; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
2626; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
2627; GFX900-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
2628; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
2629; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
2630; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
2631; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
2632; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
2633;
2634; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i32:
2635; GFX90A-SDAG:       ; %bb.0: ; %entry
2636; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2637; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
2638; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
2639; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v2
2640; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v3
2641; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, v4, v1
2642; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v6, v0
2643; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
2644; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v3
2645; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v4, v[2:3]
2646; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v0, v6, v[0:1]
2647; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
2648; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
2649; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, v2
2650; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
2651;
2652; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i32:
2653; GFX90A-GISEL:       ; %bb.0: ; %entry
2654; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
2656; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
2657; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2658; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2659; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v4, v0
2660; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v5, v1
2661; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2662; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2663; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
2664; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
2665; GFX90A-GISEL-NEXT:    v_add_u32_e32 v4, 1, v0
2666; GFX90A-GISEL-NEXT:    v_add_u32_e32 v5, 1, v1
2667; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2668; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2669; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
2670; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
2671; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
2672;
2673; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32:
2674; GFX10-SDAG:       ; %bb.0: ; %entry
2675; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2676; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2677; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2678; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
2679; GFX10-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
2680; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v4, v0
2681; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, v5, v1
2682; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
2683; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v3
2684; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v0, v4, v[0:1]
2685; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[4:5], null, v2, v5, v[2:3]
2686; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
2687; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v4, v2, v[4:5]
2688; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
2689;
2690; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32:
2691; GFX10-GISEL:       ; %bb.0: ; %entry
2692; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2693; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2694; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2695; GFX10-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2696; GFX10-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2697; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v4, v0
2698; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, v5, v1
2699; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2700; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2701; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
2702; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
2703; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
2704; GFX10-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
2705; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2706; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2707; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
2708; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
2709; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
2710;
2711; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i32:
2712; GFX11-SDAG:       ; %bb.0: ; %entry
2713; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2714; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2715; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2716; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2717; GFX11-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v2
2718; GFX11-SDAG-NEXT:    v_mul_lo_u32 v7, v1, v3
2719; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2720; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v6, v0
2721; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, v7, v1
2722; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2723; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v2
2724; GFX11-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v3
2725; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2726; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[4:5], null, v2, v6, v[2:3]
2727; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v3, v7, v[3:4]
2728; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
2729; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, v[4:5]
2730; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v5, v3, v[5:6]
2731; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
2732;
2733; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i32:
2734; GFX11-GISEL:       ; %bb.0: ; %entry
2735; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2736; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2737; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2738; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2739; GFX11-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2740; GFX11-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2741; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2742; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v4, v0
2743; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, v5, v1
2744; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2745; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2746; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2747; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
2748; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
2749; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2750; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
2751; GFX11-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
2752; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2753; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2754; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2755; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
2756; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
2757; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2758;
2759; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i32:
2760; GFX1200-SDAG:       ; %bb.0: ; %entry
2761; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
2762; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
2763; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
2764; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
2765; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
2766; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2767; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2768; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2769; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
2770; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
2771; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2772; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v4, v0
2773; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, v5, v1
2774; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2775; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
2776; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v3
2777; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2778; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[3:4], null, v0, v4, v[0:1]
2779; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[4:5], null, v2, v5, v[2:3]
2780; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
2781; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4]
2782; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v4, v2, v[4:5]
2783; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
2784;
2785; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i32:
2786; GFX1200-GISEL:       ; %bb.0: ; %entry
2787; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
2788; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
2789; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
2790; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
2791; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
2792; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2793; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2794; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2795; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
2796; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
2797; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2798; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v4, v0
2799; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, v5, v1
2800; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2801; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
2802; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
2803; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
2804; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
2805; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2806; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
2807; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
2808; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
2809; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
2810; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2811; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
2812; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
2813; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
2814entry:
2815  %y18 = add <2 x i32> %x, <i32 1, i32 1>
2816  %add = mul <2 x i32> %y18, %y
2817  %mul119 = add <2 x i32> %add, %y18
2818  %add2 = mul <2 x i32> %mul119, %y
2819  %add220 = add <2 x i32> %add, <i32 1, i32 1>
2820  %add422 = add <2 x i32> %add2, <i32 1, i32 1>
2821  %mul521 = mul <2 x i32> %add2, %add220
2822  %add6 = mul <2 x i32> %mul521, %add422
2823  ret <2 x i32> %add6
2824}
2825
2826define <3 x i32> @clpeak_imad_pat_v3i32(<3 x i32> %x, <3 x i32> %y) {
2827; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i32:
2828; GFX67-SDAG:       ; %bb.0: ; %entry
2829; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2830; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
2831; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2832; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
2833; GFX67-SDAG-NEXT:    v_mul_lo_u32 v6, v2, v5
2834; GFX67-SDAG-NEXT:    v_mul_lo_u32 v7, v0, v3
2835; GFX67-SDAG-NEXT:    v_mul_lo_u32 v8, v1, v4
2836; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v6, v2
2837; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v7, v0
2838; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
2839; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v5
2840; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v3
2841; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v4
2842; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v2, v6
2843; GFX67-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v7
2844; GFX67-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v8
2845; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v2
2846; GFX67-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v4, v0
2847; GFX67-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v5, v1
2848; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v4, v0
2849; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v5, v1
2850; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v2
2851; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
2852; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
2853; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
2854; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
2855;
2856; GFX67-GISEL-LABEL: clpeak_imad_pat_v3i32:
2857; GFX67-GISEL:       ; %bb.0: ; %entry
2858; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2859; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
2860; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
2861; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
2862; GFX67-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
2863; GFX67-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
2864; GFX67-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
2865; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
2866; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v7, v1
2867; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v8, v2
2868; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
2869; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
2870; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
2871; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v6
2872; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v7
2873; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v8
2874; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
2875; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v1
2876; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
2877; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
2878; GFX67-GISEL-NEXT:    v_mul_lo_u32 v3, v2, v5
2879; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
2880; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v6
2881; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v7
2882; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v3, v2
2883; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
2884;
2885; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i32:
2886; GFX8-SDAG:       ; %bb.0: ; %entry
2887; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2888; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, 1, v2
2889; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
2890; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
2891; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v2, v5
2892; GFX8-SDAG-NEXT:    v_mul_lo_u32 v7, v0, v3
2893; GFX8-SDAG-NEXT:    v_mul_lo_u32 v8, v1, v4
2894; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v6, v2
2895; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v7, v0
2896; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v8, v1
2897; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v5
2898; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v3
2899; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v4
2900; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v2, v6
2901; GFX8-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v7
2902; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v8
2903; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v3, v2
2904; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, v4, v0
2905; GFX8-SDAG-NEXT:    v_add_u32_e32 v5, vcc, v5, v1
2906; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v4, v0
2907; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v5, v1
2908; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v2
2909; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v4
2910; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
2911; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
2912; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
2913;
2914; GFX8-GISEL-LABEL: clpeak_imad_pat_v3i32:
2915; GFX8-GISEL:       ; %bb.0: ; %entry
2916; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2917; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
2918; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
2919; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v2
2920; GFX8-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
2921; GFX8-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
2922; GFX8-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
2923; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v6, v0
2924; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, v7, v1
2925; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v8, v2
2926; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
2927; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
2928; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
2929; GFX8-GISEL-NEXT:    v_add_u32_e32 v3, vcc, 1, v6
2930; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, 1, v7
2931; GFX8-GISEL-NEXT:    v_add_u32_e32 v5, vcc, 1, v8
2932; GFX8-GISEL-NEXT:    v_add_u32_e32 v6, vcc, 1, v0
2933; GFX8-GISEL-NEXT:    v_add_u32_e32 v7, vcc, 1, v1
2934; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
2935; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
2936; GFX8-GISEL-NEXT:    v_mul_lo_u32 v3, v2, v5
2937; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v2
2938; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v6
2939; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v7
2940; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v3, v2
2941; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
2942;
2943; GFX900-SDAG-LABEL: clpeak_imad_pat_v3i32:
2944; GFX900-SDAG:       ; %bb.0: ; %entry
2945; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2946; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, 1, v2
2947; GFX900-SDAG-NEXT:    v_mul_lo_u32 v6, v2, v5
2948; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
2949; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
2950; GFX900-SDAG-NEXT:    v_mul_lo_u32 v7, v0, v3
2951; GFX900-SDAG-NEXT:    v_mul_lo_u32 v8, v1, v4
2952; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, v6, v2
2953; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v5
2954; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v7, v0
2955; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, v8, v1
2956; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v3
2957; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v4
2958; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v6, v[5:6]
2959; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v0, v7, v[0:1]
2960; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v2, v8, v[2:3]
2961; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
2962; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v7, v2, v[7:8]
2963; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v3, v5, v[3:4]
2964; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
2965;
2966; GFX900-GISEL-LABEL: clpeak_imad_pat_v3i32:
2967; GFX900-GISEL:       ; %bb.0: ; %entry
2968; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2969; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
2970; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
2971; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, 1, v2
2972; GFX900-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
2973; GFX900-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
2974; GFX900-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
2975; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v6, v0
2976; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, v7, v1
2977; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, v8, v2
2978; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
2979; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
2980; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
2981; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, 1, v6
2982; GFX900-GISEL-NEXT:    v_add_u32_e32 v4, 1, v7
2983; GFX900-GISEL-NEXT:    v_add_u32_e32 v5, 1, v8
2984; GFX900-GISEL-NEXT:    v_add_u32_e32 v6, 1, v0
2985; GFX900-GISEL-NEXT:    v_add_u32_e32 v7, 1, v1
2986; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
2987; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
2988; GFX900-GISEL-NEXT:    v_mul_lo_u32 v3, v2, v5
2989; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, 1, v2
2990; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v6
2991; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v7
2992; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v3, v2
2993; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
2994;
2995; GFX90A-SDAG-LABEL: clpeak_imad_pat_v3i32:
2996; GFX90A-SDAG:       ; %bb.0: ; %entry
2997; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2998; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
2999; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
3000; GFX90A-SDAG-NEXT:    v_add_u32_e32 v2, 1, v2
3001; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v10, v0, v3
3002; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v8, v1, v4
3003; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v2, v5
3004; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, v8, v1
3005; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v10, v0
3006; GFX90A-SDAG-NEXT:    v_add_u32_e32 v7, v6, v2
3007; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v3
3008; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v4
3009; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v7, v5
3010; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v2, v8, v[2:3]
3011; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v0, v10, v[0:1]
3012; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v4, v6, v[4:5]
3013; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v0, v[10:11]
3014; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v8, v2, v[8:9]
3015; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v4, v[6:7]
3016; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, v8
3017; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
3018;
3019; GFX90A-GISEL-LABEL: clpeak_imad_pat_v3i32:
3020; GFX90A-GISEL:       ; %bb.0: ; %entry
3021; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3022; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
3023; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
3024; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v2
3025; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
3026; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
3027; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
3028; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v6, v0
3029; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v7, v1
3030; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, v8, v2
3031; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
3032; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
3033; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
3034; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, 1, v6
3035; GFX90A-GISEL-NEXT:    v_add_u32_e32 v4, 1, v7
3036; GFX90A-GISEL-NEXT:    v_add_u32_e32 v5, 1, v8
3037; GFX90A-GISEL-NEXT:    v_add_u32_e32 v6, 1, v0
3038; GFX90A-GISEL-NEXT:    v_add_u32_e32 v7, 1, v1
3039; GFX90A-GISEL-NEXT:    v_add_u32_e32 v8, 1, v2
3040; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
3041; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
3042; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
3043; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v6
3044; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v7
3045; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v8
3046; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
3047;
3048; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i32:
3049; GFX10-SDAG:       ; %bb.0: ; %entry
3050; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3051; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3052; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3053; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3054; GFX10-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v3
3055; GFX10-SDAG-NEXT:    v_mul_lo_u32 v7, v1, v4
3056; GFX10-SDAG-NEXT:    v_mul_lo_u32 v8, v2, v5
3057; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v6, v0
3058; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, v7, v1
3059; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v9, v8, v2
3060; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v3
3061; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v4
3062; GFX10-SDAG-NEXT:    v_mul_lo_u32 v3, v9, v5
3063; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[4:5], null, v0, v6, v[0:1]
3064; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v2, v7, v[2:3]
3065; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[6:7], null, v3, v8, v[3:4]
3066; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v0, v[4:5]
3067; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v5, v2, v[5:6]
3068; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v6, v3, v[6:7]
3069; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
3070;
3071; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i32:
3072; GFX10-GISEL:       ; %bb.0: ; %entry
3073; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3074; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3075; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3076; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3077; GFX10-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
3078; GFX10-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
3079; GFX10-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
3080; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v6, v0
3081; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, v7, v1
3082; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, v8, v2
3083; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
3084; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
3085; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
3086; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v6
3087; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v7
3088; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v5, 1, v8
3089; GFX10-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v3
3090; GFX10-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v4
3091; GFX10-GISEL-NEXT:    v_mul_lo_u32 v5, v2, v5
3092; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3093; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3094; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3095; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v3, v0
3096; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
3097; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
3098; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
3099;
3100; GFX11-SDAG-LABEL: clpeak_imad_pat_v3i32:
3101; GFX11-SDAG:       ; %bb.0: ; %entry
3102; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3103; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3104; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3105; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3106; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3107; GFX11-SDAG-NEXT:    v_mul_lo_u32 v7, v0, v3
3108; GFX11-SDAG-NEXT:    v_mul_lo_u32 v8, v1, v4
3109; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3110; GFX11-SDAG-NEXT:    v_mul_lo_u32 v9, v2, v5
3111; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v7, v0
3112; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3113; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, v8, v1
3114; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v6, v9, v2
3115; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3116; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v3
3117; GFX11-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v4
3118; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3119; GFX11-SDAG-NEXT:    v_mul_lo_u32 v4, v6, v5
3120; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v2, v7, v[2:3]
3121; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3122; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[6:7], null, v3, v8, v[3:4]
3123; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[7:8], null, v4, v9, v[4:5]
3124; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3125; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v2, v[5:6]
3126; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v6, v3, v[6:7]
3127; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
3128; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v7, v4, v[7:8]
3129; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
3130;
3131; GFX11-GISEL-LABEL: clpeak_imad_pat_v3i32:
3132; GFX11-GISEL:       ; %bb.0: ; %entry
3133; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3134; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3135; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3136; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3137; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3138; GFX11-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
3139; GFX11-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
3140; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3141; GFX11-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
3142; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v6, v0
3143; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3144; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, v7, v1
3145; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, v8, v2
3146; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3147; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
3148; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
3149; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
3150; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
3151; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v6
3152; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v7
3153; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v5, 1, v8
3154; GFX11-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v3
3155; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3156; GFX11-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v4
3157; GFX11-GISEL-NEXT:    v_mul_lo_u32 v5, v2, v5
3158; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3159; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3160; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3161; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3162; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v3, v0
3163; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
3164; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
3165; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
3166; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
3167;
3168; GFX1200-SDAG-LABEL: clpeak_imad_pat_v3i32:
3169; GFX1200-SDAG:       ; %bb.0: ; %entry
3170; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3171; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
3172; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
3173; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
3174; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
3175; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3176; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3177; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3178; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3179; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v3
3180; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v7, v1, v4
3181; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3182; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v8, v2, v5
3183; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v6, v0
3184; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3185; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, v7, v1
3186; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v9, v8, v2
3187; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3188; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v3
3189; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v4
3190; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3191; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v3, v9, v5
3192; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[4:5], null, v0, v6, v[0:1]
3193; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3194; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[5:6], null, v2, v7, v[2:3]
3195; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[6:7], null, v3, v8, v[3:4]
3196; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3197; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v4, v0, v[4:5]
3198; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v5, v2, v[5:6]
3199; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3)
3200; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[2:3], null, v6, v3, v[6:7]
3201; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
3202;
3203; GFX1200-GISEL-LABEL: clpeak_imad_pat_v3i32:
3204; GFX1200-GISEL:       ; %bb.0: ; %entry
3205; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3206; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
3207; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
3208; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
3209; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
3210; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3211; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3212; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3213; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3214; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v3
3215; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v7, v1, v4
3216; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3217; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v8, v2, v5
3218; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v6, v0
3219; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3220; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, v7, v1
3221; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, v8, v2
3222; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3223; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v3
3224; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
3225; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
3226; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v5
3227; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v6
3228; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v7
3229; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v5, 1, v8
3230; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v3
3231; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3232; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v4
3233; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v2, v5
3234; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3235; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3236; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3237; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3238; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v3, v0
3239; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
3240; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3)
3241; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
3242; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
3243entry:
3244  %y48 = add <3 x i32> %x, <i32 1, i32 1, i32 1>
3245  %add = mul <3 x i32> %y48, %y
3246  %mul1249 = add <3 x i32> %add, %y48
3247  %add15 = mul <3 x i32> %mul1249, %y
3248  %add1550 = add <3 x i32> %add, <i32 1, i32 1, i32 1>
3249  %add2452 = add <3 x i32> %add15, <i32 1, i32 1, i32 1>
3250  %mul3051 = mul <3 x i32> %add15, %add1550
3251  %add33 = mul <3 x i32> %mul3051, %add2452
3252  ret <3 x i32> %add33
3253}
3254
3255define <4 x i32> @clpeak_imad_pat_v4i32(<4 x i32> %x, <4 x i32> %y) {
3256; GFX67-SDAG-LABEL: clpeak_imad_pat_v4i32:
3257; GFX67-SDAG:       ; %bb.0: ; %entry
3258; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3259; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
3260; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
3261; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
3262; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
3263; GFX67-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v4
3264; GFX67-SDAG-NEXT:    v_mul_lo_u32 v9, v3, v7
3265; GFX67-SDAG-NEXT:    v_mul_lo_u32 v10, v1, v5
3266; GFX67-SDAG-NEXT:    v_mul_lo_u32 v11, v2, v6
3267; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
3268; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
3269; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v11, v2
3270; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v10, v1
3271; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v3, v7
3272; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v6
3273; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
3274; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v5
3275; GFX67-SDAG-NEXT:    v_mul_lo_u32 v4, v3, v9
3276; GFX67-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v11
3277; GFX67-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v8
3278; GFX67-SDAG-NEXT:    v_mul_lo_u32 v7, v1, v10
3279; GFX67-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v4, v3
3280; GFX67-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v5, v2
3281; GFX67-SDAG-NEXT:    v_add_i32_e32 v6, vcc, v6, v0
3282; GFX67-SDAG-NEXT:    v_add_i32_e32 v7, vcc, v7, v1
3283; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v6, v0
3284; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v7, v1
3285; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
3286; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v4, v3
3287; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
3288; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
3289; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
3290; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
3291; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
3292;
3293; GFX67-GISEL-LABEL: clpeak_imad_pat_v4i32:
3294; GFX67-GISEL:       ; %bb.0: ; %entry
3295; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3296; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
3297; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
3298; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
3299; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v3
3300; GFX67-GISEL-NEXT:    v_mul_lo_u32 v8, v0, v4
3301; GFX67-GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
3302; GFX67-GISEL-NEXT:    v_mul_lo_u32 v10, v2, v6
3303; GFX67-GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
3304; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v8, v0
3305; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v9, v1
3306; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
3307; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v11, v3
3308; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3309; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3310; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3311; GFX67-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3312; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v8
3313; GFX67-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v9
3314; GFX67-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v10
3315; GFX67-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v11
3316; GFX67-GISEL-NEXT:    v_add_i32_e32 v8, vcc, 1, v0
3317; GFX67-GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v1
3318; GFX67-GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v2
3319; GFX67-GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v3
3320; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3321; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3322; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3323; GFX67-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3324; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v8
3325; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v9
3326; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v10
3327; GFX67-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v11
3328; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
3329;
3330; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i32:
3331; GFX8-SDAG:       ; %bb.0: ; %entry
3332; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3333; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, 1, v3
3334; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, 1, v2
3335; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
3336; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
3337; GFX8-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v4
3338; GFX8-SDAG-NEXT:    v_mul_lo_u32 v9, v3, v7
3339; GFX8-SDAG-NEXT:    v_mul_lo_u32 v10, v1, v5
3340; GFX8-SDAG-NEXT:    v_mul_lo_u32 v11, v2, v6
3341; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v8, v0
3342; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v9, v3
3343; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v11, v2
3344; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v10, v1
3345; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v3, v7
3346; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v6
3347; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
3348; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v5
3349; GFX8-SDAG-NEXT:    v_mul_lo_u32 v4, v3, v9
3350; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v11
3351; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v8
3352; GFX8-SDAG-NEXT:    v_mul_lo_u32 v7, v1, v10
3353; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, v4, v3
3354; GFX8-SDAG-NEXT:    v_add_u32_e32 v5, vcc, v5, v2
3355; GFX8-SDAG-NEXT:    v_add_u32_e32 v6, vcc, v6, v0
3356; GFX8-SDAG-NEXT:    v_add_u32_e32 v7, vcc, v7, v1
3357; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v6, v0
3358; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v7, v1
3359; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
3360; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v4, v3
3361; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
3362; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v7
3363; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
3364; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v3, v4
3365; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
3366;
3367; GFX8-GISEL-LABEL: clpeak_imad_pat_v4i32:
3368; GFX8-GISEL:       ; %bb.0: ; %entry
3369; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3370; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
3371; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
3372; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v2
3373; GFX8-GISEL-NEXT:    v_add_u32_e32 v3, vcc, 1, v3
3374; GFX8-GISEL-NEXT:    v_mul_lo_u32 v8, v0, v4
3375; GFX8-GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
3376; GFX8-GISEL-NEXT:    v_mul_lo_u32 v10, v2, v6
3377; GFX8-GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
3378; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v8, v0
3379; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, v9, v1
3380; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v10, v2
3381; GFX8-GISEL-NEXT:    v_add_u32_e32 v3, vcc, v11, v3
3382; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3383; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3384; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3385; GFX8-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3386; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, 1, v8
3387; GFX8-GISEL-NEXT:    v_add_u32_e32 v5, vcc, 1, v9
3388; GFX8-GISEL-NEXT:    v_add_u32_e32 v6, vcc, 1, v10
3389; GFX8-GISEL-NEXT:    v_add_u32_e32 v7, vcc, 1, v11
3390; GFX8-GISEL-NEXT:    v_add_u32_e32 v8, vcc, 1, v0
3391; GFX8-GISEL-NEXT:    v_add_u32_e32 v9, vcc, 1, v1
3392; GFX8-GISEL-NEXT:    v_add_u32_e32 v10, vcc, 1, v2
3393; GFX8-GISEL-NEXT:    v_add_u32_e32 v11, vcc, 1, v3
3394; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3395; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3396; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3397; GFX8-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3398; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v8
3399; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v9
3400; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v10
3401; GFX8-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v11
3402; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
3403;
3404; GFX900-SDAG-LABEL: clpeak_imad_pat_v4i32:
3405; GFX900-SDAG:       ; %bb.0: ; %entry
3406; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3407; GFX900-SDAG-NEXT:    v_add_u32_e32 v3, 1, v3
3408; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
3409; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, 1, v2
3410; GFX900-SDAG-NEXT:    v_mul_lo_u32 v8, v3, v7
3411; GFX900-SDAG-NEXT:    v_mul_lo_u32 v11, v0, v4
3412; GFX900-SDAG-NEXT:    v_mul_lo_u32 v9, v2, v6
3413; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
3414; GFX900-SDAG-NEXT:    v_mul_lo_u32 v10, v1, v5
3415; GFX900-SDAG-NEXT:    v_add_u32_e32 v3, v8, v3
3416; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v11, v0
3417; GFX900-SDAG-NEXT:    v_add_u32_e32 v12, v9, v2
3418; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
3419; GFX900-SDAG-NEXT:    v_mul_lo_u32 v4, v3, v7
3420; GFX900-SDAG-NEXT:    v_mul_lo_u32 v3, v12, v6
3421; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, v10, v1
3422; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v5
3423; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[4:5]
3424; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[3:4]
3425; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v0, v11, v[0:1]
3426; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v2, v10, v[2:3]
3427; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v7, v0, v[7:8]
3428; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v8, v2, v[8:9]
3429; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v3, v[6:7]
3430; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[5:6]
3431; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
3432;
3433; GFX9-GISEL-LABEL: clpeak_imad_pat_v4i32:
3434; GFX9-GISEL:       ; %bb.0: ; %entry
3435; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3436; GFX9-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
3437; GFX9-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
3438; GFX9-GISEL-NEXT:    v_add_u32_e32 v2, 1, v2
3439; GFX9-GISEL-NEXT:    v_add_u32_e32 v3, 1, v3
3440; GFX9-GISEL-NEXT:    v_mul_lo_u32 v8, v0, v4
3441; GFX9-GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
3442; GFX9-GISEL-NEXT:    v_mul_lo_u32 v10, v2, v6
3443; GFX9-GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
3444; GFX9-GISEL-NEXT:    v_add_u32_e32 v0, v8, v0
3445; GFX9-GISEL-NEXT:    v_add_u32_e32 v1, v9, v1
3446; GFX9-GISEL-NEXT:    v_add_u32_e32 v2, v10, v2
3447; GFX9-GISEL-NEXT:    v_add_u32_e32 v3, v11, v3
3448; GFX9-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3449; GFX9-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3450; GFX9-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3451; GFX9-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3452; GFX9-GISEL-NEXT:    v_add_u32_e32 v4, 1, v8
3453; GFX9-GISEL-NEXT:    v_add_u32_e32 v5, 1, v9
3454; GFX9-GISEL-NEXT:    v_add_u32_e32 v6, 1, v10
3455; GFX9-GISEL-NEXT:    v_add_u32_e32 v7, 1, v11
3456; GFX9-GISEL-NEXT:    v_add_u32_e32 v8, 1, v0
3457; GFX9-GISEL-NEXT:    v_add_u32_e32 v9, 1, v1
3458; GFX9-GISEL-NEXT:    v_add_u32_e32 v10, 1, v2
3459; GFX9-GISEL-NEXT:    v_add_u32_e32 v11, 1, v3
3460; GFX9-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3461; GFX9-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3462; GFX9-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3463; GFX9-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3464; GFX9-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v8
3465; GFX9-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v9
3466; GFX9-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v10
3467; GFX9-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v11
3468; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
3469;
3470; GFX90A-SDAG-LABEL: clpeak_imad_pat_v4i32:
3471; GFX90A-SDAG:       ; %bb.0: ; %entry
3472; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3473; GFX90A-SDAG-NEXT:    v_add_u32_e32 v3, 1, v3
3474; GFX90A-SDAG-NEXT:    v_add_u32_e32 v2, 1, v2
3475; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
3476; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
3477; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v14, v0, v4
3478; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v12, v1, v5
3479; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v10, v2, v6
3480; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v8, v3, v7
3481; GFX90A-SDAG-NEXT:    v_add_u32_e32 v3, v8, v3
3482; GFX90A-SDAG-NEXT:    v_add_u32_e32 v9, v10, v2
3483; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, v12, v1
3484; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v14, v0
3485; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
3486; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v5
3487; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v9, v6
3488; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v3, v7
3489; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[6:7]
3490; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v4, v10, v[4:5]
3491; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v2, v12, v[2:3]
3492; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v0, v14, v[0:1]
3493; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v14, v0, v[14:15]
3494; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[12:13], s[4:5], v12, v2, v[12:13]
3495; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v4, v[10:11]
3496; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v8, v6, v[8:9]
3497; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, v12
3498; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v3, v4
3499; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
3500;
3501; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i32:
3502; GFX10-SDAG:       ; %bb.0: ; %entry
3503; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3504; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3505; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3506; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3507; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3508; GFX10-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v4
3509; GFX10-SDAG-NEXT:    v_mul_lo_u32 v9, v1, v5
3510; GFX10-SDAG-NEXT:    v_mul_lo_u32 v10, v2, v6
3511; GFX10-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v7
3512; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v8, v0
3513; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, v9, v1
3514; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v12, v10, v2
3515; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
3516; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v4, v11, v3
3517; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v5
3518; GFX10-SDAG-NEXT:    v_mul_lo_u32 v3, v12, v6
3519; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v4, v7
3520; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v0, v8, v[0:1]
3521; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[6:7], null, v2, v9, v[2:3]
3522; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[7:8], null, v3, v10, v[3:4]
3523; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[8:9], null, v4, v11, v[4:5]
3524; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v0, v[5:6]
3525; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v6, v2, v[6:7]
3526; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v7, v3, v[7:8]
3527; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v8, v4, v[8:9]
3528; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
3529;
3530; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i32:
3531; GFX10-GISEL:       ; %bb.0: ; %entry
3532; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3533; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3534; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3535; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3536; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3537; GFX10-GISEL-NEXT:    v_mul_lo_u32 v8, v0, v4
3538; GFX10-GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
3539; GFX10-GISEL-NEXT:    v_mul_lo_u32 v10, v2, v6
3540; GFX10-GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
3541; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v8, v0
3542; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, v9, v1
3543; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, v10, v2
3544; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, v11, v3
3545; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3546; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3547; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3548; GFX10-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3549; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v8
3550; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v5, 1, v9
3551; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v6, 1, v10
3552; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v7, 1, v11
3553; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v8, 1, v0
3554; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3555; GFX10-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v5
3556; GFX10-GISEL-NEXT:    v_mul_lo_u32 v5, v2, v6
3557; GFX10-GISEL-NEXT:    v_mul_lo_u32 v6, v3, v7
3558; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3559; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3560; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3561; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v8
3562; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
3563; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
3564; GFX10-GISEL-NEXT:    v_mul_lo_u32 v3, v6, v3
3565; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
3566;
3567; GFX11-SDAG-LABEL: clpeak_imad_pat_v4i32:
3568; GFX11-SDAG:       ; %bb.0: ; %entry
3569; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3570; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3571; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3572; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3573; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3574; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3575; GFX11-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v4
3576; GFX11-SDAG-NEXT:    v_mul_lo_u32 v9, v1, v5
3577; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3578; GFX11-SDAG-NEXT:    v_mul_lo_u32 v10, v2, v6
3579; GFX11-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v7
3580; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3581; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v8, v0
3582; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, v9, v1
3583; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
3584; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v12, v10, v2
3585; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v4
3586; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v11, v3
3587; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3588; GFX11-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v5
3589; GFX11-SDAG-NEXT:    v_mul_lo_u32 v4, v12, v6
3590; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3591; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v7
3592; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[6:7], null, v2, v8, v[2:3]
3593; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3594; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[7:8], null, v3, v9, v[3:4]
3595; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[8:9], null, v4, v10, v[4:5]
3596; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3597; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[9:10], null, v5, v11, v[5:6]
3598; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v6, v2, v[6:7]
3599; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3600; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v7, v3, v[7:8]
3601; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v8, v4, v[8:9]
3602; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4)
3603; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v9, v5, v[9:10]
3604; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
3605;
3606; GFX11-GISEL-LABEL: clpeak_imad_pat_v4i32:
3607; GFX11-GISEL:       ; %bb.0: ; %entry
3608; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3609; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3610; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3611; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3612; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3613; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3614; GFX11-GISEL-NEXT:    v_mul_lo_u32 v8, v0, v4
3615; GFX11-GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
3616; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3617; GFX11-GISEL-NEXT:    v_mul_lo_u32 v10, v2, v6
3618; GFX11-GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
3619; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3620; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v8, v0
3621; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, v9, v1
3622; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3623; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, v10, v2
3624; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, v11, v3
3625; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3626; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3627; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3628; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3629; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3630; GFX11-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3631; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v8
3632; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v5, 1, v9
3633; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v6, 1, v10
3634; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v7, 1, v11
3635; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v8, 1, v0
3636; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3637; GFX11-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v5
3638; GFX11-GISEL-NEXT:    v_mul_lo_u32 v5, v2, v6
3639; GFX11-GISEL-NEXT:    v_mul_lo_u32 v6, v3, v7
3640; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3641; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3642; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3643; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v8
3644; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3645; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
3646; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
3647; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
3648; GFX11-GISEL-NEXT:    v_mul_lo_u32 v3, v6, v3
3649; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
3650;
3651; GFX1200-SDAG-LABEL: clpeak_imad_pat_v4i32:
3652; GFX1200-SDAG:       ; %bb.0: ; %entry
3653; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3654; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
3655; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
3656; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
3657; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
3658; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3659; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3660; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3661; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3662; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3663; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v4
3664; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v9, v1, v5
3665; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3666; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v10, v2, v6
3667; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v7
3668; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3669; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v8, v0
3670; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, v9, v1
3671; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
3672; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v12, v10, v2
3673; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
3674; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v4, v11, v3
3675; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3676; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v5
3677; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v3, v12, v6
3678; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
3679; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v4, v7
3680; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[5:6], null, v0, v8, v[0:1]
3681; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[6:7], null, v2, v9, v[2:3]
3682; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3683; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[7:8], null, v3, v10, v[3:4]
3684; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[8:9], null, v4, v11, v[4:5]
3685; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3686; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v5, v0, v[5:6]
3687; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v6, v2, v[6:7]
3688; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
3689; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[2:3], null, v7, v3, v[7:8]
3690; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[3:4], null, v8, v4, v[8:9]
3691; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
3692;
3693; GFX1200-GISEL-LABEL: clpeak_imad_pat_v4i32:
3694; GFX1200-GISEL:       ; %bb.0: ; %entry
3695; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3696; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
3697; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
3698; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
3699; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
3700; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3701; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3702; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3703; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3704; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3705; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v8, v0, v4
3706; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v9, v1, v5
3707; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3708; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v10, v2, v6
3709; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v11, v3, v7
3710; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3711; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v8, v0
3712; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, v9, v1
3713; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3714; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, v10, v2
3715; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, v11, v3
3716; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3717; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3718; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
3719; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3720; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v2, v6
3721; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v3, v3, v7
3722; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v4, 1, v8
3723; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v5, 1, v9
3724; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v6, 1, v10
3725; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v7, 1, v11
3726; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v8, 1, v0
3727; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
3728; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v5
3729; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v2, v6
3730; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v6, v3, v7
3731; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
3732; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v2
3733; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v3
3734; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v8
3735; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
3736; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
3737; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
3738; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4)
3739; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v3, v6, v3
3740; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
3741entry:
3742  %y18 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
3743  %add = mul <4 x i32> %y18, %y
3744  %mul119 = add <4 x i32> %add, %y18
3745  %add2 = mul <4 x i32> %mul119, %y
3746  %add220 = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
3747  %add422 = add <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
3748  %mul521 = mul <4 x i32> %add2, %add220
3749  %add6 = mul <4 x i32> %mul521, %add422
3750  ret <4 x i32> %add6
3751}
3752
3753define i32 @clpeak_imad_pat_i24(i32 %x, i32 %y) {
3754; GFX67-SDAG-LABEL: clpeak_imad_pat_i24:
3755; GFX67-SDAG:       ; %bb.0: ; %entry
3756; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3757; GFX67-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3758; GFX67-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3759; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
3760; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3761; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
3762; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3763; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
3764; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v0
3765; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
3766; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
3767; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
3768;
3769; GFX67-GISEL-LABEL: clpeak_imad_pat_i24:
3770; GFX67-GISEL:       ; %bb.0: ; %entry
3771; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3772; GFX67-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3773; GFX67-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3774; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
3775; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3776; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
3777; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3778; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
3779; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
3780; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
3781; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
3782; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
3783;
3784; GFX8-SDAG-LABEL: clpeak_imad_pat_i24:
3785; GFX8-SDAG:       ; %bb.0: ; %entry
3786; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3787; GFX8-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3788; GFX8-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3789; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
3790; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3791; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
3792; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3793; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
3794; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v0
3795; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
3796; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
3797; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
3798;
3799; GFX8-GISEL-LABEL: clpeak_imad_pat_i24:
3800; GFX8-GISEL:       ; %bb.0: ; %entry
3801; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3802; GFX8-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3803; GFX8-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3804; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
3805; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3806; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
3807; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3808; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
3809; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
3810; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
3811; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
3812; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
3813;
3814; GFX900-SDAG-LABEL: clpeak_imad_pat_i24:
3815; GFX900-SDAG:       ; %bb.0: ; %entry
3816; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3817; GFX900-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3818; GFX900-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3819; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
3820; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3821; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
3822; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3823; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
3824; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
3825; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
3826;
3827; GFX900-GISEL-LABEL: clpeak_imad_pat_i24:
3828; GFX900-GISEL:       ; %bb.0: ; %entry
3829; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3830; GFX900-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3831; GFX900-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3832; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
3833; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3834; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
3835; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3836; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
3837; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
3838; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
3839; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
3840; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
3841;
3842; GFX90A-SDAG-LABEL: clpeak_imad_pat_i24:
3843; GFX90A-SDAG:       ; %bb.0: ; %entry
3844; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3845; GFX90A-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3846; GFX90A-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3847; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
3848; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3849; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
3850; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3851; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
3852; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
3853; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
3854;
3855; GFX90A-GISEL-LABEL: clpeak_imad_pat_i24:
3856; GFX90A-GISEL:       ; %bb.0: ; %entry
3857; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3858; GFX90A-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3859; GFX90A-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3860; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
3861; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3862; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
3863; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3864; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
3865; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
3866; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3867; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
3868; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
3869;
3870; GFX10-SDAG-LABEL: clpeak_imad_pat_i24:
3871; GFX10-SDAG:       ; %bb.0: ; %entry
3872; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3873; GFX10-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3874; GFX10-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3875; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3876; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3877; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
3878; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3879; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
3880; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
3881; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
3882;
3883; GFX10-GISEL-LABEL: clpeak_imad_pat_i24:
3884; GFX10-GISEL:       ; %bb.0: ; %entry
3885; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3886; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3887; GFX10-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3888; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3889; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3890; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
3891; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3892; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
3893; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
3894; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3895; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
3896; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
3897;
3898; GFX11-SDAG-LABEL: clpeak_imad_pat_i24:
3899; GFX11-SDAG:       ; %bb.0: ; %entry
3900; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3901; GFX11-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3902; GFX11-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3903; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3904; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3905; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v0
3906; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3907; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v5, v0
3908; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
3909; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3910; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3]
3911; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4]
3912; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
3913;
3914; GFX11-GISEL-LABEL: clpeak_imad_pat_i24:
3915; GFX11-GISEL:       ; %bb.0: ; %entry
3916; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3917; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3918; GFX11-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3919; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3920; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3921; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3922; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3923; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
3924; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3925; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
3926; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3927; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
3928; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3929; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
3930; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
3931;
3932; GFX1200-SDAG-LABEL: clpeak_imad_pat_i24:
3933; GFX1200-SDAG:       ; %bb.0: ; %entry
3934; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
3935; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
3936; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
3937; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
3938; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
3939; GFX1200-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 24
3940; GFX1200-SDAG-NEXT:    v_bfe_i32 v1, v1, 0, 24
3941; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3942; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3943; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3944; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3945; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
3946; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3947; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3948; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1]
3949; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2]
3950; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
3951;
3952; GFX1200-GISEL-LABEL: clpeak_imad_pat_i24:
3953; GFX1200-GISEL:       ; %bb.0: ; %entry
3954; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
3955; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
3956; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
3957; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
3958; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
3959; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 24
3960; GFX1200-GISEL-NEXT:    v_bfe_i32 v1, v1, 0, 24
3961; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3962; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3963; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
3964; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3965; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
3966; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
3967; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
3968; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3969; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
3970; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
3971; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
3972; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
3973entry:
3974  %shl = shl i32 %x, 8
3975  %shr = ashr exact i32 %shl, 8
3976  %shl1 = shl i32 %y, 8
3977  %shr2 = ashr exact i32 %shl1, 8
3978  %shr222 = add nsw i32 %shr, 1
3979  %add = mul i32 %shr2, %shr222
3980  %mul323 = add i32 %add, %shr222
3981  %add4 = mul i32 %mul323, %shr2
3982  %add424 = add i32 %add, 1
3983  %add626 = add i32 %add4, 1
3984  %mul725 = mul i32 %add4, %add424
3985  %add8 = mul i32 %mul725, %add626
3986  ret i32 %add8
3987}
3988
3989define i32 @clpeak_imad_pat_u24(i32 %x, i32 %y) {
3990; GFX67-SDAG-LABEL: clpeak_imad_pat_u24:
3991; GFX67-SDAG:       ; %bb.0: ; %entry
3992; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3993; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
3994; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
3995; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
3996; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
3997; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
3998; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
3999; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
4000; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v0
4001; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
4002; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
4003; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
4004;
4005; GFX67-GISEL-LABEL: clpeak_imad_pat_u24:
4006; GFX67-GISEL:       ; %bb.0: ; %entry
4007; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4008; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4009; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4010; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4011; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4012; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
4013; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4014; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
4015; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
4016; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4017; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
4018; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
4019;
4020; GFX8-SDAG-LABEL: clpeak_imad_pat_u24:
4021; GFX8-SDAG:       ; %bb.0: ; %entry
4022; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4023; GFX8-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4024; GFX8-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4025; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
4026; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
4027; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
4028; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
4029; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
4030; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v0
4031; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
4032; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
4033; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
4034;
4035; GFX8-GISEL-LABEL: clpeak_imad_pat_u24:
4036; GFX8-GISEL:       ; %bb.0: ; %entry
4037; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4038; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4039; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4040; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
4041; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4042; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
4043; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4044; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
4045; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
4046; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
4047; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
4048; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
4049;
4050; GFX900-SDAG-LABEL: clpeak_imad_pat_u24:
4051; GFX900-SDAG:       ; %bb.0: ; %entry
4052; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4053; GFX900-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4054; GFX900-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4055; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
4056; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
4057; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
4058; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
4059; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
4060; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
4061; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
4062;
4063; GFX900-GISEL-LABEL: clpeak_imad_pat_u24:
4064; GFX900-GISEL:       ; %bb.0: ; %entry
4065; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4066; GFX900-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4067; GFX900-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4068; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
4069; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4070; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
4071; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4072; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
4073; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
4074; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
4075; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
4076; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
4077;
4078; GFX90A-SDAG-LABEL: clpeak_imad_pat_u24:
4079; GFX90A-SDAG:       ; %bb.0: ; %entry
4080; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4081; GFX90A-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4082; GFX90A-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4083; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
4084; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
4085; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
4086; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
4087; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
4088; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
4089; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
4090;
4091; GFX90A-GISEL-LABEL: clpeak_imad_pat_u24:
4092; GFX90A-GISEL:       ; %bb.0: ; %entry
4093; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4094; GFX90A-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4095; GFX90A-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4096; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
4097; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4098; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
4099; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4100; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
4101; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
4102; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4103; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
4104; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
4105;
4106; GFX10-SDAG-LABEL: clpeak_imad_pat_u24:
4107; GFX10-SDAG:       ; %bb.0: ; %entry
4108; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4109; GFX10-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4110; GFX10-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4111; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4112; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
4113; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
4114; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
4115; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
4116; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
4117; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
4118;
4119; GFX10-GISEL-LABEL: clpeak_imad_pat_u24:
4120; GFX10-GISEL:       ; %bb.0: ; %entry
4121; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4122; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4123; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4124; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4125; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4126; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
4127; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4128; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
4129; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
4130; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4131; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
4132; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
4133;
4134; GFX11-SDAG-LABEL: clpeak_imad_pat_u24:
4135; GFX11-SDAG:       ; %bb.0: ; %entry
4136; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4137; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4138; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4139; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4140; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4141; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v0
4142; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4143; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v5, v0
4144; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
4145; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4146; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3]
4147; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4]
4148; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
4149;
4150; GFX11-GISEL-LABEL: clpeak_imad_pat_u24:
4151; GFX11-GISEL:       ; %bb.0: ; %entry
4152; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4153; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4154; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4155; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4156; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4157; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4158; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4159; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
4160; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4161; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
4162; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4163; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
4164; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4165; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
4166; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
4167;
4168; GFX1200-SDAG-LABEL: clpeak_imad_pat_u24:
4169; GFX1200-SDAG:       ; %bb.0: ; %entry
4170; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
4171; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
4172; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
4173; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
4174; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
4175; GFX1200-SDAG-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4176; GFX1200-SDAG-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4177; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4178; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4179; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
4180; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4181; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
4182; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
4183; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4184; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1]
4185; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2]
4186; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
4187;
4188; GFX1200-GISEL-LABEL: clpeak_imad_pat_u24:
4189; GFX1200-GISEL:       ; %bb.0: ; %entry
4190; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
4191; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
4192; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
4193; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
4194; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
4195; GFX1200-GISEL-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
4196; GFX1200-GISEL-NEXT:    v_and_b32_e32 v1, 0xffffff, v1
4197; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4198; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4199; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v0
4200; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4201; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
4202; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4203; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
4204; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4205; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
4206; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
4207; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
4208; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
4209entry:
4210  %shl = and i32 %x, 16777215
4211  %shl1 = and i32 %y, 16777215
4212  %shl122 = add nuw nsw i32 %shl, 1
4213  %add = mul i32 %shl1, %shl122
4214  %mul323 = add i32 %add, %shl122
4215  %add4 = mul i32 %mul323, %shl1
4216  %add424 = add i32 %add, 1
4217  %add626 = add i32 %add4, 1
4218  %mul725 = mul i32 %add4, %add424
4219  %add8 = mul i32 %mul725, %add626
4220  ret i32 %add8
4221}
4222
4223define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
4224; GFX67-LABEL: clpeak_imad_pat_i8:
4225; GFX67:       ; %bb.0: ; %entry
4226; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4227; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4228; GFX67-NEXT:    v_and_b32_e32 v0, 0xff, v0
4229; GFX67-NEXT:    v_and_b32_e32 v2, 0xff, v1
4230; GFX67-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
4231; GFX67-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
4232; GFX67-NEXT:    v_and_b32_e32 v1, 0xff, v1
4233; GFX67-NEXT:    v_and_b32_e32 v3, 0xff, v3
4234; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
4235; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
4236; GFX67-NEXT:    v_and_b32_e32 v0, 0xff, v0
4237; GFX67-NEXT:    v_and_b32_e32 v2, 0xff, v4
4238; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
4239; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
4240; GFX67-NEXT:    v_and_b32_e32 v0, 0xff, v0
4241; GFX67-NEXT:    v_and_b32_e32 v1, 0xff, v1
4242; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
4243; GFX67-NEXT:    v_bfe_i32 v0, v0, 0, 8
4244; GFX67-NEXT:    s_setpc_b64 s[30:31]
4245;
4246; GFX8-SDAG-LABEL: clpeak_imad_pat_i8:
4247; GFX8-SDAG:       ; %bb.0: ; %entry
4248; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4249; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4250; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
4251; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4252; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
4253; GFX8-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 8
4254; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
4255;
4256; GFX8-GISEL-LABEL: clpeak_imad_pat_i8:
4257; GFX8-GISEL:       ; %bb.0: ; %entry
4258; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4259; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
4260; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
4261; GFX8-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
4262; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
4263; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
4264; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v4
4265; GFX8-GISEL-NEXT:    v_mad_u16 v1, v3, v2, 1
4266; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
4267; GFX8-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
4268; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
4269;
4270; GFX9-SDAG-LABEL: clpeak_imad_pat_i8:
4271; GFX9-SDAG:       ; %bb.0: ; %entry
4272; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4273; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
4274; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
4275; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
4276; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v1, v0
4277; GFX9-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 8
4278; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
4279;
4280; GFX9-GISEL-LABEL: clpeak_imad_pat_i8:
4281; GFX9-GISEL:       ; %bb.0: ; %entry
4282; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4283; GFX9-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
4284; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
4285; GFX9-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
4286; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
4287; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v1, 1
4288; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v4
4289; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v3, v2, 1
4290; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
4291; GFX9-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
4292; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
4293;
4294; GFX10-SDAG-LABEL: clpeak_imad_pat_i8:
4295; GFX10-SDAG:       ; %bb.0: ; %entry
4296; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4297; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4298; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
4299; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4300; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
4301; GFX10-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 8
4302; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
4303;
4304; GFX10-GISEL-LABEL: clpeak_imad_pat_i8:
4305; GFX10-GISEL:       ; %bb.0: ; %entry
4306; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4307; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
4308; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
4309; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
4310; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
4311; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
4312; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
4313; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
4314; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
4315; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
4316; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
4317;
4318; GFX11-SDAG-LABEL: clpeak_imad_pat_i8:
4319; GFX11-SDAG:       ; %bb.0: ; %entry
4320; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4321; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4322; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4323; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
4324; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4325; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4326; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
4327; GFX11-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 8
4328; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
4329;
4330; GFX11-GISEL-LABEL: clpeak_imad_pat_i8:
4331; GFX11-GISEL:       ; %bb.0: ; %entry
4332; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4333; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
4334; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
4335; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4336; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
4337; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
4338; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
4339; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4340; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
4341; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
4342; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
4343; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4344; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
4345; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
4346;
4347; GFX1200-SDAG-LABEL: clpeak_imad_pat_i8:
4348; GFX1200-SDAG:       ; %bb.0: ; %entry
4349; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
4350; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
4351; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
4352; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
4353; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
4354; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4355; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4356; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
4357; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
4358; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4359; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
4360; GFX1200-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 8
4361; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
4362;
4363; GFX1200-GISEL-LABEL: clpeak_imad_pat_i8:
4364; GFX1200-GISEL:       ; %bb.0: ; %entry
4365; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
4366; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
4367; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
4368; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
4369; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
4370; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
4371; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
4372; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4373; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
4374; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
4375; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
4376; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4377; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
4378; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
4379; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
4380; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4381; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 8
4382; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
4383entry:
4384  %conv33 = add i8 %x, 1
4385  %add = mul i8 %conv33, %y
4386  %conv434 = add i8 %y, 1
4387  %add8 = mul i8 %conv434, %add
4388  %conv1035 = add i8 %add, 1
4389  %add14 = mul i8 %conv1035, %add8
4390  %conv1636 = add i8 %add8, 1
4391  %add20 = mul i8 %add14, %conv1636
4392  ret i8 %add20
4393}
4394
4395define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
4396; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i8:
4397; GFX67-SDAG:       ; %bb.0: ; %entry
4398; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4399; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
4400; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xff, v1
4401; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xff, v3
4402; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4403; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
4404; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xff, v0
4405; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xff, v2
4406; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4407; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v6, v5, v3
4408; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
4409; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
4410; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v4, v2, 1
4411; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
4412; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xff, v3
4413; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 8, v6
4414; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v5, v0, v2
4415; GFX67-SDAG-NEXT:    v_or_b32_e32 v3, v4, v3
4416; GFX67-SDAG-NEXT:    s_movk_i32 s4, 0x100
4417; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
4418; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, s4, v3
4419; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
4420; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v2, 8, v1
4421; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v2, v0
4422; GFX67-SDAG-NEXT:    v_bfe_u32 v2, v3, 8, 8
4423; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xff, v5
4424; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xff, v3
4425; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4426; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 0x100, v0
4427; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v4, v3
4428; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
4429; GFX67-SDAG-NEXT:    v_bfe_u32 v2, v0, 8, 8
4430; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xff, v3
4431; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
4432; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4433; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
4434; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
4435; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
4436;
4437; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i8:
4438; GFX67-GISEL:       ; %bb.0: ; %entry
4439; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4440; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4441; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
4442; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v0
4443; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v2
4444; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xff, v1
4445; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v3
4446; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
4447; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
4448; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
4449; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
4450; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
4451; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v4, v2, 1
4452; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
4453; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v5, v3, 1
4454; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
4455; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
4456; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v6
4457; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v4
4458; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
4459; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xff, v7
4460; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xff, v5
4461; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
4462; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v2
4463; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xff, v0
4464; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
4465; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xff, v3
4466; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xff, v1
4467; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
4468; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
4469;
4470; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8:
4471; GFX8-SDAG:       ; %bb.0: ; %entry
4472; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4473; GFX8-SDAG-NEXT:    v_add_u16_e32 v1, 1, v1
4474; GFX8-SDAG-NEXT:    v_add_u16_e32 v0, 1, v0
4475; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v1, v3
4476; GFX8-SDAG-NEXT:    v_mad_u16 v1, v1, v3, v1
4477; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v0, v2
4478; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v2, v0
4479; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
4480; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
4481; GFX8-SDAG-NEXT:    v_mad_u16 v3, v1, v4, v1
4482; GFX8-SDAG-NEXT:    v_mad_u16 v2, v0, v5, v0
4483; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
4484; GFX8-SDAG-NEXT:    v_lshlrev_b16_e32 v3, 8, v1
4485; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
4486; GFX8-SDAG-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4487; GFX8-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4488; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
4489;
4490; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i8:
4491; GFX8-GISEL:       ; %bb.0: ; %entry
4492; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4493; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
4494; GFX8-GISEL-NEXT:    v_add_u16_e32 v1, 1, v1
4495; GFX8-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
4496; GFX8-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
4497; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v4, v2
4498; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v7, v5, v3
4499; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
4500; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
4501; GFX8-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
4502; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
4503; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v6, v0
4504; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v7, v1
4505; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
4506; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
4507; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
4508;
4509; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i8:
4510; GFX9-SDAG:       ; %bb.0: ; %entry
4511; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4512; GFX9-SDAG-NEXT:    v_add_u16_e32 v1, 1, v1
4513; GFX9-SDAG-NEXT:    v_add_u16_e32 v0, 1, v0
4514; GFX9-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v1, v3
4515; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v1, v3, v1
4516; GFX9-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v0, v2
4517; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v2, v0
4518; GFX9-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
4519; GFX9-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
4520; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v3, v1, v4, v1
4521; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v2, v0, v5, v0
4522; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v3, v1, v3
4523; GFX9-SDAG-NEXT:    v_lshlrev_b16_e32 v3, 8, v1
4524; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v2, v0, v2
4525; GFX9-SDAG-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4526; GFX9-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4527; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
4528;
4529; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i8:
4530; GFX9-GISEL:       ; %bb.0: ; %entry
4531; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4532; GFX9-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
4533; GFX9-GISEL-NEXT:    v_add_u16_e32 v1, 1, v1
4534; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v4, v0, v2, v0
4535; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v5, v1, v3, v1
4536; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v4, v2
4537; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v7, v5, v3
4538; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v2, 1
4539; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v1, v3, 1
4540; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v2, v4, v2, 1
4541; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v3, v5, v3, 1
4542; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v6, v0
4543; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v7, v1
4544; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
4545; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
4546; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
4547;
4548; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i8:
4549; GFX10-SDAG:       ; %bb.0: ; %entry
4550; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4551; GFX10-SDAG-NEXT:    v_add_nc_u16 v1, v1, 1
4552; GFX10-SDAG-NEXT:    v_add_nc_u16 v0, v0, 1
4553; GFX10-SDAG-NEXT:    v_mad_u16 v4, v1, v3, v1
4554; GFX10-SDAG-NEXT:    v_mul_lo_u16 v1, v1, v3
4555; GFX10-SDAG-NEXT:    v_mad_u16 v5, v0, v2, v0
4556; GFX10-SDAG-NEXT:    v_mul_lo_u16 v0, v0, v2
4557; GFX10-SDAG-NEXT:    v_mul_lo_u16 v3, v4, v3
4558; GFX10-SDAG-NEXT:    v_mul_lo_u16 v2, v5, v2
4559; GFX10-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
4560; GFX10-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
4561; GFX10-SDAG-NEXT:    v_mad_u16 v1, v1, v3, v1
4562; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v2, v0
4563; GFX10-SDAG-NEXT:    v_lshlrev_b16 v2, 8, v1
4564; GFX10-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4565; GFX10-SDAG-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
4566; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
4567;
4568; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i8:
4569; GFX10-GISEL:       ; %bb.0: ; %entry
4570; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4571; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
4572; GFX10-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
4573; GFX10-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
4574; GFX10-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
4575; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
4576; GFX10-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
4577; GFX10-GISEL-NEXT:    v_mul_lo_u16 v6, v4, v2
4578; GFX10-GISEL-NEXT:    v_mul_lo_u16 v7, v5, v3
4579; GFX10-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
4580; GFX10-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
4581; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v6, v0
4582; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v7, v1
4583; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
4584; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
4585; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
4586;
4587; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i8:
4588; GFX11-SDAG:       ; %bb.0: ; %entry
4589; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4590; GFX11-SDAG-NEXT:    v_add_nc_u16 v1, v1, 1
4591; GFX11-SDAG-NEXT:    v_add_nc_u16 v0, v0, 1
4592; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4593; GFX11-SDAG-NEXT:    v_mad_u16 v4, v1, v3, v1
4594; GFX11-SDAG-NEXT:    v_mad_u16 v5, v0, v2, v0
4595; GFX11-SDAG-NEXT:    v_mul_lo_u16 v1, v1, v3
4596; GFX11-SDAG-NEXT:    v_mul_lo_u16 v0, v0, v2
4597; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4598; GFX11-SDAG-NEXT:    v_mul_lo_u16 v3, v4, v3
4599; GFX11-SDAG-NEXT:    v_mul_lo_u16 v2, v5, v2
4600; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4601; GFX11-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
4602; GFX11-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
4603; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4604; GFX11-SDAG-NEXT:    v_mad_u16 v1, v1, v3, v1
4605; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v2, v0
4606; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4607; GFX11-SDAG-NEXT:    v_lshlrev_b16 v2, 8, v1
4608; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
4609; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4610; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4611; GFX11-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
4612; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
4613;
4614; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i8:
4615; GFX11-GISEL:       ; %bb.0: ; %entry
4616; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4617; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
4618; GFX11-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
4619; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4620; GFX11-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
4621; GFX11-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
4622; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
4623; GFX11-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
4624; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4625; GFX11-GISEL-NEXT:    v_mul_lo_u16 v6, v4, v2
4626; GFX11-GISEL-NEXT:    v_mul_lo_u16 v7, v5, v3
4627; GFX11-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
4628; GFX11-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
4629; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4630; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v6, v0
4631; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v7, v1
4632; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4633; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
4634; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
4635; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
4636;
4637; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i8:
4638; GFX1200-SDAG:       ; %bb.0: ; %entry
4639; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
4640; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
4641; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
4642; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
4643; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
4644; GFX1200-SDAG-NEXT:    v_add_nc_u16 v1, v1, 1
4645; GFX1200-SDAG-NEXT:    v_add_nc_u16 v0, v0, 1
4646; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4647; GFX1200-SDAG-NEXT:    v_mad_u16 v4, v1, v3, v1
4648; GFX1200-SDAG-NEXT:    v_mad_u16 v5, v0, v2, v0
4649; GFX1200-SDAG-NEXT:    v_mul_lo_u16 v1, v1, v3
4650; GFX1200-SDAG-NEXT:    v_mul_lo_u16 v0, v0, v2
4651; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4652; GFX1200-SDAG-NEXT:    v_mul_lo_u16 v3, v4, v3
4653; GFX1200-SDAG-NEXT:    v_mul_lo_u16 v2, v5, v2
4654; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4655; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
4656; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
4657; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4658; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v1, v3, v1
4659; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v2, v0
4660; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4661; GFX1200-SDAG-NEXT:    v_lshlrev_b16 v2, 8, v1
4662; GFX1200-SDAG-NEXT:    v_and_b32_e32 v0, 0xff, v0
4663; GFX1200-SDAG-NEXT:    v_and_b32_e32 v1, 0xff, v1
4664; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4665; GFX1200-SDAG-NEXT:    v_or_b32_e32 v0, v0, v2
4666; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
4667;
4668; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i8:
4669; GFX1200-GISEL:       ; %bb.0: ; %entry
4670; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
4671; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
4672; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
4673; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
4674; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
4675; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
4676; GFX1200-GISEL-NEXT:    v_add_nc_u16 v1, v1, 1
4677; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4678; GFX1200-GISEL-NEXT:    v_mad_u16 v4, v0, v2, v0
4679; GFX1200-GISEL-NEXT:    v_mad_u16 v5, v1, v3, v1
4680; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v2, 1
4681; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v1, v3, 1
4682; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4683; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v6, v4, v2
4684; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v7, v5, v3
4685; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
4686; GFX1200-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
4687; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
4688; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v6, v0
4689; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v7, v1
4690; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
4691; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v2
4692; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v1, v3
4693; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
4694entry:
4695  %y18 = add <2 x i8> %x, <i8 1, i8 1>
4696  %add = mul <2 x i8> %y18, %y
4697  %mul119 = add <2 x i8> %add, %y18
4698  %add2 = mul <2 x i8> %mul119, %y
4699  %add220 = add <2 x i8> %add, <i8 1, i8 1>
4700  %add422 = add <2 x i8> %add2, <i8 1, i8 1>
4701  %mul521 = mul <2 x i8> %add2, %add220
4702  %add6 = mul <2 x i8> %mul521, %add422
4703  ret <2 x i8> %add6
4704}
4705
4706define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
4707; GFX6-SDAG-LABEL: clpeak_imad_pat_i64:
4708; GFX6-SDAG:       ; %bb.0: ; %entry
4709; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4710; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4711; GFX6-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v3
4712; GFX6-SDAG-NEXT:    v_mul_hi_u32 v5, v0, v2
4713; GFX6-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4714; GFX6-SDAG-NEXT:    v_mul_lo_u32 v6, v1, v2
4715; GFX6-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
4716; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v2
4717; GFX6-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
4718; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
4719; GFX6-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v3
4720; GFX6-SDAG-NEXT:    v_mul_hi_u32 v6, v0, v2
4721; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
4722; GFX6-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
4723; GFX6-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v2
4724; GFX6-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v4
4725; GFX6-SDAG-NEXT:    v_mul_hi_u32 v4, v0, v5
4726; GFX6-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v6, v3
4727; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
4728; GFX6-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v5
4729; GFX6-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
4730; GFX6-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v5
4731; GFX6-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
4732; GFX6-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v4, v0
4733; GFX6-SDAG-NEXT:    v_addc_u32_e32 v2, vcc, v2, v1, vcc
4734; GFX6-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
4735; GFX6-SDAG-NEXT:    v_mul_hi_u32 v4, v3, v0
4736; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v0
4737; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v3, v0
4738; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
4739; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
4740; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
4741; GFX6-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
4742; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
4743;
4744; GFX6-GISEL-LABEL: clpeak_imad_pat_i64:
4745; GFX6-GISEL:       ; %bb.0: ; %entry
4746; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4747; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
4748; GFX6-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4749; GFX6-GISEL-NEXT:    v_mul_lo_u32 v4, v1, v2
4750; GFX6-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v3
4751; GFX6-GISEL-NEXT:    v_mul_hi_u32 v7, v0, v2
4752; GFX6-GISEL-NEXT:    v_mul_lo_u32 v6, v0, v2
4753; GFX6-GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
4754; GFX6-GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v7
4755; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v6, v0
4756; GFX6-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v4, v1, vcc
4757; GFX6-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v2
4758; GFX6-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v3
4759; GFX6-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v2
4760; GFX6-GISEL-NEXT:    v_mul_hi_u32 v0, v0, v2
4761; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
4762; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
4763; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v6
4764; GFX6-GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
4765; GFX6-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
4766; GFX6-GISEL-NEXT:    v_addc_u32_e32 v4, vcc, 0, v0, vcc
4767; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
4768; GFX6-GISEL-NEXT:    v_mul_lo_u32 v2, v5, v2
4769; GFX6-GISEL-NEXT:    v_mul_lo_u32 v6, v5, v1
4770; GFX6-GISEL-NEXT:    v_mul_hi_u32 v1, v5, v1
4771; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
4772; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
4773; GFX6-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v3
4774; GFX6-GISEL-NEXT:    v_mul_lo_u32 v2, v6, v4
4775; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v6, v3
4776; GFX6-GISEL-NEXT:    v_mul_hi_u32 v3, v6, v3
4777; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
4778; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
4779; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
4780;
4781; GFX7-SDAG-LABEL: clpeak_imad_pat_i64:
4782; GFX7-SDAG:       ; %bb.0: ; %entry
4783; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4784; GFX7-SDAG-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
4785; GFX7-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
4786; GFX7-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
4787; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
4788; GFX7-SDAG-NEXT:    v_mul_lo_u32 v7, v5, v2
4789; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
4790; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v7
4791; GFX7-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v0, v4
4792; GFX7-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, v1, v5, vcc
4793; GFX7-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
4794; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v4, v2, 0
4795; GFX7-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
4796; GFX7-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
4797; GFX7-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v4, v2
4798; GFX7-SDAG-NEXT:    v_mul_lo_u32 v2, v4, v0
4799; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v0, v[3:4]
4800; GFX7-SDAG-NEXT:    v_mul_lo_u32 v0, v3, v1
4801; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v2, v6
4802; GFX7-SDAG-NEXT:    v_add_i32_e32 v6, vcc, v0, v1
4803; GFX7-SDAG-NEXT:    v_mul_lo_u32 v2, v6, v3
4804; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v3, v[5:6]
4805; GFX7-SDAG-NEXT:    v_mul_lo_u32 v3, v5, v4
4806; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
4807; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
4808; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
4809;
4810; GFX7-GISEL-LABEL: clpeak_imad_pat_i64:
4811; GFX7-GISEL:       ; %bb.0: ; %entry
4812; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4813; GFX7-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
4814; GFX7-GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v1, vcc
4815; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
4816; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
4817; GFX7-GISEL-NEXT:    v_add_i32_e32 v8, vcc, v0, v6
4818; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
4819; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
4820; GFX7-GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v4, v7, vcc
4821; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, v6
4822; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
4823; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
4824; GFX7-GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v0
4825; GFX7-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
4826; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
4827; GFX7-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
4828; GFX7-GISEL-NEXT:    v_mov_b32_e32 v0, v4
4829; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
4830; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
4831; GFX7-GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v2, vcc
4832; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
4833; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
4834; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
4835; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
4836;
4837; GFX8-SDAG-LABEL: clpeak_imad_pat_i64:
4838; GFX8-SDAG:       ; %bb.0: ; %entry
4839; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4840; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
4841; GFX8-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
4842; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
4843; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
4844; GFX8-SDAG-NEXT:    v_mul_lo_u32 v7, v5, v2
4845; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v6
4846; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v7
4847; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, v0, v4
4848; GFX8-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, v1, v5, vcc
4849; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
4850; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v4, v2, 0
4851; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
4852; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, v4, v6
4853; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, v4, v2
4854; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v4, v0
4855; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v3, v0, v[3:4]
4856; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v3, v1
4857; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v2, v6
4858; GFX8-SDAG-NEXT:    v_add_u32_e32 v6, vcc, v0, v1
4859; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v6, v3
4860; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v3, v[5:6]
4861; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v5, v4
4862; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
4863; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v3, v1
4864; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
4865;
4866; GFX8-GISEL-LABEL: clpeak_imad_pat_i64:
4867; GFX8-GISEL:       ; %bb.0: ; %entry
4868; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4869; GFX8-GISEL-NEXT:    v_add_u32_e32 v6, vcc, 1, v0
4870; GFX8-GISEL-NEXT:    v_addc_u32_e32 v7, vcc, 0, v1, vcc
4871; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
4872; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
4873; GFX8-GISEL-NEXT:    v_add_u32_e32 v8, vcc, v0, v6
4874; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
4875; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
4876; GFX8-GISEL-NEXT:    v_addc_u32_e32 v9, vcc, v4, v7, vcc
4877; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, v6
4878; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
4879; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
4880; GFX8-GISEL-NEXT:    v_add_u32_e32 v6, vcc, 1, v0
4881; GFX8-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v4, vcc
4882; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
4883; GFX8-GISEL-NEXT:    v_add_u32_e32 v7, vcc, 1, v5
4884; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, v4
4885; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
4886; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
4887; GFX8-GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v2, vcc
4888; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
4889; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
4890; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
4891; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
4892;
4893; GFX900-SDAG-LABEL: clpeak_imad_pat_i64:
4894; GFX900-SDAG:       ; %bb.0: ; %entry
4895; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4896; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v4, vcc, 1, v0
4897; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
4898; GFX900-SDAG-NEXT:    v_mul_lo_u32 v6, v5, v2
4899; GFX900-SDAG-NEXT:    v_mul_lo_u32 v7, v4, v3
4900; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
4901; GFX900-SDAG-NEXT:    v_add3_u32 v6, v1, v7, v6
4902; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v1, vcc, v0, v4
4903; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v4, vcc, v6, v5, vcc
4904; GFX900-SDAG-NEXT:    v_mul_lo_u32 v4, v4, v2
4905; GFX900-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v3
4906; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v1, v2, 0
4907; GFX900-SDAG-NEXT:    v_add3_u32 v2, v2, v3, v4
4908; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v0
4909; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v1, v0, v[1:2]
4910; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v6
4911; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v2
4912; GFX900-SDAG-NEXT:    v_add3_u32 v4, v5, v4, v0
4913; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v4, v1
4914; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v1, v[3:4]
4915; GFX900-SDAG-NEXT:    v_add3_u32 v1, v5, v1, v2
4916; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
4917;
4918; GFX900-GISEL-LABEL: clpeak_imad_pat_i64:
4919; GFX900-GISEL:       ; %bb.0: ; %entry
4920; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4921; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 1, v0
4922; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
4923; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
4924; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
4925; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v8, vcc, v0, v6
4926; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
4927; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
4928; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v9, vcc, v4, v7, vcc
4929; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, v6
4930; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
4931; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
4932; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 1, v0
4933; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v4, vcc
4934; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
4935; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v7, vcc, 1, v5
4936; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, v4
4937; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
4938; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
4939; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v8, vcc, 0, v2, vcc
4940; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
4941; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
4942; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
4943; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
4944;
4945; GFX90A-SDAG-LABEL: clpeak_imad_pat_i64:
4946; GFX90A-SDAG:       ; %bb.0: ; %entry
4947; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4948; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
4949; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
4950; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v1, v2
4951; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v7, v0, v3
4952; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v0, v2, 0
4953; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
4954; GFX90A-SDAG-NEXT:    v_add3_u32 v1, v6, v1, v7
4955; GFX90A-SDAG-NEXT:    v_add3_u32 v5, v5, v7, v6
4956; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v3
4957; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v1, v2
4958; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
4959; GFX90A-SDAG-NEXT:    v_add3_u32 v1, v1, v3, v6
4960; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v1, v4
4961; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v4, v[0:1]
4962; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v5
4963; GFX90A-SDAG-NEXT:    v_add3_u32 v3, v6, v3, v4
4964; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v2, v1
4965; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v5, v3, v0
4966; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
4967; GFX90A-SDAG-NEXT:    v_add3_u32 v1, v5, v1, v4
4968; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
4969;
4970; GFX90A-GISEL-LABEL: clpeak_imad_pat_i64:
4971; GFX90A-GISEL:       ; %bb.0: ; %entry
4972; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4973; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 1, v0
4974; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
4975; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v3, 0
4976; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
4977; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
4978; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v1, v4
4979; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, v0, v6
4980; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v8, vcc, v1, v7, vcc
4981; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v2, 0
4982; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v6, v3, 0
4983; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v8, v2, v[6:7]
4984; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 1, v0
4985; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v1, vcc
4986; GFX90A-GISEL-NEXT:    v_add_u32_e32 v5, v5, v2
4987; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v7, vcc, 1, v4
4988; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
4989; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v8, vcc, 0, v5, vcc
4990; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v6, 0
4991; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v6, v[0:1]
4992; GFX90A-GISEL-NEXT:    v_add_u32_e32 v4, v3, v0
4993; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v7, 0
4994; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v2, v8, 0
4995; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[2:3]
4996; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v1, v2
4997; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
4998;
4999; GFX10-SDAG-LABEL: clpeak_imad_pat_i64:
5000; GFX10-SDAG:       ; %bb.0: ; %entry
5001; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5002; GFX10-SDAG-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 1
5003; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
5004; GFX10-SDAG-NEXT:    v_mul_lo_u32 v7, v4, v3
5005; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
5006; GFX10-SDAG-NEXT:    v_mul_lo_u32 v6, v5, v2
5007; GFX10-SDAG-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v4
5008; GFX10-SDAG-NEXT:    v_add3_u32 v1, v1, v7, v6
5009; GFX10-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
5010; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v4, v2, 0
5011; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
5012; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
5013; GFX10-SDAG-NEXT:    v_add3_u32 v4, v4, v6, v2
5014; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v1
5015; GFX10-SDAG-NEXT:    v_mul_lo_u32 v5, v4, v0
5016; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
5017; GFX10-SDAG-NEXT:    v_add3_u32 v1, v5, v1, v2
5018; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v4
5019; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v3
5020; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
5021; GFX10-SDAG-NEXT:    v_add3_u32 v1, v4, v1, v2
5022; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
5023;
5024; GFX10-GISEL-LABEL: clpeak_imad_pat_i64:
5025; GFX10-GISEL:       ; %bb.0: ; %entry
5026; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5027; GFX10-GISEL-NEXT:    v_add_co_u32 v6, vcc_lo, v0, 1
5028; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo
5029; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v6, v2, 0
5030; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v6, v3, v[1:2]
5031; GFX10-GISEL-NEXT:    v_add_co_u32 v8, vcc_lo, v0, v6
5032; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v7, v2, v[4:5]
5033; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v8, v2, 0
5034; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v4, v7, vcc_lo
5035; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, v6
5036; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[6:7], null, v8, v3, v[1:2]
5037; GFX10-GISEL-NEXT:    v_add_co_u32 v8, vcc_lo, v0, 1
5038; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, 0, v4, vcc_lo
5039; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[3:4], null, v5, v8, 0
5040; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[6:7], null, v9, v2, v[6:7]
5041; GFX10-GISEL-NEXT:    v_add_co_u32 v7, vcc_lo, v5, 1
5042; GFX10-GISEL-NEXT:    v_mov_b32_e32 v2, v4
5043; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v7, 0
5044; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v6, vcc_lo
5045; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v5, v10, v[2:3]
5046; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[1:2], null, v3, v9, v[1:2]
5047; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v6, v8, v[4:5]
5048; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[1:2], null, v4, v7, v[1:2]
5049; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
5050;
5051; GFX11-SDAG-LABEL: clpeak_imad_pat_i64:
5052; GFX11-SDAG:       ; %bb.0: ; %entry
5053; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5054; GFX11-SDAG-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 1
5055; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
5056; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5057; GFX11-SDAG-NEXT:    v_mul_lo_u32 v7, v4, v3
5058; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
5059; GFX11-SDAG-NEXT:    v_mul_lo_u32 v6, v5, v2
5060; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
5061; GFX11-SDAG-NEXT:    v_add3_u32 v1, v1, v7, v6
5062; GFX11-SDAG-NEXT:    v_add_co_u32 v6, vcc_lo, v0, v4
5063; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5064; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
5065; GFX11-SDAG-NEXT:    v_mul_lo_u32 v7, v6, v3
5066; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v6, v2, 0
5067; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5068; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
5069; GFX11-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
5070; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5071; GFX11-SDAG-NEXT:    v_add3_u32 v4, v4, v7, v2
5072; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v4, v0
5073; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v3, v0, v[3:4]
5074; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5075; GFX11-SDAG-NEXT:    v_add3_u32 v6, v2, v6, v1
5076; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v4
5077; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5078; GFX11-SDAG-NEXT:    v_mul_lo_u32 v4, v6, v3
5079; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v3, v[5:6]
5080; GFX11-SDAG-NEXT:    v_add3_u32 v1, v4, v1, v2
5081; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
5082;
5083; GFX11-GISEL-LABEL: clpeak_imad_pat_i64:
5084; GFX11-GISEL:       ; %bb.0: ; %entry
5085; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5086; GFX11-GISEL-NEXT:    v_add_co_u32 v7, vcc_lo, v0, 1
5087; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, 0, v1, vcc_lo
5088; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5089; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v7, v2, 0
5090; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v7, v3, v[1:2]
5091; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
5092; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v8, v2, v[4:5]
5093; GFX11-GISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v7
5094; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
5095; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[6:7], null, v4, v2, 0
5096; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo
5097; GFX11-GISEL-NEXT:    v_add_co_u32 v11, vcc_lo, v0, 1
5098; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, 0, v5, vcc_lo
5099; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
5100; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, v7
5101; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[7:8], null, v4, v3, v[1:2]
5102; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5103; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[3:4], null, v6, v11, 0
5104; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[8:9], null, v10, v2, v[7:8]
5105; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
5106; GFX11-GISEL-NEXT:    v_mov_b32_e32 v2, v4
5107; GFX11-GISEL-NEXT:    v_add_co_u32 v9, vcc_lo, v6, 1
5108; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v6, v12, v[2:3]
5109; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5110; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v9, 0
5111; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo
5112; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v8, v11, v[4:5]
5113; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5114; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[6:7], null, v3, v10, v[1:2]
5115; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[1:2], null, v5, v9, v[6:7]
5116; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
5117;
5118; GFX1200-SDAG-LABEL: clpeak_imad_pat_i64:
5119; GFX1200-SDAG:       ; %bb.0: ; %entry
5120; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
5121; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
5122; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
5123; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
5124; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
5125; GFX1200-SDAG-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 1
5126; GFX1200-SDAG-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
5127; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5128; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v7, v4, v3
5129; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v4, v2, 0
5130; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v6, v5, v2
5131; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5132; GFX1200-SDAG-NEXT:    v_add_co_u32 v4, vcc_lo, v0, v4
5133; GFX1200-SDAG-NEXT:    v_add3_u32 v1, v1, v7, v6
5134; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5135; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
5136; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[3:4], null, v4, v2, 0
5137; GFX1200-SDAG-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
5138; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5139; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v2
5140; GFX1200-SDAG-NEXT:    v_add3_u32 v4, v4, v6, v2
5141; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5142; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v1
5143; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v5, v4, v0
5144; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4]
5145; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5146; GFX1200-SDAG-NEXT:    v_add3_u32 v1, v5, v1, v2
5147; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v4
5148; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5149; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v3
5150; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v0, v3, v[0:1]
5151; GFX1200-SDAG-NEXT:    v_add3_u32 v1, v4, v1, v2
5152; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
5153;
5154; GFX1200-GISEL-LABEL: clpeak_imad_pat_i64:
5155; GFX1200-GISEL:       ; %bb.0: ; %entry
5156; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
5157; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
5158; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
5159; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
5160; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
5161; GFX1200-GISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 1
5162; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
5163; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
5164; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v0, v4, v2
5165; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
5166; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[0:1], null, v4, v3, v[0:1]
5167; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5168; GFX1200-GISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v6, v4
5169; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[0:1], null, v5, v2, v[0:1]
5170; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
5171; GFX1200-GISEL-NEXT:    v_mov_b32_e32 v7, v0
5172; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v0, v4, v2
5173; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5174; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v7, v5, vcc_lo
5175; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[0:1], null, v4, v3, v[0:1]
5176; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v3, v4, v2
5177; GFX1200-GISEL-NEXT:    v_add_co_u32 v4, vcc_lo, v6, 1
5178; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5179; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[0:1], null, v5, v2, v[0:1]
5180; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v1, v3, v4
5181; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v3, v4
5182; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
5183; GFX1200-GISEL-NEXT:    v_mov_b32_e32 v6, v0
5184; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v7, vcc_lo
5185; GFX1200-GISEL-NEXT:    v_add_co_u32 v7, vcc_lo, v3, 1
5186; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, 0, v6, vcc_lo
5187; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5188; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[1:2], null, v3, v2, v[1:2]
5189; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v2, v5, v7
5190; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
5191; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[3:4], null, v0, v4, v[1:2]
5192; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v5, v7
5193; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[1:2], null, v5, v6, v[2:3]
5194; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
5195; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[1:2], null, v3, v7, v[1:2]
5196; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
5197entry:
5198  %y18 = add i64 %x, 1
5199  %add = mul i64 %y18, %y
5200  %mul119 = add i64 %add, %y18
5201  %add2 = mul i64 %mul119, %y
5202  %add220 = add i64 %add, 1
5203  %add422 = add i64 %add2, 1
5204  %mul521 = mul i64 %add2, %add220
5205  %add6 = mul i64 %mul521, %add422
5206  ret i64 %add6
5207}
5208
5209define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
5210; GFX6-SDAG-LABEL: clpeak_imad_pat_v2i64:
5211; GFX6-SDAG:       ; %bb.0: ; %entry
5212; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5213; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
5214; GFX6-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5215; GFX6-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v5
5216; GFX6-SDAG-NEXT:    v_mul_hi_u32 v9, v0, v4
5217; GFX6-SDAG-NEXT:    v_mul_lo_u32 v10, v1, v4
5218; GFX6-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
5219; GFX6-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5220; GFX6-SDAG-NEXT:    v_add_i32_e32 v8, vcc, v9, v8
5221; GFX6-SDAG-NEXT:    v_add_i32_e32 v8, vcc, v8, v10
5222; GFX6-SDAG-NEXT:    v_mul_lo_u32 v9, v2, v7
5223; GFX6-SDAG-NEXT:    v_mul_hi_u32 v10, v2, v6
5224; GFX6-SDAG-NEXT:    v_mul_lo_u32 v12, v3, v6
5225; GFX6-SDAG-NEXT:    v_mul_lo_u32 v11, v0, v4
5226; GFX6-SDAG-NEXT:    v_add_i32_e32 v9, vcc, v10, v9
5227; GFX6-SDAG-NEXT:    v_add_i32_e32 v9, vcc, v9, v12
5228; GFX6-SDAG-NEXT:    v_mul_lo_u32 v10, v2, v6
5229; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v11, v0
5230; GFX6-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, v8, v1, vcc
5231; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v5
5232; GFX6-SDAG-NEXT:    v_mul_hi_u32 v12, v0, v4
5233; GFX6-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v4
5234; GFX6-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v10, v2
5235; GFX6-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, v9, v3, vcc
5236; GFX6-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v12, v5
5237; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
5238; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v7
5239; GFX6-SDAG-NEXT:    v_mul_hi_u32 v7, v2, v6
5240; GFX6-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v6
5241; GFX6-SDAG-NEXT:    v_mul_lo_u32 v3, v3, v6
5242; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v4
5243; GFX6-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v7, v5
5244; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v9
5245; GFX6-SDAG-NEXT:    v_mul_hi_u32 v6, v2, v10
5246; GFX6-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
5247; GFX6-SDAG-NEXT:    v_mul_lo_u32 v4, v3, v10
5248; GFX6-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
5249; GFX6-SDAG-NEXT:    v_mul_hi_u32 v6, v0, v11
5250; GFX6-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
5251; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v8
5252; GFX6-SDAG-NEXT:    v_mul_lo_u32 v8, v1, v11
5253; GFX6-SDAG-NEXT:    v_mul_lo_u32 v7, v2, v10
5254; GFX6-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
5255; GFX6-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v11
5256; GFX6-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
5257; GFX6-SDAG-NEXT:    v_add_i32_e32 v6, vcc, v6, v0
5258; GFX6-SDAG-NEXT:    v_addc_u32_e32 v5, vcc, v5, v1, vcc
5259; GFX6-SDAG-NEXT:    v_mul_lo_u32 v1, v6, v1
5260; GFX6-SDAG-NEXT:    v_mul_hi_u32 v8, v6, v0
5261; GFX6-SDAG-NEXT:    v_mul_lo_u32 v9, v5, v0
5262; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v6, v0
5263; GFX6-SDAG-NEXT:    v_add_i32_e32 v7, vcc, v7, v2
5264; GFX6-SDAG-NEXT:    v_addc_u32_e32 v4, vcc, v4, v3, vcc
5265; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v8, v1
5266; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v9
5267; GFX6-SDAG-NEXT:    v_mul_lo_u32 v3, v7, v3
5268; GFX6-SDAG-NEXT:    v_mul_hi_u32 v8, v7, v2
5269; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
5270; GFX6-SDAG-NEXT:    v_addc_u32_e32 v1, vcc, v1, v5, vcc
5271; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v4, v2
5272; GFX6-SDAG-NEXT:    v_mul_lo_u32 v2, v7, v2
5273; GFX6-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v8, v3
5274; GFX6-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
5275; GFX6-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v7
5276; GFX6-SDAG-NEXT:    v_addc_u32_e32 v3, vcc, v3, v4, vcc
5277; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
5278;
5279; GFX6-GISEL-LABEL: clpeak_imad_pat_v2i64:
5280; GFX6-GISEL:       ; %bb.0: ; %entry
5281; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5282; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
5283; GFX6-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5284; GFX6-GISEL-NEXT:    v_mul_lo_u32 v8, v1, v4
5285; GFX6-GISEL-NEXT:    v_mul_lo_u32 v9, v0, v5
5286; GFX6-GISEL-NEXT:    v_mul_hi_u32 v11, v0, v4
5287; GFX6-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v2
5288; GFX6-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
5289; GFX6-GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v9
5290; GFX6-GISEL-NEXT:    v_add_i32_e32 v8, vcc, v8, v11
5291; GFX6-GISEL-NEXT:    v_mul_lo_u32 v9, v3, v6
5292; GFX6-GISEL-NEXT:    v_mul_lo_u32 v11, v2, v7
5293; GFX6-GISEL-NEXT:    v_mul_hi_u32 v13, v2, v6
5294; GFX6-GISEL-NEXT:    v_mul_lo_u32 v10, v0, v4
5295; GFX6-GISEL-NEXT:    v_mul_lo_u32 v12, v2, v6
5296; GFX6-GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v11
5297; GFX6-GISEL-NEXT:    v_add_i32_e32 v9, vcc, v9, v13
5298; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v10, v0
5299; GFX6-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, v8, v1, vcc
5300; GFX6-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v4
5301; GFX6-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v5
5302; GFX6-GISEL-NEXT:    v_mul_lo_u32 v11, v0, v4
5303; GFX6-GISEL-NEXT:    v_mul_hi_u32 v0, v0, v4
5304; GFX6-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v12, v2
5305; GFX6-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, v9, v3, vcc
5306; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
5307; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
5308; GFX6-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v6
5309; GFX6-GISEL-NEXT:    v_mul_lo_u32 v3, v2, v7
5310; GFX6-GISEL-NEXT:    v_mul_lo_u32 v4, v2, v6
5311; GFX6-GISEL-NEXT:    v_mul_hi_u32 v2, v2, v6
5312; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
5313; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
5314; GFX6-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v10
5315; GFX6-GISEL-NEXT:    v_addc_u32_e32 v3, vcc, 0, v8, vcc
5316; GFX6-GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v12
5317; GFX6-GISEL-NEXT:    v_addc_u32_e32 v6, vcc, 0, v9, vcc
5318; GFX6-GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v11
5319; GFX6-GISEL-NEXT:    v_addc_u32_e32 v8, vcc, 0, v0, vcc
5320; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
5321; GFX6-GISEL-NEXT:    v_mul_lo_u32 v3, v11, v3
5322; GFX6-GISEL-NEXT:    v_mul_lo_u32 v12, v11, v2
5323; GFX6-GISEL-NEXT:    v_mul_hi_u32 v2, v11, v2
5324; GFX6-GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
5325; GFX6-GISEL-NEXT:    v_addc_u32_e32 v10, vcc, 0, v1, vcc
5326; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v3
5327; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
5328; GFX6-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
5329; GFX6-GISEL-NEXT:    v_mul_lo_u32 v2, v4, v6
5330; GFX6-GISEL-NEXT:    v_mul_lo_u32 v3, v4, v5
5331; GFX6-GISEL-NEXT:    v_mul_hi_u32 v4, v4, v5
5332; GFX6-GISEL-NEXT:    v_mul_hi_u32 v5, v12, v7
5333; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
5334; GFX6-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v1, v4
5335; GFX6-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v7
5336; GFX6-GISEL-NEXT:    v_mul_lo_u32 v4, v12, v8
5337; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v12, v7
5338; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
5339; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
5340; GFX6-GISEL-NEXT:    v_mul_lo_u32 v4, v2, v9
5341; GFX6-GISEL-NEXT:    v_mul_lo_u32 v5, v3, v10
5342; GFX6-GISEL-NEXT:    v_mul_lo_u32 v2, v3, v9
5343; GFX6-GISEL-NEXT:    v_mul_hi_u32 v3, v3, v9
5344; GFX6-GISEL-NEXT:    v_add_i32_e32 v4, vcc, v4, v5
5345; GFX6-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
5346; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
5347;
5348; GFX7-SDAG-LABEL: clpeak_imad_pat_v2i64:
5349; GFX7-SDAG:       ; %bb.0: ; %entry
5350; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5351; GFX7-SDAG-NEXT:    v_add_i32_e32 v8, vcc, 1, v0
5352; GFX7-SDAG-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
5353; GFX7-SDAG-NEXT:    v_mul_lo_u32 v10, v8, v5
5354; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0
5355; GFX7-SDAG-NEXT:    v_add_i32_e32 v11, vcc, 1, v2
5356; GFX7-SDAG-NEXT:    v_addc_u32_e32 v12, vcc, 0, v3, vcc
5357; GFX7-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v1, v10
5358; GFX7-SDAG-NEXT:    v_mul_lo_u32 v10, v9, v4
5359; GFX7-SDAG-NEXT:    v_mul_lo_u32 v13, v11, v7
5360; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v11, v6, 0
5361; GFX7-SDAG-NEXT:    v_add_i32_e32 v14, vcc, v3, v10
5362; GFX7-SDAG-NEXT:    v_mul_lo_u32 v3, v12, v6
5363; GFX7-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v13
5364; GFX7-SDAG-NEXT:    v_add_i32_e32 v13, vcc, v2, v3
5365; GFX7-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v0, v8
5366; GFX7-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v5
5367; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v2, v4, 0
5368; GFX7-SDAG-NEXT:    v_addc_u32_e32 v9, vcc, v14, v9, vcc
5369; GFX7-SDAG-NEXT:    v_add_i32_e32 v8, vcc, v1, v11
5370; GFX7-SDAG-NEXT:    v_addc_u32_e32 v10, vcc, v13, v12, vcc
5371; GFX7-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
5372; GFX7-SDAG-NEXT:    v_mul_lo_u32 v5, v8, v7
5373; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v8, v6, 0
5374; GFX7-SDAG-NEXT:    v_mul_lo_u32 v9, v9, v4
5375; GFX7-SDAG-NEXT:    v_mul_lo_u32 v4, v10, v6
5376; GFX7-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v8, v5
5377; GFX7-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v9
5378; GFX7-SDAG-NEXT:    v_add_i32_e32 v8, vcc, v5, v4
5379; GFX7-SDAG-NEXT:    v_mul_lo_u32 v6, v8, v1
5380; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v1, v[7:8]
5381; GFX7-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v0
5382; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v2, v0, v[2:3]
5383; GFX7-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
5384; GFX7-SDAG-NEXT:    v_mul_lo_u32 v6, v7, v13
5385; GFX7-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v14
5386; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v10
5387; GFX7-SDAG-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
5388; GFX7-SDAG-NEXT:    v_add_i32_e32 v10, vcc, v0, v1
5389; GFX7-SDAG-NEXT:    v_mul_lo_u32 v11, v10, v2
5390; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v9, v2, v[9:10]
5391; GFX7-SDAG-NEXT:    v_mul_lo_u32 v6, v9, v3
5392; GFX7-SDAG-NEXT:    v_mul_lo_u32 v9, v5, v7
5393; GFX7-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[4:5]
5394; GFX7-SDAG-NEXT:    v_mul_lo_u32 v4, v4, v8
5395; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v11, v1
5396; GFX7-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v9, v3
5397; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v6, v1
5398; GFX7-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
5399; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
5400;
5401; GFX7-GISEL-LABEL: clpeak_imad_pat_v2i64:
5402; GFX7-GISEL:       ; %bb.0: ; %entry
5403; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5404; GFX7-GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v0
5405; GFX7-GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v1, vcc
5406; GFX7-GISEL-NEXT:    v_add_i32_e32 v14, vcc, 1, v2
5407; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
5408; GFX7-GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v3, vcc
5409; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
5410; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
5411; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, v3
5412; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
5413; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
5414; GFX7-GISEL-NEXT:    v_add_i32_e32 v3, vcc, v0, v12
5415; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
5416; GFX7-GISEL-NEXT:    v_addc_u32_e32 v16, vcc, v8, v13, vcc
5417; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
5418; GFX7-GISEL-NEXT:    v_add_i32_e32 v17, vcc, v2, v14
5419; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0
5420; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, v11
5421; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2]
5422; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, v14
5423; GFX7-GISEL-NEXT:    v_addc_u32_e32 v18, vcc, v9, v15, vcc
5424; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2]
5425; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12]
5426; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15]
5427; GFX7-GISEL-NEXT:    v_add_i32_e32 v11, vcc, 1, v0
5428; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
5429; GFX7-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v8, vcc
5430; GFX7-GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v2
5431; GFX7-GISEL-NEXT:    v_mov_b32_e32 v0, v6
5432; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
5433; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0
5434; GFX7-GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v9, vcc
5435; GFX7-GISEL-NEXT:    v_add_i32_e32 v14, vcc, 1, v10
5436; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
5437; GFX7-GISEL-NEXT:    v_mov_b32_e32 v0, v7
5438; GFX7-GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v3, vcc
5439; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1]
5440; GFX7-GISEL-NEXT:    v_add_i32_e32 v16, vcc, 1, v13
5441; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
5442; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3]
5443; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
5444; GFX7-GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v4, vcc
5445; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
5446; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, v3
5447; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
5448; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
5449; GFX7-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
5450; GFX7-GISEL-NEXT:    v_mov_b32_e32 v1, v5
5451; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
5452;
5453; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i64:
5454; GFX8-SDAG:       ; %bb.0: ; %entry
5455; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5456; GFX8-SDAG-NEXT:    v_add_u32_e32 v8, vcc, 1, v0
5457; GFX8-SDAG-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
5458; GFX8-SDAG-NEXT:    v_mul_lo_u32 v10, v8, v5
5459; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0
5460; GFX8-SDAG-NEXT:    v_add_u32_e32 v11, vcc, 1, v2
5461; GFX8-SDAG-NEXT:    v_addc_u32_e32 v12, vcc, 0, v3, vcc
5462; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v1, v10
5463; GFX8-SDAG-NEXT:    v_mul_lo_u32 v10, v9, v4
5464; GFX8-SDAG-NEXT:    v_mul_lo_u32 v13, v11, v7
5465; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v11, v6, 0
5466; GFX8-SDAG-NEXT:    v_add_u32_e32 v14, vcc, v3, v10
5467; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v12, v6
5468; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v2, v13
5469; GFX8-SDAG-NEXT:    v_add_u32_e32 v13, vcc, v2, v3
5470; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v0, v8
5471; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v2, v5
5472; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v2, v4, 0
5473; GFX8-SDAG-NEXT:    v_addc_u32_e32 v9, vcc, v14, v9, vcc
5474; GFX8-SDAG-NEXT:    v_add_u32_e32 v8, vcc, v1, v11
5475; GFX8-SDAG-NEXT:    v_addc_u32_e32 v10, vcc, v13, v12, vcc
5476; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
5477; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v8, v7
5478; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v8, v6, 0
5479; GFX8-SDAG-NEXT:    v_mul_lo_u32 v9, v9, v4
5480; GFX8-SDAG-NEXT:    v_mul_lo_u32 v4, v10, v6
5481; GFX8-SDAG-NEXT:    v_add_u32_e32 v5, vcc, v8, v5
5482; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v3, v9
5483; GFX8-SDAG-NEXT:    v_add_u32_e32 v8, vcc, v5, v4
5484; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v8, v1
5485; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v1, v[7:8]
5486; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v0
5487; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v2, v0, v[2:3]
5488; GFX8-SDAG-NEXT:    v_add_u32_e32 v5, vcc, v6, v5
5489; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v7, v13
5490; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v14
5491; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v10
5492; GFX8-SDAG-NEXT:    v_add_u32_e32 v5, vcc, v6, v5
5493; GFX8-SDAG-NEXT:    v_add_u32_e32 v10, vcc, v0, v1
5494; GFX8-SDAG-NEXT:    v_mul_lo_u32 v11, v10, v2
5495; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v9, v2, v[9:10]
5496; GFX8-SDAG-NEXT:    v_mul_lo_u32 v6, v9, v3
5497; GFX8-SDAG-NEXT:    v_mul_lo_u32 v9, v5, v7
5498; GFX8-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[4:5]
5499; GFX8-SDAG-NEXT:    v_mul_lo_u32 v4, v4, v8
5500; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v11, v1
5501; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v9, v3
5502; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v6, v1
5503; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v4, v3
5504; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
5505;
5506; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i64:
5507; GFX8-GISEL:       ; %bb.0: ; %entry
5508; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5509; GFX8-GISEL-NEXT:    v_add_u32_e32 v12, vcc, 1, v0
5510; GFX8-GISEL-NEXT:    v_addc_u32_e32 v13, vcc, 0, v1, vcc
5511; GFX8-GISEL-NEXT:    v_add_u32_e32 v14, vcc, 1, v2
5512; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
5513; GFX8-GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v3, vcc
5514; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
5515; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
5516; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, v3
5517; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
5518; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
5519; GFX8-GISEL-NEXT:    v_add_u32_e32 v3, vcc, v0, v12
5520; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
5521; GFX8-GISEL-NEXT:    v_addc_u32_e32 v16, vcc, v8, v13, vcc
5522; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
5523; GFX8-GISEL-NEXT:    v_add_u32_e32 v17, vcc, v2, v14
5524; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0
5525; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, v11
5526; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2]
5527; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, v14
5528; GFX8-GISEL-NEXT:    v_addc_u32_e32 v18, vcc, v9, v15, vcc
5529; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2]
5530; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12]
5531; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15]
5532; GFX8-GISEL-NEXT:    v_add_u32_e32 v11, vcc, 1, v0
5533; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
5534; GFX8-GISEL-NEXT:    v_addc_u32_e32 v1, vcc, 0, v8, vcc
5535; GFX8-GISEL-NEXT:    v_add_u32_e32 v12, vcc, 1, v2
5536; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, v6
5537; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
5538; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0
5539; GFX8-GISEL-NEXT:    v_addc_u32_e32 v2, vcc, 0, v9, vcc
5540; GFX8-GISEL-NEXT:    v_add_u32_e32 v14, vcc, 1, v10
5541; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
5542; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, v7
5543; GFX8-GISEL-NEXT:    v_addc_u32_e32 v15, vcc, 0, v3, vcc
5544; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1]
5545; GFX8-GISEL-NEXT:    v_add_u32_e32 v16, vcc, 1, v13
5546; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
5547; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3]
5548; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
5549; GFX8-GISEL-NEXT:    v_addc_u32_e32 v17, vcc, 0, v4, vcc
5550; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
5551; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, v3
5552; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
5553; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
5554; GFX8-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
5555; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, v5
5556; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
5557;
5558; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i64:
5559; GFX900-SDAG:       ; %bb.0: ; %entry
5560; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5561; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v8, vcc, 1, v2
5562; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v9, vcc, 0, v3, vcc
5563; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v10, vcc, 1, v0
5564; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v11, vcc, 0, v1, vcc
5565; GFX900-SDAG-NEXT:    v_mul_lo_u32 v12, v11, v4
5566; GFX900-SDAG-NEXT:    v_mul_lo_u32 v13, v10, v5
5567; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0
5568; GFX900-SDAG-NEXT:    v_mul_lo_u32 v14, v9, v6
5569; GFX900-SDAG-NEXT:    v_mul_lo_u32 v15, v8, v7
5570; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0
5571; GFX900-SDAG-NEXT:    v_add3_u32 v12, v1, v13, v12
5572; GFX900-SDAG-NEXT:    v_add3_u32 v1, v3, v15, v14
5573; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v8, vcc, v2, v8
5574; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v9, vcc
5575; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v3, vcc, v0, v10
5576; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v10, vcc, v12, v11, vcc
5577; GFX900-SDAG-NEXT:    v_mul_lo_u32 v10, v10, v4
5578; GFX900-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v5
5579; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v3, v4, 0
5580; GFX900-SDAG-NEXT:    v_mul_lo_u32 v9, v9, v6
5581; GFX900-SDAG-NEXT:    v_mul_lo_u32 v7, v8, v7
5582; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v6, 0
5583; GFX900-SDAG-NEXT:    v_add3_u32 v4, v4, v11, v10
5584; GFX900-SDAG-NEXT:    v_mul_lo_u32 v10, v4, v0
5585; GFX900-SDAG-NEXT:    v_add3_u32 v6, v6, v7, v9
5586; GFX900-SDAG-NEXT:    v_mul_lo_u32 v9, v6, v2
5587; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v5, v2, v[5:6]
5588; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v1
5589; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4]
5590; GFX900-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v12
5591; GFX900-SDAG-NEXT:    v_add3_u32 v8, v9, v8, v2
5592; GFX900-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v4
5593; GFX900-SDAG-NEXT:    v_mul_lo_u32 v6, v7, v6
5594; GFX900-SDAG-NEXT:    v_add3_u32 v1, v10, v1, v11
5595; GFX900-SDAG-NEXT:    v_mul_lo_u32 v9, v1, v3
5596; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1]
5597; GFX900-SDAG-NEXT:    v_mul_lo_u32 v10, v8, v5
5598; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v5, v[7:8]
5599; GFX900-SDAG-NEXT:    v_add3_u32 v1, v9, v1, v4
5600; GFX900-SDAG-NEXT:    v_add3_u32 v3, v10, v3, v6
5601; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
5602;
5603; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i64:
5604; GFX900-GISEL:       ; %bb.0: ; %entry
5605; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5606; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v12, vcc, 1, v0
5607; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v13, vcc, 0, v1, vcc
5608; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v14, vcc, 1, v2
5609; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
5610; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v15, vcc, 0, v3, vcc
5611; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
5612; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
5613; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, v3
5614; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
5615; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
5616; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v3, vcc, v0, v12
5617; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
5618; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v16, vcc, v8, v13, vcc
5619; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
5620; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v17, vcc, v2, v14
5621; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[13:14], s[4:5], v17, v6, 0
5622; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, v11
5623; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[11:12], s[4:5], v3, v5, v[1:2]
5624; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, v14
5625; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v18, vcc, v9, v15, vcc
5626; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[14:15], s[4:5], v17, v7, v[1:2]
5627; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v16, v4, v[11:12]
5628; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v18, v6, v[14:15]
5629; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v11, vcc, 1, v0
5630; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
5631; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v8, vcc
5632; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v12, vcc, 1, v2
5633; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, v6
5634; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
5635; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v12, 0
5636; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v9, vcc
5637; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v14, vcc, 1, v10
5638; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
5639; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, v7
5640; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v15, vcc, 0, v3, vcc
5641; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v13, v2, v[0:1]
5642; GFX900-GISEL-NEXT:    v_add_co_u32_e32 v16, vcc, 1, v13
5643; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
5644; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v4, v12, v[2:3]
5645; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
5646; GFX900-GISEL-NEXT:    v_addc_co_u32_e32 v17, vcc, 0, v4, vcc
5647; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
5648; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, v3
5649; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
5650; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
5651; GFX900-GISEL-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
5652; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, v5
5653; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
5654;
5655; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i64:
5656; GFX90A-SDAG:       ; %bb.0: ; %entry
5657; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5658; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v2, vcc, 1, v2
5659; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
5660; GFX90A-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, 1, v0
5661; GFX90A-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
5662; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v12, v1, v4
5663; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v13, v0, v5
5664; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v0, v4, 0
5665; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v4, v[0:1]
5666; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v14, v3, v6
5667; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v15, v2, v7
5668; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v2, v6, 0
5669; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v2, v6, v[2:3]
5670; GFX90A-SDAG-NEXT:    v_add3_u32 v1, v12, v1, v13
5671; GFX90A-SDAG-NEXT:    v_add3_u32 v9, v9, v13, v12
5672; GFX90A-SDAG-NEXT:    v_add3_u32 v3, v14, v3, v15
5673; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v5
5674; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v12, v1, v4
5675; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0
5676; GFX90A-SDAG-NEXT:    v_add3_u32 v1, v1, v5, v12
5677; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v2, v7
5678; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v5, v3, v6
5679; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v2, v6, 0
5680; GFX90A-SDAG-NEXT:    v_add3_u32 v11, v11, v15, v14
5681; GFX90A-SDAG-NEXT:    v_add3_u32 v3, v3, v4, v5
5682; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v3, v10
5683; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v10, v[2:3]
5684; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v7, v2, v11
5685; GFX90A-SDAG-NEXT:    v_add3_u32 v5, v6, v5, v7
5686; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v10, v1, v8
5687; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v0, v8, v[0:1]
5688; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v8, v0, v9
5689; GFX90A-SDAG-NEXT:    v_add3_u32 v7, v10, v7, v8
5690; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v8, v6, v1
5691; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v9, v7, v0
5692; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
5693; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v4, v3
5694; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v7, v5, v2
5695; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
5696; GFX90A-SDAG-NEXT:    v_add3_u32 v1, v9, v1, v8
5697; GFX90A-SDAG-NEXT:    v_add3_u32 v3, v7, v3, v6
5698; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
5699;
5700; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i64:
5701; GFX90A-GISEL:       ; %bb.0: ; %entry
5702; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5703; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v10, vcc, 1, v0
5704; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v11, vcc, 0, v1, vcc
5705; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v12, vcc, 1, v2
5706; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v13, vcc, 0, v3, vcc
5707; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v10, v5, 0
5708; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0
5709; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v11, v4, v[2:3]
5710; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v1, v2
5711; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v7, 0
5712; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v10, vcc, v0, v10
5713; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v6, 0
5714; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v13, v6, v[8:9]
5715; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v14, vcc, v1, v11, vcc
5716; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, v3, v8
5717; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v12, vcc, v2, v12
5718; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v10, v4, 0
5719; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v10, v5, 0
5720; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v13, vcc, v3, v13, vcc
5721; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v14, v4, v[10:11]
5722; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v12, v7, 0
5723; GFX90A-GISEL-NEXT:    v_add_u32_e32 v9, v9, v4
5724; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v12, v6, 0
5725; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v13, v6, v[10:11]
5726; GFX90A-GISEL-NEXT:    v_add_u32_e32 v5, v5, v6
5727; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v6, vcc, 1, v0
5728; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v1, vcc
5729; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v10, vcc, 1, v2
5730; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v11, vcc, 0, v3, vcc
5731; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0
5732; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v12, vcc, 1, v8
5733; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0
5734; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v9, v6, v[0:1]
5735; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v13, vcc, 0, v9, vcc
5736; GFX90A-GISEL-NEXT:    v_add_u32_e32 v8, v3, v0
5737; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0
5738; GFX90A-GISEL-NEXT:    v_add_co_u32_e32 v14, vcc, 1, v4
5739; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v4, v10, 0
5740; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v10, v[0:1]
5741; GFX90A-GISEL-NEXT:    v_addc_co_u32_e32 v15, vcc, 0, v5, vcc
5742; GFX90A-GISEL-NEXT:    v_add_u32_e32 v7, v7, v0
5743; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v12, 0
5744; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v2, v13, 0
5745; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v8, v12, v[2:3]
5746; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v15, 0
5747; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v1, v2
5748; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v14, 0
5749; GFX90A-GISEL-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v7, v14, v[4:5]
5750; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, v3, v4
5751; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
5752;
5753; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i64:
5754; GFX10-SDAG:       ; %bb.0: ; %entry
5755; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5756; GFX10-SDAG-NEXT:    v_add_co_u32 v8, vcc_lo, v0, 1
5757; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
5758; GFX10-SDAG-NEXT:    v_add_co_u32 v10, vcc_lo, v2, 1
5759; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo
5760; GFX10-SDAG-NEXT:    v_mul_lo_u32 v12, v9, v4
5761; GFX10-SDAG-NEXT:    v_mul_lo_u32 v13, v8, v5
5762; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v8, v4, 0
5763; GFX10-SDAG-NEXT:    v_mul_lo_u32 v14, v11, v6
5764; GFX10-SDAG-NEXT:    v_mul_lo_u32 v15, v10, v7
5765; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v10, v6, 0
5766; GFX10-SDAG-NEXT:    v_add3_u32 v12, v1, v13, v12
5767; GFX10-SDAG-NEXT:    v_add_co_u32 v1, vcc_lo, v0, v8
5768; GFX10-SDAG-NEXT:    v_add3_u32 v13, v3, v15, v14
5769; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v12, v9, vcc_lo
5770; GFX10-SDAG-NEXT:    v_add_co_u32 v8, vcc_lo, v2, v10
5771; GFX10-SDAG-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v13, v11, vcc_lo
5772; GFX10-SDAG-NEXT:    v_mul_lo_u32 v10, v3, v4
5773; GFX10-SDAG-NEXT:    v_mul_lo_u32 v11, v1, v5
5774; GFX10-SDAG-NEXT:    v_mul_lo_u32 v7, v8, v7
5775; GFX10-SDAG-NEXT:    v_mul_lo_u32 v9, v9, v6
5776; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v8, v6, 0
5777; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v1, v4, 0
5778; GFX10-SDAG-NEXT:    v_add3_u32 v6, v6, v7, v9
5779; GFX10-SDAG-NEXT:    v_add3_u32 v4, v4, v11, v10
5780; GFX10-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v12
5781; GFX10-SDAG-NEXT:    v_mul_lo_u32 v9, v6, v2
5782; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[7:8], null, v5, v2, v[5:6]
5783; GFX10-SDAG-NEXT:    v_mul_lo_u32 v10, v4, v0
5784; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
5785; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v13
5786; GFX10-SDAG-NEXT:    v_mul_lo_u32 v6, v7, v6
5787; GFX10-SDAG-NEXT:    v_add3_u32 v1, v10, v1, v11
5788; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v4
5789; GFX10-SDAG-NEXT:    v_add3_u32 v8, v9, v8, v2
5790; GFX10-SDAG-NEXT:    v_mul_lo_u32 v9, v1, v3
5791; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
5792; GFX10-SDAG-NEXT:    v_mul_lo_u32 v10, v8, v5
5793; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v7, v5, v[7:8]
5794; GFX10-SDAG-NEXT:    v_add3_u32 v1, v9, v1, v4
5795; GFX10-SDAG-NEXT:    v_add3_u32 v3, v10, v3, v6
5796; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
5797;
5798; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i64:
5799; GFX10-GISEL:       ; %bb.0: ; %entry
5800; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5801; GFX10-GISEL-NEXT:    v_add_co_u32 v12, vcc_lo, v0, 1
5802; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, 0, v1, vcc_lo
5803; GFX10-GISEL-NEXT:    v_add_co_u32 v14, vcc_lo, v2, 1
5804; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v12, v4, 0
5805; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v15, vcc_lo, 0, v3, vcc_lo
5806; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[2:3], null, v14, v6, 0
5807; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[8:9], null, v12, v5, v[1:2]
5808; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[9:10], null, v14, v7, v[3:4]
5809; GFX10-GISEL-NEXT:    v_add_co_u32 v3, vcc_lo, v0, v12
5810; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[10:11], null, v13, v4, v[8:9]
5811; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[11:12], null, v3, v4, 0
5812; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, v10, v13, vcc_lo
5813; GFX10-GISEL-NEXT:    v_add_co_u32 v17, vcc_lo, v2, v14
5814; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[8:9], null, v15, v6, v[9:10]
5815; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[13:14], null, v17, v6, 0
5816; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, v8, v15, vcc_lo
5817; GFX10-GISEL-NEXT:    v_add_co_u32 v19, vcc_lo, v0, 1
5818; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, v12
5819; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, v14
5820; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v20, vcc_lo, 0, v10, vcc_lo
5821; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[9:10], null, v11, v19, 0
5822; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[14:15], null, v3, v5, v[0:1]
5823; GFX10-GISEL-NEXT:    v_add_co_u32 v15, vcc_lo, v2, 1
5824; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v17, v7, v[1:2]
5825; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v12, vcc_lo, 0, v8, vcc_lo
5826; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[7:8], null, v13, v15, 0
5827; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, v10
5828; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v16, v4, v[14:15]
5829; GFX10-GISEL-NEXT:    v_add_co_u32 v14, vcc_lo, v11, 1
5830; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v18, v6, v[0:1]
5831; GFX10-GISEL-NEXT:    v_mov_b32_e32 v6, v8
5832; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[10:11], null, v11, v20, v[1:2]
5833; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v4, vcc_lo
5834; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[11:12], null, v13, v12, v[6:7]
5835; GFX10-GISEL-NEXT:    v_add_co_u32 v17, vcc_lo, v13, 1
5836; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v9, v14, 0
5837; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v18, vcc_lo, 0, v5, vcc_lo
5838; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[12:13], null, v4, v19, v[10:11]
5839; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[2:3], null, v7, v17, 0
5840; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v5, v15, v[11:12]
5841; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v9, v16, v[1:2]
5842; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[7:8], null, v7, v18, v[3:4]
5843; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v12, v14, v[5:6]
5844; GFX10-GISEL-NEXT:    v_mad_u64_u32 v[3:4], null, v4, v17, v[7:8]
5845; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, v5
5846; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
5847;
5848; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i64:
5849; GFX11-SDAG:       ; %bb.0: ; %entry
5850; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5851; GFX11-SDAG-NEXT:    v_add_co_u32 v8, vcc_lo, v0, 1
5852; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
5853; GFX11-SDAG-NEXT:    v_add_co_u32 v10, vcc_lo, v2, 1
5854; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo
5855; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
5856; GFX11-SDAG-NEXT:    v_mul_lo_u32 v12, v9, v4
5857; GFX11-SDAG-NEXT:    v_mul_lo_u32 v13, v8, v5
5858; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v8, v4, 0
5859; GFX11-SDAG-NEXT:    v_mul_lo_u32 v14, v11, v6
5860; GFX11-SDAG-NEXT:    v_mul_lo_u32 v15, v10, v7
5861; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v10, v6, 0
5862; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5863; GFX11-SDAG-NEXT:    v_add3_u32 v1, v1, v13, v12
5864; GFX11-SDAG-NEXT:    v_add3_u32 v12, v3, v15, v14
5865; GFX11-SDAG-NEXT:    v_add_co_u32 v3, vcc_lo, v0, v8
5866; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
5867; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v8, vcc_lo, v1, v9, vcc_lo
5868; GFX11-SDAG-NEXT:    v_add_co_u32 v9, vcc_lo, v2, v10
5869; GFX11-SDAG-NEXT:    v_add_co_ci_u32_e32 v10, vcc_lo, v12, v11, vcc_lo
5870; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
5871; GFX11-SDAG-NEXT:    v_mul_lo_u32 v11, v8, v4
5872; GFX11-SDAG-NEXT:    v_mul_lo_u32 v13, v3, v5
5873; GFX11-SDAG-NEXT:    v_mul_lo_u32 v14, v9, v7
5874; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
5875; GFX11-SDAG-NEXT:    v_mul_lo_u32 v10, v10, v6
5876; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[7:8], null, v9, v6, 0
5877; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v3, v4, 0
5878; GFX11-SDAG-NEXT:    v_add3_u32 v8, v8, v14, v10
5879; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5880; GFX11-SDAG-NEXT:    v_add3_u32 v6, v6, v13, v11
5881; GFX11-SDAG-NEXT:    v_mul_lo_u32 v11, v8, v2
5882; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[9:10], null, v7, v2, v[7:8]
5883; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
5884; GFX11-SDAG-NEXT:    v_mul_lo_u32 v13, v6, v0
5885; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v5, v0, v[5:6]
5886; GFX11-SDAG-NEXT:    v_mul_lo_u32 v0, v5, v1
5887; GFX11-SDAG-NEXT:    v_mul_lo_u32 v1, v7, v12
5888; GFX11-SDAG-NEXT:    v_mul_lo_u32 v6, v3, v6
5889; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
5890; GFX11-SDAG-NEXT:    v_add3_u32 v4, v13, v4, v0
5891; GFX11-SDAG-NEXT:    v_add3_u32 v10, v11, v10, v1
5892; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
5893; GFX11-SDAG-NEXT:    v_mul_lo_u32 v11, v4, v5
5894; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v5, v[3:4]
5895; GFX11-SDAG-NEXT:    v_mul_lo_u32 v4, v9, v8
5896; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v10, v7
5897; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[2:3], null, v9, v7, v[9:10]
5898; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5899; GFX11-SDAG-NEXT:    v_add3_u32 v1, v11, v1, v6
5900; GFX11-SDAG-NEXT:    v_add3_u32 v3, v5, v3, v4
5901; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
5902;
5903; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i64:
5904; GFX11-GISEL:       ; %bb.0: ; %entry
5905; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5906; GFX11-GISEL-NEXT:    v_add_co_u32 v13, vcc_lo, v0, 1
5907; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v14, vcc_lo, 0, v1, vcc_lo
5908; GFX11-GISEL-NEXT:    v_add_co_u32 v15, vcc_lo, v2, 1
5909; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5910; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v13, v4, 0
5911; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v3, vcc_lo
5912; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[2:3], null, v15, v6, 0
5913; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5914; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[8:9], null, v13, v5, v[1:2]
5915; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[9:10], null, v15, v7, v[3:4]
5916; GFX11-GISEL-NEXT:    v_add_co_u32 v3, vcc_lo, v0, v13
5917; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5918; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[10:11], null, v14, v4, v[8:9]
5919; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[11:12], null, v16, v6, v[9:10]
5920; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, v10, v14, vcc_lo
5921; GFX11-GISEL-NEXT:    v_add_co_u32 v18, vcc_lo, v2, v15
5922; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[8:9], null, v3, v4, 0
5923; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
5924; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v19, vcc_lo, v11, v16, vcc_lo
5925; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[12:13], null, v18, v6, 0
5926; GFX11-GISEL-NEXT:    v_add_co_u32 v20, vcc_lo, v0, 1
5927; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, 0, v10, vcc_lo
5928; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, v9
5929; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
5930; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[9:10], null, v8, v20, 0
5931; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, v13
5932; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[13:14], null, v3, v5, v[0:1]
5933; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_3)
5934; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, v10
5935; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[14:15], null, v18, v7, v[1:2]
5936; GFX11-GISEL-NEXT:    v_add_co_u32 v18, vcc_lo, v2, 1
5937; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v11, vcc_lo
5938; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[15:16], null, v17, v4, v[13:14]
5939; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5940; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[4:5], null, v12, v18, 0
5941; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[10:11], null, v19, v6, v[14:15]
5942; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[6:7], null, v8, v21, v[0:1]
5943; GFX11-GISEL-NEXT:    v_add_co_u32 v14, vcc_lo, v8, 1
5944; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, 0, v15, vcc_lo
5945; GFX11-GISEL-NEXT:    v_add_co_u32 v17, vcc_lo, v12, 1
5946; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5947; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[7:8], null, v12, v22, v[5:6]
5948; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[0:1], null, v9, v14, 0
5949; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
5950; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[2:3], null, v4, v17, 0
5951; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v19, vcc_lo, 0, v10, vcc_lo
5952; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[11:12], null, v15, v20, v[6:7]
5953; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[12:13], null, v10, v18, v[7:8]
5954; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5955; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[7:8], null, v9, v16, v[1:2]
5956; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[8:9], null, v4, v19, v[3:4]
5957; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5958; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[5:6], null, v11, v14, v[7:8]
5959; GFX11-GISEL-NEXT:    v_mad_u64_u32 v[3:4], null, v12, v17, v[8:9]
5960; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
5961; GFX11-GISEL-NEXT:    v_mov_b32_e32 v1, v5
5962; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
5963;
5964; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i64:
5965; GFX1200-SDAG:       ; %bb.0: ; %entry
5966; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
5967; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
5968; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
5969; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
5970; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
5971; GFX1200-SDAG-NEXT:    v_add_co_u32 v8, vcc_lo, v0, 1
5972; GFX1200-SDAG-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
5973; GFX1200-SDAG-NEXT:    v_add_co_u32 v10, vcc_lo, v2, 1
5974; GFX1200-SDAG-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo
5975; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
5976; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v12, v9, v4
5977; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v13, v8, v5
5978; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v8, v4, 0
5979; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v14, v11, v6
5980; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v15, v10, v7
5981; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[2:3], null, v10, v6, 0
5982; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5983; GFX1200-SDAG-NEXT:    v_add3_u32 v12, v1, v13, v12
5984; GFX1200-SDAG-NEXT:    v_add_co_u32 v1, vcc_lo, v0, v8
5985; GFX1200-SDAG-NEXT:    v_add3_u32 v13, v3, v15, v14
5986; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
5987; GFX1200-SDAG-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v12, v9, vcc_lo
5988; GFX1200-SDAG-NEXT:    v_add_co_u32 v8, vcc_lo, v2, v10
5989; GFX1200-SDAG-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v13, v11, vcc_lo
5990; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
5991; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v10, v3, v4
5992; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v11, v1, v5
5993; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v7, v8, v7
5994; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2)
5995; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v9, v9, v6
5996; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[5:6], null, v8, v6, 0
5997; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[3:4], null, v1, v4, 0
5998; GFX1200-SDAG-NEXT:    v_add3_u32 v6, v6, v7, v9
5999; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
6000; GFX1200-SDAG-NEXT:    v_add3_u32 v4, v4, v11, v10
6001; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v11, v3, v12
6002; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
6003; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v9, v6, v2
6004; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[7:8], null, v5, v2, v[5:6]
6005; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v10, v4, v0
6006; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4]
6007; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v5, v13
6008; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
6009; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v6, v7, v6
6010; GFX1200-SDAG-NEXT:    v_add3_u32 v1, v10, v1, v11
6011; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6012; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v4
6013; GFX1200-SDAG-NEXT:    v_add3_u32 v8, v9, v8, v2
6014; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6015; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v9, v1, v3
6016; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v0, v3, v[0:1]
6017; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v10, v8, v5
6018; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[2:3], null, v7, v5, v[7:8]
6019; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
6020; GFX1200-SDAG-NEXT:    v_add3_u32 v1, v9, v1, v4
6021; GFX1200-SDAG-NEXT:    v_add3_u32 v3, v10, v3, v6
6022; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
6023;
6024; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i64:
6025; GFX1200-GISEL:       ; %bb.0: ; %entry
6026; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
6027; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
6028; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
6029; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
6030; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
6031; GFX1200-GISEL-NEXT:    v_add_co_u32 v8, vcc_lo, v0, 1
6032; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
6033; GFX1200-GISEL-NEXT:    v_add_co_u32 v10, vcc_lo, v2, 1
6034; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6035; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v0, v8, v4
6036; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo
6037; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v1, v10, v6
6038; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v12, v8, v4
6039; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v13, v10, v6
6040; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
6041; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[2:3], null, v8, v5, v[0:1]
6042; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[0:1], null, v10, v7, v[1:2]
6043; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
6044; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[1:2], null, v9, v4, v[2:3]
6045; GFX1200-GISEL-NEXT:    v_mov_b32_e32 v14, v1
6046; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6047; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[2:3], null, v11, v6, v[0:1]
6048; GFX1200-GISEL-NEXT:    v_add_co_u32 v3, vcc_lo, v12, v8
6049; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v14, v9, vcc_lo
6050; GFX1200-GISEL-NEXT:    v_add_co_u32 v10, vcc_lo, v13, v10
6051; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
6052; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v0, v3, v4
6053; GFX1200-GISEL-NEXT:    v_mov_b32_e32 v8, v2
6054; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v15, v3, v4
6055; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v1, v10, v6
6056; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v16, v10, v6
6057; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
6058; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, v8, v11, vcc_lo
6059; GFX1200-GISEL-NEXT:    v_add_co_u32 v12, vcc_lo, v12, 1
6060; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[2:3], null, v3, v5, v[0:1]
6061; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, 0, v14, vcc_lo
6062; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
6063; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[0:1], null, v10, v7, v[1:2]
6064; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v1, v15, v12
6065; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6066; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[2:3], null, v9, v4, v[2:3]
6067; GFX1200-GISEL-NEXT:    v_add_co_u32 v9, vcc_lo, v13, 1
6068; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[3:4], null, v11, v6, v[0:1]
6069; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
6070; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[4:5], null, v15, v5, v[1:2]
6071; GFX1200-GISEL-NEXT:    v_mov_b32_e32 v1, v2
6072; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v0, v16, v9
6073; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v14, v16, v9
6074; GFX1200-GISEL-NEXT:    v_mov_b32_e32 v11, v3
6075; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v8, vcc_lo
6076; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v8, v15, v12
6077; GFX1200-GISEL-NEXT:    v_add_co_u32 v10, vcc_lo, v15, 1
6078; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, 0, v1, vcc_lo
6079; GFX1200-GISEL-NEXT:    v_add_co_u32 v15, vcc_lo, v16, 1
6080; GFX1200-GISEL-NEXT:    v_add_co_ci_u32_e32 v11, vcc_lo, 0, v11, vcc_lo
6081; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
6082; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v1, v8, v10
6083; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[5:6], null, v16, v7, v[0:1]
6084; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
6085; GFX1200-GISEL-NEXT:    v_mul_hi_u32 v0, v14, v15
6086; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[6:7], null, v2, v12, v[4:5]
6087; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[1:2], null, v8, v13, v[1:2]
6088; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6089; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[3:4], null, v3, v9, v[5:6]
6090; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[4:5], null, v14, v11, v[0:1]
6091; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
6092; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[1:2], null, v6, v10, v[1:2]
6093; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v8, v10
6094; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v14, v15
6095; GFX1200-GISEL-NEXT:    v_mad_co_u64_u32 v[3:4], null, v3, v15, v[4:5]
6096; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
6097entry:
6098  %y18 = add <2 x i64> %x, <i64 1, i64 1>
6099  %add = mul <2 x i64> %y18, %y
6100  %mul119 = add <2 x i64> %add, %y18
6101  %add2 = mul <2 x i64> %mul119, %y
6102  %add220 = add <2 x i64> %add, <i64 1, i64 1>
6103  %add422 = add <2 x i64> %add2, <i64 1, i64 1>
6104  %mul521 = mul <2 x i64> %add2, %add220
6105  %add6 = mul <2 x i64> %mul521, %add422
6106  ret <2 x i64> %add6
6107}
6108
6109define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %ptr) {
6110; GFX6-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
6111; GFX6-SDAG:       ; %bb.0: ; %bb
6112; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6113; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6114; GFX6-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6115; GFX6-SDAG-NEXT:    s_mov_b32 s6, 0
6116; GFX6-SDAG-NEXT:    s_mov_b32 s7, 0xf000
6117; GFX6-SDAG-NEXT:    s_mov_b32 s4, s6
6118; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6119; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6120; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6121; GFX6-SDAG-NEXT:    s_mov_b32 s5, s6
6122; GFX6-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v1
6123; GFX6-SDAG-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6124; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0)
6125; GFX6-SDAG-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6126; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0)
6127; GFX6-SDAG-NEXT:    buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
6128; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(1)
6129; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
6130; GFX6-SDAG-NEXT:    s_waitcnt expcnt(0)
6131; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
6132;
6133; GFX6-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
6134; GFX6-GISEL:       ; %bb.0: ; %bb
6135; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6136; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6137; GFX6-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6138; GFX6-GISEL-NEXT:    s_mov_b32 s6, 0
6139; GFX6-GISEL-NEXT:    s_mov_b32 s7, 0xf000
6140; GFX6-GISEL-NEXT:    s_mov_b64 s[4:5], 0
6141; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6142; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6143; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6144; GFX6-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v1
6145; GFX6-GISEL-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6146; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0)
6147; GFX6-GISEL-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6148; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0)
6149; GFX6-GISEL-NEXT:    buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
6150; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(1)
6151; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
6152; GFX6-GISEL-NEXT:    s_waitcnt expcnt(0)
6153; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
6154;
6155; GFX7-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
6156; GFX7-SDAG:       ; %bb.0: ; %bb
6157; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6158; GFX7-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6159; GFX7-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6160; GFX7-SDAG-NEXT:    s_mov_b32 s6, 0
6161; GFX7-SDAG-NEXT:    s_mov_b32 s7, 0xf000
6162; GFX7-SDAG-NEXT:    s_mov_b32 s4, s6
6163; GFX7-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6164; GFX7-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6165; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6166; GFX7-SDAG-NEXT:    s_mov_b32 s5, s6
6167; GFX7-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v1
6168; GFX7-SDAG-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6169; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
6170; GFX7-SDAG-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6171; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
6172; GFX7-SDAG-NEXT:    buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
6173; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
6174; GFX7-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
6175; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
6176;
6177; GFX7-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
6178; GFX7-GISEL:       ; %bb.0: ; %bb
6179; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6180; GFX7-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6181; GFX7-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6182; GFX7-GISEL-NEXT:    s_mov_b32 s6, 0
6183; GFX7-GISEL-NEXT:    s_mov_b32 s7, 0xf000
6184; GFX7-GISEL-NEXT:    s_mov_b64 s[4:5], 0
6185; GFX7-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6186; GFX7-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6187; GFX7-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6188; GFX7-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v1
6189; GFX7-GISEL-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6190; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
6191; GFX7-GISEL-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6192; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
6193; GFX7-GISEL-NEXT:    buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
6194; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
6195; GFX7-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v5, v1
6196; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
6197;
6198; GFX8-LABEL: v_multi_use_mul_chain_add_other_use_all:
6199; GFX8:       ; %bb.0: ; %bb
6200; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6201; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
6202; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
6203; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
6204; GFX8-NEXT:    v_mul_lo_u32 v0, v0, v1
6205; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6206; GFX8-NEXT:    v_mul_lo_u32 v5, v0, v1
6207; GFX8-NEXT:    flat_store_dword v[3:4], v2
6208; GFX8-NEXT:    s_waitcnt vmcnt(0)
6209; GFX8-NEXT:    flat_store_dword v[3:4], v0
6210; GFX8-NEXT:    s_waitcnt vmcnt(0)
6211; GFX8-NEXT:    flat_store_dword v[3:4], v5
6212; GFX8-NEXT:    s_waitcnt vmcnt(0)
6213; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v5, v1
6214; GFX8-NEXT:    s_setpc_b64 s[30:31]
6215;
6216; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
6217; GFX900-SDAG:       ; %bb.0: ; %bb
6218; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6219; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
6220; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6221; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
6222; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6223; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6224; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v1
6225; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v2, off
6226; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
6227; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v0, off
6228; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
6229; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v5, off
6230; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
6231; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v5, v1
6232; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
6233;
6234; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
6235; GFX900-GISEL:       ; %bb.0: ; %bb
6236; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6237; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6238; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6239; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
6240; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6241; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6242; GFX900-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v1
6243; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v2, off
6244; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
6245; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v0, off
6246; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
6247; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v5, off
6248; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
6249; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v5, v1
6250; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
6251;
6252; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
6253; GFX90A-SDAG:       ; %bb.0: ; %bb
6254; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6255; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
6256; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6257; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
6258; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v5, v4
6259; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v4, v3
6260; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6261; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6262; GFX90A-SDAG-NEXT:    global_store_dword v[4:5], v2, off
6263; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
6264; GFX90A-SDAG-NEXT:    global_store_dword v[4:5], v0, off
6265; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
6266; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6267; GFX90A-SDAG-NEXT:    global_store_dword v[4:5], v0, off
6268; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
6269; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v0, v1
6270; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
6271;
6272; GFX90A-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
6273; GFX90A-GISEL:       ; %bb.0: ; %bb
6274; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6275; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6276; GFX90A-GISEL-NEXT:    v_mov_b32_e32 v2, v3
6277; GFX90A-GISEL-NEXT:    v_mov_b32_e32 v3, v4
6278; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v1
6279; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v4, v0
6280; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6281; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v4
6282; GFX90A-GISEL-NEXT:    global_store_dword v[2:3], v4, off
6283; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0)
6284; GFX90A-GISEL-NEXT:    global_store_dword v[2:3], v0, off
6285; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0)
6286; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6287; GFX90A-GISEL-NEXT:    global_store_dword v[2:3], v0, off
6288; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0)
6289; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v0, v1
6290; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
6291;
6292; GFX10-LABEL: v_multi_use_mul_chain_add_other_use_all:
6293; GFX10:       ; %bb.0: ; %bb
6294; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6295; GFX10-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6296; GFX10-NEXT:    v_mul_lo_u32 v2, v0, v1
6297; GFX10-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6298; GFX10-NEXT:    v_mul_lo_u32 v1, v0, v1
6299; GFX10-NEXT:    v_add_nc_u32_e32 v0, 1, v2
6300; GFX10-NEXT:    v_mul_lo_u32 v5, v1, v0
6301; GFX10-NEXT:    global_store_dword v[3:4], v2, off
6302; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6303; GFX10-NEXT:    global_store_dword v[3:4], v1, off
6304; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6305; GFX10-NEXT:    global_store_dword v[3:4], v5, off
6306; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6307; GFX10-NEXT:    v_add_nc_u32_e32 v0, v5, v0
6308; GFX10-NEXT:    s_setpc_b64 s[30:31]
6309;
6310; GFX11-LABEL: v_multi_use_mul_chain_add_other_use_all:
6311; GFX11:       ; %bb.0: ; %bb
6312; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6313; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6314; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6315; GFX11-NEXT:    v_mul_lo_u32 v2, v0, v1
6316; GFX11-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6317; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6318; GFX11-NEXT:    v_mul_lo_u32 v1, v0, v1
6319; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v2
6320; GFX11-NEXT:    v_mul_lo_u32 v5, v1, v0
6321; GFX11-NEXT:    global_store_b32 v[3:4], v2, off dlc
6322; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6323; GFX11-NEXT:    global_store_b32 v[3:4], v1, off dlc
6324; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6325; GFX11-NEXT:    global_store_b32 v[3:4], v5, off dlc
6326; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6327; GFX11-NEXT:    v_add_nc_u32_e32 v0, v5, v0
6328; GFX11-NEXT:    s_setpc_b64 s[30:31]
6329;
6330; GFX1200-LABEL: v_multi_use_mul_chain_add_other_use_all:
6331; GFX1200:       ; %bb.0: ; %bb
6332; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
6333; GFX1200-NEXT:    s_wait_expcnt 0x0
6334; GFX1200-NEXT:    s_wait_samplecnt 0x0
6335; GFX1200-NEXT:    s_wait_bvhcnt 0x0
6336; GFX1200-NEXT:    s_wait_kmcnt 0x0
6337; GFX1200-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6338; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6339; GFX1200-NEXT:    v_mul_lo_u32 v2, v0, v1
6340; GFX1200-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6341; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6342; GFX1200-NEXT:    v_mul_lo_u32 v1, v0, v1
6343; GFX1200-NEXT:    v_add_nc_u32_e32 v0, 1, v2
6344; GFX1200-NEXT:    v_mul_lo_u32 v5, v1, v0
6345; GFX1200-NEXT:    s_wait_storecnt 0x0
6346; GFX1200-NEXT:    global_store_b32 v[3:4], v2, off scope:SCOPE_SYS
6347; GFX1200-NEXT:    s_wait_storecnt 0x0
6348; GFX1200-NEXT:    global_store_b32 v[3:4], v1, off scope:SCOPE_SYS
6349; GFX1200-NEXT:    s_wait_storecnt 0x0
6350; GFX1200-NEXT:    global_store_b32 v[3:4], v5, off scope:SCOPE_SYS
6351; GFX1200-NEXT:    s_wait_storecnt 0x0
6352; GFX1200-NEXT:    v_add_nc_u32_e32 v0, v5, v0
6353; GFX1200-NEXT:    s_setpc_b64 s[30:31]
6354bb:
6355  %i = add i32 %arg, 1
6356  %i3 = mul i32 %i, %arg1
6357  store volatile i32 %i3, ptr addrspace(1) %ptr
6358  %i4 = add i32 %i3, %i
6359  %i5 = mul i32 %i4, %arg1
6360  store volatile i32 %i5, ptr addrspace(1) %ptr
6361  %i6 = add i32 %i3, 1
6362  %i7 = mul i32 %i5, %i6
6363  store volatile i32 %i7, ptr addrspace(1) %ptr
6364  %i8 = add i32 %i7, %i6
6365  ret i32 %i8
6366}
6367
6368define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %ptr) {
6369; GFX6-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
6370; GFX6-SDAG:       ; %bb.0: ; %bb
6371; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6372; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6373; GFX6-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6374; GFX6-SDAG-NEXT:    s_mov_b32 s6, 0
6375; GFX6-SDAG-NEXT:    s_mov_b32 s7, 0xf000
6376; GFX6-SDAG-NEXT:    s_mov_b32 s4, s6
6377; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6378; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6379; GFX6-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6380; GFX6-SDAG-NEXT:    s_mov_b32 s5, s6
6381; GFX6-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6382; GFX6-SDAG-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6383; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0)
6384; GFX6-SDAG-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6385; GFX6-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
6386; GFX6-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
6387; GFX6-SDAG-NEXT:    s_setpc_b64 s[30:31]
6388;
6389; GFX6-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
6390; GFX6-GISEL:       ; %bb.0: ; %bb
6391; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6392; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6393; GFX6-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6394; GFX6-GISEL-NEXT:    s_mov_b32 s6, 0
6395; GFX6-GISEL-NEXT:    s_mov_b32 s7, 0xf000
6396; GFX6-GISEL-NEXT:    s_mov_b64 s[4:5], 0
6397; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6398; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6399; GFX6-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6400; GFX6-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6401; GFX6-GISEL-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6402; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0)
6403; GFX6-GISEL-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6404; GFX6-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
6405; GFX6-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
6406; GFX6-GISEL-NEXT:    s_setpc_b64 s[30:31]
6407;
6408; GFX7-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
6409; GFX7-SDAG:       ; %bb.0: ; %bb
6410; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6411; GFX7-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6412; GFX7-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6413; GFX7-SDAG-NEXT:    s_mov_b32 s6, 0
6414; GFX7-SDAG-NEXT:    s_mov_b32 s7, 0xf000
6415; GFX7-SDAG-NEXT:    s_mov_b32 s4, s6
6416; GFX7-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6417; GFX7-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6418; GFX7-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6419; GFX7-SDAG-NEXT:    s_mov_b32 s5, s6
6420; GFX7-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6421; GFX7-SDAG-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6422; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
6423; GFX7-SDAG-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6424; GFX7-SDAG-NEXT:    s_waitcnt vmcnt(0)
6425; GFX7-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
6426; GFX7-SDAG-NEXT:    s_setpc_b64 s[30:31]
6427;
6428; GFX7-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
6429; GFX7-GISEL:       ; %bb.0: ; %bb
6430; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6431; GFX7-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6432; GFX7-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6433; GFX7-GISEL-NEXT:    s_mov_b32 s6, 0
6434; GFX7-GISEL-NEXT:    s_mov_b32 s7, 0xf000
6435; GFX7-GISEL-NEXT:    s_mov_b64 s[4:5], 0
6436; GFX7-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6437; GFX7-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6438; GFX7-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6439; GFX7-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6440; GFX7-GISEL-NEXT:    buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
6441; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
6442; GFX7-GISEL-NEXT:    buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
6443; GFX7-GISEL-NEXT:    s_waitcnt vmcnt(0)
6444; GFX7-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
6445; GFX7-GISEL-NEXT:    s_setpc_b64 s[30:31]
6446;
6447; GFX8-LABEL: v_multi_use_mul_chain_add_other_use_some:
6448; GFX8:       ; %bb.0: ; %bb
6449; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6450; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
6451; GFX8-NEXT:    v_mul_lo_u32 v2, v0, v1
6452; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
6453; GFX8-NEXT:    v_mul_lo_u32 v0, v0, v1
6454; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6455; GFX8-NEXT:    v_mul_lo_u32 v0, v0, v1
6456; GFX8-NEXT:    flat_store_dword v[3:4], v2
6457; GFX8-NEXT:    s_waitcnt vmcnt(0)
6458; GFX8-NEXT:    flat_store_dword v[3:4], v0
6459; GFX8-NEXT:    s_waitcnt vmcnt(0)
6460; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
6461; GFX8-NEXT:    s_setpc_b64 s[30:31]
6462;
6463; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
6464; GFX900-SDAG:       ; %bb.0: ; %bb
6465; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6466; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
6467; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6468; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
6469; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6470; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6471; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6472; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v2, off
6473; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
6474; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v0, off
6475; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
6476; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v0, v1
6477; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
6478;
6479; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
6480; GFX900-GISEL:       ; %bb.0: ; %bb
6481; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6482; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6483; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6484; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
6485; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6486; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6487; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6488; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v2, off
6489; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
6490; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v0, off
6491; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
6492; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v0, v1
6493; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
6494;
6495; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
6496; GFX90A-SDAG:       ; %bb.0: ; %bb
6497; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6498; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
6499; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6500; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
6501; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6502; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6503; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v5, v4
6504; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v4, v3
6505; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6506; GFX90A-SDAG-NEXT:    global_store_dword v[4:5], v2, off
6507; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
6508; GFX90A-SDAG-NEXT:    global_store_dword v[4:5], v0, off
6509; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0)
6510; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v0, v1
6511; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
6512;
6513; GFX90A-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
6514; GFX90A-GISEL:       ; %bb.0: ; %bb
6515; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6516; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6517; GFX90A-GISEL-NEXT:    v_mov_b32_e32 v2, v3
6518; GFX90A-GISEL-NEXT:    v_mov_b32_e32 v3, v4
6519; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v1
6520; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v4, v0
6521; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6522; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v4
6523; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6524; GFX90A-GISEL-NEXT:    global_store_dword v[2:3], v4, off
6525; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0)
6526; GFX90A-GISEL-NEXT:    global_store_dword v[2:3], v0, off
6527; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0)
6528; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v0, v1
6529; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
6530;
6531; GFX10-LABEL: v_multi_use_mul_chain_add_other_use_some:
6532; GFX10:       ; %bb.0: ; %bb
6533; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6534; GFX10-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6535; GFX10-NEXT:    v_mul_lo_u32 v2, v0, v1
6536; GFX10-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6537; GFX10-NEXT:    v_mul_lo_u32 v0, v0, v1
6538; GFX10-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6539; GFX10-NEXT:    v_mul_lo_u32 v5, v0, v1
6540; GFX10-NEXT:    global_store_dword v[3:4], v2, off
6541; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6542; GFX10-NEXT:    global_store_dword v[3:4], v5, off
6543; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
6544; GFX10-NEXT:    v_add_nc_u32_e32 v0, v5, v1
6545; GFX10-NEXT:    s_setpc_b64 s[30:31]
6546;
6547; GFX11-LABEL: v_multi_use_mul_chain_add_other_use_some:
6548; GFX11:       ; %bb.0: ; %bb
6549; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6550; GFX11-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6551; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6552; GFX11-NEXT:    v_mul_lo_u32 v2, v0, v1
6553; GFX11-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6554; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6555; GFX11-NEXT:    v_mul_lo_u32 v0, v0, v1
6556; GFX11-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6557; GFX11-NEXT:    v_mul_lo_u32 v5, v0, v1
6558; GFX11-NEXT:    global_store_b32 v[3:4], v2, off dlc
6559; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6560; GFX11-NEXT:    global_store_b32 v[3:4], v5, off dlc
6561; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
6562; GFX11-NEXT:    v_add_nc_u32_e32 v0, v5, v1
6563; GFX11-NEXT:    s_setpc_b64 s[30:31]
6564;
6565; GFX1200-LABEL: v_multi_use_mul_chain_add_other_use_some:
6566; GFX1200:       ; %bb.0: ; %bb
6567; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
6568; GFX1200-NEXT:    s_wait_expcnt 0x0
6569; GFX1200-NEXT:    s_wait_samplecnt 0x0
6570; GFX1200-NEXT:    s_wait_bvhcnt 0x0
6571; GFX1200-NEXT:    s_wait_kmcnt 0x0
6572; GFX1200-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6573; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6574; GFX1200-NEXT:    v_mul_lo_u32 v2, v0, v1
6575; GFX1200-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6576; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6577; GFX1200-NEXT:    v_mul_lo_u32 v0, v0, v1
6578; GFX1200-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6579; GFX1200-NEXT:    v_mul_lo_u32 v5, v0, v1
6580; GFX1200-NEXT:    s_wait_storecnt 0x0
6581; GFX1200-NEXT:    global_store_b32 v[3:4], v2, off scope:SCOPE_SYS
6582; GFX1200-NEXT:    s_wait_storecnt 0x0
6583; GFX1200-NEXT:    global_store_b32 v[3:4], v5, off scope:SCOPE_SYS
6584; GFX1200-NEXT:    s_wait_storecnt 0x0
6585; GFX1200-NEXT:    v_add_nc_u32_e32 v0, v5, v1
6586; GFX1200-NEXT:    s_setpc_b64 s[30:31]
6587bb:
6588  %i = add i32 %arg, 1
6589  %i3 = mul i32 %i, %arg1
6590  store volatile i32 %i3, ptr addrspace(1) %ptr
6591  %i4 = add i32 %i3, %i
6592  %i5 = mul i32 %i4, %arg1
6593  %i6 = add i32 %i3, 1
6594  %i7 = mul i32 %i5, %i6
6595  store volatile i32 %i7, ptr addrspace(1) %ptr
6596  %i8 = add i32 %i7, %i6
6597  ret i32 %i8
6598}
6599
6600define i32 @clpeak_imad_pat_i32_x2(i32 %x, i32 %y) {
6601; GFX67-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6602; GFX67-SDAG:       ; %bb.0: ; %entry
6603; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6604; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6605; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6606; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6607; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6608; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6609; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6610; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
6611; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6612; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6613; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6614; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
6615; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6616; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
6617; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v0
6618; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6619; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
6620; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
6621;
6622; GFX67-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6623; GFX67-GISEL:       ; %bb.0: ; %entry
6624; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6625; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6626; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6627; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
6628; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6629; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6630; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6631; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
6632; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6633; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6634; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6635; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
6636; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6637; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v2
6638; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
6639; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6640; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6641; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
6642;
6643; GFX8-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6644; GFX8-SDAG:       ; %bb.0: ; %entry
6645; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6646; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
6647; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6648; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
6649; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6650; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6651; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6652; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
6653; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6654; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6655; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6656; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
6657; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6658; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v0, v2
6659; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v0
6660; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6661; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
6662; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
6663;
6664; GFX8-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6665; GFX8-GISEL:       ; %bb.0: ; %entry
6666; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6667; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
6668; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6669; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
6670; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6671; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6672; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6673; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
6674; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6675; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6676; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6677; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, v2, v1
6678; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6679; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v2
6680; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
6681; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
6682; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6683; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
6684;
6685; GFX900-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6686; GFX900-SDAG:       ; %bb.0: ; %entry
6687; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6688; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
6689; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6690; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
6691; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6692; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6693; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6694; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, v2, v1
6695; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6696; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6697; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6698; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, v2, v1
6699; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6700; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
6701; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
6702; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
6703;
6704; GFX900-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6705; GFX900-GISEL:       ; %bb.0: ; %entry
6706; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6707; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6708; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6709; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
6710; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6711; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6712; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6713; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, v2, v1
6714; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6715; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6716; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6717; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, v2, v1
6718; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6719; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6720; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
6721; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6722; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6723; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
6724;
6725; GFX90A-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6726; GFX90A-SDAG:       ; %bb.0: ; %entry
6727; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6728; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
6729; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6730; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
6731; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6732; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6733; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6734; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, v2, v1
6735; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6736; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
6737; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6738; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, v2, v1
6739; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6740; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
6741; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
6742; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
6743;
6744; GFX90A-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6745; GFX90A-GISEL:       ; %bb.0: ; %entry
6746; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6747; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
6748; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6749; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
6750; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6751; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6752; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6753; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v2, v1
6754; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6755; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6756; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6757; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v2, v1
6758; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6759; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
6760; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v0
6761; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6762; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
6763; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
6764;
6765; GFX10-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6766; GFX10-SDAG:       ; %bb.0: ; %entry
6767; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6768; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6769; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6770; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6771; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6772; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6773; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6774; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6775; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6776; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6777; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6778; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6779; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6780; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
6781; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
6782; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
6783;
6784; GFX10-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6785; GFX10-GISEL:       ; %bb.0: ; %entry
6786; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6787; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6788; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6789; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6790; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6791; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6792; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6793; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6794; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6795; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6796; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6797; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6798; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6799; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6800; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
6801; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6802; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6803; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
6804;
6805; GFX11-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6806; GFX11-SDAG:       ; %bb.0: ; %entry
6807; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6808; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6809; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6810; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6811; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6812; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6813; GFX11-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6814; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6815; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6816; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6817; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6818; GFX11-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6819; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6820; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6821; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v1
6822; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, v5, v1
6823; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6824; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v1, v0
6825; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v2, v5, v[2:3]
6826; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6827; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, v[3:4]
6828; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
6829;
6830; GFX11-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6831; GFX11-GISEL:       ; %bb.0: ; %entry
6832; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6833; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6834; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6835; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6836; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6837; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6838; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6839; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6840; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6841; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6842; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6843; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6844; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6845; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6846; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6847; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6848; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6849; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6850; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6851; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
6852; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6853; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6854; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6855; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
6856;
6857; GFX1200-SDAG-LABEL: clpeak_imad_pat_i32_x2:
6858; GFX1200-SDAG:       ; %bb.0: ; %entry
6859; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
6860; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
6861; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
6862; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
6863; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
6864; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6865; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6866; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6867; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6868; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6869; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
6870; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6871; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6872; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6873; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6874; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6875; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6876; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6877; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
6878; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6879; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6880; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v1, v0
6881; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v0, v2, v[0:1]
6882; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6883; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v1, v0, v[1:2]
6884; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
6885;
6886; GFX1200-GISEL-LABEL: clpeak_imad_pat_i32_x2:
6887; GFX1200-GISEL:       ; %bb.0: ; %entry
6888; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
6889; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
6890; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
6891; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
6892; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
6893; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6894; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6895; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6896; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v2, v0
6897; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6898; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
6899; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6900; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6901; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6902; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6903; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6904; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6905; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
6906; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
6907; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, v2, v1
6908; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
6909; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6910; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v2
6911; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v0, v1
6912; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
6913; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
6914; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v1, v0
6915; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
6916entry:
6917  %y38 = add i32 %x, 1
6918  %add = mul i32 %y38, %y
6919  %mul139 = add i32 %add, %y38
6920  %add2 = mul i32 %mul139, %y
6921  %add240 = add i32 %add, 1
6922  %add4 = mul i32 %add2, %add240
6923  %mul541 = add i32 %add4, %add240
6924  %add6 = mul i32 %mul541, %add2
6925  %add642 = add i32 %add4, 1
6926  %add8 = mul i32 %add6, %add642
6927  %mul943 = add i32 %add8, %add642
6928  %add10 = mul i32 %mul943, %add6
6929  %add1044 = add i32 %add8, 1
6930  %add1246 = add i32 %add10, 1
6931  %mul1345 = mul i32 %add10, %add1044
6932  %add14 = mul i32 %mul1345, %add1246
6933  ret i32 %add14
6934}
6935
6936define <2 x i32> @clpeak_imad_pat_v2i32_x2(<2 x i32> %x, <2 x i32> %y) {
6937; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
6938; GFX67-SDAG:       ; %bb.0: ; %entry
6939; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6940; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6941; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
6942; GFX67-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v2
6943; GFX67-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v3
6944; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v5, v0
6945; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v4, v1
6946; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
6947; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
6948; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v5
6949; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v2
6950; GFX67-SDAG-NEXT:    v_add_i32_e32 v4, vcc, 1, v4
6951; GFX67-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v4
6952; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
6953; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
6954; GFX67-SDAG-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
6955; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v4, v1
6956; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, 1, v3
6957; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v2
6958; GFX67-SDAG-NEXT:    v_add_i32_e32 v4, vcc, 1, v5
6959; GFX67-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v4
6960; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
6961; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
6962; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v5, v4
6963; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v2, v1
6964; GFX67-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v3
6965; GFX67-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v5
6966; GFX67-SDAG-NEXT:    v_add_i32_e32 v2, vcc, v2, v0
6967; GFX67-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
6968; GFX67-SDAG-NEXT:    v_add_i32_e32 v3, vcc, v3, v1
6969; GFX67-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
6970; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
6971; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
6972; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
6973;
6974; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
6975; GFX67-GISEL:       ; %bb.0: ; %entry
6976; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6977; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
6978; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
6979; GFX67-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
6980; GFX67-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
6981; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, v4, v0
6982; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, v5, v1
6983; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
6984; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
6985; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v4
6986; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v5
6987; GFX67-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v2
6988; GFX67-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v4
6989; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
6990; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
6991; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
6992; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
6993; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v3
6994; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v5
6995; GFX67-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v2
6996; GFX67-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v4
6997; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
6998; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
6999; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7000; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
7001; GFX67-GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v3
7002; GFX67-GISEL-NEXT:    v_add_i32_e32 v3, vcc, 1, v5
7003; GFX67-GISEL-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
7004; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7005; GFX67-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v3
7006; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
7007; GFX67-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
7008; GFX67-GISEL-NEXT:    v_mul_lo_u32 v1, v2, v1
7009; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
7010;
7011; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
7012; GFX8-SDAG:       ; %bb.0: ; %entry
7013; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7014; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
7015; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
7016; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v2
7017; GFX8-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v3
7018; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v5, v0
7019; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v4, v1
7020; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
7021; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
7022; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, 1, v5
7023; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v2
7024; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, 1, v4
7025; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v4
7026; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
7027; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7028; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, v5, v4
7029; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v4, v1
7030; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, 1, v3
7031; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v2
7032; GFX8-SDAG-NEXT:    v_add_u32_e32 v4, vcc, 1, v5
7033; GFX8-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v4
7034; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
7035; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7036; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v5, v4
7037; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v2, v1
7038; GFX8-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v3
7039; GFX8-SDAG-NEXT:    v_mul_lo_u32 v3, v1, v5
7040; GFX8-SDAG-NEXT:    v_add_u32_e32 v2, vcc, v2, v0
7041; GFX8-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7042; GFX8-SDAG-NEXT:    v_add_u32_e32 v3, vcc, v3, v1
7043; GFX8-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
7044; GFX8-SDAG-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
7045; GFX8-SDAG-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
7046; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
7047;
7048; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
7049; GFX8-GISEL:       ; %bb.0: ; %entry
7050; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7051; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, 1, v0
7052; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
7053; GFX8-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7054; GFX8-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7055; GFX8-GISEL-NEXT:    v_add_u32_e32 v0, vcc, v4, v0
7056; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, v5, v1
7057; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7058; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7059; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v4
7060; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, 1, v5
7061; GFX8-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v2
7062; GFX8-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v4
7063; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
7064; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v5, v4
7065; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7066; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
7067; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v3
7068; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, 1, v5
7069; GFX8-GISEL-NEXT:    v_mul_lo_u32 v3, v0, v2
7070; GFX8-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v4
7071; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
7072; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, v5, v4
7073; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7074; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v4, v1
7075; GFX8-GISEL-NEXT:    v_add_u32_e32 v2, vcc, 1, v3
7076; GFX8-GISEL-NEXT:    v_add_u32_e32 v3, vcc, 1, v5
7077; GFX8-GISEL-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
7078; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7079; GFX8-GISEL-NEXT:    v_mul_lo_u32 v2, v1, v3
7080; GFX8-GISEL-NEXT:    v_add_u32_e32 v1, vcc, 1, v1
7081; GFX8-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
7082; GFX8-GISEL-NEXT:    v_mul_lo_u32 v1, v2, v1
7083; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
7084;
7085; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
7086; GFX900-SDAG:       ; %bb.0: ; %entry
7087; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7088; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
7089; GFX900-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7090; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
7091; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7092; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v4, v0
7093; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
7094; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, v5, v1
7095; GFX900-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
7096; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, 1, v4
7097; GFX900-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v2
7098; GFX900-SDAG-NEXT:    v_add_u32_e32 v4, 1, v5
7099; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v4
7100; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, v3, v2
7101; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7102; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, v5, v4
7103; GFX900-SDAG-NEXT:    v_mul_lo_u32 v1, v2, v1
7104; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, 1, v3
7105; GFX900-SDAG-NEXT:    v_mul_lo_u32 v3, v0, v2
7106; GFX900-SDAG-NEXT:    v_add_u32_e32 v4, 1, v5
7107; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v4
7108; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, v3, v2
7109; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7110; GFX900-SDAG-NEXT:    v_add_u32_e32 v2, v5, v4
7111; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v1
7112; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v0, v3, v[0:1]
7113; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v5, v[2:3]
7114; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4]
7115; GFX900-SDAG-NEXT:    v_mad_u64_u32 v[1:2], s[4:5], v4, v2, v[4:5]
7116; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
7117;
7118; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
7119; GFX900-GISEL:       ; %bb.0: ; %entry
7120; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7121; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
7122; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
7123; GFX900-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7124; GFX900-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7125; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v4, v0
7126; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, v5, v1
7127; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7128; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7129; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
7130; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
7131; GFX900-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7132; GFX900-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7133; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, v4, v2
7134; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, v5, v3
7135; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7136; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7137; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
7138; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
7139; GFX900-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7140; GFX900-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7141; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, v4, v2
7142; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, v5, v3
7143; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7144; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7145; GFX900-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
7146; GFX900-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
7147; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
7148; GFX900-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
7149; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
7150; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
7151; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7152; GFX900-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7153; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
7154;
7155; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
7156; GFX90A-SDAG:       ; %bb.0: ; %entry
7157; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7158; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, 1, v1
7159; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
7160; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7161; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7162; GFX90A-SDAG-NEXT:    v_add_u32_e32 v1, v5, v1
7163; GFX90A-SDAG-NEXT:    v_add_u32_e32 v0, v4, v0
7164; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
7165; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
7166; GFX90A-SDAG-NEXT:    v_add_u32_e32 v2, 1, v4
7167; GFX90A-SDAG-NEXT:    v_add_u32_e32 v3, 1, v5
7168; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v3
7169; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v2
7170; GFX90A-SDAG-NEXT:    v_add_u32_e32 v2, v5, v2
7171; GFX90A-SDAG-NEXT:    v_add_u32_e32 v3, v4, v3
7172; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
7173; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7174; GFX90A-SDAG-NEXT:    v_add_u32_e32 v2, 1, v4
7175; GFX90A-SDAG-NEXT:    v_add_u32_e32 v3, 1, v5
7176; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v3
7177; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v4, v1, v2
7178; GFX90A-SDAG-NEXT:    v_add_u32_e32 v2, v4, v2
7179; GFX90A-SDAG-NEXT:    v_add_u32_e32 v3, v6, v3
7180; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v0, v3, v0
7181; GFX90A-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v1
7182; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v2, v4, v[2:3]
7183; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v0, v6, v[0:1]
7184; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
7185; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
7186; GFX90A-SDAG-NEXT:    v_mov_b32_e32 v1, v2
7187; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
7188;
7189; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
7190; GFX90A-GISEL:       ; %bb.0: ; %entry
7191; GFX90A-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7192; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
7193; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, 1, v1
7194; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7195; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7196; GFX90A-GISEL-NEXT:    v_add_u32_e32 v0, v4, v0
7197; GFX90A-GISEL-NEXT:    v_add_u32_e32 v1, v5, v1
7198; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7199; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7200; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
7201; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
7202; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7203; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7204; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, v4, v2
7205; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, v5, v3
7206; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7207; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7208; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
7209; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
7210; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7211; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7212; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, v4, v2
7213; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, v5, v3
7214; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7215; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7216; GFX90A-GISEL-NEXT:    v_add_u32_e32 v2, 1, v4
7217; GFX90A-GISEL-NEXT:    v_add_u32_e32 v3, 1, v5
7218; GFX90A-GISEL-NEXT:    v_add_u32_e32 v4, 1, v0
7219; GFX90A-GISEL-NEXT:    v_add_u32_e32 v5, 1, v1
7220; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7221; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7222; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v4
7223; GFX90A-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v5
7224; GFX90A-GISEL-NEXT:    s_setpc_b64 s[30:31]
7225;
7226; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
7227; GFX10-SDAG:       ; %bb.0: ; %entry
7228; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7229; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7230; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7231; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7232; GFX10-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7233; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v0, v4, v0
7234; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v1, v5, v1
7235; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
7236; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7237; GFX10-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
7238; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7239; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7240; GFX10-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7241; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7242; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7243; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7244; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7245; GFX10-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
7246; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7247; GFX10-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7248; GFX10-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7249; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7250; GFX10-SDAG-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7251; GFX10-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7252; GFX10-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v1
7253; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[3:4], null, v0, v4, v[0:1]
7254; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[4:5], null, v2, v5, v[2:3]
7255; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
7256; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v4, v2, v[4:5]
7257; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
7258;
7259; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
7260; GFX10-GISEL:       ; %bb.0: ; %entry
7261; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7262; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7263; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7264; GFX10-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7265; GFX10-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7266; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, v4, v0
7267; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, v5, v1
7268; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7269; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7270; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7271; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7272; GFX10-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7273; GFX10-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7274; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7275; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7276; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7277; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7278; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7279; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7280; GFX10-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7281; GFX10-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7282; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7283; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7284; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7285; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7286; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7287; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7288; GFX10-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
7289; GFX10-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
7290; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7291; GFX10-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7292; GFX10-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7293; GFX10-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7294; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
7295;
7296; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
7297; GFX11-SDAG:       ; %bb.0: ; %entry
7298; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7299; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7300; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7301; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7302; GFX11-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7303; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7304; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7305; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v0, v4, v0
7306; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v1, v5, v1
7307; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7308; GFX11-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
7309; GFX11-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
7310; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7311; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7312; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7313; GFX11-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7314; GFX11-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7315; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7316; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7317; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7318; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7319; GFX11-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7320; GFX11-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
7321; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7322; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7323; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7324; GFX11-SDAG-NEXT:    v_mul_lo_u32 v6, v0, v2
7325; GFX11-SDAG-NEXT:    v_mul_lo_u32 v7, v1, v3
7326; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7327; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v2, v6, v2
7328; GFX11-SDAG-NEXT:    v_add_nc_u32_e32 v3, v7, v3
7329; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7330; GFX11-SDAG-NEXT:    v_mul_lo_u32 v2, v2, v0
7331; GFX11-SDAG-NEXT:    v_mul_lo_u32 v3, v3, v1
7332; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7333; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[4:5], null, v2, v6, v[2:3]
7334; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[5:6], null, v3, v7, v[3:4]
7335; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
7336; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, v[4:5]
7337; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[1:2], null, v5, v3, v[5:6]
7338; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
7339;
7340; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
7341; GFX11-GISEL:       ; %bb.0: ; %entry
7342; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7343; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7344; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7345; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7346; GFX11-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7347; GFX11-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7348; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7349; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, v4, v0
7350; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, v5, v1
7351; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7352; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7353; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7354; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7355; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7356; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7357; GFX11-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7358; GFX11-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7359; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7360; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7361; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7362; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7363; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7364; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7365; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7366; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7367; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7368; GFX11-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7369; GFX11-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7370; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7371; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7372; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7373; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7374; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7375; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7376; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7377; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7378; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7379; GFX11-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
7380; GFX11-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
7381; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7382; GFX11-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7383; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7384; GFX11-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7385; GFX11-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7386; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
7387;
7388; GFX1200-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
7389; GFX1200-SDAG:       ; %bb.0: ; %entry
7390; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
7391; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
7392; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
7393; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
7394; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
7395; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7396; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7397; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7398; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7399; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7400; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7401; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v0, v4, v0
7402; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v1, v5, v1
7403; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
7404; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v2
7405; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7406; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v1, v1, v3
7407; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7408; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
7409; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7410; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7411; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7412; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7413; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7414; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
7415; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7416; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7417; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v1, v3, v1
7418; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7419; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
7420; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v4, v0, v2
7421; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v5, v1, v3
7422; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7423; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7424; GFX1200-SDAG-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7425; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7426; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v0, v2, v0
7427; GFX1200-SDAG-NEXT:    v_mul_lo_u32 v2, v3, v1
7428; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
7429; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[3:4], null, v0, v4, v[0:1]
7430; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[4:5], null, v2, v5, v[2:3]
7431; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
7432; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[0:1], null, v3, v0, v[3:4]
7433; GFX1200-SDAG-NEXT:    v_mad_co_u64_u32 v[1:2], null, v4, v2, v[4:5]
7434; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
7435;
7436; GFX1200-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
7437; GFX1200-GISEL:       ; %bb.0: ; %entry
7438; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
7439; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
7440; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
7441; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
7442; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
7443; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7444; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7445; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7446; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7447; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7448; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7449; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, v4, v0
7450; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, v5, v1
7451; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7452; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v2
7453; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v1, v3
7454; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7455; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7456; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7457; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7458; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7459; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7460; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7461; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7462; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7463; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7464; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7465; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7466; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7467; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7468; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v4, v0, v2
7469; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v5, v1, v3
7470; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7471; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, v4, v2
7472; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, v5, v3
7473; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7474; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7475; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7476; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v2, 1, v4
7477; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v3, 1, v5
7478; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7479; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v2
7480; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v3, v1, v3
7481; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v0, 1, v0
7482; GFX1200-GISEL-NEXT:    v_add_nc_u32_e32 v1, 1, v1
7483; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7484; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v0, v2, v0
7485; GFX1200-GISEL-NEXT:    v_mul_lo_u32 v1, v3, v1
7486; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
7487entry:
7488  %y38 = add <2 x i32> %x, <i32 1, i32 1>
7489  %add = mul <2 x i32> %y38, %y
7490  %mul139 = add <2 x i32> %add, %y38
7491  %add2 = mul <2 x i32> %mul139, %y
7492  %add240 = add <2 x i32> %add, <i32 1, i32 1>
7493  %add4 = mul <2 x i32> %add2, %add240
7494  %mul541 = add <2 x i32> %add4, %add240
7495  %add6 = mul <2 x i32> %mul541, %add2
7496  %add642 = add <2 x i32> %add4, <i32 1, i32 1>
7497  %add8 = mul <2 x i32> %add6, %add642
7498  %mul943 = add <2 x i32> %add8, %add642
7499  %add10 = mul <2 x i32> %mul943, %add6
7500  %add1044 = add <2 x i32> %add8, <i32 1, i32 1>
7501  %add1246 = add <2 x i32> %add10, <i32 1, i32 1>
7502  %mul1345 = mul <2 x i32> %add10, %add1044
7503  %add14 = mul <2 x i32> %mul1345, %add1246
7504  ret <2 x i32> %add14
7505}
7506
7507define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
7508; GFX67-LABEL: clpeak_imad_pat_i16_x2:
7509; GFX67:       ; %bb.0: ; %entry
7510; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7511; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
7512; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7513; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v1
7514; GFX67-NEXT:    v_mul_u32_u24_e32 v3, v0, v2
7515; GFX67-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
7516; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
7517; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
7518; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
7519; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
7520; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7521; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
7522; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v2
7523; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
7524; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
7525; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
7526; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
7527; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
7528; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7529; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
7530; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v2
7531; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
7532; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
7533; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
7534; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v1, v3
7535; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
7536; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7537; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v4
7538; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
7539; GFX67-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
7540; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7541; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
7542; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
7543; GFX67-NEXT:    v_bfe_i32 v0, v0, 0, 16
7544; GFX67-NEXT:    s_setpc_b64 s[30:31]
7545;
7546; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2:
7547; GFX8-SDAG:       ; %bb.0: ; %entry
7548; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7549; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7550; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7551; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7552; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7553; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7554; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7555; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7556; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7557; GFX8-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
7558; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
7559;
7560; GFX8-GISEL-LABEL: clpeak_imad_pat_i16_x2:
7561; GFX8-GISEL:       ; %bb.0: ; %entry
7562; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7563; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
7564; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
7565; GFX8-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
7566; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
7567; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7568; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v0, v4
7569; GFX8-GISEL-NEXT:    v_mad_u16 v2, v3, v2, 1
7570; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v3, v2, v1
7571; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v4, 1
7572; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v0, v3
7573; GFX8-GISEL-NEXT:    v_mad_u16 v1, v2, v1, 1
7574; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v1, v4
7575; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v3, 1
7576; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
7577; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v4, 1
7578; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
7579; GFX8-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
7580; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
7581;
7582; GFX9-SDAG-LABEL: clpeak_imad_pat_i16_x2:
7583; GFX9-SDAG:       ; %bb.0: ; %entry
7584; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7585; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7586; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
7587; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7588; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
7589; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7590; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
7591; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7592; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v1, v0
7593; GFX9-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
7594; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
7595;
7596; GFX9-GISEL-LABEL: clpeak_imad_pat_i16_x2:
7597; GFX9-GISEL:       ; %bb.0: ; %entry
7598; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7599; GFX9-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
7600; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
7601; GFX9-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
7602; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
7603; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v1, 1
7604; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v0, v4
7605; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v2, v3, v2, 1
7606; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v3, v2, v1
7607; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v4, 1
7608; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v0, v3
7609; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v2, v1, 1
7610; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v1, v4
7611; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v3, 1
7612; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
7613; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v1, v4, 1
7614; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
7615; GFX9-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
7616; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
7617;
7618; GFX10-SDAG-LABEL: clpeak_imad_pat_i16_x2:
7619; GFX10-SDAG:       ; %bb.0: ; %entry
7620; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7621; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7622; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7623; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7624; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7625; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7626; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7627; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7628; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7629; GFX10-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
7630; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
7631;
7632; GFX10-GISEL-LABEL: clpeak_imad_pat_i16_x2:
7633; GFX10-GISEL:       ; %bb.0: ; %entry
7634; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7635; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
7636; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
7637; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7638; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7639; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7640; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7641; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7642; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7643; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7644; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7645; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7646; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7647; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7648; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7649; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
7650; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7651; GFX10-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
7652; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
7653;
7654; GFX11-SDAG-LABEL: clpeak_imad_pat_i16_x2:
7655; GFX11-SDAG:       ; %bb.0: ; %entry
7656; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7657; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7658; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7659; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7660; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7661; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7662; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7663; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7664; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7665; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7666; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7667; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7668; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7669; GFX11-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
7670; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
7671;
7672; GFX11-GISEL-LABEL: clpeak_imad_pat_i16_x2:
7673; GFX11-GISEL:       ; %bb.0: ; %entry
7674; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7675; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
7676; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
7677; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7678; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7679; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7680; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7681; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7682; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7683; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7684; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7685; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7686; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7687; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7688; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7689; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7690; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7691; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
7692; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7693; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
7694; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7695; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7696; GFX11-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
7697; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
7698;
7699; GFX1200-SDAG-LABEL: clpeak_imad_pat_i16_x2:
7700; GFX1200-SDAG:       ; %bb.0: ; %entry
7701; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
7702; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
7703; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
7704; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
7705; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
7706; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7707; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7708; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7709; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7710; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7711; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7712; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7713; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7714; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7715; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7716; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7717; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7718; GFX1200-SDAG-NEXT:    v_bfe_i32 v0, v0, 0, 16
7719; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
7720;
7721; GFX1200-GISEL-LABEL: clpeak_imad_pat_i16_x2:
7722; GFX1200-GISEL:       ; %bb.0: ; %entry
7723; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
7724; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
7725; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
7726; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
7727; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
7728; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
7729; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
7730; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7731; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7732; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7733; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7734; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7735; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7736; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7737; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7738; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7739; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7740; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7741; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7742; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7743; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7744; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
7745; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7746; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
7747; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7748; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7749; GFX1200-GISEL-NEXT:    v_bfe_i32 v0, v0, 0, 16
7750; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
7751entry:
7752  %conv69 = add i16 %x, 1
7753  %add = mul i16 %conv69, %y
7754  %conv470 = add i16 %y, 1
7755  %add8 = mul i16 %conv470, %add
7756  %conv1071 = add i16 %add, 1
7757  %add14 = mul i16 %conv1071, %add8
7758  %conv1672 = add i16 %add8, 1
7759  %add20 = mul i16 %conv1672, %add14
7760  %conv2273 = add i16 %add14, 1
7761  %add26 = mul i16 %conv2273, %add20
7762  %conv2874 = add i16 %add20, 1
7763  %add32 = mul i16 %conv2874, %add26
7764  %conv3475 = add i16 %add26, 1
7765  %add38 = mul i16 %conv3475, %add32
7766  %conv4076 = add i16 %add32, 1
7767  %add44 = mul i16 %add38, %conv4076
7768  ret i16 %add44
7769}
7770
7771define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
7772; GFX67-LABEL: clpeak_umad_pat_i16_x2:
7773; GFX67:       ; %bb.0: ; %entry
7774; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7775; GFX67-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
7776; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7777; GFX67-NEXT:    v_mul_u32_u24_e32 v2, v0, v1
7778; GFX67-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
7779; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v3
7780; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
7781; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v3, v2
7782; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
7783; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7784; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
7785; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
7786; GFX67-NEXT:    v_mad_u32_u24 v2, v3, v2, 1
7787; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
7788; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
7789; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v2, v3
7790; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
7791; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7792; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
7793; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
7794; GFX67-NEXT:    v_mad_u32_u24 v2, v2, v3, 1
7795; GFX67-NEXT:    v_and_b32_e32 v2, 0xffff, v2
7796; GFX67-NEXT:    v_and_b32_e32 v3, 0xffff, v4
7797; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v2, v3
7798; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, 1
7799; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7800; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v4
7801; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
7802; GFX67-NEXT:    v_mad_u32_u24 v1, v2, v3, 1
7803; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7804; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
7805; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
7806; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7807; GFX67-NEXT:    s_setpc_b64 s[30:31]
7808;
7809; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2:
7810; GFX8-SDAG:       ; %bb.0: ; %entry
7811; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7812; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7813; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7814; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7815; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7816; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7817; GFX8-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7818; GFX8-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7819; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7820; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
7821;
7822; GFX8-GISEL-LABEL: clpeak_umad_pat_i16_x2:
7823; GFX8-GISEL:       ; %bb.0: ; %entry
7824; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7825; GFX8-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
7826; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
7827; GFX8-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
7828; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
7829; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7830; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v0, v4
7831; GFX8-GISEL-NEXT:    v_mad_u16 v2, v3, v2, 1
7832; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v3, v2, v1
7833; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v4, 1
7834; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v0, v3
7835; GFX8-GISEL-NEXT:    v_mad_u16 v1, v2, v1, 1
7836; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v1, v4
7837; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v3, 1
7838; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
7839; GFX8-GISEL-NEXT:    v_mad_u16 v1, v1, v4, 1
7840; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
7841; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
7842;
7843; GFX9-SDAG-LABEL: clpeak_umad_pat_i16_x2:
7844; GFX9-SDAG:       ; %bb.0: ; %entry
7845; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7846; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7847; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
7848; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7849; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
7850; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7851; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v1, v0, v1, v0
7852; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v1, v0, v1
7853; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v1, v0
7854; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
7855;
7856; GFX9-GISEL-LABEL: clpeak_umad_pat_i16_x2:
7857; GFX9-GISEL:       ; %bb.0: ; %entry
7858; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7859; GFX9-GISEL-NEXT:    v_add_u16_e32 v0, 1, v0
7860; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
7861; GFX9-GISEL-NEXT:    v_add_u16_e32 v3, 1, v1
7862; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v3, v2
7863; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v1, 1
7864; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v0, v4
7865; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v2, v3, v2, 1
7866; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v3, v2, v1
7867; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v4, 1
7868; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v0, v3
7869; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v2, v1, 1
7870; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v1, v4
7871; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v3, 1
7872; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
7873; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v1, v1, v4, 1
7874; GFX9-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v0, v1
7875; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
7876;
7877; GFX10-SDAG-LABEL: clpeak_umad_pat_i16_x2:
7878; GFX10-SDAG:       ; %bb.0: ; %entry
7879; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7880; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7881; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7882; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7883; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7884; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7885; GFX10-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7886; GFX10-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7887; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7888; GFX10-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7889; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
7890;
7891; GFX10-GISEL-LABEL: clpeak_umad_pat_i16_x2:
7892; GFX10-GISEL:       ; %bb.0: ; %entry
7893; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7894; GFX10-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
7895; GFX10-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
7896; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7897; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7898; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7899; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7900; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7901; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7902; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7903; GFX10-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7904; GFX10-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7905; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7906; GFX10-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7907; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7908; GFX10-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
7909; GFX10-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7910; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7911; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
7912;
7913; GFX11-SDAG-LABEL: clpeak_umad_pat_i16_x2:
7914; GFX11-SDAG:       ; %bb.0: ; %entry
7915; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7916; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7917; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7918; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7919; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7920; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7921; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7922; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7923; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7924; GFX11-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7925; GFX11-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7926; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7927; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7928; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7929; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
7930;
7931; GFX11-GISEL-LABEL: clpeak_umad_pat_i16_x2:
7932; GFX11-GISEL:       ; %bb.0: ; %entry
7933; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7934; GFX11-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
7935; GFX11-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
7936; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7937; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7938; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7939; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7940; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7941; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7942; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7943; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7944; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7945; GFX11-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7946; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7947; GFX11-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7948; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7949; GFX11-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7950; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
7951; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7952; GFX11-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
7953; GFX11-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
7954; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
7955; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7956; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
7957;
7958; GFX1200-SDAG-LABEL: clpeak_umad_pat_i16_x2:
7959; GFX1200-SDAG:       ; %bb.0: ; %entry
7960; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
7961; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
7962; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
7963; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
7964; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
7965; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7966; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7967; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7968; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7969; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7970; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7971; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7972; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7973; GFX1200-SDAG-NEXT:    v_mad_u16 v1, v0, v1, v0
7974; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v1, v0, v1
7975; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
7976; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v0
7977; GFX1200-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
7978; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
7979;
7980; GFX1200-GISEL-LABEL: clpeak_umad_pat_i16_x2:
7981; GFX1200-GISEL:       ; %bb.0: ; %entry
7982; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
7983; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
7984; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
7985; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
7986; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
7987; GFX1200-GISEL-NEXT:    v_add_nc_u16 v0, v0, 1
7988; GFX1200-GISEL-NEXT:    v_add_nc_u16 v2, v1, 1
7989; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7990; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7991; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7992; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7993; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7994; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
7995; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
7996; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
7997; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
7998; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v2, v3, 1
7999; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8000; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v3, v0, v1
8001; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, 1
8002; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v1, v2, v3
8003; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
8004; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
8005; GFX1200-GISEL-NEXT:    v_mad_u16 v1, v2, v3, 1
8006; GFX1200-GISEL-NEXT:    v_mul_lo_u16 v0, v0, v1
8007; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8008; GFX1200-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8009; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
8010entry:
8011  %conv69 = add i16 %x, 1
8012  %add = mul i16 %conv69, %y
8013  %conv470 = add i16 %y, 1
8014  %add8 = mul i16 %conv470, %add
8015  %conv1071 = add i16 %add, 1
8016  %add14 = mul i16 %conv1071, %add8
8017  %conv1672 = add i16 %add8, 1
8018  %add20 = mul i16 %conv1672, %add14
8019  %conv2273 = add i16 %add14, 1
8020  %add26 = mul i16 %conv2273, %add20
8021  %conv2874 = add i16 %add20, 1
8022  %add32 = mul i16 %conv2874, %add26
8023  %conv3475 = add i16 %add26, 1
8024  %add38 = mul i16 %conv3475, %add32
8025  %conv4076 = add i16 %add32, 1
8026  %add44 = mul i16 %add38, %conv4076
8027  ret i16 %add44
8028}
8029
8030define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
8031; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
8032; GFX67-SDAG:       ; %bb.0: ; %entry
8033; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8034; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
8035; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8036; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
8037; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8038; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8039; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8040; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
8041; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8042; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
8043; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8044; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
8045; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v4, v2, 1
8046; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
8047; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v2
8048; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v5, v3, 1
8049; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8050; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v3
8051; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8052; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v0, v4, v2
8053; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8054; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v1, v5, v3
8055; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8056; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v2, v0
8057; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
8058; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v1
8059; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8060; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v5, 1
8061; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8062; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8063; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8064; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v2, v4, v0
8065; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8066; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v3, v5, v1
8067; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8068; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
8069; GFX67-SDAG-NEXT:    v_mad_u32_u24 v4, v2, v4, 1
8070; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
8071; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8072; GFX67-SDAG-NEXT:    v_mad_u32_u24 v5, v3, v5, 1
8073; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
8074; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v6
8075; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
8076; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v5
8077; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
8078; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v7
8079; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
8080; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8081; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8082; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8083; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8084; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
8085; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
8086; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
8087;
8088; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
8089; GFX67-GISEL:       ; %bb.0: ; %entry
8090; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8091; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
8092; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
8093; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8094; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8095; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8096; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8097; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
8098; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8099; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8100; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8101; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
8102; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
8103; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8104; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8105; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
8106; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
8107; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
8108; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8109; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
8110; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v5, v3, 1
8111; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
8112; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v4, v2, 1
8113; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v3
8114; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v2
8115; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8116; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8117; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
8118; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8119; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8120; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8121; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v1, v5, v3
8122; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v0, v4, v2
8123; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8124; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8125; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
8126; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
8127; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
8128; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8129; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v1
8130; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v5, 1
8131; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v0
8132; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
8133; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8134; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8135; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8136; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8137; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
8138; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8139; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8140; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8141; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v3, v5, v1
8142; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v2, v4, v0
8143; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8144; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8145; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
8146; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
8147; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v3, v5, 1
8148; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
8149; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v2, v4, 1
8150; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
8151; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8152; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
8153; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8154; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8155; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
8156; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
8157; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8158; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
8159; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8160; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8161; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
8162; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
8163; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
8164; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
8165; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
8166; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
8167; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
8168; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
8169; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
8170; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8171; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8172; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8173; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
8174; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
8175; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
8176;
8177; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
8178; GFX8-SDAG:       ; %bb.0: ; %entry
8179; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8180; GFX8-SDAG-NEXT:    v_mov_b32_e32 v3, 1
8181; GFX8-SDAG-NEXT:    v_add_u16_e32 v2, 1, v0
8182; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
8183; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
8184; GFX8-SDAG-NEXT:    v_mad_u16 v4, v0, v3, v0
8185; GFX8-SDAG-NEXT:    v_mad_u16 v5, v2, v1, v2
8186; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v4, v3
8187; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v3, 1
8188; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v5, v1
8189; GFX8-SDAG-NEXT:    v_mad_u16 v1, v2, v1, 1
8190; GFX8-SDAG-NEXT:    v_mad_u16 v2, v4, v0, v0
8191; GFX8-SDAG-NEXT:    v_mad_u16 v3, v5, v1, v1
8192; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v2, v2, v4
8193; GFX8-SDAG-NEXT:    v_mad_u16 v0, v4, v0, 1
8194; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v3, v3, v5
8195; GFX8-SDAG-NEXT:    v_mad_u16 v1, v5, v1, 1
8196; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v2, v0
8197; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v0
8198; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v3, v1
8199; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v1
8200; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
8201; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
8202; GFX8-SDAG-NEXT:    v_mad_u16 v2, v0, v4, v0
8203; GFX8-SDAG-NEXT:    v_mad_u16 v3, v1, v5, v1
8204; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
8205; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
8206; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
8207; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v1, v0
8208; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
8209;
8210; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
8211; GFX8-GISEL:       ; %bb.0: ; %entry
8212; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8213; GFX8-GISEL-NEXT:    v_mov_b32_e32 v3, 1
8214; GFX8-GISEL-NEXT:    v_add_u16_e32 v2, 1, v0
8215; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
8216; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
8217; GFX8-GISEL-NEXT:    v_mad_u16 v4, v2, v1, v2
8218; GFX8-GISEL-NEXT:    v_mad_u16 v5, v0, v3, v0
8219; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v4, v1
8220; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
8221; GFX8-GISEL-NEXT:    v_mad_u16 v1, v2, v1, 1
8222; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v3, 1
8223; GFX8-GISEL-NEXT:    v_mad_u16 v2, v4, v1, v1
8224; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v0, v0
8225; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v2, v4
8226; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v3, v3, v5
8227; GFX8-GISEL-NEXT:    v_mad_u16 v1, v4, v1, 1
8228; GFX8-GISEL-NEXT:    v_mad_u16 v0, v5, v0, 1
8229; GFX8-GISEL-NEXT:    v_mad_u16 v4, v2, v1, v1
8230; GFX8-GISEL-NEXT:    v_mad_u16 v5, v3, v0, v0
8231; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v4, v2
8232; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v7, v5, v3
8233; GFX8-GISEL-NEXT:    v_mad_u16 v1, v2, v1, 1
8234; GFX8-GISEL-NEXT:    v_mad_u16 v0, v3, v0, 1
8235; GFX8-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
8236; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
8237; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v6, v1
8238; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v7, v0
8239; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v1, v2
8240; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
8241; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
8242; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
8243;
8244; GFX9-LABEL: clpeak_imad_pat_v2i16_x2:
8245; GFX9:       ; %bb.0: ; %entry
8246; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8247; GFX9-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8248; GFX9-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8249; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v2, v1
8250; GFX9-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8251; GFX9-NEXT:    v_pk_mad_u16 v1, v2, v0, v0
8252; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
8253; GFX9-NEXT:    v_pk_mad_u16 v0, v2, v0, 1 op_sel_hi:[1,1,0]
8254; GFX9-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8255; GFX9-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8256; GFX9-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8257; GFX9-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8258; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8259; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8260; GFX9-NEXT:    s_setpc_b64 s[30:31]
8261;
8262; GFX10-LABEL: clpeak_imad_pat_v2i16_x2:
8263; GFX10:       ; %bb.0: ; %entry
8264; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8265; GFX10-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8266; GFX10-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8267; GFX10-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8268; GFX10-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8269; GFX10-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8270; GFX10-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8271; GFX10-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8272; GFX10-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8273; GFX10-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8274; GFX10-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8275; GFX10-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8276; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8277; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8278; GFX10-NEXT:    s_setpc_b64 s[30:31]
8279;
8280; GFX11-LABEL: clpeak_imad_pat_v2i16_x2:
8281; GFX11:       ; %bb.0: ; %entry
8282; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8283; GFX11-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8284; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8285; GFX11-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8286; GFX11-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8287; GFX11-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8288; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8289; GFX11-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8290; GFX11-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8291; GFX11-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8292; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8293; GFX11-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8294; GFX11-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8295; GFX11-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8296; GFX11-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8297; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8298; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8299; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8300; GFX11-NEXT:    s_setpc_b64 s[30:31]
8301;
8302; GFX1200-LABEL: clpeak_imad_pat_v2i16_x2:
8303; GFX1200:       ; %bb.0: ; %entry
8304; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
8305; GFX1200-NEXT:    s_wait_expcnt 0x0
8306; GFX1200-NEXT:    s_wait_samplecnt 0x0
8307; GFX1200-NEXT:    s_wait_bvhcnt 0x0
8308; GFX1200-NEXT:    s_wait_kmcnt 0x0
8309; GFX1200-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8310; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8311; GFX1200-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8312; GFX1200-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8313; GFX1200-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8314; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8315; GFX1200-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8316; GFX1200-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8317; GFX1200-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8318; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8319; GFX1200-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8320; GFX1200-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8321; GFX1200-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8322; GFX1200-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8323; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8324; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8325; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8326; GFX1200-NEXT:    s_setpc_b64 s[30:31]
8327entry:
8328  %y38 = add <2 x i16> %x, <i16 1, i16 1>
8329  %add = mul <2 x i16> %y38, %y
8330  %mul139 = add <2 x i16> %add, %y38
8331  %add2 = mul <2 x i16> %mul139, %y
8332  %add240 = add <2 x i16> %add, <i16 1, i16 1>
8333  %add4 = mul <2 x i16> %add2, %add240
8334  %mul541 = add <2 x i16> %add4, %add240
8335  %add6 = mul <2 x i16> %mul541, %add2
8336  %add642 = add <2 x i16> %add4, <i16 1, i16 1>
8337  %add8 = mul <2 x i16> %add6, %add642
8338  %mul943 = add <2 x i16> %add8, %add642
8339  %add10 = mul <2 x i16> %mul943, %add6
8340  %add1044 = add <2 x i16> %add8, <i16 1, i16 1>
8341  %add1246 = add <2 x i16> %add10, <i16 1, i16 1>
8342  %mul1345 = mul <2 x i16> %add10, %add1044
8343  %add14 = mul <2 x i16> %mul1345, %add1246
8344  ret <2 x i16> %add14
8345}
8346
8347define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
8348; GFX67-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
8349; GFX67-SDAG:       ; %bb.0: ; %entry
8350; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8351; GFX67-SDAG-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
8352; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8353; GFX67-SDAG-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
8354; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8355; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8356; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8357; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
8358; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8359; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
8360; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8361; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
8362; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v4, v2, 1
8363; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
8364; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v2
8365; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v5, v3, 1
8366; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8367; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v3
8368; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8369; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v0, v4, v2
8370; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8371; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v1, v5, v3
8372; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8373; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v2, v0
8374; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
8375; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v1
8376; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8377; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v5, 1
8378; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8379; GFX67-SDAG-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8380; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8381; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v2, v4, v0
8382; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8383; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v3, v5, v1
8384; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8385; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
8386; GFX67-SDAG-NEXT:    v_mad_u32_u24 v4, v2, v4, 1
8387; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
8388; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8389; GFX67-SDAG-NEXT:    v_mad_u32_u24 v5, v3, v5, 1
8390; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
8391; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v6
8392; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
8393; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v5
8394; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v3, v3, v4
8395; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v7
8396; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v2, v4, v2
8397; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8398; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8399; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8400; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8401; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v3, v0
8402; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v1, v2, v1
8403; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
8404;
8405; GFX67-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
8406; GFX67-GISEL:       ; %bb.0: ; %entry
8407; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8408; GFX67-GISEL-NEXT:    v_add_i32_e32 v1, vcc, 1, v1
8409; GFX67-GISEL-NEXT:    v_add_i32_e32 v0, vcc, 1, v0
8410; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8411; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8412; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8413; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8414; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
8415; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8416; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8417; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8418; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v5, v3, v1
8419; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v4, v2, v0
8420; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8421; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8422; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
8423; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
8424; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
8425; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8426; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
8427; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v5, v3, 1
8428; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v2
8429; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v4, v2, 1
8430; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v3
8431; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v2
8432; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8433; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8434; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
8435; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8436; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8437; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8438; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v1, v5, v3
8439; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v0, v4, v2
8440; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8441; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8442; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
8443; GFX67-GISEL-NEXT:    v_or_b32_e32 v2, v2, v3
8444; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
8445; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8446; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v3, v3, v1
8447; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v5, 1
8448; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v0
8449; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v4, 1
8450; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v1
8451; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8452; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8453; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8454; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
8455; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8456; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8457; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8458; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v3, v5, v1
8459; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v2, v4, v0
8460; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8461; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8462; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
8463; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
8464; GFX67-GISEL-NEXT:    v_mad_u32_u24 v5, v3, v5, 1
8465; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
8466; GFX67-GISEL-NEXT:    v_mad_u32_u24 v4, v2, v4, 1
8467; GFX67-GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
8468; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8469; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v1, v3
8470; GFX67-GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8471; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
8472; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, 1
8473; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v6, v0, v2
8474; GFX67-GISEL-NEXT:    v_or_b32_e32 v4, v4, v5
8475; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, 1
8476; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8477; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8478; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
8479; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v6
8480; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v4
8481; GFX67-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
8482; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v1, 16, v4
8483; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v2, v2, v3
8484; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v7
8485; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v3, v1
8486; GFX67-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
8487; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8488; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8489; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8490; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v2, v0
8491; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v1, v1, v3
8492; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
8493;
8494; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
8495; GFX8-SDAG:       ; %bb.0: ; %entry
8496; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8497; GFX8-SDAG-NEXT:    v_mov_b32_e32 v3, 1
8498; GFX8-SDAG-NEXT:    v_add_u16_e32 v2, 1, v0
8499; GFX8-SDAG-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
8500; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
8501; GFX8-SDAG-NEXT:    v_mad_u16 v4, v0, v3, v0
8502; GFX8-SDAG-NEXT:    v_mad_u16 v5, v2, v1, v2
8503; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v4, v3
8504; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v3, 1
8505; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v5, v1
8506; GFX8-SDAG-NEXT:    v_mad_u16 v1, v2, v1, 1
8507; GFX8-SDAG-NEXT:    v_mad_u16 v2, v4, v0, v0
8508; GFX8-SDAG-NEXT:    v_mad_u16 v3, v5, v1, v1
8509; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v2, v2, v4
8510; GFX8-SDAG-NEXT:    v_mad_u16 v0, v4, v0, 1
8511; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v3, v3, v5
8512; GFX8-SDAG-NEXT:    v_mad_u16 v1, v5, v1, 1
8513; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v4, v2, v0
8514; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v0
8515; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v5, v3, v1
8516; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v1
8517; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v0, v0, v2
8518; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v1, v1, v3
8519; GFX8-SDAG-NEXT:    v_mad_u16 v2, v0, v4, v0
8520; GFX8-SDAG-NEXT:    v_mad_u16 v3, v1, v5, v1
8521; GFX8-SDAG-NEXT:    v_mad_u16 v0, v2, v0, v2
8522; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
8523; GFX8-SDAG-NEXT:    v_mad_u16 v1, v3, v1, v3
8524; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v1, v0
8525; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
8526;
8527; GFX8-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
8528; GFX8-GISEL:       ; %bb.0: ; %entry
8529; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8530; GFX8-GISEL-NEXT:    v_mov_b32_e32 v3, 1
8531; GFX8-GISEL-NEXT:    v_add_u16_e32 v2, 1, v0
8532; GFX8-GISEL-NEXT:    v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
8533; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
8534; GFX8-GISEL-NEXT:    v_mad_u16 v4, v2, v1, v2
8535; GFX8-GISEL-NEXT:    v_mad_u16 v5, v0, v3, v0
8536; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v4, v4, v1
8537; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
8538; GFX8-GISEL-NEXT:    v_mad_u16 v1, v2, v1, 1
8539; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v3, 1
8540; GFX8-GISEL-NEXT:    v_mad_u16 v2, v4, v1, v1
8541; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v0, v0
8542; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v2, v2, v4
8543; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v3, v3, v5
8544; GFX8-GISEL-NEXT:    v_mad_u16 v1, v4, v1, 1
8545; GFX8-GISEL-NEXT:    v_mad_u16 v0, v5, v0, 1
8546; GFX8-GISEL-NEXT:    v_mad_u16 v4, v2, v1, v1
8547; GFX8-GISEL-NEXT:    v_mad_u16 v5, v3, v0, v0
8548; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v4, v2
8549; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v7, v5, v3
8550; GFX8-GISEL-NEXT:    v_mad_u16 v1, v2, v1, 1
8551; GFX8-GISEL-NEXT:    v_mad_u16 v0, v3, v0, 1
8552; GFX8-GISEL-NEXT:    v_mad_u16 v2, v4, v2, 1
8553; GFX8-GISEL-NEXT:    v_mad_u16 v3, v5, v3, 1
8554; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v6, v1
8555; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v0, v7, v0
8556; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v1, v1, v2
8557; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
8558; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v1, v0
8559; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
8560;
8561; GFX9-LABEL: clpeak_umad_pat_v2i16_x2:
8562; GFX9:       ; %bb.0: ; %entry
8563; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8564; GFX9-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8565; GFX9-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8566; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v2, v1
8567; GFX9-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8568; GFX9-NEXT:    v_pk_mad_u16 v1, v2, v0, v0
8569; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
8570; GFX9-NEXT:    v_pk_mad_u16 v0, v2, v0, 1 op_sel_hi:[1,1,0]
8571; GFX9-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8572; GFX9-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8573; GFX9-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8574; GFX9-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8575; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8576; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8577; GFX9-NEXT:    s_setpc_b64 s[30:31]
8578;
8579; GFX10-LABEL: clpeak_umad_pat_v2i16_x2:
8580; GFX10:       ; %bb.0: ; %entry
8581; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8582; GFX10-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8583; GFX10-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8584; GFX10-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8585; GFX10-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8586; GFX10-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8587; GFX10-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8588; GFX10-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8589; GFX10-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8590; GFX10-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8591; GFX10-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8592; GFX10-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8593; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8594; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8595; GFX10-NEXT:    s_setpc_b64 s[30:31]
8596;
8597; GFX11-LABEL: clpeak_umad_pat_v2i16_x2:
8598; GFX11:       ; %bb.0: ; %entry
8599; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8600; GFX11-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8601; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8602; GFX11-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8603; GFX11-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8604; GFX11-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8605; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8606; GFX11-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8607; GFX11-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8608; GFX11-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8609; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8610; GFX11-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8611; GFX11-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8612; GFX11-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8613; GFX11-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8614; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8615; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8616; GFX11-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8617; GFX11-NEXT:    s_setpc_b64 s[30:31]
8618;
8619; GFX1200-LABEL: clpeak_umad_pat_v2i16_x2:
8620; GFX1200:       ; %bb.0: ; %entry
8621; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
8622; GFX1200-NEXT:    s_wait_expcnt 0x0
8623; GFX1200-NEXT:    s_wait_samplecnt 0x0
8624; GFX1200-NEXT:    s_wait_bvhcnt 0x0
8625; GFX1200-NEXT:    s_wait_kmcnt 0x0
8626; GFX1200-NEXT:    v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
8627; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8628; GFX1200-NEXT:    v_pk_mad_u16 v2, v0, v1, v0
8629; GFX1200-NEXT:    v_pk_mad_u16 v0, v0, v1, 1 op_sel_hi:[1,1,0]
8630; GFX1200-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8631; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8632; GFX1200-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8633; GFX1200-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8634; GFX1200-NEXT:    v_pk_mul_lo_u16 v1, v2, v1
8635; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8636; GFX1200-NEXT:    v_pk_mad_u16 v2, v1, v0, v0
8637; GFX1200-NEXT:    v_pk_mad_u16 v0, v1, v0, 1 op_sel_hi:[1,1,0]
8638; GFX1200-NEXT:    v_pk_mul_lo_u16 v3, v2, v1
8639; GFX1200-NEXT:    v_pk_mad_u16 v1, v2, v1, 1 op_sel_hi:[1,1,0]
8640; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8641; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v3, v0
8642; GFX1200-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
8643; GFX1200-NEXT:    s_setpc_b64 s[30:31]
8644entry:
8645  %y38 = add <2 x i16> %x, <i16 1, i16 1>
8646  %add = mul <2 x i16> %y38, %y
8647  %mul139 = add <2 x i16> %add, %y38
8648  %add2 = mul <2 x i16> %mul139, %y
8649  %add240 = add <2 x i16> %add, <i16 1, i16 1>
8650  %add4 = mul <2 x i16> %add2, %add240
8651  %mul541 = add <2 x i16> %add4, %add240
8652  %add6 = mul <2 x i16> %mul541, %add2
8653  %add642 = add <2 x i16> %add4, <i16 1, i16 1>
8654  %add8 = mul <2 x i16> %add6, %add642
8655  %mul943 = add <2 x i16> %add8, %add642
8656  %add10 = mul <2 x i16> %mul943, %add6
8657  %add1044 = add <2 x i16> %add8, <i16 1, i16 1>
8658  %add1246 = add <2 x i16> %add10, <i16 1, i16 1>
8659  %mul1345 = mul <2 x i16> %add10, %add1044
8660  %add14 = mul <2 x i16> %mul1345, %add1246
8661  ret <2 x i16> %add14
8662}
8663
8664; Multiple uses of mul with different variable addend
8665define <2 x i32> @multi_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z0, i32 %z1) {
8666; GFX67-LABEL: multi_use_mul_mad_i32_var:
8667; GFX67:       ; %bb.0: ; %entry
8668; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8669; GFX67-NEXT:    v_mul_lo_u32 v1, v0, v1
8670; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v1, v2
8671; GFX67-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
8672; GFX67-NEXT:    s_setpc_b64 s[30:31]
8673;
8674; GFX8-LABEL: multi_use_mul_mad_i32_var:
8675; GFX8:       ; %bb.0: ; %entry
8676; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8677; GFX8-NEXT:    v_mul_lo_u32 v1, v0, v1
8678; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v1, v2
8679; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
8680; GFX8-NEXT:    s_setpc_b64 s[30:31]
8681;
8682; GFX9-LABEL: multi_use_mul_mad_i32_var:
8683; GFX9:       ; %bb.0: ; %entry
8684; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8685; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v1
8686; GFX9-NEXT:    v_add_u32_e32 v0, v1, v2
8687; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
8688; GFX9-NEXT:    s_setpc_b64 s[30:31]
8689;
8690; GFX10-LABEL: multi_use_mul_mad_i32_var:
8691; GFX10:       ; %bb.0: ; %entry
8692; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8693; GFX10-NEXT:    v_mul_lo_u32 v1, v0, v1
8694; GFX10-NEXT:    v_add_nc_u32_e32 v0, v1, v2
8695; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v3
8696; GFX10-NEXT:    s_setpc_b64 s[30:31]
8697;
8698; GFX11-LABEL: multi_use_mul_mad_i32_var:
8699; GFX11:       ; %bb.0: ; %entry
8700; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8701; GFX11-NEXT:    v_mul_lo_u32 v1, v0, v1
8702; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8703; GFX11-NEXT:    v_add_nc_u32_e32 v0, v1, v2
8704; GFX11-NEXT:    v_add_nc_u32_e32 v1, v1, v3
8705; GFX11-NEXT:    s_setpc_b64 s[30:31]
8706;
8707; GFX1200-LABEL: multi_use_mul_mad_i32_var:
8708; GFX1200:       ; %bb.0: ; %entry
8709; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
8710; GFX1200-NEXT:    s_wait_expcnt 0x0
8711; GFX1200-NEXT:    s_wait_samplecnt 0x0
8712; GFX1200-NEXT:    s_wait_bvhcnt 0x0
8713; GFX1200-NEXT:    s_wait_kmcnt 0x0
8714; GFX1200-NEXT:    v_mul_lo_u32 v1, v0, v1
8715; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8716; GFX1200-NEXT:    v_add_nc_u32_e32 v0, v1, v2
8717; GFX1200-NEXT:    v_add_nc_u32_e32 v1, v1, v3
8718; GFX1200-NEXT:    s_setpc_b64 s[30:31]
8719entry:
8720  %mul = mul i32 %x, %y
8721  %add0 = add i32 %mul, %z0
8722  %add1 = add i32 %mul, %z1
8723  %insert.0 = insertelement <2 x i32> poison, i32 %add0, i32 0
8724  %insert.1 = insertelement <2 x i32> %insert.0, i32 %add1, i32 1
8725  ret <2 x i32> %insert.1
8726}
8727
8728define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
8729; GFX67-LABEL: multi_use_mul_mad_i16_var:
8730; GFX67:       ; %bb.0: ; %entry
8731; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8732; GFX67-NEXT:    v_and_b32_e32 v4, 0xffff, v0
8733; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8734; GFX67-NEXT:    v_mad_u32_u24 v0, v4, v1, v2
8735; GFX67-NEXT:    v_mad_u32_u24 v1, v4, v1, v3
8736; GFX67-NEXT:    s_setpc_b64 s[30:31]
8737;
8738; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var:
8739; GFX8-SDAG:       ; %bb.0: ; %entry
8740; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8741; GFX8-SDAG-NEXT:    v_mad_u16 v3, v0, v1, v3
8742; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v2
8743; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
8744; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v1
8745; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
8746;
8747; GFX8-GISEL-LABEL: multi_use_mul_mad_i16_var:
8748; GFX8-GISEL:       ; %bb.0: ; %entry
8749; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750; GFX8-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
8751; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
8752; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
8753; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
8754; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
8755;
8756; GFX9-SDAG-LABEL: multi_use_mul_mad_i16_var:
8757; GFX9-SDAG:       ; %bb.0: ; %entry
8758; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8759; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v2, v0, v1, v2
8760; GFX9-SDAG-NEXT:    v_mad_legacy_u16 v0, v0, v1, v3
8761; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0x5040100
8762; GFX9-SDAG-NEXT:    v_perm_b32 v0, v0, v2, s4
8763; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
8764;
8765; GFX9-GISEL-LABEL: multi_use_mul_mad_i16_var:
8766; GFX9-GISEL:       ; %bb.0: ; %entry
8767; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8768; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v2, v0, v1, v2
8769; GFX9-GISEL-NEXT:    v_mad_legacy_u16 v0, v0, v1, v3
8770; GFX9-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v2
8771; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
8772;
8773; GFX10-SDAG-LABEL: multi_use_mul_mad_i16_var:
8774; GFX10-SDAG:       ; %bb.0: ; %entry
8775; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8776; GFX10-SDAG-NEXT:    v_mad_u16 v2, v0, v1, v2
8777; GFX10-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v3
8778; GFX10-SDAG-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
8779; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
8780;
8781; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var:
8782; GFX10-GISEL:       ; %bb.0: ; %entry
8783; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8784; GFX10-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
8785; GFX10-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
8786; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
8787; GFX10-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
8788; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
8789;
8790; GFX11-SDAG-LABEL: multi_use_mul_mad_i16_var:
8791; GFX11-SDAG:       ; %bb.0: ; %entry
8792; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8793; GFX11-SDAG-NEXT:    v_mad_u16 v2, v0, v1, v2
8794; GFX11-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v3
8795; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8796; GFX11-SDAG-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
8797; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
8798;
8799; GFX11-GISEL-LABEL: multi_use_mul_mad_i16_var:
8800; GFX11-GISEL:       ; %bb.0: ; %entry
8801; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8802; GFX11-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
8803; GFX11-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
8804; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8805; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
8806; GFX11-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
8807; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
8808;
8809; GFX1200-SDAG-LABEL: multi_use_mul_mad_i16_var:
8810; GFX1200-SDAG:       ; %bb.0: ; %entry
8811; GFX1200-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
8812; GFX1200-SDAG-NEXT:    s_wait_expcnt 0x0
8813; GFX1200-SDAG-NEXT:    s_wait_samplecnt 0x0
8814; GFX1200-SDAG-NEXT:    s_wait_bvhcnt 0x0
8815; GFX1200-SDAG-NEXT:    s_wait_kmcnt 0x0
8816; GFX1200-SDAG-NEXT:    v_mad_u16 v2, v0, v1, v2
8817; GFX1200-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v3
8818; GFX1200-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8819; GFX1200-SDAG-NEXT:    v_perm_b32 v0, v0, v2, 0x5040100
8820; GFX1200-SDAG-NEXT:    s_setpc_b64 s[30:31]
8821;
8822; GFX1200-GISEL-LABEL: multi_use_mul_mad_i16_var:
8823; GFX1200-GISEL:       ; %bb.0: ; %entry
8824; GFX1200-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
8825; GFX1200-GISEL-NEXT:    s_wait_expcnt 0x0
8826; GFX1200-GISEL-NEXT:    s_wait_samplecnt 0x0
8827; GFX1200-GISEL-NEXT:    s_wait_bvhcnt 0x0
8828; GFX1200-GISEL-NEXT:    s_wait_kmcnt 0x0
8829; GFX1200-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
8830; GFX1200-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
8831; GFX1200-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
8832; GFX1200-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v2
8833; GFX1200-GISEL-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
8834; GFX1200-GISEL-NEXT:    s_setpc_b64 s[30:31]
8835entry:
8836  %mul = mul i16 %x, %y
8837  %add0 = add i16 %mul, %z0
8838  %add1 = add i16 %mul, %z1
8839  %insert.0 = insertelement <2 x i16> poison, i16 %add0, i16 0
8840  %insert.1 = insertelement <2 x i16> %insert.0, i16 %add1, i16 1
8841  ret <2 x i16> %insert.1
8842}
8843
8844define i32 @other_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z, ptr addrspace(3) %ptr) {
8845; GFX67-LABEL: other_use_mul_mad_i32_var:
8846; GFX67:       ; %bb.0: ; %entry
8847; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8848; GFX67-NEXT:    v_mul_lo_u32 v1, v0, v1
8849; GFX67-NEXT:    s_mov_b32 m0, -1
8850; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v1, v2
8851; GFX67-NEXT:    ds_write_b32 v3, v1
8852; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
8853; GFX67-NEXT:    s_setpc_b64 s[30:31]
8854;
8855; GFX8-LABEL: other_use_mul_mad_i32_var:
8856; GFX8:       ; %bb.0: ; %entry
8857; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8858; GFX8-NEXT:    v_mul_lo_u32 v1, v0, v1
8859; GFX8-NEXT:    s_mov_b32 m0, -1
8860; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v1, v2
8861; GFX8-NEXT:    ds_write_b32 v3, v1
8862; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8863; GFX8-NEXT:    s_setpc_b64 s[30:31]
8864;
8865; GFX9-LABEL: other_use_mul_mad_i32_var:
8866; GFX9:       ; %bb.0: ; %entry
8867; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8868; GFX9-NEXT:    v_mul_lo_u32 v1, v0, v1
8869; GFX9-NEXT:    v_add_u32_e32 v0, v1, v2
8870; GFX9-NEXT:    ds_write_b32 v3, v1
8871; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
8872; GFX9-NEXT:    s_setpc_b64 s[30:31]
8873;
8874; GFX10-LABEL: other_use_mul_mad_i32_var:
8875; GFX10:       ; %bb.0: ; %entry
8876; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8877; GFX10-NEXT:    v_mul_lo_u32 v1, v0, v1
8878; GFX10-NEXT:    v_add_nc_u32_e32 v0, v1, v2
8879; GFX10-NEXT:    ds_write_b32 v3, v1
8880; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
8881; GFX10-NEXT:    s_setpc_b64 s[30:31]
8882;
8883; GFX11-LABEL: other_use_mul_mad_i32_var:
8884; GFX11:       ; %bb.0: ; %entry
8885; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8886; GFX11-NEXT:    v_mul_lo_u32 v1, v0, v1
8887; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8888; GFX11-NEXT:    v_add_nc_u32_e32 v0, v1, v2
8889; GFX11-NEXT:    ds_store_b32 v3, v1
8890; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
8891; GFX11-NEXT:    s_setpc_b64 s[30:31]
8892;
8893; GFX1200-LABEL: other_use_mul_mad_i32_var:
8894; GFX1200:       ; %bb.0: ; %entry
8895; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
8896; GFX1200-NEXT:    s_wait_expcnt 0x0
8897; GFX1200-NEXT:    s_wait_samplecnt 0x0
8898; GFX1200-NEXT:    s_wait_bvhcnt 0x0
8899; GFX1200-NEXT:    s_wait_kmcnt 0x0
8900; GFX1200-NEXT:    v_mul_lo_u32 v1, v0, v1
8901; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1)
8902; GFX1200-NEXT:    v_add_nc_u32_e32 v0, v1, v2
8903; GFX1200-NEXT:    ds_store_b32 v3, v1
8904; GFX1200-NEXT:    s_wait_dscnt 0x0
8905; GFX1200-NEXT:    s_setpc_b64 s[30:31]
8906entry:
8907  %mul = mul i32 %x, %y
8908  %add0 = add i32 %mul, %z
8909  store i32 %mul, ptr addrspace(3) %ptr
8910  ret i32 %add0
8911}
8912
8913define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) {
8914; GFX67-LABEL: other_use_mul_mad_i16_var:
8915; GFX67:       ; %bb.0: ; %entry
8916; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8917; GFX67-NEXT:    v_and_b32_e32 v0, 0xffff, v0
8918; GFX67-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8919; GFX67-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
8920; GFX67-NEXT:    v_mad_u32_u24 v0, v0, v1, v2
8921; GFX67-NEXT:    s_mov_b32 m0, -1
8922; GFX67-NEXT:    ds_write_b16 v3, v4
8923; GFX67-NEXT:    s_waitcnt lgkmcnt(0)
8924; GFX67-NEXT:    s_setpc_b64 s[30:31]
8925;
8926; GFX8-LABEL: other_use_mul_mad_i16_var:
8927; GFX8:       ; %bb.0: ; %entry
8928; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8929; GFX8-NEXT:    v_mul_lo_u16_e32 v4, v0, v1
8930; GFX8-NEXT:    v_mad_u16 v0, v0, v1, v2
8931; GFX8-NEXT:    s_mov_b32 m0, -1
8932; GFX8-NEXT:    ds_write_b16 v3, v4
8933; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8934; GFX8-NEXT:    s_setpc_b64 s[30:31]
8935;
8936; GFX9-LABEL: other_use_mul_mad_i16_var:
8937; GFX9:       ; %bb.0: ; %entry
8938; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8939; GFX9-NEXT:    v_mul_lo_u16_e32 v4, v0, v1
8940; GFX9-NEXT:    v_mad_legacy_u16 v0, v0, v1, v2
8941; GFX9-NEXT:    ds_write_b16 v3, v4
8942; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
8943; GFX9-NEXT:    s_setpc_b64 s[30:31]
8944;
8945; GFX10-LABEL: other_use_mul_mad_i16_var:
8946; GFX10:       ; %bb.0: ; %entry
8947; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8948; GFX10-NEXT:    v_mul_lo_u16 v4, v0, v1
8949; GFX10-NEXT:    v_mad_u16 v0, v0, v1, v2
8950; GFX10-NEXT:    ds_write_b16 v3, v4
8951; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
8952; GFX10-NEXT:    s_setpc_b64 s[30:31]
8953;
8954; GFX11-LABEL: other_use_mul_mad_i16_var:
8955; GFX11:       ; %bb.0: ; %entry
8956; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8957; GFX11-NEXT:    v_mul_lo_u16 v4, v0, v1
8958; GFX11-NEXT:    v_mad_u16 v0, v0, v1, v2
8959; GFX11-NEXT:    ds_store_b16 v3, v4
8960; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
8961; GFX11-NEXT:    s_setpc_b64 s[30:31]
8962;
8963; GFX1200-LABEL: other_use_mul_mad_i16_var:
8964; GFX1200:       ; %bb.0: ; %entry
8965; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
8966; GFX1200-NEXT:    s_wait_expcnt 0x0
8967; GFX1200-NEXT:    s_wait_samplecnt 0x0
8968; GFX1200-NEXT:    s_wait_bvhcnt 0x0
8969; GFX1200-NEXT:    s_wait_kmcnt 0x0
8970; GFX1200-NEXT:    v_mul_lo_u16 v4, v0, v1
8971; GFX1200-NEXT:    v_mad_u16 v0, v0, v1, v2
8972; GFX1200-NEXT:    ds_store_b16 v3, v4
8973; GFX1200-NEXT:    s_wait_dscnt 0x0
8974; GFX1200-NEXT:    s_setpc_b64 s[30:31]
8975entry:
8976  %mul = mul i16 %x, %y
8977  %add0 = add i16 %mul, %z
8978  store i16 %mul, ptr addrspace(3) %ptr
8979  ret i16 %add0
8980}
8981
8982define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z0, <2 x i16> %z1) {
8983; GFX67-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
8984; GFX67-SDAG:       ; %bb.0: ; %entry
8985; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8986; GFX67-SDAG-NEXT:    v_and_b32_e32 v8, 0xffff, v0
8987; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8988; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
8989; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
8990; GFX67-SDAG-NEXT:    v_mad_u32_u24 v5, v1, v3, v5
8991; GFX67-SDAG-NEXT:    v_mad_u32_u24 v4, v8, v2, v4
8992; GFX67-SDAG-NEXT:    v_mad_u32_u24 v3, v1, v3, v7
8993; GFX67-SDAG-NEXT:    v_mad_u32_u24 v2, v8, v2, v6
8994; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 16, v5
8995; GFX67-SDAG-NEXT:    v_and_b32_e32 v4, 0xffff, v4
8996; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
8997; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
8998; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v4, v0
8999; GFX67-SDAG-NEXT:    v_or_b32_e32 v2, v2, v1
9000; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v5
9001; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
9002; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
9003;
9004; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
9005; GFX67-GISEL:       ; %bb.0: ; %entry
9006; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9007; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v0
9008; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
9009; GFX67-GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v1
9010; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
9011; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v8, v2, v4
9012; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v9, v3, v5
9013; GFX67-GISEL-NEXT:    v_mad_u32_u24 v2, v8, v2, v6
9014; GFX67-GISEL-NEXT:    v_mad_u32_u24 v3, v9, v3, v7
9015; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
9016;
9017; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
9018; GFX8-SDAG:       ; %bb.0: ; %entry
9019; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9020; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
9021; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
9022; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
9023; GFX8-SDAG-NEXT:    v_mad_u16 v6, v5, v4, v6
9024; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
9025; GFX8-SDAG-NEXT:    v_mad_u16 v2, v0, v1, v2
9026; GFX8-SDAG-NEXT:    v_or_b32_e32 v2, v2, v6
9027; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
9028; GFX8-SDAG-NEXT:    v_mad_u16 v4, v5, v4, v6
9029; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
9030; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v3
9031; GFX8-SDAG-NEXT:    v_or_b32_e32 v1, v0, v4
9032; GFX8-SDAG-NEXT:    v_mov_b32_e32 v0, v2
9033; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
9034;
9035; GFX8-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
9036; GFX8-GISEL:       ; %bb.0: ; %entry
9037; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9038; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
9039; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
9040; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
9041; GFX8-GISEL-NEXT:    v_mad_u16 v6, v4, v5, v6
9042; GFX8-GISEL-NEXT:    v_mad_u16 v2, v0, v1, v2
9043; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
9044; GFX8-GISEL-NEXT:    v_or_b32_e32 v2, v2, v6
9045; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v6, 16, v3
9046; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v3
9047; GFX8-GISEL-NEXT:    v_mad_u16 v1, v4, v5, v6
9048; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
9049; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v0, v1
9050; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, v2
9051; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
9052;
9053; GFX9-LABEL: multi_use_mul_mad_v2i16_var:
9054; GFX9:       ; %bb.0: ; %entry
9055; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9056; GFX9-NEXT:    v_pk_mad_u16 v2, v0, v1, v2
9057; GFX9-NEXT:    v_pk_mad_u16 v1, v0, v1, v3
9058; GFX9-NEXT:    v_mov_b32_e32 v0, v2
9059; GFX9-NEXT:    s_setpc_b64 s[30:31]
9060;
9061; GFX10-LABEL: multi_use_mul_mad_v2i16_var:
9062; GFX10:       ; %bb.0: ; %entry
9063; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9064; GFX10-NEXT:    v_pk_mad_u16 v2, v0, v1, v2
9065; GFX10-NEXT:    v_pk_mad_u16 v1, v0, v1, v3
9066; GFX10-NEXT:    v_mov_b32_e32 v0, v2
9067; GFX10-NEXT:    s_setpc_b64 s[30:31]
9068;
9069; GFX11-LABEL: multi_use_mul_mad_v2i16_var:
9070; GFX11:       ; %bb.0: ; %entry
9071; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9072; GFX11-NEXT:    v_pk_mad_u16 v2, v0, v1, v2
9073; GFX11-NEXT:    v_pk_mad_u16 v1, v0, v1, v3
9074; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
9075; GFX11-NEXT:    v_mov_b32_e32 v0, v2
9076; GFX11-NEXT:    s_setpc_b64 s[30:31]
9077;
9078; GFX1200-LABEL: multi_use_mul_mad_v2i16_var:
9079; GFX1200:       ; %bb.0: ; %entry
9080; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
9081; GFX1200-NEXT:    s_wait_expcnt 0x0
9082; GFX1200-NEXT:    s_wait_samplecnt 0x0
9083; GFX1200-NEXT:    s_wait_bvhcnt 0x0
9084; GFX1200-NEXT:    s_wait_kmcnt 0x0
9085; GFX1200-NEXT:    v_pk_mad_u16 v2, v0, v1, v2
9086; GFX1200-NEXT:    v_pk_mad_u16 v1, v0, v1, v3
9087; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_2)
9088; GFX1200-NEXT:    v_mov_b32_e32 v0, v2
9089; GFX1200-NEXT:    s_setpc_b64 s[30:31]
9090entry:
9091  %mul = mul <2 x i16> %x, %y
9092  %add0 = add <2 x i16> %mul, %z0
9093  %add1 = add <2 x i16> %mul, %z1
9094  %shuffle = shufflevector <2 x i16> %add0, <2 x i16> %add1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
9095  ret <4 x i16> %shuffle
9096}
9097
9098define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z, ptr addrspace(3) %ptr) {
9099; GFX67-SDAG-LABEL: other_use_mul_mad_v2i16_var:
9100; GFX67-SDAG:       ; %bb.0: ; %entry
9101; GFX67-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9102; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
9103; GFX67-SDAG-NEXT:    v_and_b32_e32 v2, 0xffff, v2
9104; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
9105; GFX67-SDAG-NEXT:    v_and_b32_e32 v3, 0xffff, v3
9106; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v7, v0, v2
9107; GFX67-SDAG-NEXT:    v_mul_u32_u24_e32 v8, v1, v3
9108; GFX67-SDAG-NEXT:    v_mad_u32_u24 v1, v1, v3, v5
9109; GFX67-SDAG-NEXT:    v_mad_u32_u24 v0, v0, v2, v4
9110; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
9111; GFX67-SDAG-NEXT:    v_and_b32_e32 v7, 0xffff, v7
9112; GFX67-SDAG-NEXT:    v_lshlrev_b32_e32 v3, 16, v1
9113; GFX67-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
9114; GFX67-SDAG-NEXT:    v_or_b32_e32 v7, v7, v8
9115; GFX67-SDAG-NEXT:    v_or_b32_e32 v0, v0, v3
9116; GFX67-SDAG-NEXT:    s_mov_b32 m0, -1
9117; GFX67-SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v1
9118; GFX67-SDAG-NEXT:    ds_write_b32 v6, v7
9119; GFX67-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
9120; GFX67-SDAG-NEXT:    s_setpc_b64 s[30:31]
9121;
9122; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var:
9123; GFX67-GISEL:       ; %bb.0: ; %entry
9124; GFX67-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9125; GFX67-GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
9126; GFX67-GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
9127; GFX67-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
9128; GFX67-GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
9129; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v8, v1, v3
9130; GFX67-GISEL-NEXT:    v_mul_u32_u24_e32 v7, v0, v2
9131; GFX67-GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v8
9132; GFX67-GISEL-NEXT:    v_and_b32_e32 v7, 0xffff, v7
9133; GFX67-GISEL-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
9134; GFX67-GISEL-NEXT:    v_or_b32_e32 v7, v7, v8
9135; GFX67-GISEL-NEXT:    v_mad_u32_u24 v0, v0, v2, v4
9136; GFX67-GISEL-NEXT:    v_mad_u32_u24 v1, v1, v3, v5
9137; GFX67-GISEL-NEXT:    s_mov_b32 m0, -1
9138; GFX67-GISEL-NEXT:    ds_write_b32 v6, v7
9139; GFX67-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
9140; GFX67-GISEL-NEXT:    s_setpc_b64 s[30:31]
9141;
9142; GFX8-SDAG-LABEL: other_use_mul_mad_v2i16_var:
9143; GFX8-SDAG:       ; %bb.0: ; %entry
9144; GFX8-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9145; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
9146; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
9147; GFX8-SDAG-NEXT:    v_mul_lo_u16_sdwa v6, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
9148; GFX8-SDAG-NEXT:    v_mul_lo_u16_e32 v7, v0, v1
9149; GFX8-SDAG-NEXT:    v_or_b32_e32 v6, v7, v6
9150; GFX8-SDAG-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
9151; GFX8-SDAG-NEXT:    v_mad_u16 v4, v5, v4, v7
9152; GFX8-SDAG-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
9153; GFX8-SDAG-NEXT:    v_mad_u16 v0, v0, v1, v2
9154; GFX8-SDAG-NEXT:    v_or_b32_e32 v0, v0, v4
9155; GFX8-SDAG-NEXT:    s_mov_b32 m0, -1
9156; GFX8-SDAG-NEXT:    ds_write_b32 v3, v6
9157; GFX8-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
9158; GFX8-SDAG-NEXT:    s_setpc_b64 s[30:31]
9159;
9160; GFX8-GISEL-LABEL: other_use_mul_mad_v2i16_var:
9161; GFX8-GISEL:       ; %bb.0: ; %entry
9162; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9163; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
9164; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
9165; GFX8-GISEL-NEXT:    v_mul_lo_u16_e32 v6, v0, v1
9166; GFX8-GISEL-NEXT:    v_mul_lo_u16_sdwa v7, v4, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
9167; GFX8-GISEL-NEXT:    v_or_b32_e32 v6, v6, v7
9168; GFX8-GISEL-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
9169; GFX8-GISEL-NEXT:    v_mad_u16 v0, v0, v1, v2
9170; GFX8-GISEL-NEXT:    v_mad_u16 v1, v4, v5, v7
9171; GFX8-GISEL-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
9172; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v1
9173; GFX8-GISEL-NEXT:    s_mov_b32 m0, -1
9174; GFX8-GISEL-NEXT:    ds_write_b32 v3, v6
9175; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
9176; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
9177;
9178; GFX9-LABEL: other_use_mul_mad_v2i16_var:
9179; GFX9:       ; %bb.0: ; %entry
9180; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9181; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v0, v1
9182; GFX9-NEXT:    v_pk_mad_u16 v0, v0, v1, v2
9183; GFX9-NEXT:    ds_write_b32 v3, v4
9184; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
9185; GFX9-NEXT:    s_setpc_b64 s[30:31]
9186;
9187; GFX10-LABEL: other_use_mul_mad_v2i16_var:
9188; GFX10:       ; %bb.0: ; %entry
9189; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9190; GFX10-NEXT:    v_pk_mul_lo_u16 v4, v0, v1
9191; GFX10-NEXT:    v_pk_mad_u16 v0, v0, v1, v2
9192; GFX10-NEXT:    ds_write_b32 v3, v4
9193; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
9194; GFX10-NEXT:    s_setpc_b64 s[30:31]
9195;
9196; GFX11-LABEL: other_use_mul_mad_v2i16_var:
9197; GFX11:       ; %bb.0: ; %entry
9198; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9199; GFX11-NEXT:    v_pk_mul_lo_u16 v4, v0, v1
9200; GFX11-NEXT:    v_pk_mad_u16 v0, v0, v1, v2
9201; GFX11-NEXT:    ds_store_b32 v3, v4
9202; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
9203; GFX11-NEXT:    s_setpc_b64 s[30:31]
9204;
9205; GFX1200-LABEL: other_use_mul_mad_v2i16_var:
9206; GFX1200:       ; %bb.0: ; %entry
9207; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
9208; GFX1200-NEXT:    s_wait_expcnt 0x0
9209; GFX1200-NEXT:    s_wait_samplecnt 0x0
9210; GFX1200-NEXT:    s_wait_bvhcnt 0x0
9211; GFX1200-NEXT:    s_wait_kmcnt 0x0
9212; GFX1200-NEXT:    v_pk_mul_lo_u16 v4, v0, v1
9213; GFX1200-NEXT:    v_pk_mad_u16 v0, v0, v1, v2
9214; GFX1200-NEXT:    ds_store_b32 v3, v4
9215; GFX1200-NEXT:    s_wait_dscnt 0x0
9216; GFX1200-NEXT:    s_setpc_b64 s[30:31]
9217entry:
9218  %mul = mul <2 x i16> %x, %y
9219  %add0 = add <2 x i16> %mul, %z
9220  store <2 x i16> %mul, ptr addrspace(3) %ptr
9221  ret <2 x i16> %add0
9222}
9223
9224define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
9225; GFX67-LABEL: mul_u24_add64:
9226; GFX67:       ; %bb.0:
9227; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9228; GFX67-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
9229; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9230; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
9231; GFX67-NEXT:    v_addc_u32_e32 v1, vcc, v4, v3, vcc
9232; GFX67-NEXT:    s_setpc_b64 s[30:31]
9233;
9234; GFX8-LABEL: mul_u24_add64:
9235; GFX8:       ; %bb.0:
9236; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9237; GFX8-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
9238; GFX8-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9239; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
9240; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v3, vcc
9241; GFX8-NEXT:    s_setpc_b64 s[30:31]
9242;
9243; GFX9-SDAG-LABEL: mul_u24_add64:
9244; GFX9-SDAG:       ; %bb.0:
9245; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9246; GFX9-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
9247; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
9248;
9249; GFX9-GISEL-LABEL: mul_u24_add64:
9250; GFX9-GISEL:       ; %bb.0:
9251; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9252; GFX9-GISEL-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
9253; GFX9-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9254; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
9255; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v3, vcc
9256; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
9257;
9258; GFX10-SDAG-LABEL: mul_u24_add64:
9259; GFX10-SDAG:       ; %bb.0:
9260; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9261; GFX10-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
9262; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
9263;
9264; GFX10-GISEL-LABEL: mul_u24_add64:
9265; GFX10-GISEL:       ; %bb.0:
9266; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9267; GFX10-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
9268; GFX10-GISEL-NEXT:    v_mul_hi_u32_u24_e32 v1, v0, v1
9269; GFX10-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v2
9270; GFX10-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
9271; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
9272;
9273; GFX11-SDAG-LABEL: mul_u24_add64:
9274; GFX11-SDAG:       ; %bb.0:
9275; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9276; GFX11-SDAG-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
9277; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9278; GFX11-SDAG-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
9279; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
9280;
9281; GFX11-GISEL-LABEL: mul_u24_add64:
9282; GFX11-GISEL:       ; %bb.0:
9283; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9284; GFX11-GISEL-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
9285; GFX11-GISEL-NEXT:    v_mul_hi_u32_u24_e32 v1, v0, v1
9286; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
9287; GFX11-GISEL-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v2
9288; GFX11-GISEL-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
9289; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
9290;
9291; GFX1200-LABEL: mul_u24_add64:
9292; GFX1200:       ; %bb.0:
9293; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
9294; GFX1200-NEXT:    s_wait_expcnt 0x0
9295; GFX1200-NEXT:    s_wait_samplecnt 0x0
9296; GFX1200-NEXT:    s_wait_bvhcnt 0x0
9297; GFX1200-NEXT:    s_wait_kmcnt 0x0
9298; GFX1200-NEXT:    v_mad_co_u64_u32 v[0:1], null, v0, v1, v[2:3]
9299; GFX1200-NEXT:    s_setpc_b64 s[30:31]
9300  %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
9301  %add = add i64 %mul, %z
9302  ret i64 %add
9303}
9304
9305define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
9306; GFX67-LABEL: mul_u24_zext_add64:
9307; GFX67:       ; %bb.0:
9308; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9309; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9310; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
9311; GFX67-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
9312; GFX67-NEXT:    s_setpc_b64 s[30:31]
9313;
9314; GFX8-LABEL: mul_u24_zext_add64:
9315; GFX8:       ; %bb.0:
9316; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9317; GFX8-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9318; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
9319; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
9320; GFX8-NEXT:    s_setpc_b64 s[30:31]
9321;
9322; GFX9-LABEL: mul_u24_zext_add64:
9323; GFX9:       ; %bb.0:
9324; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9325; GFX9-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9326; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
9327; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
9328; GFX9-NEXT:    s_setpc_b64 s[30:31]
9329;
9330; GFX10-LABEL: mul_u24_zext_add64:
9331; GFX10:       ; %bb.0:
9332; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9333; GFX10-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9334; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
9335; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
9336; GFX10-NEXT:    s_setpc_b64 s[30:31]
9337;
9338; GFX11-LABEL: mul_u24_zext_add64:
9339; GFX11:       ; %bb.0:
9340; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9341; GFX11-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9342; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9343; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
9344; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
9345; GFX11-NEXT:    s_setpc_b64 s[30:31]
9346;
9347; GFX1200-LABEL: mul_u24_zext_add64:
9348; GFX1200:       ; %bb.0:
9349; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0
9350; GFX1200-NEXT:    s_wait_expcnt 0x0
9351; GFX1200-NEXT:    s_wait_samplecnt 0x0
9352; GFX1200-NEXT:    s_wait_bvhcnt 0x0
9353; GFX1200-NEXT:    s_wait_kmcnt 0x0
9354; GFX1200-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
9355; GFX1200-NEXT:    s_delay_alu instid0(VALU_DEP_1)
9356; GFX1200-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
9357; GFX1200-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
9358; GFX1200-NEXT:    s_setpc_b64 s[30:31]
9359  %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y)
9360  %mul.zext = zext i32 %mul to i64
9361  %add = add i64 %mul.zext, %z
9362  ret i64 %add
9363}
9364
9365declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32)
9366declare i32 @llvm.amdgcn.mul.u24(i32, i32)
9367
9368;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
9369; GFX6: {{.*}}
9370; GFX7: {{.*}}
9371; GFX900: {{.*}}
9372; GFX90A: {{.*}}
9373