xref: /llvm-project/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll (revision 4e70720139ff8b03ab02087df249ce4fd4bbfd59)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
6
7declare float @llvm.fabs.f32(float)
8declare float @llvm.fma.f32(float, float, float)
9
10define float @v_mul_f32_vop2(float %x, float %y) {
11; GFX9-LABEL: v_mul_f32_vop2:
12; GFX9:       ; %bb.0:
13; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
14; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x0a]
15; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
16;
17; GFX10-LABEL: v_mul_f32_vop2:
18; GFX10:       ; %bb.0:
19; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
20; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
21; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
22;
23; GFX11-LABEL: v_mul_f32_vop2:
24; GFX11:       ; %bb.0:
25; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
26; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
27; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
28;
29; GFX1200-LABEL: v_mul_f32_vop2:
30; GFX1200:       ; %bb.0:
31; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
32; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
33; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
34; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
35; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
36; GFX1200-NEXT:    v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
37; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
38  %mul = fmul float %x, %y
39  ret float %mul
40}
41; NOT-GFX12: codeLenInByte = 12
42; GFX1200: codeLenInByte = 28
43
44define float @v_mul_f32_vop2_inline_imm(float %x) {
45; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
46; GFX9:       ; %bb.0:
47; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
48; GFX9-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x0a]
49; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
50;
51; GFX10-LABEL: v_mul_f32_vop2_inline_imm:
52; GFX10:       ; %bb.0:
53; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
54; GFX10-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
55; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
56;
57; GFX11-LABEL: v_mul_f32_vop2_inline_imm:
58; GFX11:       ; %bb.0:
59; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
60; GFX11-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
61; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
62;
63; GFX1200-LABEL: v_mul_f32_vop2_inline_imm:
64; GFX1200:       ; %bb.0:
65; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
66; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
67; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
68; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
69; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
70; GFX1200-NEXT:    v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
71; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
72  %mul = fmul float %x, 4.0
73  ret float %mul
74}
75; NOT-GFX12: codeLenInByte = 12
76; GFX1200: codeLenInByte = 28
77
78define float @v_mul_f32_vop2_literal(float %x) {
79; GFX9-LABEL: v_mul_f32_vop2_literal:
80; GFX9:       ; %bb.0:
81; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
82; GFX9-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x0a,0x00,0x00,0xf6,0x42]
83; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
84;
85; GFX10-LABEL: v_mul_f32_vop2_literal:
86; GFX10:       ; %bb.0:
87; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
88; GFX10-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
89; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
90;
91; GFX11-LABEL: v_mul_f32_vop2_literal:
92; GFX11:       ; %bb.0:
93; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
94; GFX11-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
95; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
96;
97; GFX1200-LABEL: v_mul_f32_vop2_literal:
98; GFX1200:       ; %bb.0:
99; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
100; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
101; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
102; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
103; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
104; GFX1200-NEXT:    v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
105; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
106  %mul = fmul float %x, 123.0
107  ret float %mul
108}
109; NOT-GFX12: codeLenInByte = 16
110; GFX1200: codeLenInByte = 32
111
112define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
113; GFX9-LABEL: v_mul_f32_vop3_src_mods:
114; GFX9:       ; %bb.0:
115; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
116; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x03,0x02,0x00]
117; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
118;
119; GFX10-LABEL: v_mul_f32_vop3_src_mods:
120; GFX10:       ; %bb.0:
121; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
122; GFX10-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
123; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
124;
125; GFX11-LABEL: v_mul_f32_vop3_src_mods:
126; GFX11:       ; %bb.0:
127; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
128; GFX11-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
129; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
130;
131; GFX1200-LABEL: v_mul_f32_vop3_src_mods:
132; GFX1200:       ; %bb.0:
133; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
134; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
135; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
136; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
137; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
138; GFX1200-NEXT:    v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
139; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
140  %fabs.x = call float @llvm.fabs.f32(float %x)
141  %mul = fmul float %fabs.x, %y
142  ret float %mul
143}
144; NOT-GFX12: codeLenInByte = 16
145; GFX1200: codeLenInByte = 32
146
147define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
148; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
149; GFX9:       ; %bb.0:
150; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
151; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0xed,0x01,0x00]
152; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
153;
154; GFX10-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
155; GFX10:       ; %bb.0:
156; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
157; GFX10-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
158; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
159;
160; GFX11-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
161; GFX11:       ; %bb.0:
162; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
163; GFX11-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
164; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
165;
166; GFX1200-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
167; GFX1200:       ; %bb.0:
168; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
169; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
170; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
171; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
172; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
173; GFX1200-NEXT:    v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
174; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
175  %fabs.x = call float @llvm.fabs.f32(float %x)
176  %mul = fmul float %fabs.x, 4.0
177  ret float %mul
178}
179
180; NOT-GFX12: codeLenInByte = 16
181; GFX1200: codeLenInByte = 32
182
183define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
184; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
185; GFX9:       ; %bb.0:
186; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
187; GFX9-NEXT:    s_mov_b32 s4, 0x42f60000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0xf6,0x42]
188; GFX9-NEXT:    v_mul_f32_e64 v0, |v0|, s4 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x09,0x00,0x00]
189; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
190;
191; GFX10-LABEL: v_mul_f32_vop3_src_mods_literal:
192; GFX10:       ; %bb.0:
193; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
194; GFX10-NEXT:    v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
195; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
196;
197; GFX11-LABEL: v_mul_f32_vop3_src_mods_literal:
198; GFX11:       ; %bb.0:
199; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
200; GFX11-NEXT:    v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
201; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
202;
203; GFX1200-LABEL: v_mul_f32_vop3_src_mods_literal:
204; GFX1200:       ; %bb.0:
205; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
206; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
207; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
208; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
209; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
210; GFX1200-NEXT:    v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
211; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
212  %fabs.x = call float @llvm.fabs.f32(float %x)
213  %mul = fmul float %fabs.x, 123.0
214  ret float %mul
215}
216
217; GFX9: codeLenInByte = 24
218; GFX10: codeLenInByte = 20
219; GFX11: codeLenInByte = 20
220; GFX1200: codeLenInByte = 36
221
222define float @v_mul_f32_vop2_frame_index(float %x) {
223; GFX9-LABEL: v_mul_f32_vop2_frame_index:
224; GFX9:       ; %bb.0:
225; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
226; GFX9-NEXT:    v_lshrrev_b32_e64 v1, 6, s32 ; encoding: [0x01,0x00,0x10,0xd1,0x86,0x40,0x00,0x00]
227; GFX9-NEXT:    v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x0a]
228; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
229;
230; GFX10-LABEL: v_mul_f32_vop2_frame_index:
231; GFX10:       ; %bb.0:
232; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
233; GFX10-NEXT:    v_lshrrev_b32_e64 v1, 5, s32 ; encoding: [0x01,0x00,0x16,0xd5,0x85,0x40,0x00,0x00]
234; GFX10-NEXT:    v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x10]
235; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
236;
237; GFX11-LABEL: v_mul_f32_vop2_frame_index:
238; GFX11:       ; %bb.0:
239; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
240; GFX11-NEXT:    v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
241; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
242;
243; GFX1200-LABEL: v_mul_f32_vop2_frame_index:
244; GFX1200:       ; %bb.0:
245; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
246; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
247; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
248; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
249; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
250; GFX1200-NEXT:    v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
251; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
252  %alloca = alloca i32, addrspace(5)
253  %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
254  %cast = bitcast i32 %ptrtoint to float
255  %mul = fmul float %x, %cast
256  ret float %mul
257}
258
259; GFX9: codeLenInByte = 20
260; GFX10: codeLenInByte = 20
261; GFX11: codeLenInByte = 12
262; GFX1200: codeLenInByte = 28
263
264define float @v_fma_f32(float %x, float %y, float %z) {
265; GFX9-LABEL: v_fma_f32:
266; GFX9:       ; %bb.0:
267; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
268; GFX9-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x0a,0x04]
269; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
270;
271; GFX10-LABEL: v_fma_f32:
272; GFX10:       ; %bb.0:
273; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
274; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x4b,0xd5,0x00,0x03,0x0a,0x04]
275; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
276;
277; GFX11-LABEL: v_fma_f32:
278; GFX11:       ; %bb.0:
279; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
280; GFX11-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
281; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
282;
283; GFX1200-LABEL: v_fma_f32:
284; GFX1200:       ; %bb.0:
285; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
286; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
287; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
288; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
289; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
290; GFX1200-NEXT:    v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
291; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
292  %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
293  ret float %fma
294}
295
296; NOT-GFX12: codeLenInByte = 16
297; GFX1200: codeLenInByte = 32
298
299define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
300; GFX9-LABEL: v_fma_f32_src_mods:
301; GFX9:       ; %bb.0:
302; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
303; GFX9-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x0a,0x04]
304; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
305;
306; GFX10-LABEL: v_fma_f32_src_mods:
307; GFX10:       ; %bb.0:
308; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
309; GFX10-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0x0a,0x04]
310; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
311;
312; GFX11-LABEL: v_fma_f32_src_mods:
313; GFX11:       ; %bb.0:
314; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
315; GFX11-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
316; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
317;
318; GFX1200-LABEL: v_fma_f32_src_mods:
319; GFX1200:       ; %bb.0:
320; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
321; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
322; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
323; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
324; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
325; GFX1200-NEXT:    v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
326; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
327  %fabs.x = call float @llvm.fabs.f32(float %x)
328  %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
329  ret float %fma
330}
331
332; NOT-GFX12: codeLenInByte = 16
333; GFX1200: codeLenInByte = 32
334
335define float @v_fmac_f32(float %x, float %y) {
336; GFX9-LABEL: v_fmac_f32:
337; GFX9:       ; %bb.0:
338; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
339; GFX9-NEXT:    v_fma_f32 v0, v0, v1, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x02,0x04]
340; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
341;
342; GFX10-LABEL: v_fmac_f32:
343; GFX10:       ; %bb.0:
344; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
345; GFX10-NEXT:    v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
346; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
347;
348; GFX11-LABEL: v_fmac_f32:
349; GFX11:       ; %bb.0:
350; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
351; GFX11-NEXT:    v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
352; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
353;
354; GFX1200-LABEL: v_fmac_f32:
355; GFX1200:       ; %bb.0:
356; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
357; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
358; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
359; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
360; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
361; GFX1200-NEXT:    v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
362; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
363  %fma = call float @llvm.fma.f32(float %x, float %y, float %x)
364  ret float %fma
365}
366
367; GFX9: codeLenInByte = 16
368; GFX10: codeLenInByte = 12
369; GFX11: codeLenInByte = 12
370; GFX1200: codeLenInByte = 28
371
372define float @v_fmaak_f32(float %x, float %y) {
373; GFX9-LABEL: v_fmaak_f32:
374; GFX9:       ; %bb.0:
375; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
376; GFX9-NEXT:    s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
377; GFX9-NEXT:    v_fma_f32 v0, v0, v1, s4 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x12,0x00]
378; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
379;
380; GFX10-LABEL: v_fmaak_f32:
381; GFX10:       ; %bb.0:
382; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
383; GFX10-NEXT:    v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
384; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
385;
386; GFX11-LABEL: v_fmaak_f32:
387; GFX11:       ; %bb.0:
388; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
389; GFX11-NEXT:    v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
390; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
391;
392; GFX1200-LABEL: v_fmaak_f32:
393; GFX1200:       ; %bb.0:
394; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
395; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
396; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
397; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
398; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
399; GFX1200-NEXT:    v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
400; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
401  %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
402  ret float %fma
403}
404
405; GFX9: codeLenInByte = 24
406; GFX10: codeLenInByte = 16
407; GFX11: codeLenInByte = 16
408; GFX1200: codeLenInByte = 32
409
410define float @v_fma_k_f32_src_mods(float %x, float %y) {
411; GFX9-LABEL: v_fma_k_f32_src_mods:
412; GFX9:       ; %bb.0:
413; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
414; GFX9-NEXT:    s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
415; GFX9-NEXT:    v_fma_f32 v0, |v0|, v1, s4 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x12,0x00]
416; GFX9-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
417;
418; GFX10-LABEL: v_fma_k_f32_src_mods:
419; GFX10:       ; %bb.0:
420; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
421; GFX10-NEXT:    v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
422; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
423;
424; GFX11-LABEL: v_fma_k_f32_src_mods:
425; GFX11:       ; %bb.0:
426; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
427; GFX11-NEXT:    v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
428; GFX11-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
429;
430; GFX1200-LABEL: v_fma_k_f32_src_mods:
431; GFX1200:       ; %bb.0:
432; GFX1200-NEXT:    s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
433; GFX1200-NEXT:    s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
434; GFX1200-NEXT:    s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
435; GFX1200-NEXT:    s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
436; GFX1200-NEXT:    s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
437; GFX1200-NEXT:    v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
438; GFX1200-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
439  %fabs.x = call float @llvm.fabs.f32(float %x)
440  %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
441  ret float %fma
442}
443
444; GFX9: codeLenInByte = 24
445; GFX10: codeLenInByte = 20
446; GFX11: codeLenInByte = 20
447; GFX1200: codeLenInByte = 36
448
449define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
450; GFX9-LABEL: s_fmaak_f32:
451; GFX9:       ; %bb.0:
452; GFX9-NEXT:    v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
453; GFX9-NEXT:    v_mov_b32_e32 v1, 0x43800000 ; encoding: [0xff,0x02,0x02,0x7e,0x00,0x00,0x80,0x43]
454; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x00,0x06,0x04]
455; GFX9-NEXT:    ; return to shader part epilog
456;
457; GFX10-LABEL: s_fmaak_f32:
458; GFX10:       ; %bb.0:
459; GFX10-NEXT:    v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
460; GFX10-NEXT:    v_fmaak_f32 v0, s0, v0, 0x43800000 ; encoding: [0x00,0x00,0x00,0x5a,0x00,0x00,0x80,0x43]
461; GFX10-NEXT:    ; return to shader part epilog
462;
463; GFX1100-LABEL: s_fmaak_f32:
464; GFX1100:       ; %bb.0:
465; GFX1100-NEXT:    v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e]
466; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf]
467; GFX1100-NEXT:    v_fmaak_f32 v0, s0, v0, 0x43800000 ; encoding: [0x00,0x00,0x00,0x5a,0x00,0x00,0x80,0x43]
468; GFX1100-NEXT:    ; return to shader part epilog
469;
470; GFX1150-LABEL: s_fmaak_f32:
471; GFX1150:       ; %bb.0:
472; GFX1150-NEXT:    s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
473; GFX1150-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
474; GFX1150-NEXT:    v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
475; GFX1150-NEXT:    ; return to shader part epilog
476;
477; GFX1200-LABEL: s_fmaak_f32:
478; GFX1200:       ; %bb.0:
479; GFX1200-NEXT:    s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
480; GFX1200-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
481; GFX1200-NEXT:    v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
482; GFX1200-NEXT:    ; return to shader part epilog
483  %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
484  ret float %fma
485}
486
487; GFX9: codeLenInByte = 20
488; GFX10: codeLenInByte = 12
489; GFX1100: codeLenInByte = 16
490; GFX1150: codeLenInByte = 16
491; GFX1200: codeLenInByte = 16
492