xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.powi.ll (revision f2c164c8150548d983565c4ddc0fde790f9e2a5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s
4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
5
6define i16 @v_powi_f16(i16 %l, i32 %r) {
7; GFX78-LABEL: v_powi_f16:
8; GFX78:       ; %bb.0:
9; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GFX78-NEXT:    v_cvt_f32_f16_e32 v0, v0
11; GFX78-NEXT:    v_cvt_f32_i32_e32 v1, v1
12; GFX78-NEXT:    v_log_f32_e32 v0, v0
13; GFX78-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
14; GFX78-NEXT:    v_exp_f32_e32 v0, v0
15; GFX78-NEXT:    v_cvt_f16_f32_e32 v0, v0
16; GFX78-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX11-LABEL: v_powi_f16:
19; GFX11:       ; %bb.0:
20; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
22; GFX11-NEXT:    v_cvt_f32_i32_e32 v1, v1
23; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
24; GFX11-NEXT:    v_log_f32_e32 v0, v0
25; GFX11-NEXT:    s_waitcnt_depctr 0xfff
26; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
27; GFX11-NEXT:    v_exp_f32_e32 v0, v0
28; GFX11-NEXT:    s_waitcnt_depctr 0xfff
29; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
30; GFX11-NEXT:    s_setpc_b64 s[30:31]
31  %l.cast = bitcast i16 %l to half
32  %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r)
33  %res.cast = bitcast half %res to i16
34  ret i16 %res.cast
35}
36
37define float @v_powi_f32(float %l, i32 %r) {
38; GFX78-LABEL: v_powi_f32:
39; GFX78:       ; %bb.0:
40; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; GFX78-NEXT:    v_log_f32_e32 v0, v0
42; GFX78-NEXT:    v_cvt_f32_i32_e32 v1, v1
43; GFX78-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
44; GFX78-NEXT:    v_exp_f32_e32 v0, v0
45; GFX78-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX11-LABEL: v_powi_f32:
48; GFX11:       ; %bb.0:
49; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX11-NEXT:    v_log_f32_e32 v0, v0
51; GFX11-NEXT:    v_cvt_f32_i32_e32 v1, v1
52; GFX11-NEXT:    s_waitcnt_depctr 0xfff
53; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
54; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
55; GFX11-NEXT:    v_exp_f32_e32 v0, v0
56; GFX11-NEXT:    s_setpc_b64 s[30:31]
57  %res = call float @llvm.powi.f32.i32(float %l, i32 %r)
58  ret float %res
59}
60
61define float @v_powi_0_f32(float %l) {
62; GFX78-LABEL: v_powi_0_f32:
63; GFX78:       ; %bb.0:
64; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65; GFX78-NEXT:    v_mov_b32_e32 v0, 1.0
66; GFX78-NEXT:    s_setpc_b64 s[30:31]
67;
68; GFX11-LABEL: v_powi_0_f32:
69; GFX11:       ; %bb.0:
70; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
72; GFX11-NEXT:    s_setpc_b64 s[30:31]
73  %res = call float @llvm.powi.f32.i32(float %l, i32 0)
74  ret float %res
75}
76
77define float @v_powi_1_f32(float %l) {
78; GFX78-LABEL: v_powi_1_f32:
79; GFX78:       ; %bb.0:
80; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX78-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX11-LABEL: v_powi_1_f32:
84; GFX11:       ; %bb.0:
85; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX11-NEXT:    s_setpc_b64 s[30:31]
87  %res = call float @llvm.powi.f32.i32(float %l, i32 1)
88  ret float %res
89}
90
91define float @v_powi_neg1_f32(float %l) {
92; GFX7-LABEL: v_powi_neg1_f32:
93; GFX7:       ; %bb.0:
94; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
96; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
97; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
98; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
99; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
100; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
101; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
102; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
103; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
104; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
105; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
106; GFX7-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX8-LABEL: v_powi_neg1_f32:
109; GFX8:       ; %bb.0:
110; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
112; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
113; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
114; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
115; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
116; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
117; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
118; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
119; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
120; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
121; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
122; GFX8-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX11-LABEL: v_powi_neg1_f32:
125; GFX11:       ; %bb.0:
126; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
128; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
129; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
130; GFX11-NEXT:    s_waitcnt_depctr 0xfff
131; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
132; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
133; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
134; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
135; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
136; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
137; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
138; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
139; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
140; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
141; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
142; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
143; GFX11-NEXT:    s_setpc_b64 s[30:31]
144  %res = call float @llvm.powi.f32.i32(float %l, i32 -1)
145  ret float %res
146}
147
148define float @v_powi_2_f32(float %l) {
149; GFX78-LABEL: v_powi_2_f32:
150; GFX78:       ; %bb.0:
151; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
153; GFX78-NEXT:    s_setpc_b64 s[30:31]
154;
155; GFX11-LABEL: v_powi_2_f32:
156; GFX11:       ; %bb.0:
157; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
159; GFX11-NEXT:    s_setpc_b64 s[30:31]
160  %res = call float @llvm.powi.f32.i32(float %l, i32 2)
161  ret float %res
162}
163
164define float @v_powi_neg2_f32(float %l) {
165; GFX7-LABEL: v_powi_neg2_f32:
166; GFX7:       ; %bb.0:
167; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
169; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
170; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
171; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
172; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
173; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
174; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
175; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
176; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
177; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
178; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
179; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
180; GFX7-NEXT:    s_setpc_b64 s[30:31]
181;
182; GFX8-LABEL: v_powi_neg2_f32:
183; GFX8:       ; %bb.0:
184; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
186; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
187; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
188; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
189; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
190; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
191; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
192; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
193; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
194; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
195; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
196; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
197; GFX8-NEXT:    s_setpc_b64 s[30:31]
198;
199; GFX11-LABEL: v_powi_neg2_f32:
200; GFX11:       ; %bb.0:
201; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
203; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
204; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
205; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
206; GFX11-NEXT:    s_waitcnt_depctr 0xfff
207; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
208; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
209; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
210; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
211; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
212; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
213; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
214; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
215; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
216; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
217; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
218; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
219; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
220; GFX11-NEXT:    s_setpc_b64 s[30:31]
221  %res = call float @llvm.powi.f32.i32(float %l, i32 -2)
222  ret float %res
223}
224
225define float @v_powi_4_f32(float %l) {
226; GFX78-LABEL: v_powi_4_f32:
227; GFX78:       ; %bb.0:
228; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
230; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
231; GFX78-NEXT:    s_setpc_b64 s[30:31]
232;
233; GFX11-LABEL: v_powi_4_f32:
234; GFX11:       ; %bb.0:
235; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
237; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
238; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
239; GFX11-NEXT:    s_setpc_b64 s[30:31]
240  %res = call float @llvm.powi.f32.i32(float %l, i32 4)
241  ret float %res
242}
243
244define float @v_powi_8_f32(float %l) {
245; GFX78-LABEL: v_powi_8_f32:
246; GFX78:       ; %bb.0:
247; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
249; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
250; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
251; GFX78-NEXT:    s_setpc_b64 s[30:31]
252;
253; GFX11-LABEL: v_powi_8_f32:
254; GFX11:       ; %bb.0:
255; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
257; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
258; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
259; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
260; GFX11-NEXT:    s_setpc_b64 s[30:31]
261  %res = call float @llvm.powi.f32.i32(float %l, i32 8)
262  ret float %res
263}
264
265define float @v_powi_16_f32(float %l) {
266; GFX78-LABEL: v_powi_16_f32:
267; GFX78:       ; %bb.0:
268; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
270; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
271; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
272; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
273; GFX78-NEXT:    s_setpc_b64 s[30:31]
274;
275; GFX11-LABEL: v_powi_16_f32:
276; GFX11:       ; %bb.0:
277; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
278; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
279; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
280; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
281; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
282; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
283; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
284; GFX11-NEXT:    s_setpc_b64 s[30:31]
285  %res = call float @llvm.powi.f32.i32(float %l, i32 16)
286  ret float %res
287}
288
289define float @v_powi_128_f32(float %l) {
290; GFX78-LABEL: v_powi_128_f32:
291; GFX78:       ; %bb.0:
292; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
294; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
295; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
296; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
297; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
298; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
299; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
300; GFX78-NEXT:    s_setpc_b64 s[30:31]
301;
302; GFX11-LABEL: v_powi_128_f32:
303; GFX11:       ; %bb.0:
304; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
306; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
307; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
308; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
309; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
310; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
311; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
312; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
313; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
314; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
315; GFX11-NEXT:    s_setpc_b64 s[30:31]
316  %res = call float @llvm.powi.f32.i32(float %l, i32 128)
317  ret float %res
318}
319
320define float @v_powi_neg128_f32(float %l) {
321; GFX7-LABEL: v_powi_neg128_f32:
322; GFX7:       ; %bb.0:
323; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
325; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
326; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
327; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
328; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
329; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
330; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
331; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
332; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
333; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
334; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
335; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
336; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
337; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
338; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
339; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
340; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
341; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
342; GFX7-NEXT:    s_setpc_b64 s[30:31]
343;
344; GFX8-LABEL: v_powi_neg128_f32:
345; GFX8:       ; %bb.0:
346; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
348; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
349; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
350; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
351; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
352; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
353; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
354; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
355; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
356; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
357; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
358; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
359; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
360; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
361; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
362; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
363; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
364; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
365; GFX8-NEXT:    s_setpc_b64 s[30:31]
366;
367; GFX11-LABEL: v_powi_neg128_f32:
368; GFX11:       ; %bb.0:
369; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
371; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
372; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
373; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
374; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
375; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
376; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
377; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
378; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
379; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
380; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
381; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
382; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
383; GFX11-NEXT:    s_waitcnt_depctr 0xfff
384; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
385; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
386; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
387; GFX11-NEXT:    v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
388; GFX11-NEXT:    v_mul_f32_e32 v4, v3, v2
389; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
390; GFX11-NEXT:    v_fma_f32 v5, -v1, v4, v3
391; GFX11-NEXT:    v_fmac_f32_e32 v4, v5, v2
392; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
393; GFX11-NEXT:    v_fma_f32 v1, -v1, v4, v3
394; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
395; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
396; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
397; GFX11-NEXT:    s_setpc_b64 s[30:31]
398  %res = call float @llvm.powi.f32.i32(float %l, i32 -128)
399  ret float %res
400}
401
402; FIXME: f64 broken
403; define double @v_powi_f64(double %l, i32 %r) {
404;   %res = call double @llvm.powi.f64.i32(double %l, i32 %r)
405;   ret double %res
406; }
407
408declare half @llvm.powi.f16.i32(half, i32) #0
409declare float @llvm.powi.f32.i32(float, i32) #0
410declare double @llvm.powi.f64.i32(double, i32) #0
411
412attributes #0 = { nounwind readnone speculatable willreturn }
413