xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll (revision fd6f8b3ce33cc2cbe378f8f1b391f3f40fa7bd54)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
6
7define i16 @v_powi_f16(i16 %l, i32 %r) {
8; GFX7-LABEL: v_powi_f16:
9; GFX7:       ; %bb.0:
10; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX7-NEXT:    v_cvt_f32_f16_e32 v0, v0
12; GFX7-NEXT:    v_cvt_f32_i32_e32 v1, v1
13; GFX7-NEXT:    v_mov_b32_e32 v2, 0xc2fc0000
14; GFX7-NEXT:    v_mov_b32_e32 v3, 0x42800000
15; GFX7-NEXT:    v_log_f32_e32 v0, v0
16; GFX7-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
17; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
18; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v3, vcc
19; GFX7-NEXT:    v_add_f32_e32 v0, v0, v1
20; GFX7-NEXT:    v_exp_f32_e32 v0, v0
21; GFX7-NEXT:    v_not_b32_e32 v1, 63
22; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
23; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
24; GFX7-NEXT:    v_cvt_f16_f32_e32 v0, v0
25; GFX7-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX8-LABEL: v_powi_f16:
28; GFX8:       ; %bb.0:
29; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v1
31; GFX8-NEXT:    v_log_f16_e32 v0, v0
32; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v1
33; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
34; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
35; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
36; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
37; GFX8-NEXT:    v_exp_f16_e32 v0, v0
38; GFX8-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX11-TRUE16-LABEL: v_powi_f16:
41; GFX11-TRUE16:       ; %bb.0:
42; GFX11-TRUE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX11-TRUE16-NEXT:    v_log_f16_e32 v0.l, v0.l
44; GFX11-TRUE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
45; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
46; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.h, v1
47; GFX11-TRUE16-NEXT:    s_waitcnt_depctr 0xfff
48; GFX11-TRUE16-NEXT:    v_cvt_f32_f16_e32 v1, v0.l
49; GFX11-TRUE16-NEXT:    v_cvt_f32_f16_e32 v0, v0.h
50; GFX11-TRUE16-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
51; GFX11-TRUE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
52; GFX11-TRUE16-NEXT:    v_cvt_f16_f32_e32 v0.l, v0
53; GFX11-TRUE16-NEXT:    v_exp_f16_e32 v0.l, v0.l
54; GFX11-TRUE16-NEXT:    s_setpc_b64 s[30:31]
55;
56; GFX11-FAKE16-LABEL: v_powi_f16:
57; GFX11-FAKE16:       ; %bb.0:
58; GFX11-FAKE16-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX11-FAKE16-NEXT:    v_log_f16_e32 v0, v0
60; GFX11-FAKE16-NEXT:    v_cvt_f32_i32_e32 v1, v1
61; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
62; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v1, v1
63; GFX11-FAKE16-NEXT:    s_waitcnt_depctr 0xfff
64; GFX11-FAKE16-NEXT:    v_cvt_f32_f16_e32 v0, v0
65; GFX11-FAKE16-NEXT:    v_cvt_f32_f16_e32 v1, v1
66; GFX11-FAKE16-NEXT:    v_mul_dx9_zero_f32_e32 v0, v0, v1
67; GFX11-FAKE16-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
68; GFX11-FAKE16-NEXT:    v_cvt_f16_f32_e32 v0, v0
69; GFX11-FAKE16-NEXT:    v_exp_f16_e32 v0, v0
70; GFX11-FAKE16-NEXT:    s_setpc_b64 s[30:31]
71  %l.cast = bitcast i16 %l to half
72  %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r)
73  %res.cast = bitcast half %res to i16
74  ret i16 %res.cast
75}
76
77define float @v_powi_f32(float %l, i32 %r) {
78; GFX7-LABEL: v_powi_f32:
79; GFX7:       ; %bb.0:
80; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
81; GFX7-NEXT:    v_mov_b32_e32 v2, 0x800000
82; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
83; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
84; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
85; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v2
86; GFX7-NEXT:    v_log_f32_e32 v0, v0
87; GFX7-NEXT:    v_cvt_f32_i32_e32 v1, v1
88; GFX7-NEXT:    v_mov_b32_e32 v2, 0x42000000
89; GFX7-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
90; GFX7-NEXT:    v_sub_f32_e32 v0, v0, v2
91; GFX7-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
92; GFX7-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
93; GFX7-NEXT:    v_mov_b32_e32 v2, 0x42800000
94; GFX7-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
95; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
96; GFX7-NEXT:    v_add_f32_e32 v0, v0, v1
97; GFX7-NEXT:    v_exp_f32_e32 v0, v0
98; GFX7-NEXT:    v_not_b32_e32 v1, 63
99; GFX7-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
100; GFX7-NEXT:    v_ldexp_f32_e32 v0, v0, v1
101; GFX7-NEXT:    s_setpc_b64 s[30:31]
102;
103; GFX8-LABEL: v_powi_f32:
104; GFX8:       ; %bb.0:
105; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX8-NEXT:    v_mov_b32_e32 v2, 0x800000
107; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v2
108; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
109; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
110; GFX8-NEXT:    v_ldexp_f32 v0, v0, v2
111; GFX8-NEXT:    v_log_f32_e32 v0, v0
112; GFX8-NEXT:    v_cvt_f32_i32_e32 v1, v1
113; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42000000
114; GFX8-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
115; GFX8-NEXT:    v_sub_f32_e32 v0, v0, v2
116; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
117; GFX8-NEXT:    v_mov_b32_e32 v1, 0xc2fc0000
118; GFX8-NEXT:    v_mov_b32_e32 v2, 0x42800000
119; GFX8-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
120; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v2, vcc
121; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
122; GFX8-NEXT:    v_exp_f32_e32 v0, v0
123; GFX8-NEXT:    v_not_b32_e32 v1, 63
124; GFX8-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
125; GFX8-NEXT:    v_ldexp_f32 v0, v0, v1
126; GFX8-NEXT:    s_setpc_b64 s[30:31]
127;
128; GFX11-LABEL: v_powi_f32:
129; GFX11:       ; %bb.0:
130; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
132; GFX11-NEXT:    v_cvt_f32_i32_e32 v1, v1
133; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
134; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
135; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 5, v2
136; GFX11-NEXT:    v_ldexp_f32 v0, v0, v2
137; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo
138; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
139; GFX11-NEXT:    v_log_f32_e32 v0, v0
140; GFX11-NEXT:    s_waitcnt_depctr 0xfff
141; GFX11-NEXT:    v_sub_f32_e32 v0, v0, v2
142; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v0, v1
143; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
144; GFX11-NEXT:    v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0
145; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo
146; GFX11-NEXT:    v_add_f32_e32 v0, v0, v1
147; GFX11-NEXT:    v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo
148; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
149; GFX11-NEXT:    v_exp_f32_e32 v0, v0
150; GFX11-NEXT:    s_waitcnt_depctr 0xfff
151; GFX11-NEXT:    v_ldexp_f32 v0, v0, v1
152; GFX11-NEXT:    s_setpc_b64 s[30:31]
153  %res = call float @llvm.powi.f32.i32(float %l, i32 %r)
154  ret float %res
155}
156
157define float @v_powi_0_f32(float %l) {
158; GFX78-LABEL: v_powi_0_f32:
159; GFX78:       ; %bb.0:
160; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX78-NEXT:    v_mov_b32_e32 v0, 1.0
162; GFX78-NEXT:    s_setpc_b64 s[30:31]
163;
164; GFX11-LABEL: v_powi_0_f32:
165; GFX11:       ; %bb.0:
166; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
168; GFX11-NEXT:    s_setpc_b64 s[30:31]
169  %res = call float @llvm.powi.f32.i32(float %l, i32 0)
170  ret float %res
171}
172
173define float @v_powi_1_f32(float %l) {
174; GFX78-LABEL: v_powi_1_f32:
175; GFX78:       ; %bb.0:
176; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX78-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX11-LABEL: v_powi_1_f32:
180; GFX11:       ; %bb.0:
181; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX11-NEXT:    s_setpc_b64 s[30:31]
183  %res = call float @llvm.powi.f32.i32(float %l, i32 1)
184  ret float %res
185}
186
187define float @v_powi_neg1_f32(float %l) {
188; GFX7-LABEL: v_powi_neg1_f32:
189; GFX7:       ; %bb.0:
190; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
192; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
193; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
194; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
195; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
196; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
197; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
198; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
199; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
200; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
201; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
202; GFX7-NEXT:    s_setpc_b64 s[30:31]
203;
204; GFX8-LABEL: v_powi_neg1_f32:
205; GFX8:       ; %bb.0:
206; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
207; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
208; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
209; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
210; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
211; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
212; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
213; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
214; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
215; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
216; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
217; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
218; GFX8-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX11-LABEL: v_powi_neg1_f32:
221; GFX11:       ; %bb.0:
222; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
224; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
225; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
226; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
227; GFX11-NEXT:    s_waitcnt_depctr 0xfff
228; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
229; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
230; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
231; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v2
232; GFX11-NEXT:    v_fma_f32 v5, -v1, v3, v4
233; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
234; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v2
235; GFX11-NEXT:    v_fma_f32 v1, -v1, v3, v4
236; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
237; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
238; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
239; GFX11-NEXT:    s_setpc_b64 s[30:31]
240  %res = call float @llvm.powi.f32.i32(float %l, i32 -1)
241  ret float %res
242}
243
244define float @v_powi_2_f32(float %l) {
245; GFX78-LABEL: v_powi_2_f32:
246; GFX78:       ; %bb.0:
247; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
249; GFX78-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX11-LABEL: v_powi_2_f32:
252; GFX11:       ; %bb.0:
253; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
255; GFX11-NEXT:    s_setpc_b64 s[30:31]
256  %res = call float @llvm.powi.f32.i32(float %l, i32 2)
257  ret float %res
258}
259
260define float @v_powi_neg2_f32(float %l) {
261; GFX7-LABEL: v_powi_neg2_f32:
262; GFX7:       ; %bb.0:
263; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
265; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
266; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
267; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
268; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
269; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
270; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
271; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
272; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
273; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
274; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
275; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
276; GFX7-NEXT:    s_setpc_b64 s[30:31]
277;
278; GFX8-LABEL: v_powi_neg2_f32:
279; GFX8:       ; %bb.0:
280; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
282; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
283; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
284; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
285; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
286; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
287; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
288; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
289; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
290; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
291; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
292; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
293; GFX8-NEXT:    s_setpc_b64 s[30:31]
294;
295; GFX11-LABEL: v_powi_neg2_f32:
296; GFX11:       ; %bb.0:
297; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
299; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
300; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
301; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
302; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
303; GFX11-NEXT:    s_waitcnt_depctr 0xfff
304; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
305; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
306; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
307; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v2
308; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
309; GFX11-NEXT:    v_fma_f32 v5, -v1, v3, v4
310; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v2
311; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
312; GFX11-NEXT:    v_fma_f32 v1, -v1, v3, v4
313; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
314; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
315; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
316; GFX11-NEXT:    s_setpc_b64 s[30:31]
317  %res = call float @llvm.powi.f32.i32(float %l, i32 -2)
318  ret float %res
319}
320
321define float @v_powi_4_f32(float %l) {
322; GFX78-LABEL: v_powi_4_f32:
323; GFX78:       ; %bb.0:
324; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
326; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
327; GFX78-NEXT:    s_setpc_b64 s[30:31]
328;
329; GFX11-LABEL: v_powi_4_f32:
330; GFX11:       ; %bb.0:
331; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
333; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
334; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
335; GFX11-NEXT:    s_setpc_b64 s[30:31]
336  %res = call float @llvm.powi.f32.i32(float %l, i32 4)
337  ret float %res
338}
339
340define float @v_powi_8_f32(float %l) {
341; GFX78-LABEL: v_powi_8_f32:
342; GFX78:       ; %bb.0:
343; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
345; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
346; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
347; GFX78-NEXT:    s_setpc_b64 s[30:31]
348;
349; GFX11-LABEL: v_powi_8_f32:
350; GFX11:       ; %bb.0:
351; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
353; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
354; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
355; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
356; GFX11-NEXT:    s_setpc_b64 s[30:31]
357  %res = call float @llvm.powi.f32.i32(float %l, i32 8)
358  ret float %res
359}
360
361define float @v_powi_16_f32(float %l) {
362; GFX78-LABEL: v_powi_16_f32:
363; GFX78:       ; %bb.0:
364; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
366; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
367; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
368; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
369; GFX78-NEXT:    s_setpc_b64 s[30:31]
370;
371; GFX11-LABEL: v_powi_16_f32:
372; GFX11:       ; %bb.0:
373; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
375; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
376; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
377; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
378; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
379; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
380; GFX11-NEXT:    s_setpc_b64 s[30:31]
381  %res = call float @llvm.powi.f32.i32(float %l, i32 16)
382  ret float %res
383}
384
385define float @v_powi_128_f32(float %l) {
386; GFX78-LABEL: v_powi_128_f32:
387; GFX78:       ; %bb.0:
388; GFX78-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
390; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
391; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
392; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
393; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
394; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
395; GFX78-NEXT:    v_mul_f32_e32 v0, v0, v0
396; GFX78-NEXT:    s_setpc_b64 s[30:31]
397;
398; GFX11-LABEL: v_powi_128_f32:
399; GFX11:       ; %bb.0:
400; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
402; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
403; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
404; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
405; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
406; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
407; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
408; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
409; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
410; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
411; GFX11-NEXT:    s_setpc_b64 s[30:31]
412  %res = call float @llvm.powi.f32.i32(float %l, i32 128)
413  ret float %res
414}
415
416define float @v_powi_neg128_f32(float %l) {
417; GFX7-LABEL: v_powi_neg128_f32:
418; GFX7:       ; %bb.0:
419; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
421; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
422; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
423; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
424; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
425; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
426; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
427; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
428; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
429; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
430; GFX7-NEXT:    v_fma_f32 v4, -v1, v2, 1.0
431; GFX7-NEXT:    v_fma_f32 v2, v4, v2, v2
432; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
433; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
434; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
435; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
436; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
437; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
438; GFX7-NEXT:    s_setpc_b64 s[30:31]
439;
440; GFX8-LABEL: v_powi_neg128_f32:
441; GFX8:       ; %bb.0:
442; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
444; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
445; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
446; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
447; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
448; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
449; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
450; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
451; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
452; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
453; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
454; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
455; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
456; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
457; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
458; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
459; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
460; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
461; GFX8-NEXT:    s_setpc_b64 s[30:31]
462;
463; GFX11-LABEL: v_powi_neg128_f32:
464; GFX11:       ; %bb.0:
465; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
467; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
468; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
469; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
470; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
471; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
472; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
473; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
474; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
475; GFX11-NEXT:    v_mul_f32_e32 v0, v0, v0
476; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
477; GFX11-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
478; GFX11-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
479; GFX11-NEXT:    v_rcp_f32_e32 v2, v1
480; GFX11-NEXT:    s_waitcnt_depctr 0xfff
481; GFX11-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
482; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
483; GFX11-NEXT:    v_fmac_f32_e32 v2, v3, v2
484; GFX11-NEXT:    v_mul_f32_e32 v3, v4, v2
485; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
486; GFX11-NEXT:    v_fma_f32 v5, -v1, v3, v4
487; GFX11-NEXT:    v_fmac_f32_e32 v3, v5, v2
488; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
489; GFX11-NEXT:    v_fma_f32 v1, -v1, v3, v4
490; GFX11-NEXT:    v_div_fmas_f32 v1, v1, v2, v3
491; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
492; GFX11-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
493; GFX11-NEXT:    s_setpc_b64 s[30:31]
494  %res = call float @llvm.powi.f32.i32(float %l, i32 -128)
495  ret float %res
496}
497
498; FIXME: f64 broken
499; define double @v_powi_f64(double %l, i32 %r) {
500;   %res = call double @llvm.powi.f64.i32(double %l, i32 %r)
501;   ret double %res
502; }
503
504declare half @llvm.powi.f16.i32(half, i32) #0
505declare float @llvm.powi.f32.i32(float, i32) #0
506declare double @llvm.powi.f64.i32(double, i32) #0
507
508attributes #0 = { nounwind readnone speculatable willreturn }
509