xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fpow.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
8
9define float @v_pow_f32(float %x, float %y) {
10; GFX6-LABEL: v_pow_f32:
11; GFX6:       ; %bb.0:
12; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX6-NEXT:    v_log_f32_e32 v0, v0
14; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
15; GFX6-NEXT:    v_exp_f32_e32 v0, v0
16; GFX6-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX8-LABEL: v_pow_f32:
19; GFX8:       ; %bb.0:
20; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX8-NEXT:    v_log_f32_e32 v0, v0
22; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
23; GFX8-NEXT:    v_exp_f32_e32 v0, v0
24; GFX8-NEXT:    s_setpc_b64 s[30:31]
25;
26; GFX9-LABEL: v_pow_f32:
27; GFX9:       ; %bb.0:
28; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GFX9-NEXT:    v_log_f32_e32 v0, v0
30; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
31; GFX9-NEXT:    v_exp_f32_e32 v0, v0
32; GFX9-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX90A-LABEL: v_pow_f32:
35; GFX90A:       ; %bb.0:
36; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX90A-NEXT:    v_log_f32_e32 v0, v0
38; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
39; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
40; GFX90A-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX10-LABEL: v_pow_f32:
43; GFX10:       ; %bb.0:
44; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX10-NEXT:    v_log_f32_e32 v0, v0
46; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
47; GFX10-NEXT:    v_exp_f32_e32 v0, v0
48; GFX10-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX11-LABEL: v_pow_f32:
51; GFX11:       ; %bb.0:
52; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX11-NEXT:    v_log_f32_e32 v0, v0
54; GFX11-NEXT:    s_waitcnt_depctr 0xfff
55; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
56; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
57; GFX11-NEXT:    v_exp_f32_e32 v0, v0
58; GFX11-NEXT:    s_setpc_b64 s[30:31]
59  %pow = call float @llvm.pow.f32(float %x, float %y)
60  ret float %pow
61}
62
63define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
64; GFX6-LABEL: v_pow_v2f32:
65; GFX6:       ; %bb.0:
66; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GFX6-NEXT:    v_log_f32_e32 v0, v0
68; GFX6-NEXT:    v_log_f32_e32 v1, v1
69; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
70; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
71; GFX6-NEXT:    v_exp_f32_e32 v0, v0
72; GFX6-NEXT:    v_exp_f32_e32 v1, v1
73; GFX6-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX8-LABEL: v_pow_v2f32:
76; GFX8:       ; %bb.0:
77; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX8-NEXT:    v_log_f32_e32 v0, v0
79; GFX8-NEXT:    v_log_f32_e32 v1, v1
80; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
81; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
82; GFX8-NEXT:    v_exp_f32_e32 v0, v0
83; GFX8-NEXT:    v_exp_f32_e32 v1, v1
84; GFX8-NEXT:    s_setpc_b64 s[30:31]
85;
86; GFX9-LABEL: v_pow_v2f32:
87; GFX9:       ; %bb.0:
88; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX9-NEXT:    v_log_f32_e32 v0, v0
90; GFX9-NEXT:    v_log_f32_e32 v1, v1
91; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
92; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
93; GFX9-NEXT:    v_exp_f32_e32 v0, v0
94; GFX9-NEXT:    v_exp_f32_e32 v1, v1
95; GFX9-NEXT:    s_setpc_b64 s[30:31]
96;
97; GFX90A-LABEL: v_pow_v2f32:
98; GFX90A:       ; %bb.0:
99; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX90A-NEXT:    v_log_f32_e32 v0, v0
101; GFX90A-NEXT:    v_log_f32_e32 v1, v1
102; GFX90A-NEXT:    v_mul_legacy_f32 v0, v2, v0
103; GFX90A-NEXT:    v_mul_legacy_f32 v1, v3, v1
104; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
105; GFX90A-NEXT:    v_exp_f32_e32 v1, v1
106; GFX90A-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX10-LABEL: v_pow_v2f32:
109; GFX10:       ; %bb.0:
110; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX10-NEXT:    v_log_f32_e32 v0, v0
112; GFX10-NEXT:    v_log_f32_e32 v1, v1
113; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
114; GFX10-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
115; GFX10-NEXT:    v_exp_f32_e32 v0, v0
116; GFX10-NEXT:    v_exp_f32_e32 v1, v1
117; GFX10-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX11-LABEL: v_pow_v2f32:
120; GFX11:       ; %bb.0:
121; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX11-NEXT:    v_log_f32_e32 v0, v0
123; GFX11-NEXT:    v_log_f32_e32 v1, v1
124; GFX11-NEXT:    s_waitcnt_depctr 0xfff
125; GFX11-NEXT:    v_dual_mul_dx9_zero_f32 v0, v2, v0 :: v_dual_mul_dx9_zero_f32 v1, v3, v1
126; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
127; GFX11-NEXT:    v_exp_f32_e32 v0, v0
128; GFX11-NEXT:    v_exp_f32_e32 v1, v1
129; GFX11-NEXT:    s_setpc_b64 s[30:31]
130  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
131  ret <2 x float> %pow
132}
133
134define half @v_pow_f16(half %x, half %y) {
135; GFX6-LABEL: v_pow_f16:
136; GFX6:       ; %bb.0:
137; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
139; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
140; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
141; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
142; GFX6-NEXT:    v_log_f32_e32 v0, v0
143; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
144; GFX6-NEXT:    v_exp_f32_e32 v0, v0
145; GFX6-NEXT:    s_setpc_b64 s[30:31]
146;
147; GFX8-LABEL: v_pow_f16:
148; GFX8:       ; %bb.0:
149; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
151; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
152; GFX8-NEXT:    v_log_f32_e32 v0, v0
153; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
154; GFX8-NEXT:    v_exp_f32_e32 v0, v0
155; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
156; GFX8-NEXT:    s_setpc_b64 s[30:31]
157;
158; GFX9-LABEL: v_pow_f16:
159; GFX9:       ; %bb.0:
160; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
162; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
163; GFX9-NEXT:    v_log_f32_e32 v0, v0
164; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
165; GFX9-NEXT:    v_exp_f32_e32 v0, v0
166; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
167; GFX9-NEXT:    s_setpc_b64 s[30:31]
168;
169; GFX90A-LABEL: v_pow_f16:
170; GFX90A:       ; %bb.0:
171; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
173; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
174; GFX90A-NEXT:    v_log_f32_e32 v0, v0
175; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
176; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
177; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
178; GFX90A-NEXT:    s_setpc_b64 s[30:31]
179;
180; GFX10-LABEL: v_pow_f16:
181; GFX10:       ; %bb.0:
182; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
184; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
185; GFX10-NEXT:    v_log_f32_e32 v0, v0
186; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
187; GFX10-NEXT:    v_exp_f32_e32 v0, v0
188; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
189; GFX10-NEXT:    s_setpc_b64 s[30:31]
190;
191; GFX11-LABEL: v_pow_f16:
192; GFX11:       ; %bb.0:
193; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
195; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
196; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
197; GFX11-NEXT:    v_log_f32_e32 v0, v0
198; GFX11-NEXT:    s_waitcnt_depctr 0xfff
199; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
200; GFX11-NEXT:    v_exp_f32_e32 v0, v0
201; GFX11-NEXT:    s_waitcnt_depctr 0xfff
202; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
203; GFX11-NEXT:    s_setpc_b64 s[30:31]
204  %pow = call half @llvm.pow.f16(half %x, half %y)
205  ret half %pow
206}
207
208define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
209; GFX6-LABEL: v_pow_v2f16:
210; GFX6:       ; %bb.0:
211; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
213; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
214; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
215; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
216; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
217; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
218; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
219; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
220; GFX6-NEXT:    v_log_f32_e32 v0, v0
221; GFX6-NEXT:    v_log_f32_e32 v1, v1
222; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
223; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
224; GFX6-NEXT:    v_exp_f32_e32 v0, v0
225; GFX6-NEXT:    v_exp_f32_e32 v1, v1
226; GFX6-NEXT:    s_setpc_b64 s[30:31]
227;
228; GFX8-LABEL: v_pow_v2f16:
229; GFX8:       ; %bb.0:
230; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
232; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
233; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
234; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
235; GFX8-NEXT:    v_log_f32_e32 v2, v2
236; GFX8-NEXT:    v_log_f32_e32 v0, v0
237; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
238; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
239; GFX8-NEXT:    v_exp_f32_e32 v1, v2
240; GFX8-NEXT:    v_exp_f32_e32 v0, v0
241; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
242; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
243; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
244; GFX8-NEXT:    s_setpc_b64 s[30:31]
245;
246; GFX9-LABEL: v_pow_v2f16:
247; GFX9:       ; %bb.0:
248; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
250; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
251; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
252; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
253; GFX9-NEXT:    v_log_f32_e32 v2, v2
254; GFX9-NEXT:    v_log_f32_e32 v0, v0
255; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
256; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
257; GFX9-NEXT:    v_exp_f32_e32 v1, v2
258; GFX9-NEXT:    v_exp_f32_e32 v0, v0
259; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
260; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
261; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
262; GFX9-NEXT:    s_setpc_b64 s[30:31]
263;
264; GFX90A-LABEL: v_pow_v2f16:
265; GFX90A:       ; %bb.0:
266; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
268; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
269; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
270; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
271; GFX90A-NEXT:    v_log_f32_e32 v2, v2
272; GFX90A-NEXT:    v_log_f32_e32 v0, v0
273; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
274; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
275; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
276; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
277; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
278; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
279; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
280; GFX90A-NEXT:    s_setpc_b64 s[30:31]
281;
282; GFX10-LABEL: v_pow_v2f16:
283; GFX10:       ; %bb.0:
284; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
286; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
287; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
288; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
289; GFX10-NEXT:    v_log_f32_e32 v2, v2
290; GFX10-NEXT:    v_log_f32_e32 v0, v0
291; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
292; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
293; GFX10-NEXT:    v_exp_f32_e32 v1, v2
294; GFX10-NEXT:    v_exp_f32_e32 v0, v0
295; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
296; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
297; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
298; GFX10-NEXT:    s_setpc_b64 s[30:31]
299;
300; GFX11-LABEL: v_pow_v2f16:
301; GFX11:       ; %bb.0:
302; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
304; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
305; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
306; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
307; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
308; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
309; GFX11-NEXT:    v_log_f32_e32 v0, v0
310; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
311; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v3
312; GFX11-NEXT:    v_log_f32_e32 v2, v2
313; GFX11-NEXT:    s_waitcnt_depctr 0xfff
314; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
315; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
316; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
317; GFX11-NEXT:    v_exp_f32_e32 v0, v0
318; GFX11-NEXT:    v_exp_f32_e32 v1, v2
319; GFX11-NEXT:    s_waitcnt_depctr 0xfff
320; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
321; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
322; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
323; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
324; GFX11-NEXT:    s_setpc_b64 s[30:31]
325  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
326  ret <2 x half> %pow
327}
328
329define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
330; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
331; GFX6:       ; %bb.0:
332; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
334; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
335; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
336; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
337; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
338; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
339; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v3
340; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
341; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
342; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
343; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
344; GFX6-NEXT:    v_log_f32_e32 v3, v3
345; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
346; GFX6-NEXT:    v_log_f32_e32 v4, v0
347; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v3
348; GFX6-NEXT:    v_exp_f32_e32 v0, v0
349; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v4
350; GFX6-NEXT:    v_exp_f32_e32 v1, v1
351; GFX6-NEXT:    s_setpc_b64 s[30:31]
352;
353; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
354; GFX8:       ; %bb.0:
355; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
356; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
357; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
358; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
359; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
360; GFX8-NEXT:    v_log_f32_e32 v2, v2
361; GFX8-NEXT:    v_log_f32_e32 v0, v0
362; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
363; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
364; GFX8-NEXT:    v_exp_f32_e32 v1, v2
365; GFX8-NEXT:    v_exp_f32_e32 v0, v0
366; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
367; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
368; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
369; GFX8-NEXT:    s_setpc_b64 s[30:31]
370;
371; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
372; GFX9:       ; %bb.0:
373; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
375; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
376; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
377; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
378; GFX9-NEXT:    v_log_f32_e32 v2, v2
379; GFX9-NEXT:    v_log_f32_e32 v0, v0
380; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
381; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
382; GFX9-NEXT:    v_exp_f32_e32 v1, v2
383; GFX9-NEXT:    v_exp_f32_e32 v0, v0
384; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
385; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
386; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
387; GFX9-NEXT:    s_setpc_b64 s[30:31]
388;
389; GFX90A-LABEL: v_pow_v2f16_fneg_lhs:
390; GFX90A:       ; %bb.0:
391; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
393; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
394; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
395; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
396; GFX90A-NEXT:    v_log_f32_e32 v2, v2
397; GFX90A-NEXT:    v_log_f32_e32 v0, v0
398; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
399; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
400; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
401; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
402; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
403; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
404; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
405; GFX90A-NEXT:    s_setpc_b64 s[30:31]
406;
407; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
408; GFX10:       ; %bb.0:
409; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
411; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
412; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
413; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
414; GFX10-NEXT:    v_log_f32_e32 v2, v2
415; GFX10-NEXT:    v_log_f32_e32 v0, v0
416; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
417; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
418; GFX10-NEXT:    v_exp_f32_e32 v1, v2
419; GFX10-NEXT:    v_exp_f32_e32 v0, v0
420; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
421; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
422; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
423; GFX10-NEXT:    s_setpc_b64 s[30:31]
424;
425; GFX11-LABEL: v_pow_v2f16_fneg_lhs:
426; GFX11:       ; %bb.0:
427; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
429; GFX11-NEXT:    v_cvt_f32_f16_e64 v0, -v0
430; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
431; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
432; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
433; GFX11-NEXT:    v_cvt_f32_f16_e64 v2, -v2
434; GFX11-NEXT:    v_log_f32_e32 v0, v0
435; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
436; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v3
437; GFX11-NEXT:    v_log_f32_e32 v2, v2
438; GFX11-NEXT:    s_waitcnt_depctr 0xfff
439; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
440; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
441; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
442; GFX11-NEXT:    v_exp_f32_e32 v0, v0
443; GFX11-NEXT:    v_exp_f32_e32 v1, v2
444; GFX11-NEXT:    s_waitcnt_depctr 0xfff
445; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
446; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
447; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
448; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
449; GFX11-NEXT:    s_setpc_b64 s[30:31]
450  %x.fneg = fneg <2 x half> %x
451  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
452  ret <2 x half> %pow
453}
454
455define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
456; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
457; GFX6:       ; %bb.0:
458; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
460; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
461; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
462; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
463; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
464; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
465; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
466; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
467; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
468; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
469; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
470; GFX6-NEXT:    v_log_f32_e32 v0, v0
471; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
472; GFX6-NEXT:    v_log_f32_e32 v1, v1
473; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
474; GFX6-NEXT:    v_exp_f32_e32 v0, v0
475; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
476; GFX6-NEXT:    v_exp_f32_e32 v1, v1
477; GFX6-NEXT:    s_setpc_b64 s[30:31]
478;
479; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
480; GFX8:       ; %bb.0:
481; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
483; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
484; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
485; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
486; GFX8-NEXT:    v_log_f32_e32 v2, v2
487; GFX8-NEXT:    v_log_f32_e32 v0, v0
488; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
489; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
490; GFX8-NEXT:    v_exp_f32_e32 v1, v2
491; GFX8-NEXT:    v_exp_f32_e32 v0, v0
492; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
493; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
494; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
495; GFX8-NEXT:    s_setpc_b64 s[30:31]
496;
497; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
498; GFX9:       ; %bb.0:
499; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
500; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
501; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
502; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
503; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
504; GFX9-NEXT:    v_log_f32_e32 v2, v2
505; GFX9-NEXT:    v_log_f32_e32 v0, v0
506; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
507; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
508; GFX9-NEXT:    v_exp_f32_e32 v1, v2
509; GFX9-NEXT:    v_exp_f32_e32 v0, v0
510; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
511; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
512; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
513; GFX9-NEXT:    s_setpc_b64 s[30:31]
514;
515; GFX90A-LABEL: v_pow_v2f16_fneg_rhs:
516; GFX90A:       ; %bb.0:
517; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
519; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
520; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
521; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
522; GFX90A-NEXT:    v_log_f32_e32 v2, v2
523; GFX90A-NEXT:    v_log_f32_e32 v0, v0
524; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
525; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
526; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
527; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
528; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
529; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
530; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
531; GFX90A-NEXT:    s_setpc_b64 s[30:31]
532;
533; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
534; GFX10:       ; %bb.0:
535; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
537; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
538; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
539; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
540; GFX10-NEXT:    v_log_f32_e32 v2, v2
541; GFX10-NEXT:    v_log_f32_e32 v0, v0
542; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
543; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
544; GFX10-NEXT:    v_exp_f32_e32 v1, v2
545; GFX10-NEXT:    v_exp_f32_e32 v0, v0
546; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
547; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
548; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
549; GFX10-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX11-LABEL: v_pow_v2f16_fneg_rhs:
552; GFX11:       ; %bb.0:
553; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
555; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
556; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
557; GFX11-NEXT:    v_cvt_f32_f16_e64 v1, -v1
558; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
559; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
560; GFX11-NEXT:    v_log_f32_e32 v0, v0
561; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
562; GFX11-NEXT:    v_cvt_f32_f16_e64 v3, -v3
563; GFX11-NEXT:    v_log_f32_e32 v2, v2
564; GFX11-NEXT:    s_waitcnt_depctr 0xfff
565; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
566; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
567; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
568; GFX11-NEXT:    v_exp_f32_e32 v0, v0
569; GFX11-NEXT:    v_exp_f32_e32 v1, v2
570; GFX11-NEXT:    s_waitcnt_depctr 0xfff
571; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
572; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
573; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
574; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
575; GFX11-NEXT:    s_setpc_b64 s[30:31]
576  %y.fneg = fneg <2 x half> %y
577  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
578  ret <2 x half> %pow
579}
580
581define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
582; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
583; GFX6:       ; %bb.0:
584; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
586; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
587; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
588; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
589; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
590; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
591; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
592; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
593; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
594; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
595; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
596; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
597; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
598; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
599; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
600; GFX6-NEXT:    v_log_f32_e32 v0, v0
601; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
602; GFX6-NEXT:    v_log_f32_e32 v1, v1
603; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
604; GFX6-NEXT:    v_exp_f32_e32 v0, v0
605; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
606; GFX6-NEXT:    v_exp_f32_e32 v1, v1
607; GFX6-NEXT:    s_setpc_b64 s[30:31]
608;
609; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
610; GFX8:       ; %bb.0:
611; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
613; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
614; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
615; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
616; GFX8-NEXT:    v_log_f32_e32 v2, v2
617; GFX8-NEXT:    v_log_f32_e32 v0, v0
618; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
619; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
620; GFX8-NEXT:    v_exp_f32_e32 v1, v2
621; GFX8-NEXT:    v_exp_f32_e32 v0, v0
622; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
623; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
624; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
625; GFX8-NEXT:    s_setpc_b64 s[30:31]
626;
627; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
628; GFX9:       ; %bb.0:
629; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
631; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
632; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
633; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
634; GFX9-NEXT:    v_log_f32_e32 v2, v2
635; GFX9-NEXT:    v_log_f32_e32 v0, v0
636; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
637; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
638; GFX9-NEXT:    v_exp_f32_e32 v1, v2
639; GFX9-NEXT:    v_exp_f32_e32 v0, v0
640; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
641; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
642; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
643; GFX9-NEXT:    s_setpc_b64 s[30:31]
644;
645; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs:
646; GFX90A:       ; %bb.0:
647; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
649; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
650; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
651; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
652; GFX90A-NEXT:    v_log_f32_e32 v2, v2
653; GFX90A-NEXT:    v_log_f32_e32 v0, v0
654; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
655; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
656; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
657; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
658; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
659; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
660; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
661; GFX90A-NEXT:    s_setpc_b64 s[30:31]
662;
663; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
664; GFX10:       ; %bb.0:
665; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
667; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
668; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
669; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
670; GFX10-NEXT:    v_log_f32_e32 v2, v2
671; GFX10-NEXT:    v_log_f32_e32 v0, v0
672; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
673; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
674; GFX10-NEXT:    v_exp_f32_e32 v1, v2
675; GFX10-NEXT:    v_exp_f32_e32 v0, v0
676; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
677; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
678; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
679; GFX10-NEXT:    s_setpc_b64 s[30:31]
680;
681; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs:
682; GFX11:       ; %bb.0:
683; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
685; GFX11-NEXT:    v_cvt_f32_f16_e64 v0, -v0
686; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
687; GFX11-NEXT:    v_cvt_f32_f16_e64 v1, -v1
688; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
689; GFX11-NEXT:    v_cvt_f32_f16_e64 v2, -v2
690; GFX11-NEXT:    v_log_f32_e32 v0, v0
691; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
692; GFX11-NEXT:    v_cvt_f32_f16_e64 v3, -v3
693; GFX11-NEXT:    v_log_f32_e32 v2, v2
694; GFX11-NEXT:    s_waitcnt_depctr 0xfff
695; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
696; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
697; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
698; GFX11-NEXT:    v_exp_f32_e32 v0, v0
699; GFX11-NEXT:    v_exp_f32_e32 v1, v2
700; GFX11-NEXT:    s_waitcnt_depctr 0xfff
701; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
702; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
703; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
704; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
705; GFX11-NEXT:    s_setpc_b64 s[30:31]
706  %x.fneg = fneg <2 x half> %x
707  %y.fneg = fneg <2 x half> %y
708  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
709  ret <2 x half> %pow
710}
711
712; FIXME
713; define double @v_pow_f64(double %x, double %y) {
714;   %pow = call double @llvm.pow.f64(double %x, double %y)
715;   ret double %pow
716; }
717
718define float @v_pow_f32_fabs_lhs(float %x, float %y) {
719; GFX6-LABEL: v_pow_f32_fabs_lhs:
720; GFX6:       ; %bb.0:
721; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
723; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
724; GFX6-NEXT:    v_exp_f32_e32 v0, v0
725; GFX6-NEXT:    s_setpc_b64 s[30:31]
726;
727; GFX8-LABEL: v_pow_f32_fabs_lhs:
728; GFX8:       ; %bb.0:
729; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
731; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
732; GFX8-NEXT:    v_exp_f32_e32 v0, v0
733; GFX8-NEXT:    s_setpc_b64 s[30:31]
734;
735; GFX9-LABEL: v_pow_f32_fabs_lhs:
736; GFX9:       ; %bb.0:
737; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
739; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
740; GFX9-NEXT:    v_exp_f32_e32 v0, v0
741; GFX9-NEXT:    s_setpc_b64 s[30:31]
742;
743; GFX90A-LABEL: v_pow_f32_fabs_lhs:
744; GFX90A:       ; %bb.0:
745; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
746; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
747; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
748; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
749; GFX90A-NEXT:    s_setpc_b64 s[30:31]
750;
751; GFX10-LABEL: v_pow_f32_fabs_lhs:
752; GFX10:       ; %bb.0:
753; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
755; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
756; GFX10-NEXT:    v_exp_f32_e32 v0, v0
757; GFX10-NEXT:    s_setpc_b64 s[30:31]
758;
759; GFX11-LABEL: v_pow_f32_fabs_lhs:
760; GFX11:       ; %bb.0:
761; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762; GFX11-NEXT:    v_log_f32_e64 v0, |v0|
763; GFX11-NEXT:    s_waitcnt_depctr 0xfff
764; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
765; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
766; GFX11-NEXT:    v_exp_f32_e32 v0, v0
767; GFX11-NEXT:    s_setpc_b64 s[30:31]
768  %fabs.x = call float @llvm.fabs.f32(float %x)
769  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
770  ret float %pow
771}
772
773define float @v_pow_f32_fabs_rhs(float %x, float %y) {
774; GFX6-LABEL: v_pow_f32_fabs_rhs:
775; GFX6:       ; %bb.0:
776; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GFX6-NEXT:    v_log_f32_e32 v0, v0
778; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
779; GFX6-NEXT:    v_exp_f32_e32 v0, v0
780; GFX6-NEXT:    s_setpc_b64 s[30:31]
781;
782; GFX8-LABEL: v_pow_f32_fabs_rhs:
783; GFX8:       ; %bb.0:
784; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785; GFX8-NEXT:    v_log_f32_e32 v0, v0
786; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
787; GFX8-NEXT:    v_exp_f32_e32 v0, v0
788; GFX8-NEXT:    s_setpc_b64 s[30:31]
789;
790; GFX9-LABEL: v_pow_f32_fabs_rhs:
791; GFX9:       ; %bb.0:
792; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GFX9-NEXT:    v_log_f32_e32 v0, v0
794; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
795; GFX9-NEXT:    v_exp_f32_e32 v0, v0
796; GFX9-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX90A-LABEL: v_pow_f32_fabs_rhs:
799; GFX90A:       ; %bb.0:
800; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX90A-NEXT:    v_log_f32_e32 v0, v0
802; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
803; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
804; GFX90A-NEXT:    s_setpc_b64 s[30:31]
805;
806; GFX10-LABEL: v_pow_f32_fabs_rhs:
807; GFX10:       ; %bb.0:
808; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809; GFX10-NEXT:    v_log_f32_e32 v0, v0
810; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
811; GFX10-NEXT:    v_exp_f32_e32 v0, v0
812; GFX10-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX11-LABEL: v_pow_f32_fabs_rhs:
815; GFX11:       ; %bb.0:
816; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX11-NEXT:    v_log_f32_e32 v0, v0
818; GFX11-NEXT:    s_waitcnt_depctr 0xfff
819; GFX11-NEXT:    v_mul_dx9_zero_f32_e64 v0, |v1|, v0
820; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
821; GFX11-NEXT:    v_exp_f32_e32 v0, v0
822; GFX11-NEXT:    s_setpc_b64 s[30:31]
823  %fabs.y = call float @llvm.fabs.f32(float %y)
824  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
825  ret float %pow
826}
827
828define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
829; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
830; GFX6:       ; %bb.0:
831; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
833; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
834; GFX6-NEXT:    v_exp_f32_e32 v0, v0
835; GFX6-NEXT:    s_setpc_b64 s[30:31]
836;
837; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
838; GFX8:       ; %bb.0:
839; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
840; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
841; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
842; GFX8-NEXT:    v_exp_f32_e32 v0, v0
843; GFX8-NEXT:    s_setpc_b64 s[30:31]
844;
845; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
846; GFX9:       ; %bb.0:
847; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
848; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
849; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
850; GFX9-NEXT:    v_exp_f32_e32 v0, v0
851; GFX9-NEXT:    s_setpc_b64 s[30:31]
852;
853; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs:
854; GFX90A:       ; %bb.0:
855; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
857; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
858; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
859; GFX90A-NEXT:    s_setpc_b64 s[30:31]
860;
861; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
862; GFX10:       ; %bb.0:
863; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
865; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
866; GFX10-NEXT:    v_exp_f32_e32 v0, v0
867; GFX10-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
870; GFX11:       ; %bb.0:
871; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872; GFX11-NEXT:    v_log_f32_e64 v0, |v0|
873; GFX11-NEXT:    s_waitcnt_depctr 0xfff
874; GFX11-NEXT:    v_mul_dx9_zero_f32_e64 v0, |v1|, v0
875; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
876; GFX11-NEXT:    v_exp_f32_e32 v0, v0
877; GFX11-NEXT:    s_setpc_b64 s[30:31]
878  %fabs.x = call float @llvm.fabs.f32(float %x)
879  %fabs.y = call float @llvm.fabs.f32(float %y)
880  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
881  ret float %pow
882}
883
884define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
885; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
886; GFX6:       ; %bb.0:
887; GFX6-NEXT:    v_log_f32_e32 v1, s0
888; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
889; GFX6-NEXT:    v_exp_f32_e32 v0, v0
890; GFX6-NEXT:    ; return to shader part epilog
891;
892; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
893; GFX8:       ; %bb.0:
894; GFX8-NEXT:    v_log_f32_e32 v1, s0
895; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
896; GFX8-NEXT:    v_exp_f32_e32 v0, v0
897; GFX8-NEXT:    ; return to shader part epilog
898;
899; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
900; GFX9:       ; %bb.0:
901; GFX9-NEXT:    v_log_f32_e32 v1, s0
902; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
903; GFX9-NEXT:    v_exp_f32_e32 v0, v0
904; GFX9-NEXT:    ; return to shader part epilog
905;
906; GFX90A-LABEL: v_pow_f32_sgpr_vgpr:
907; GFX90A:       ; %bb.0:
908; GFX90A-NEXT:    v_log_f32_e32 v1, s0
909; GFX90A-NEXT:    v_mul_legacy_f32 v0, v0, v1
910; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
911; GFX90A-NEXT:    ; return to shader part epilog
912;
913; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
914; GFX10:       ; %bb.0:
915; GFX10-NEXT:    v_log_f32_e32 v1, s0
916; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
917; GFX10-NEXT:    v_exp_f32_e32 v0, v0
918; GFX10-NEXT:    ; return to shader part epilog
919;
920; GFX11-LABEL: v_pow_f32_sgpr_vgpr:
921; GFX11:       ; %bb.0:
922; GFX11-NEXT:    v_log_f32_e32 v1, s0
923; GFX11-NEXT:    s_waitcnt_depctr 0xfff
924; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v0, v1
925; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
926; GFX11-NEXT:    v_exp_f32_e32 v0, v0
927; GFX11-NEXT:    ; return to shader part epilog
928  %pow = call float @llvm.pow.f32(float %x, float %y)
929  ret float %pow
930}
931
932define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
933; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
934; GFX6:       ; %bb.0:
935; GFX6-NEXT:    v_log_f32_e32 v0, v0
936; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
937; GFX6-NEXT:    v_exp_f32_e32 v0, v0
938; GFX6-NEXT:    ; return to shader part epilog
939;
940; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
941; GFX8:       ; %bb.0:
942; GFX8-NEXT:    v_log_f32_e32 v0, v0
943; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
944; GFX8-NEXT:    v_exp_f32_e32 v0, v0
945; GFX8-NEXT:    ; return to shader part epilog
946;
947; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
948; GFX9:       ; %bb.0:
949; GFX9-NEXT:    v_log_f32_e32 v0, v0
950; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
951; GFX9-NEXT:    v_exp_f32_e32 v0, v0
952; GFX9-NEXT:    ; return to shader part epilog
953;
954; GFX90A-LABEL: v_pow_f32_vgpr_sgpr:
955; GFX90A:       ; %bb.0:
956; GFX90A-NEXT:    v_log_f32_e32 v0, v0
957; GFX90A-NEXT:    v_mul_legacy_f32 v0, s0, v0
958; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
959; GFX90A-NEXT:    ; return to shader part epilog
960;
961; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
962; GFX10:       ; %bb.0:
963; GFX10-NEXT:    v_log_f32_e32 v0, v0
964; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
965; GFX10-NEXT:    v_exp_f32_e32 v0, v0
966; GFX10-NEXT:    ; return to shader part epilog
967;
968; GFX11-LABEL: v_pow_f32_vgpr_sgpr:
969; GFX11:       ; %bb.0:
970; GFX11-NEXT:    v_log_f32_e32 v0, v0
971; GFX11-NEXT:    s_waitcnt_depctr 0xfff
972; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, s0, v0
973; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
974; GFX11-NEXT:    v_exp_f32_e32 v0, v0
975; GFX11-NEXT:    ; return to shader part epilog
976  %pow = call float @llvm.pow.f32(float %x, float %y)
977  ret float %pow
978}
979
980define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
981; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
982; GFX6:       ; %bb.0:
983; GFX6-NEXT:    v_log_f32_e32 v0, s0
984; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
985; GFX6-NEXT:    v_exp_f32_e32 v0, v0
986; GFX6-NEXT:    ; return to shader part epilog
987;
988; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
989; GFX8:       ; %bb.0:
990; GFX8-NEXT:    v_log_f32_e32 v0, s0
991; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
992; GFX8-NEXT:    v_exp_f32_e32 v0, v0
993; GFX8-NEXT:    ; return to shader part epilog
994;
995; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
996; GFX9:       ; %bb.0:
997; GFX9-NEXT:    v_log_f32_e32 v0, s0
998; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
999; GFX9-NEXT:    v_exp_f32_e32 v0, v0
1000; GFX9-NEXT:    ; return to shader part epilog
1001;
1002; GFX90A-LABEL: v_pow_f32_sgpr_sgpr:
1003; GFX90A:       ; %bb.0:
1004; GFX90A-NEXT:    v_log_f32_e32 v0, s0
1005; GFX90A-NEXT:    v_mul_legacy_f32 v0, s1, v0
1006; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
1007; GFX90A-NEXT:    ; return to shader part epilog
1008;
1009; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
1010; GFX10:       ; %bb.0:
1011; GFX10-NEXT:    v_log_f32_e32 v0, s0
1012; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
1013; GFX10-NEXT:    v_exp_f32_e32 v0, v0
1014; GFX10-NEXT:    ; return to shader part epilog
1015;
1016; GFX11-LABEL: v_pow_f32_sgpr_sgpr:
1017; GFX11:       ; %bb.0:
1018; GFX11-NEXT:    v_log_f32_e32 v0, s0
1019; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1020; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, s1, v0
1021; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1022; GFX11-NEXT:    v_exp_f32_e32 v0, v0
1023; GFX11-NEXT:    ; return to shader part epilog
1024  %pow = call float @llvm.pow.f32(float %x, float %y)
1025  ret float %pow
1026}
1027
1028declare half @llvm.pow.f16(half, half)
1029declare float @llvm.pow.f32(float, float)
1030declare double @llvm.pow.f64(double, double)
1031
1032declare half @llvm.fabs.f16(half)
1033declare float @llvm.fabs.f32(float)
1034
1035declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
1036declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
1037