1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 8 9define float @v_pow_f32(float %x, float %y) { 10; GFX6-LABEL: v_pow_f32: 11; GFX6: ; %bb.0: 12; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX6-NEXT: v_log_f32_e32 v0, v0 14; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 15; GFX6-NEXT: v_exp_f32_e32 v0, v0 16; GFX6-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX8-LABEL: v_pow_f32: 19; GFX8: ; %bb.0: 20; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX8-NEXT: v_log_f32_e32 v0, v0 22; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 23; GFX8-NEXT: v_exp_f32_e32 v0, v0 24; GFX8-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX9-LABEL: v_pow_f32: 27; GFX9: ; %bb.0: 28; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GFX9-NEXT: v_log_f32_e32 v0, v0 30; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 31; GFX9-NEXT: v_exp_f32_e32 v0, v0 32; GFX9-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX90A-LABEL: v_pow_f32: 35; GFX90A: ; %bb.0: 36; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX90A-NEXT: v_log_f32_e32 v0, v0 38; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 39; GFX90A-NEXT: v_exp_f32_e32 v0, v0 40; GFX90A-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX10-LABEL: v_pow_f32: 43; GFX10: ; %bb.0: 44; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX10-NEXT: v_log_f32_e32 v0, v0 46; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 47; GFX10-NEXT: v_exp_f32_e32 v0, v0 48; GFX10-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX11-LABEL: v_pow_f32: 51; GFX11: ; %bb.0: 52; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX11-NEXT: v_log_f32_e32 v0, v0 54; GFX11-NEXT: s_waitcnt_depctr 0xfff 55; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 56; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 57; GFX11-NEXT: v_exp_f32_e32 v0, v0 58; GFX11-NEXT: s_setpc_b64 s[30:31] 59 %pow = call float @llvm.pow.f32(float %x, float %y) 60 ret float %pow 61} 62 63define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 64; GFX6-LABEL: v_pow_v2f32: 65; GFX6: ; %bb.0: 66; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX6-NEXT: v_log_f32_e32 v0, v0 68; GFX6-NEXT: v_log_f32_e32 v1, v1 69; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 70; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 71; GFX6-NEXT: v_exp_f32_e32 v0, v0 72; GFX6-NEXT: v_exp_f32_e32 v1, v1 73; GFX6-NEXT: s_setpc_b64 s[30:31] 74; 75; GFX8-LABEL: v_pow_v2f32: 76; GFX8: ; %bb.0: 77; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX8-NEXT: v_log_f32_e32 v0, v0 79; GFX8-NEXT: v_log_f32_e32 v1, v1 80; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 81; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 82; GFX8-NEXT: v_exp_f32_e32 v0, v0 83; GFX8-NEXT: v_exp_f32_e32 v1, v1 84; GFX8-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX9-LABEL: v_pow_v2f32: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX9-NEXT: v_log_f32_e32 v0, v0 90; GFX9-NEXT: v_log_f32_e32 v1, v1 91; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 92; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 93; GFX9-NEXT: v_exp_f32_e32 v0, v0 94; GFX9-NEXT: v_exp_f32_e32 v1, v1 95; GFX9-NEXT: s_setpc_b64 s[30:31] 96; 97; GFX90A-LABEL: v_pow_v2f32: 98; GFX90A: ; %bb.0: 99; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX90A-NEXT: v_log_f32_e32 v0, v0 101; GFX90A-NEXT: v_log_f32_e32 v1, v1 102; GFX90A-NEXT: v_mul_legacy_f32 v0, v2, v0 103; GFX90A-NEXT: v_mul_legacy_f32 v1, v3, v1 104; GFX90A-NEXT: v_exp_f32_e32 v0, v0 105; GFX90A-NEXT: v_exp_f32_e32 v1, v1 106; GFX90A-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX10-LABEL: v_pow_v2f32: 109; GFX10: ; %bb.0: 110; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX10-NEXT: v_log_f32_e32 v0, v0 112; GFX10-NEXT: v_log_f32_e32 v1, v1 113; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 114; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 115; GFX10-NEXT: v_exp_f32_e32 v0, v0 116; GFX10-NEXT: v_exp_f32_e32 v1, v1 117; GFX10-NEXT: s_setpc_b64 s[30:31] 118; 119; GFX11-LABEL: v_pow_v2f32: 120; GFX11: ; %bb.0: 121; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX11-NEXT: v_log_f32_e32 v0, v0 123; GFX11-NEXT: v_log_f32_e32 v1, v1 124; GFX11-NEXT: s_waitcnt_depctr 0xfff 125; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v2, v0 :: v_dual_mul_dx9_zero_f32 v1, v3, v1 126; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 127; GFX11-NEXT: v_exp_f32_e32 v0, v0 128; GFX11-NEXT: v_exp_f32_e32 v1, v1 129; GFX11-NEXT: s_setpc_b64 s[30:31] 130 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 131 ret <2 x float> %pow 132} 133 134define half @v_pow_f16(half %x, half %y) { 135; GFX6-LABEL: v_pow_f16: 136; GFX6: ; %bb.0: 137; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 139; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 140; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 141; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 142; GFX6-NEXT: v_log_f32_e32 v0, v0 143; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 144; GFX6-NEXT: v_exp_f32_e32 v0, v0 145; GFX6-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX8-LABEL: v_pow_f16: 148; GFX8: ; %bb.0: 149; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 151; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 152; GFX8-NEXT: v_log_f32_e32 v0, v0 153; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 154; GFX8-NEXT: v_exp_f32_e32 v0, v0 155; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 156; GFX8-NEXT: s_setpc_b64 s[30:31] 157; 158; GFX9-LABEL: v_pow_f16: 159; GFX9: ; %bb.0: 160; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 162; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 163; GFX9-NEXT: v_log_f32_e32 v0, v0 164; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 165; GFX9-NEXT: v_exp_f32_e32 v0, v0 166; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 167; GFX9-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX90A-LABEL: v_pow_f16: 170; GFX90A: ; %bb.0: 171; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 173; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 174; GFX90A-NEXT: v_log_f32_e32 v0, v0 175; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 176; GFX90A-NEXT: v_exp_f32_e32 v0, v0 177; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 178; GFX90A-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: v_pow_f16: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 184; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 185; GFX10-NEXT: v_log_f32_e32 v0, v0 186; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 187; GFX10-NEXT: v_exp_f32_e32 v0, v0 188; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 189; GFX10-NEXT: s_setpc_b64 s[30:31] 190; 191; GFX11-LABEL: v_pow_f16: 192; GFX11: ; %bb.0: 193; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 195; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 196; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 197; GFX11-NEXT: v_log_f32_e32 v0, v0 198; GFX11-NEXT: s_waitcnt_depctr 0xfff 199; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 200; GFX11-NEXT: v_exp_f32_e32 v0, v0 201; GFX11-NEXT: s_waitcnt_depctr 0xfff 202; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 203; GFX11-NEXT: s_setpc_b64 s[30:31] 204 %pow = call half @llvm.pow.f16(half %x, half %y) 205 ret half %pow 206} 207 208define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 209; GFX6-LABEL: v_pow_v2f16: 210; GFX6: ; %bb.0: 211; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 213; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 214; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 215; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 216; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 217; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 218; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 219; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 220; GFX6-NEXT: v_log_f32_e32 v0, v0 221; GFX6-NEXT: v_log_f32_e32 v1, v1 222; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 223; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 224; GFX6-NEXT: v_exp_f32_e32 v0, v0 225; GFX6-NEXT: v_exp_f32_e32 v1, v1 226; GFX6-NEXT: s_setpc_b64 s[30:31] 227; 228; GFX8-LABEL: v_pow_v2f16: 229; GFX8: ; %bb.0: 230; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 231; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 232; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 233; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 234; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 235; GFX8-NEXT: v_log_f32_e32 v2, v2 236; GFX8-NEXT: v_log_f32_e32 v0, v0 237; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 238; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 239; GFX8-NEXT: v_exp_f32_e32 v1, v2 240; GFX8-NEXT: v_exp_f32_e32 v0, v0 241; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 242; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 243; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 244; GFX8-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX9-LABEL: v_pow_v2f16: 247; GFX9: ; %bb.0: 248; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 250; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 251; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 252; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 253; GFX9-NEXT: v_log_f32_e32 v2, v2 254; GFX9-NEXT: v_log_f32_e32 v0, v0 255; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 256; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 257; GFX9-NEXT: v_exp_f32_e32 v1, v2 258; GFX9-NEXT: v_exp_f32_e32 v0, v0 259; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 260; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 261; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 262; GFX9-NEXT: s_setpc_b64 s[30:31] 263; 264; GFX90A-LABEL: v_pow_v2f16: 265; GFX90A: ; %bb.0: 266; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 267; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 268; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 269; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 270; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 271; GFX90A-NEXT: v_log_f32_e32 v2, v2 272; GFX90A-NEXT: v_log_f32_e32 v0, v0 273; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 274; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 275; GFX90A-NEXT: v_exp_f32_e32 v1, v2 276; GFX90A-NEXT: v_exp_f32_e32 v0, v0 277; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 278; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 279; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 280; GFX90A-NEXT: s_setpc_b64 s[30:31] 281; 282; GFX10-LABEL: v_pow_v2f16: 283; GFX10: ; %bb.0: 284; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 286; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 287; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 288; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 289; GFX10-NEXT: v_log_f32_e32 v2, v2 290; GFX10-NEXT: v_log_f32_e32 v0, v0 291; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 292; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 293; GFX10-NEXT: v_exp_f32_e32 v1, v2 294; GFX10-NEXT: v_exp_f32_e32 v0, v0 295; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 296; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 297; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 298; GFX10-NEXT: s_setpc_b64 s[30:31] 299; 300; GFX11-LABEL: v_pow_v2f16: 301; GFX11: ; %bb.0: 302; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 304; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 305; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 306; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 307; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 308; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 309; GFX11-NEXT: v_log_f32_e32 v0, v0 310; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 311; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3 312; GFX11-NEXT: v_log_f32_e32 v2, v2 313; GFX11-NEXT: s_waitcnt_depctr 0xfff 314; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 315; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 316; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 317; GFX11-NEXT: v_exp_f32_e32 v0, v0 318; GFX11-NEXT: v_exp_f32_e32 v1, v2 319; GFX11-NEXT: s_waitcnt_depctr 0xfff 320; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 321; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 322; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 323; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 324; GFX11-NEXT: s_setpc_b64 s[30:31] 325 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 326 ret <2 x half> %pow 327} 328 329define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 330; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 331; GFX6: ; %bb.0: 332; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 334; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 335; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 336; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 337; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 338; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 339; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3 340; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 341; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 342; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 343; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 344; GFX6-NEXT: v_log_f32_e32 v3, v3 345; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 346; GFX6-NEXT: v_log_f32_e32 v4, v0 347; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3 348; GFX6-NEXT: v_exp_f32_e32 v0, v0 349; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4 350; GFX6-NEXT: v_exp_f32_e32 v1, v1 351; GFX6-NEXT: s_setpc_b64 s[30:31] 352; 353; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 354; GFX8: ; %bb.0: 355; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 357; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 358; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 359; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 360; GFX8-NEXT: v_log_f32_e32 v2, v2 361; GFX8-NEXT: v_log_f32_e32 v0, v0 362; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 363; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 364; GFX8-NEXT: v_exp_f32_e32 v1, v2 365; GFX8-NEXT: v_exp_f32_e32 v0, v0 366; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 367; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 368; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 369; GFX8-NEXT: s_setpc_b64 s[30:31] 370; 371; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 372; GFX9: ; %bb.0: 373; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 375; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 376; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 377; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 378; GFX9-NEXT: v_log_f32_e32 v2, v2 379; GFX9-NEXT: v_log_f32_e32 v0, v0 380; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 381; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 382; GFX9-NEXT: v_exp_f32_e32 v1, v2 383; GFX9-NEXT: v_exp_f32_e32 v0, v0 384; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 385; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 386; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 387; GFX9-NEXT: s_setpc_b64 s[30:31] 388; 389; GFX90A-LABEL: v_pow_v2f16_fneg_lhs: 390; GFX90A: ; %bb.0: 391; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 392; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 393; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 394; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 395; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 396; GFX90A-NEXT: v_log_f32_e32 v2, v2 397; GFX90A-NEXT: v_log_f32_e32 v0, v0 398; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 399; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 400; GFX90A-NEXT: v_exp_f32_e32 v1, v2 401; GFX90A-NEXT: v_exp_f32_e32 v0, v0 402; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 403; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 404; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 405; GFX90A-NEXT: s_setpc_b64 s[30:31] 406; 407; GFX10-LABEL: v_pow_v2f16_fneg_lhs: 408; GFX10: ; %bb.0: 409; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 411; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 412; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 413; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 414; GFX10-NEXT: v_log_f32_e32 v2, v2 415; GFX10-NEXT: v_log_f32_e32 v0, v0 416; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 417; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 418; GFX10-NEXT: v_exp_f32_e32 v1, v2 419; GFX10-NEXT: v_exp_f32_e32 v0, v0 420; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 421; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 422; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 423; GFX10-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX11-LABEL: v_pow_v2f16_fneg_lhs: 426; GFX11: ; %bb.0: 427; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 429; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0 430; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 431; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 432; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 433; GFX11-NEXT: v_cvt_f32_f16_e64 v2, -v2 434; GFX11-NEXT: v_log_f32_e32 v0, v0 435; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 436; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3 437; GFX11-NEXT: v_log_f32_e32 v2, v2 438; GFX11-NEXT: s_waitcnt_depctr 0xfff 439; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 440; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 441; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 442; GFX11-NEXT: v_exp_f32_e32 v0, v0 443; GFX11-NEXT: v_exp_f32_e32 v1, v2 444; GFX11-NEXT: s_waitcnt_depctr 0xfff 445; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 446; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 447; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 448; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 449; GFX11-NEXT: s_setpc_b64 s[30:31] 450 %x.fneg = fneg <2 x half> %x 451 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 452 ret <2 x half> %pow 453} 454 455define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 456; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 457; GFX6: ; %bb.0: 458; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 460; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 461; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 462; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 463; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 464; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 465; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 466; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 467; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 468; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 469; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 470; GFX6-NEXT: v_log_f32_e32 v0, v0 471; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 472; GFX6-NEXT: v_log_f32_e32 v1, v1 473; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 474; GFX6-NEXT: v_exp_f32_e32 v0, v0 475; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 476; GFX6-NEXT: v_exp_f32_e32 v1, v1 477; GFX6-NEXT: s_setpc_b64 s[30:31] 478; 479; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 480; GFX8: ; %bb.0: 481; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 483; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 484; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 485; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 486; GFX8-NEXT: v_log_f32_e32 v2, v2 487; GFX8-NEXT: v_log_f32_e32 v0, v0 488; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 489; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 490; GFX8-NEXT: v_exp_f32_e32 v1, v2 491; GFX8-NEXT: v_exp_f32_e32 v0, v0 492; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 493; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 494; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 495; GFX8-NEXT: s_setpc_b64 s[30:31] 496; 497; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 498; GFX9: ; %bb.0: 499; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 500; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 501; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 502; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 503; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 504; GFX9-NEXT: v_log_f32_e32 v2, v2 505; GFX9-NEXT: v_log_f32_e32 v0, v0 506; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 507; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 508; GFX9-NEXT: v_exp_f32_e32 v1, v2 509; GFX9-NEXT: v_exp_f32_e32 v0, v0 510; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 511; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 512; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 513; GFX9-NEXT: s_setpc_b64 s[30:31] 514; 515; GFX90A-LABEL: v_pow_v2f16_fneg_rhs: 516; GFX90A: ; %bb.0: 517; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 519; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 520; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 521; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 522; GFX90A-NEXT: v_log_f32_e32 v2, v2 523; GFX90A-NEXT: v_log_f32_e32 v0, v0 524; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 525; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 526; GFX90A-NEXT: v_exp_f32_e32 v1, v2 527; GFX90A-NEXT: v_exp_f32_e32 v0, v0 528; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 529; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 530; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 531; GFX90A-NEXT: s_setpc_b64 s[30:31] 532; 533; GFX10-LABEL: v_pow_v2f16_fneg_rhs: 534; GFX10: ; %bb.0: 535; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 537; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 538; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 539; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 540; GFX10-NEXT: v_log_f32_e32 v2, v2 541; GFX10-NEXT: v_log_f32_e32 v0, v0 542; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 543; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 544; GFX10-NEXT: v_exp_f32_e32 v1, v2 545; GFX10-NEXT: v_exp_f32_e32 v0, v0 546; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 547; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 548; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 549; GFX10-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX11-LABEL: v_pow_v2f16_fneg_rhs: 552; GFX11: ; %bb.0: 553; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 555; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 556; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 557; GFX11-NEXT: v_cvt_f32_f16_e64 v1, -v1 558; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 559; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 560; GFX11-NEXT: v_log_f32_e32 v0, v0 561; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 562; GFX11-NEXT: v_cvt_f32_f16_e64 v3, -v3 563; GFX11-NEXT: v_log_f32_e32 v2, v2 564; GFX11-NEXT: s_waitcnt_depctr 0xfff 565; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 566; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 567; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 568; GFX11-NEXT: v_exp_f32_e32 v0, v0 569; GFX11-NEXT: v_exp_f32_e32 v1, v2 570; GFX11-NEXT: s_waitcnt_depctr 0xfff 571; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 572; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 573; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 574; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 575; GFX11-NEXT: s_setpc_b64 s[30:31] 576 %y.fneg = fneg <2 x half> %y 577 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 578 ret <2 x half> %pow 579} 580 581define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 582; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 583; GFX6: ; %bb.0: 584; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 586; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 587; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 588; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 589; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 590; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 591; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 592; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 593; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 594; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 595; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 596; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 597; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 598; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 599; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 600; GFX6-NEXT: v_log_f32_e32 v0, v0 601; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 602; GFX6-NEXT: v_log_f32_e32 v1, v1 603; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 604; GFX6-NEXT: v_exp_f32_e32 v0, v0 605; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 606; GFX6-NEXT: v_exp_f32_e32 v1, v1 607; GFX6-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 610; GFX8: ; %bb.0: 611; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 613; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 614; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 615; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 616; GFX8-NEXT: v_log_f32_e32 v2, v2 617; GFX8-NEXT: v_log_f32_e32 v0, v0 618; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 619; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 620; GFX8-NEXT: v_exp_f32_e32 v1, v2 621; GFX8-NEXT: v_exp_f32_e32 v0, v0 622; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 623; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 624; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 625; GFX8-NEXT: s_setpc_b64 s[30:31] 626; 627; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 628; GFX9: ; %bb.0: 629; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 630; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 631; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 632; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 633; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 634; GFX9-NEXT: v_log_f32_e32 v2, v2 635; GFX9-NEXT: v_log_f32_e32 v0, v0 636; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 637; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 638; GFX9-NEXT: v_exp_f32_e32 v1, v2 639; GFX9-NEXT: v_exp_f32_e32 v0, v0 640; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 641; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 642; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 643; GFX9-NEXT: s_setpc_b64 s[30:31] 644; 645; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs: 646; GFX90A: ; %bb.0: 647; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 648; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 649; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 650; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 651; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 652; GFX90A-NEXT: v_log_f32_e32 v2, v2 653; GFX90A-NEXT: v_log_f32_e32 v0, v0 654; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 655; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 656; GFX90A-NEXT: v_exp_f32_e32 v1, v2 657; GFX90A-NEXT: v_exp_f32_e32 v0, v0 658; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 659; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 660; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 661; GFX90A-NEXT: s_setpc_b64 s[30:31] 662; 663; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: 664; GFX10: ; %bb.0: 665; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 666; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 667; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 668; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 669; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 670; GFX10-NEXT: v_log_f32_e32 v2, v2 671; GFX10-NEXT: v_log_f32_e32 v0, v0 672; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 673; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 674; GFX10-NEXT: v_exp_f32_e32 v1, v2 675; GFX10-NEXT: v_exp_f32_e32 v0, v0 676; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 677; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 678; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 679; GFX10-NEXT: s_setpc_b64 s[30:31] 680; 681; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs: 682; GFX11: ; %bb.0: 683; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 684; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 685; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0 686; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 687; GFX11-NEXT: v_cvt_f32_f16_e64 v1, -v1 688; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 689; GFX11-NEXT: v_cvt_f32_f16_e64 v2, -v2 690; GFX11-NEXT: v_log_f32_e32 v0, v0 691; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 692; GFX11-NEXT: v_cvt_f32_f16_e64 v3, -v3 693; GFX11-NEXT: v_log_f32_e32 v2, v2 694; GFX11-NEXT: s_waitcnt_depctr 0xfff 695; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 696; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 697; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 698; GFX11-NEXT: v_exp_f32_e32 v0, v0 699; GFX11-NEXT: v_exp_f32_e32 v1, v2 700; GFX11-NEXT: s_waitcnt_depctr 0xfff 701; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 702; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 703; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 704; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 705; GFX11-NEXT: s_setpc_b64 s[30:31] 706 %x.fneg = fneg <2 x half> %x 707 %y.fneg = fneg <2 x half> %y 708 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 709 ret <2 x half> %pow 710} 711 712; FIXME 713; define double @v_pow_f64(double %x, double %y) { 714; %pow = call double @llvm.pow.f64(double %x, double %y) 715; ret double %pow 716; } 717 718define float @v_pow_f32_fabs_lhs(float %x, float %y) { 719; GFX6-LABEL: v_pow_f32_fabs_lhs: 720; GFX6: ; %bb.0: 721; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GFX6-NEXT: v_log_f32_e64 v0, |v0| 723; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 724; GFX6-NEXT: v_exp_f32_e32 v0, v0 725; GFX6-NEXT: s_setpc_b64 s[30:31] 726; 727; GFX8-LABEL: v_pow_f32_fabs_lhs: 728; GFX8: ; %bb.0: 729; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX8-NEXT: v_log_f32_e64 v0, |v0| 731; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 732; GFX8-NEXT: v_exp_f32_e32 v0, v0 733; GFX8-NEXT: s_setpc_b64 s[30:31] 734; 735; GFX9-LABEL: v_pow_f32_fabs_lhs: 736; GFX9: ; %bb.0: 737; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 738; GFX9-NEXT: v_log_f32_e64 v0, |v0| 739; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 740; GFX9-NEXT: v_exp_f32_e32 v0, v0 741; GFX9-NEXT: s_setpc_b64 s[30:31] 742; 743; GFX90A-LABEL: v_pow_f32_fabs_lhs: 744; GFX90A: ; %bb.0: 745; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 746; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 747; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 748; GFX90A-NEXT: v_exp_f32_e32 v0, v0 749; GFX90A-NEXT: s_setpc_b64 s[30:31] 750; 751; GFX10-LABEL: v_pow_f32_fabs_lhs: 752; GFX10: ; %bb.0: 753; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754; GFX10-NEXT: v_log_f32_e64 v0, |v0| 755; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 756; GFX10-NEXT: v_exp_f32_e32 v0, v0 757; GFX10-NEXT: s_setpc_b64 s[30:31] 758; 759; GFX11-LABEL: v_pow_f32_fabs_lhs: 760; GFX11: ; %bb.0: 761; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 762; GFX11-NEXT: v_log_f32_e64 v0, |v0| 763; GFX11-NEXT: s_waitcnt_depctr 0xfff 764; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 765; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 766; GFX11-NEXT: v_exp_f32_e32 v0, v0 767; GFX11-NEXT: s_setpc_b64 s[30:31] 768 %fabs.x = call float @llvm.fabs.f32(float %x) 769 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 770 ret float %pow 771} 772 773define float @v_pow_f32_fabs_rhs(float %x, float %y) { 774; GFX6-LABEL: v_pow_f32_fabs_rhs: 775; GFX6: ; %bb.0: 776; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GFX6-NEXT: v_log_f32_e32 v0, v0 778; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 779; GFX6-NEXT: v_exp_f32_e32 v0, v0 780; GFX6-NEXT: s_setpc_b64 s[30:31] 781; 782; GFX8-LABEL: v_pow_f32_fabs_rhs: 783; GFX8: ; %bb.0: 784; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 785; GFX8-NEXT: v_log_f32_e32 v0, v0 786; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 787; GFX8-NEXT: v_exp_f32_e32 v0, v0 788; GFX8-NEXT: s_setpc_b64 s[30:31] 789; 790; GFX9-LABEL: v_pow_f32_fabs_rhs: 791; GFX9: ; %bb.0: 792; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GFX9-NEXT: v_log_f32_e32 v0, v0 794; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 795; GFX9-NEXT: v_exp_f32_e32 v0, v0 796; GFX9-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX90A-LABEL: v_pow_f32_fabs_rhs: 799; GFX90A: ; %bb.0: 800; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX90A-NEXT: v_log_f32_e32 v0, v0 802; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 803; GFX90A-NEXT: v_exp_f32_e32 v0, v0 804; GFX90A-NEXT: s_setpc_b64 s[30:31] 805; 806; GFX10-LABEL: v_pow_f32_fabs_rhs: 807; GFX10: ; %bb.0: 808; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 809; GFX10-NEXT: v_log_f32_e32 v0, v0 810; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 811; GFX10-NEXT: v_exp_f32_e32 v0, v0 812; GFX10-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX11-LABEL: v_pow_f32_fabs_rhs: 815; GFX11: ; %bb.0: 816; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX11-NEXT: v_log_f32_e32 v0, v0 818; GFX11-NEXT: s_waitcnt_depctr 0xfff 819; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0 820; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 821; GFX11-NEXT: v_exp_f32_e32 v0, v0 822; GFX11-NEXT: s_setpc_b64 s[30:31] 823 %fabs.y = call float @llvm.fabs.f32(float %y) 824 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 825 ret float %pow 826} 827 828define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 829; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 830; GFX6: ; %bb.0: 831; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 832; GFX6-NEXT: v_log_f32_e64 v0, |v0| 833; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 834; GFX6-NEXT: v_exp_f32_e32 v0, v0 835; GFX6-NEXT: s_setpc_b64 s[30:31] 836; 837; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 838; GFX8: ; %bb.0: 839; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 840; GFX8-NEXT: v_log_f32_e64 v0, |v0| 841; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 842; GFX8-NEXT: v_exp_f32_e32 v0, v0 843; GFX8-NEXT: s_setpc_b64 s[30:31] 844; 845; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 846; GFX9: ; %bb.0: 847; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 848; GFX9-NEXT: v_log_f32_e64 v0, |v0| 849; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 850; GFX9-NEXT: v_exp_f32_e32 v0, v0 851; GFX9-NEXT: s_setpc_b64 s[30:31] 852; 853; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs: 854; GFX90A: ; %bb.0: 855; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 856; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 857; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 858; GFX90A-NEXT: v_exp_f32_e32 v0, v0 859; GFX90A-NEXT: s_setpc_b64 s[30:31] 860; 861; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: 862; GFX10: ; %bb.0: 863; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; GFX10-NEXT: v_log_f32_e64 v0, |v0| 865; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 866; GFX10-NEXT: v_exp_f32_e32 v0, v0 867; GFX10-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs: 870; GFX11: ; %bb.0: 871; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX11-NEXT: v_log_f32_e64 v0, |v0| 873; GFX11-NEXT: s_waitcnt_depctr 0xfff 874; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0 875; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 876; GFX11-NEXT: v_exp_f32_e32 v0, v0 877; GFX11-NEXT: s_setpc_b64 s[30:31] 878 %fabs.x = call float @llvm.fabs.f32(float %x) 879 %fabs.y = call float @llvm.fabs.f32(float %y) 880 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 881 ret float %pow 882} 883 884define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 885; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 886; GFX6: ; %bb.0: 887; GFX6-NEXT: v_log_f32_e32 v1, s0 888; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 889; GFX6-NEXT: v_exp_f32_e32 v0, v0 890; GFX6-NEXT: ; return to shader part epilog 891; 892; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 893; GFX8: ; %bb.0: 894; GFX8-NEXT: v_log_f32_e32 v1, s0 895; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 896; GFX8-NEXT: v_exp_f32_e32 v0, v0 897; GFX8-NEXT: ; return to shader part epilog 898; 899; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 900; GFX9: ; %bb.0: 901; GFX9-NEXT: v_log_f32_e32 v1, s0 902; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 903; GFX9-NEXT: v_exp_f32_e32 v0, v0 904; GFX9-NEXT: ; return to shader part epilog 905; 906; GFX90A-LABEL: v_pow_f32_sgpr_vgpr: 907; GFX90A: ; %bb.0: 908; GFX90A-NEXT: v_log_f32_e32 v1, s0 909; GFX90A-NEXT: v_mul_legacy_f32 v0, v0, v1 910; GFX90A-NEXT: v_exp_f32_e32 v0, v0 911; GFX90A-NEXT: ; return to shader part epilog 912; 913; GFX10-LABEL: v_pow_f32_sgpr_vgpr: 914; GFX10: ; %bb.0: 915; GFX10-NEXT: v_log_f32_e32 v1, s0 916; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 917; GFX10-NEXT: v_exp_f32_e32 v0, v0 918; GFX10-NEXT: ; return to shader part epilog 919; 920; GFX11-LABEL: v_pow_f32_sgpr_vgpr: 921; GFX11: ; %bb.0: 922; GFX11-NEXT: v_log_f32_e32 v1, s0 923; GFX11-NEXT: s_waitcnt_depctr 0xfff 924; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 925; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 926; GFX11-NEXT: v_exp_f32_e32 v0, v0 927; GFX11-NEXT: ; return to shader part epilog 928 %pow = call float @llvm.pow.f32(float %x, float %y) 929 ret float %pow 930} 931 932define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 933; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 934; GFX6: ; %bb.0: 935; GFX6-NEXT: v_log_f32_e32 v0, v0 936; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 937; GFX6-NEXT: v_exp_f32_e32 v0, v0 938; GFX6-NEXT: ; return to shader part epilog 939; 940; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 941; GFX8: ; %bb.0: 942; GFX8-NEXT: v_log_f32_e32 v0, v0 943; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 944; GFX8-NEXT: v_exp_f32_e32 v0, v0 945; GFX8-NEXT: ; return to shader part epilog 946; 947; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 948; GFX9: ; %bb.0: 949; GFX9-NEXT: v_log_f32_e32 v0, v0 950; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 951; GFX9-NEXT: v_exp_f32_e32 v0, v0 952; GFX9-NEXT: ; return to shader part epilog 953; 954; GFX90A-LABEL: v_pow_f32_vgpr_sgpr: 955; GFX90A: ; %bb.0: 956; GFX90A-NEXT: v_log_f32_e32 v0, v0 957; GFX90A-NEXT: v_mul_legacy_f32 v0, s0, v0 958; GFX90A-NEXT: v_exp_f32_e32 v0, v0 959; GFX90A-NEXT: ; return to shader part epilog 960; 961; GFX10-LABEL: v_pow_f32_vgpr_sgpr: 962; GFX10: ; %bb.0: 963; GFX10-NEXT: v_log_f32_e32 v0, v0 964; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 965; GFX10-NEXT: v_exp_f32_e32 v0, v0 966; GFX10-NEXT: ; return to shader part epilog 967; 968; GFX11-LABEL: v_pow_f32_vgpr_sgpr: 969; GFX11: ; %bb.0: 970; GFX11-NEXT: v_log_f32_e32 v0, v0 971; GFX11-NEXT: s_waitcnt_depctr 0xfff 972; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0 973; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 974; GFX11-NEXT: v_exp_f32_e32 v0, v0 975; GFX11-NEXT: ; return to shader part epilog 976 %pow = call float @llvm.pow.f32(float %x, float %y) 977 ret float %pow 978} 979 980define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 981; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 982; GFX6: ; %bb.0: 983; GFX6-NEXT: v_log_f32_e32 v0, s0 984; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 985; GFX6-NEXT: v_exp_f32_e32 v0, v0 986; GFX6-NEXT: ; return to shader part epilog 987; 988; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 989; GFX8: ; %bb.0: 990; GFX8-NEXT: v_log_f32_e32 v0, s0 991; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 992; GFX8-NEXT: v_exp_f32_e32 v0, v0 993; GFX8-NEXT: ; return to shader part epilog 994; 995; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 996; GFX9: ; %bb.0: 997; GFX9-NEXT: v_log_f32_e32 v0, s0 998; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 999; GFX9-NEXT: v_exp_f32_e32 v0, v0 1000; GFX9-NEXT: ; return to shader part epilog 1001; 1002; GFX90A-LABEL: v_pow_f32_sgpr_sgpr: 1003; GFX90A: ; %bb.0: 1004; GFX90A-NEXT: v_log_f32_e32 v0, s0 1005; GFX90A-NEXT: v_mul_legacy_f32 v0, s1, v0 1006; GFX90A-NEXT: v_exp_f32_e32 v0, v0 1007; GFX90A-NEXT: ; return to shader part epilog 1008; 1009; GFX10-LABEL: v_pow_f32_sgpr_sgpr: 1010; GFX10: ; %bb.0: 1011; GFX10-NEXT: v_log_f32_e32 v0, s0 1012; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 1013; GFX10-NEXT: v_exp_f32_e32 v0, v0 1014; GFX10-NEXT: ; return to shader part epilog 1015; 1016; GFX11-LABEL: v_pow_f32_sgpr_sgpr: 1017; GFX11: ; %bb.0: 1018; GFX11-NEXT: v_log_f32_e32 v0, s0 1019; GFX11-NEXT: s_waitcnt_depctr 0xfff 1020; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s1, v0 1021; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1022; GFX11-NEXT: v_exp_f32_e32 v0, v0 1023; GFX11-NEXT: ; return to shader part epilog 1024 %pow = call float @llvm.pow.f32(float %x, float %y) 1025 ret float %pow 1026} 1027 1028declare half @llvm.pow.f16(half, half) 1029declare float @llvm.pow.f32(float, float) 1030declare double @llvm.pow.f64(double, double) 1031 1032declare half @llvm.fabs.f16(half) 1033declare float @llvm.fabs.f32(float) 1034 1035declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 1036declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 1037