1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s 5 6define i16 @v_powi_f16(i16 %l, i32 %r) { 7; GFX78-LABEL: v_powi_f16: 8; GFX78: ; %bb.0: 9; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX78-NEXT: v_cvt_f32_f16_e32 v0, v0 11; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1 12; GFX78-NEXT: v_log_f32_e32 v0, v0 13; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 14; GFX78-NEXT: v_exp_f32_e32 v0, v0 15; GFX78-NEXT: v_cvt_f16_f32_e32 v0, v0 16; GFX78-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX11-LABEL: v_powi_f16: 19; GFX11: ; %bb.0: 20; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 22; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 23; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 24; GFX11-NEXT: v_log_f32_e32 v0, v0 25; GFX11-NEXT: s_waitcnt_depctr 0xfff 26; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 27; GFX11-NEXT: v_exp_f32_e32 v0, v0 28; GFX11-NEXT: s_waitcnt_depctr 0xfff 29; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 30; GFX11-NEXT: s_setpc_b64 s[30:31] 31 %l.cast = bitcast i16 %l to half 32 %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r) 33 %res.cast = bitcast half %res to i16 34 ret i16 %res.cast 35} 36 37define float @v_powi_f32(float %l, i32 %r) { 38; GFX78-LABEL: v_powi_f32: 39; GFX78: ; %bb.0: 40; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX78-NEXT: v_log_f32_e32 v0, v0 42; GFX78-NEXT: v_cvt_f32_i32_e32 v1, v1 43; GFX78-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 44; GFX78-NEXT: v_exp_f32_e32 v0, v0 45; GFX78-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX11-LABEL: v_powi_f32: 48; GFX11: ; %bb.0: 49; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX11-NEXT: v_log_f32_e32 v0, v0 51; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 52; GFX11-NEXT: s_waitcnt_depctr 0xfff 53; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 54; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 55; GFX11-NEXT: v_exp_f32_e32 v0, v0 56; GFX11-NEXT: s_setpc_b64 s[30:31] 57 %res = call float @llvm.powi.f32.i32(float %l, i32 %r) 58 ret float %res 59} 60 61define float @v_powi_0_f32(float %l) { 62; GFX78-LABEL: v_powi_0_f32: 63; GFX78: ; %bb.0: 64; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX78-NEXT: v_mov_b32_e32 v0, 1.0 66; GFX78-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX11-LABEL: v_powi_0_f32: 69; GFX11: ; %bb.0: 70; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 72; GFX11-NEXT: s_setpc_b64 s[30:31] 73 %res = call float @llvm.powi.f32.i32(float %l, i32 0) 74 ret float %res 75} 76 77define float @v_powi_1_f32(float %l) { 78; GFX78-LABEL: v_powi_1_f32: 79; GFX78: ; %bb.0: 80; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX78-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX11-LABEL: v_powi_1_f32: 84; GFX11: ; %bb.0: 85; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX11-NEXT: s_setpc_b64 s[30:31] 87 %res = call float @llvm.powi.f32.i32(float %l, i32 1) 88 ret float %res 89} 90 91define float @v_powi_neg1_f32(float %l) { 92; GFX7-LABEL: v_powi_neg1_f32: 93; GFX7: ; %bb.0: 94; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 96; GFX7-NEXT: v_rcp_f32_e32 v2, v1 97; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 98; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 99; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 100; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 101; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 102; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 103; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 104; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 105; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 106; GFX7-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX8-LABEL: v_powi_neg1_f32: 109; GFX8: ; %bb.0: 110; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 112; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 113; GFX8-NEXT: v_rcp_f32_e32 v3, v1 114; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 115; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 116; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 117; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 118; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 119; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 120; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 121; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 122; GFX8-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX11-LABEL: v_powi_neg1_f32: 125; GFX11: ; %bb.0: 126; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 128; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 129; GFX11-NEXT: v_rcp_f32_e32 v2, v1 130; GFX11-NEXT: s_waitcnt_depctr 0xfff 131; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 132; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 133; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 134; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 135; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 136; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 137; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 138; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 139; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 140; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 141; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 142; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 143; GFX11-NEXT: s_setpc_b64 s[30:31] 144 %res = call float @llvm.powi.f32.i32(float %l, i32 -1) 145 ret float %res 146} 147 148define float @v_powi_2_f32(float %l) { 149; GFX78-LABEL: v_powi_2_f32: 150; GFX78: ; %bb.0: 151; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 153; GFX78-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX11-LABEL: v_powi_2_f32: 156; GFX11: ; %bb.0: 157; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 159; GFX11-NEXT: s_setpc_b64 s[30:31] 160 %res = call float @llvm.powi.f32.i32(float %l, i32 2) 161 ret float %res 162} 163 164define float @v_powi_neg2_f32(float %l) { 165; GFX7-LABEL: v_powi_neg2_f32: 166; GFX7: ; %bb.0: 167; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 169; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 170; GFX7-NEXT: v_rcp_f32_e32 v2, v1 171; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 172; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 173; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 174; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 175; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 176; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 177; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 178; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 179; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 180; GFX7-NEXT: s_setpc_b64 s[30:31] 181; 182; GFX8-LABEL: v_powi_neg2_f32: 183; GFX8: ; %bb.0: 184; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 186; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 187; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 188; GFX8-NEXT: v_rcp_f32_e32 v3, v1 189; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 190; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 191; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 192; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 193; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 194; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 195; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 196; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 197; GFX8-NEXT: s_setpc_b64 s[30:31] 198; 199; GFX11-LABEL: v_powi_neg2_f32: 200; GFX11: ; %bb.0: 201; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 202; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 203; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 204; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 205; GFX11-NEXT: v_rcp_f32_e32 v2, v1 206; GFX11-NEXT: s_waitcnt_depctr 0xfff 207; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 208; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 209; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 210; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 211; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 212; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 213; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 214; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 215; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 216; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 217; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 218; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 219; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 220; GFX11-NEXT: s_setpc_b64 s[30:31] 221 %res = call float @llvm.powi.f32.i32(float %l, i32 -2) 222 ret float %res 223} 224 225define float @v_powi_4_f32(float %l) { 226; GFX78-LABEL: v_powi_4_f32: 227; GFX78: ; %bb.0: 228; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 230; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 231; GFX78-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX11-LABEL: v_powi_4_f32: 234; GFX11: ; %bb.0: 235; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 237; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 238; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 239; GFX11-NEXT: s_setpc_b64 s[30:31] 240 %res = call float @llvm.powi.f32.i32(float %l, i32 4) 241 ret float %res 242} 243 244define float @v_powi_8_f32(float %l) { 245; GFX78-LABEL: v_powi_8_f32: 246; GFX78: ; %bb.0: 247; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 249; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 250; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 251; GFX78-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX11-LABEL: v_powi_8_f32: 254; GFX11: ; %bb.0: 255; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 257; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 258; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 259; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 260; GFX11-NEXT: s_setpc_b64 s[30:31] 261 %res = call float @llvm.powi.f32.i32(float %l, i32 8) 262 ret float %res 263} 264 265define float @v_powi_16_f32(float %l) { 266; GFX78-LABEL: v_powi_16_f32: 267; GFX78: ; %bb.0: 268; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 269; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 270; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 271; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 272; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 273; GFX78-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX11-LABEL: v_powi_16_f32: 276; GFX11: ; %bb.0: 277; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 279; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 280; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 281; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 282; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 283; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 284; GFX11-NEXT: s_setpc_b64 s[30:31] 285 %res = call float @llvm.powi.f32.i32(float %l, i32 16) 286 ret float %res 287} 288 289define float @v_powi_128_f32(float %l) { 290; GFX78-LABEL: v_powi_128_f32: 291; GFX78: ; %bb.0: 292; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 294; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 295; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 296; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 297; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 298; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 299; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 300; GFX78-NEXT: s_setpc_b64 s[30:31] 301; 302; GFX11-LABEL: v_powi_128_f32: 303; GFX11: ; %bb.0: 304; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 306; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 307; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 308; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 309; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 310; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 311; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 312; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 313; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 314; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 315; GFX11-NEXT: s_setpc_b64 s[30:31] 316 %res = call float @llvm.powi.f32.i32(float %l, i32 128) 317 ret float %res 318} 319 320define float @v_powi_neg128_f32(float %l) { 321; GFX7-LABEL: v_powi_neg128_f32: 322; GFX7: ; %bb.0: 323; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 325; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 326; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 327; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 328; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 329; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 330; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 331; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 332; GFX7-NEXT: v_rcp_f32_e32 v2, v1 333; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 334; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 335; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 336; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 337; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 338; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 339; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 340; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 341; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 342; GFX7-NEXT: s_setpc_b64 s[30:31] 343; 344; GFX8-LABEL: v_powi_neg128_f32: 345; GFX8: ; %bb.0: 346; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 348; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 349; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 350; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 351; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 352; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 353; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 354; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 355; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 356; GFX8-NEXT: v_rcp_f32_e32 v3, v1 357; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 358; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 359; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 360; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 361; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 362; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 363; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 364; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 365; GFX8-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX11-LABEL: v_powi_neg128_f32: 368; GFX11: ; %bb.0: 369; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 371; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 372; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 373; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 374; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 375; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 376; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 377; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 378; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 379; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 380; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 381; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 382; GFX11-NEXT: v_rcp_f32_e32 v2, v1 383; GFX11-NEXT: s_waitcnt_depctr 0xfff 384; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 385; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 386; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 387; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0 388; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 389; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 390; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 391; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 392; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 393; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 394; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 395; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 396; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 397; GFX11-NEXT: s_setpc_b64 s[30:31] 398 %res = call float @llvm.powi.f32.i32(float %l, i32 -128) 399 ret float %res 400} 401 402; FIXME: f64 broken 403; define double @v_powi_f64(double %l, i32 %r) { 404; %res = call double @llvm.powi.f64.i32(double %l, i32 %r) 405; ret double %res 406; } 407 408declare half @llvm.powi.f16.i32(half, i32) #0 409declare float @llvm.powi.f32.i32(float, i32) #0 410declare double @llvm.powi.f64.i32(double, i32) #0 411 412attributes #0 = { nounwind readnone speculatable willreturn } 413