1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX78,GFX7 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GFX78,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s 6 7define i16 @v_powi_f16(i16 %l, i32 %r) { 8; GFX7-LABEL: v_powi_f16: 9; GFX7: ; %bb.0: 10; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 12; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1 13; GFX7-NEXT: v_mov_b32_e32 v2, 0xc2fc0000 14; GFX7-NEXT: v_mov_b32_e32 v3, 0x42800000 15; GFX7-NEXT: v_log_f32_e32 v0, v0 16; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 17; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 18; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc 19; GFX7-NEXT: v_add_f32_e32 v0, v0, v1 20; GFX7-NEXT: v_exp_f32_e32 v0, v0 21; GFX7-NEXT: v_not_b32_e32 v1, 63 22; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 23; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 24; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 25; GFX7-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX8-LABEL: v_powi_f16: 28; GFX8: ; %bb.0: 29; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1 31; GFX8-NEXT: v_log_f16_e32 v0, v0 32; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1 33; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 34; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 35; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 36; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 37; GFX8-NEXT: v_exp_f16_e32 v0, v0 38; GFX8-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX11-TRUE16-LABEL: v_powi_f16: 41; GFX11-TRUE16: ; %bb.0: 42; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX11-TRUE16-NEXT: v_log_f16_e32 v0.l, v0.l 44; GFX11-TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v1 45; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 46; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1 47; GFX11-TRUE16-NEXT: s_waitcnt_depctr 0xfff 48; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v1, v0.l 49; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, v0.h 50; GFX11-TRUE16-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 51; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 52; GFX11-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 53; GFX11-TRUE16-NEXT: v_exp_f16_e32 v0.l, v0.l 54; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX11-FAKE16-LABEL: v_powi_f16: 57; GFX11-FAKE16: ; %bb.0: 58; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX11-FAKE16-NEXT: v_log_f16_e32 v0, v0 60; GFX11-FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1 61; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 62; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v1 63; GFX11-FAKE16-NEXT: s_waitcnt_depctr 0xfff 64; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, v0 65; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, v1 66; GFX11-FAKE16-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 67; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 68; GFX11-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 69; GFX11-FAKE16-NEXT: v_exp_f16_e32 v0, v0 70; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] 71 %l.cast = bitcast i16 %l to half 72 %res = call half @llvm.powi.f16.i32(half %l.cast, i32 %r) 73 %res.cast = bitcast half %res to i16 74 ret i16 %res.cast 75} 76 77define float @v_powi_f32(float %l, i32 %r) { 78; GFX7-LABEL: v_powi_f32: 79; GFX7: ; %bb.0: 80; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX7-NEXT: v_mov_b32_e32 v2, 0x800000 82; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 83; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 84; GFX7-NEXT: v_lshlrev_b32_e32 v2, 5, v2 85; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v2 86; GFX7-NEXT: v_log_f32_e32 v0, v0 87; GFX7-NEXT: v_cvt_f32_i32_e32 v1, v1 88; GFX7-NEXT: v_mov_b32_e32 v2, 0x42000000 89; GFX7-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 90; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2 91; GFX7-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 92; GFX7-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 93; GFX7-NEXT: v_mov_b32_e32 v2, 0x42800000 94; GFX7-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 95; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc 96; GFX7-NEXT: v_add_f32_e32 v0, v0, v1 97; GFX7-NEXT: v_exp_f32_e32 v0, v0 98; GFX7-NEXT: v_not_b32_e32 v1, 63 99; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 100; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 101; GFX7-NEXT: s_setpc_b64 s[30:31] 102; 103; GFX8-LABEL: v_powi_f32: 104; GFX8: ; %bb.0: 105; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX8-NEXT: v_mov_b32_e32 v2, 0x800000 107; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 108; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 109; GFX8-NEXT: v_lshlrev_b32_e32 v2, 5, v2 110; GFX8-NEXT: v_ldexp_f32 v0, v0, v2 111; GFX8-NEXT: v_log_f32_e32 v0, v0 112; GFX8-NEXT: v_cvt_f32_i32_e32 v1, v1 113; GFX8-NEXT: v_mov_b32_e32 v2, 0x42000000 114; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 115; GFX8-NEXT: v_sub_f32_e32 v0, v0, v2 116; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 117; GFX8-NEXT: v_mov_b32_e32 v1, 0xc2fc0000 118; GFX8-NEXT: v_mov_b32_e32 v2, 0x42800000 119; GFX8-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 120; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc 121; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 122; GFX8-NEXT: v_exp_f32_e32 v0, v0 123; GFX8-NEXT: v_not_b32_e32 v1, 63 124; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 125; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 126; GFX8-NEXT: s_setpc_b64 s[30:31] 127; 128; GFX11-LABEL: v_powi_f32: 129; GFX11: ; %bb.0: 130; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 131; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 132; GFX11-NEXT: v_cvt_f32_i32_e32 v1, v1 133; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 134; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 135; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 136; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 137; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, vcc_lo 138; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 139; GFX11-NEXT: v_log_f32_e32 v0, v0 140; GFX11-NEXT: s_waitcnt_depctr 0xfff 141; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 142; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 143; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 144; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 145; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42800000, vcc_lo 146; GFX11-NEXT: v_add_f32_e32 v0, v0, v1 147; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo 148; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 149; GFX11-NEXT: v_exp_f32_e32 v0, v0 150; GFX11-NEXT: s_waitcnt_depctr 0xfff 151; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 152; GFX11-NEXT: s_setpc_b64 s[30:31] 153 %res = call float @llvm.powi.f32.i32(float %l, i32 %r) 154 ret float %res 155} 156 157define float @v_powi_0_f32(float %l) { 158; GFX78-LABEL: v_powi_0_f32: 159; GFX78: ; %bb.0: 160; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX78-NEXT: v_mov_b32_e32 v0, 1.0 162; GFX78-NEXT: s_setpc_b64 s[30:31] 163; 164; GFX11-LABEL: v_powi_0_f32: 165; GFX11: ; %bb.0: 166; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 168; GFX11-NEXT: s_setpc_b64 s[30:31] 169 %res = call float @llvm.powi.f32.i32(float %l, i32 0) 170 ret float %res 171} 172 173define float @v_powi_1_f32(float %l) { 174; GFX78-LABEL: v_powi_1_f32: 175; GFX78: ; %bb.0: 176; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX78-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX11-LABEL: v_powi_1_f32: 180; GFX11: ; %bb.0: 181; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX11-NEXT: s_setpc_b64 s[30:31] 183 %res = call float @llvm.powi.f32.i32(float %l, i32 1) 184 ret float %res 185} 186 187define float @v_powi_neg1_f32(float %l) { 188; GFX7-LABEL: v_powi_neg1_f32: 189; GFX7: ; %bb.0: 190; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 192; GFX7-NEXT: v_rcp_f32_e32 v2, v1 193; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 194; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 195; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 196; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 197; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 198; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 199; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 200; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 201; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 202; GFX7-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX8-LABEL: v_powi_neg1_f32: 205; GFX8: ; %bb.0: 206; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 208; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 209; GFX8-NEXT: v_rcp_f32_e32 v3, v1 210; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 211; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 212; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 213; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 214; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 215; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 216; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 217; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 218; GFX8-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX11-LABEL: v_powi_neg1_f32: 221; GFX11: ; %bb.0: 222; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 224; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 225; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 226; GFX11-NEXT: v_rcp_f32_e32 v2, v1 227; GFX11-NEXT: s_waitcnt_depctr 0xfff 228; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 229; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 230; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 231; GFX11-NEXT: v_mul_f32_e32 v3, v4, v2 232; GFX11-NEXT: v_fma_f32 v5, -v1, v3, v4 233; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 234; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v2 235; GFX11-NEXT: v_fma_f32 v1, -v1, v3, v4 236; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 237; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v3 238; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 239; GFX11-NEXT: s_setpc_b64 s[30:31] 240 %res = call float @llvm.powi.f32.i32(float %l, i32 -1) 241 ret float %res 242} 243 244define float @v_powi_2_f32(float %l) { 245; GFX78-LABEL: v_powi_2_f32: 246; GFX78: ; %bb.0: 247; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 249; GFX78-NEXT: s_setpc_b64 s[30:31] 250; 251; GFX11-LABEL: v_powi_2_f32: 252; GFX11: ; %bb.0: 253; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 255; GFX11-NEXT: s_setpc_b64 s[30:31] 256 %res = call float @llvm.powi.f32.i32(float %l, i32 2) 257 ret float %res 258} 259 260define float @v_powi_neg2_f32(float %l) { 261; GFX7-LABEL: v_powi_neg2_f32: 262; GFX7: ; %bb.0: 263; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 264; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 265; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 266; GFX7-NEXT: v_rcp_f32_e32 v2, v1 267; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 268; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 269; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 270; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 271; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 272; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 273; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 274; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 275; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 276; GFX7-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX8-LABEL: v_powi_neg2_f32: 279; GFX8: ; %bb.0: 280; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 282; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 283; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 284; GFX8-NEXT: v_rcp_f32_e32 v3, v1 285; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 286; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 287; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 288; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 289; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 290; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 291; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 292; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 293; GFX8-NEXT: s_setpc_b64 s[30:31] 294; 295; GFX11-LABEL: v_powi_neg2_f32: 296; GFX11: ; %bb.0: 297; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 299; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 300; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 301; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 302; GFX11-NEXT: v_rcp_f32_e32 v2, v1 303; GFX11-NEXT: s_waitcnt_depctr 0xfff 304; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 305; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 306; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 307; GFX11-NEXT: v_mul_f32_e32 v3, v4, v2 308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 309; GFX11-NEXT: v_fma_f32 v5, -v1, v3, v4 310; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v2 311; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 312; GFX11-NEXT: v_fma_f32 v1, -v1, v3, v4 313; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v3 314; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 315; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 316; GFX11-NEXT: s_setpc_b64 s[30:31] 317 %res = call float @llvm.powi.f32.i32(float %l, i32 -2) 318 ret float %res 319} 320 321define float @v_powi_4_f32(float %l) { 322; GFX78-LABEL: v_powi_4_f32: 323; GFX78: ; %bb.0: 324; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 326; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 327; GFX78-NEXT: s_setpc_b64 s[30:31] 328; 329; GFX11-LABEL: v_powi_4_f32: 330; GFX11: ; %bb.0: 331; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 332; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 333; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 334; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 335; GFX11-NEXT: s_setpc_b64 s[30:31] 336 %res = call float @llvm.powi.f32.i32(float %l, i32 4) 337 ret float %res 338} 339 340define float @v_powi_8_f32(float %l) { 341; GFX78-LABEL: v_powi_8_f32: 342; GFX78: ; %bb.0: 343; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 345; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 346; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 347; GFX78-NEXT: s_setpc_b64 s[30:31] 348; 349; GFX11-LABEL: v_powi_8_f32: 350; GFX11: ; %bb.0: 351; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 353; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 354; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 355; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 356; GFX11-NEXT: s_setpc_b64 s[30:31] 357 %res = call float @llvm.powi.f32.i32(float %l, i32 8) 358 ret float %res 359} 360 361define float @v_powi_16_f32(float %l) { 362; GFX78-LABEL: v_powi_16_f32: 363; GFX78: ; %bb.0: 364; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 366; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 367; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 368; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 369; GFX78-NEXT: s_setpc_b64 s[30:31] 370; 371; GFX11-LABEL: v_powi_16_f32: 372; GFX11: ; %bb.0: 373; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 375; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 376; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 377; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 378; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 379; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 380; GFX11-NEXT: s_setpc_b64 s[30:31] 381 %res = call float @llvm.powi.f32.i32(float %l, i32 16) 382 ret float %res 383} 384 385define float @v_powi_128_f32(float %l) { 386; GFX78-LABEL: v_powi_128_f32: 387; GFX78: ; %bb.0: 388; GFX78-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 390; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 391; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 392; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 393; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 394; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 395; GFX78-NEXT: v_mul_f32_e32 v0, v0, v0 396; GFX78-NEXT: s_setpc_b64 s[30:31] 397; 398; GFX11-LABEL: v_powi_128_f32: 399; GFX11: ; %bb.0: 400; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 402; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 403; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 404; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 405; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 406; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 407; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 408; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 409; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 410; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 411; GFX11-NEXT: s_setpc_b64 s[30:31] 412 %res = call float @llvm.powi.f32.i32(float %l, i32 128) 413 ret float %res 414} 415 416define float @v_powi_neg128_f32(float %l) { 417; GFX7-LABEL: v_powi_neg128_f32: 418; GFX7: ; %bb.0: 419; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 421; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 422; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 423; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 424; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 425; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 426; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 427; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 428; GFX7-NEXT: v_rcp_f32_e32 v2, v1 429; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 430; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 431; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 432; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 433; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 434; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 435; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 436; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 437; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 438; GFX7-NEXT: s_setpc_b64 s[30:31] 439; 440; GFX8-LABEL: v_powi_neg128_f32: 441; GFX8: ; %bb.0: 442; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 443; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 444; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 445; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 446; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 447; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 448; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 449; GFX8-NEXT: v_mul_f32_e32 v0, v0, v0 450; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 451; GFX8-NEXT: v_div_scale_f32 v2, vcc, 1.0, v0, 1.0 452; GFX8-NEXT: v_rcp_f32_e32 v3, v1 453; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 454; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 455; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 456; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 457; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 458; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 459; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 460; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 461; GFX8-NEXT: s_setpc_b64 s[30:31] 462; 463; GFX11-LABEL: v_powi_neg128_f32: 464; GFX11: ; %bb.0: 465; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 467; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 468; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 469; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 470; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 471; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 472; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 473; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 474; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 475; GFX11-NEXT: v_mul_f32_e32 v0, v0, v0 476; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 477; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 1.0 478; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0 479; GFX11-NEXT: v_rcp_f32_e32 v2, v1 480; GFX11-NEXT: s_waitcnt_depctr 0xfff 481; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 482; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 483; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 484; GFX11-NEXT: v_mul_f32_e32 v3, v4, v2 485; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 486; GFX11-NEXT: v_fma_f32 v5, -v1, v3, v4 487; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v2 488; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 489; GFX11-NEXT: v_fma_f32 v1, -v1, v3, v4 490; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v3 491; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 492; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 1.0 493; GFX11-NEXT: s_setpc_b64 s[30:31] 494 %res = call float @llvm.powi.f32.i32(float %l, i32 -128) 495 ret float %res 496} 497 498; FIXME: f64 broken 499; define double @v_powi_f64(double %l, i32 %r) { 500; %res = call double @llvm.powi.f64.i32(double %l, i32 %r) 501; ret double %res 502; } 503 504declare half @llvm.powi.f16.i32(half, i32) #0 505declare float @llvm.powi.f32.i32(float, i32) #0 506declare double @llvm.powi.f64.i32(double, i32) #0 507 508attributes #0 = { nounwind readnone speculatable willreturn } 509