1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-simplifylib,instcombine -amdgpu-prelink < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-prelink | FileCheck %s 3 4declare hidden float @_Z3powff(float, float) 5declare hidden double @_Z3powdd(double, double) 6declare hidden half @_Z3powDhDh(half, half) 7 8declare hidden float @_Z4powrff(float, float) 9declare hidden double @_Z4powrdd(double, double) 10declare hidden half @_Z4powrDhDh(half, half) 11 12declare hidden float @_Z4pownfi(float, i32) 13declare hidden double @_Z4powndi(double, i32) 14declare hidden half @_Z4pownDhi(half, i32) 15 16; -------------------------------------------------------------------- 17; test pow 18; -------------------------------------------------------------------- 19 20define half @test_pow_fast_f16(half %x, half %y) { 21; CHECK-LABEL: test_pow_fast_f16: 22; CHECK: ; %bb.0: 23; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; CHECK-NEXT: s_getpc_b64 s[16:17] 25; CHECK-NEXT: s_add_u32 s16, s16, _Z3powDhDh@rel32@lo+4 26; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powDhDh@rel32@hi+12 27; CHECK-NEXT: s_setpc_b64 s[16:17] 28 %pow = tail call fast half @_Z3powDhDh(half %x, half %y) 29 ret half %pow 30} 31 32define float @test_pow_fast_f32(float %x, float %y) { 33; CHECK-LABEL: test_pow_fast_f32: 34; CHECK: ; %bb.0: 35; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; CHECK-NEXT: s_getpc_b64 s[16:17] 37; CHECK-NEXT: s_add_u32 s16, s16, _Z3powff@rel32@lo+4 38; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powff@rel32@hi+12 39; CHECK-NEXT: s_setpc_b64 s[16:17] 40 %pow = tail call fast float @_Z3powff(float %x, float %y) 41 ret float %pow 42} 43 44define double @test_pow_fast_f64(double %x, double %y) { 45; CHECK-LABEL: test_pow_fast_f64: 46; CHECK: ; %bb.0: 47; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; CHECK-NEXT: s_getpc_b64 s[16:17] 49; CHECK-NEXT: s_add_u32 s16, s16, _Z3powdd@rel32@lo+4 50; CHECK-NEXT: s_addc_u32 s17, s17, _Z3powdd@rel32@hi+12 51; CHECK-NEXT: s_setpc_b64 s[16:17] 52 %pow = tail call fast double @_Z3powdd(double %x, double %y) 53 ret double %pow 54} 55 56define half @test_pow_fast_f16__integral_y(half %x, i32 %y.i) { 57; CHECK-LABEL: test_pow_fast_f16__integral_y: 58; CHECK: ; %bb.0: 59; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 61; CHECK-NEXT: v_log_f16_e64 v3, |v0| 62; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 63; CHECK-NEXT: v_cvt_f32_f16_e32 v1, v1 64; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1 65; CHECK-NEXT: v_cvt_f32_i32_e32 v2, v1 66; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1 67; CHECK-NEXT: v_and_b32_e32 v0, v1, v0 68; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2 69; CHECK-NEXT: v_mul_f16_e32 v2, v3, v2 70; CHECK-NEXT: v_exp_f16_e32 v2, v2 71; CHECK-NEXT: v_or_b32_e32 v0, v0, v2 72; CHECK-NEXT: s_setpc_b64 s[30:31] 73 %y = sitofp i32 %y.i to half 74 %pow = tail call fast half @_Z3powDhDh(half %x, half %y) 75 ret half %pow 76} 77 78define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) { 79; CHECK-LABEL: test_pow_fast_f32__integral_y: 80; CHECK: ; %bb.0: 81; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 83; CHECK-NEXT: s_mov_b32 s4, 0x800000 84; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 85; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 86; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1 87; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 88; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 89; CHECK-NEXT: v_log_f32_e32 v3, v3 90; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1 91; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 92; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 93; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2 94; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4 95; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 96; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000 97; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 98; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 99; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3 100; CHECK-NEXT: v_exp_f32_e32 v2, v2 101; CHECK-NEXT: v_not_b32_e32 v3, 63 102; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 103; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1 104; CHECK-NEXT: v_ldexp_f32 v2, v2, v3 105; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2 106; CHECK-NEXT: s_setpc_b64 s[30:31] 107 %y = sitofp i32 %y.i to float 108 %pow = tail call fast float @_Z3powff(float %x, float %y) 109 ret float %pow 110} 111 112define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { 113; CHECK-LABEL: test_pow_fast_f64__integral_y: 114; CHECK: ; %bb.0: 115; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; CHECK-NEXT: s_mov_b32 s16, s33 117; CHECK-NEXT: s_mov_b32 s33, s32 118; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 119; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 120; CHECK-NEXT: s_mov_b64 exec, s[18:19] 121; CHECK-NEXT: v_writelane_b32 v43, s16, 14 122; CHECK-NEXT: v_writelane_b32 v43, s30, 0 123; CHECK-NEXT: v_writelane_b32 v43, s31, 1 124; CHECK-NEXT: v_writelane_b32 v43, s34, 2 125; CHECK-NEXT: v_writelane_b32 v43, s35, 3 126; CHECK-NEXT: v_writelane_b32 v43, s36, 4 127; CHECK-NEXT: v_writelane_b32 v43, s37, 5 128; CHECK-NEXT: v_writelane_b32 v43, s38, 6 129; CHECK-NEXT: v_writelane_b32 v43, s39, 7 130; CHECK-NEXT: s_addk_i32 s32, 0x800 131; CHECK-NEXT: v_writelane_b32 v43, s40, 8 132; CHECK-NEXT: v_writelane_b32 v43, s41, 9 133; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] 134; CHECK-NEXT: s_getpc_b64 s[4:5] 135; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 136; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 137; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 138; CHECK-NEXT: v_writelane_b32 v43, s42, 10 139; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 140; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 141; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill 142; CHECK-NEXT: v_writelane_b32 v43, s43, 11 143; CHECK-NEXT: v_mov_b32_e32 v42, v1 144; CHECK-NEXT: v_writelane_b32 v43, s44, 12 145; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 146; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 147; CHECK-NEXT: v_writelane_b32 v43, s45, 13 148; CHECK-NEXT: v_mov_b32_e32 v40, v31 149; CHECK-NEXT: v_mov_b32_e32 v41, v2 150; CHECK-NEXT: s_mov_b32 s42, s15 151; CHECK-NEXT: s_mov_b32 s43, s14 152; CHECK-NEXT: s_mov_b32 s44, s13 153; CHECK-NEXT: s_mov_b32 s45, s12 154; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] 155; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] 156; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] 157; CHECK-NEXT: s_waitcnt lgkmcnt(0) 158; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 159; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 160; CHECK-NEXT: s_getpc_b64 s[4:5] 161; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 162; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 163; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 164; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 165; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 166; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] 167; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] 168; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] 169; CHECK-NEXT: s_mov_b32 s12, s45 170; CHECK-NEXT: s_mov_b32 s13, s44 171; CHECK-NEXT: s_mov_b32 s14, s43 172; CHECK-NEXT: s_mov_b32 s15, s42 173; CHECK-NEXT: v_mov_b32_e32 v31, v40 174; CHECK-NEXT: s_waitcnt lgkmcnt(0) 175; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 176; CHECK-NEXT: v_lshlrev_b32_e32 v2, 31, v41 177; CHECK-NEXT: v_and_b32_e32 v2, v2, v42 178; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload 179; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 180; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 181; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 182; CHECK-NEXT: v_readlane_b32 s45, v43, 13 183; CHECK-NEXT: v_readlane_b32 s44, v43, 12 184; CHECK-NEXT: v_readlane_b32 s43, v43, 11 185; CHECK-NEXT: v_readlane_b32 s42, v43, 10 186; CHECK-NEXT: v_readlane_b32 s41, v43, 9 187; CHECK-NEXT: v_readlane_b32 s40, v43, 8 188; CHECK-NEXT: v_readlane_b32 s39, v43, 7 189; CHECK-NEXT: v_readlane_b32 s38, v43, 6 190; CHECK-NEXT: v_readlane_b32 s37, v43, 5 191; CHECK-NEXT: v_readlane_b32 s36, v43, 4 192; CHECK-NEXT: v_readlane_b32 s35, v43, 3 193; CHECK-NEXT: v_readlane_b32 s34, v43, 2 194; CHECK-NEXT: v_readlane_b32 s31, v43, 1 195; CHECK-NEXT: v_readlane_b32 s30, v43, 0 196; CHECK-NEXT: s_mov_b32 s32, s33 197; CHECK-NEXT: v_readlane_b32 s4, v43, 14 198; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 199; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 200; CHECK-NEXT: s_mov_b64 exec, s[6:7] 201; CHECK-NEXT: s_mov_b32 s33, s4 202; CHECK-NEXT: s_waitcnt vmcnt(0) 203; CHECK-NEXT: s_setpc_b64 s[30:31] 204 %y = sitofp i32 %y.i to double 205 %pow = tail call fast double @_Z3powdd(double %x, double %y) 206 ret double %pow 207} 208 209; -------------------------------------------------------------------- 210; test powr 211; -------------------------------------------------------------------- 212 213define half @test_powr_fast_f16(half %x, half %y) { 214; CHECK-LABEL: test_powr_fast_f16: 215; CHECK: ; %bb.0: 216; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; CHECK-NEXT: v_log_f16_e32 v0, v0 218; CHECK-NEXT: v_mul_f16_e32 v0, v1, v0 219; CHECK-NEXT: v_exp_f16_e32 v0, v0 220; CHECK-NEXT: s_setpc_b64 s[30:31] 221 %powr = tail call fast half @_Z4powrDhDh(half %x, half %y) 222 ret half %powr 223} 224 225define float @test_powr_fast_f32(float %x, float %y) { 226; CHECK-LABEL: test_powr_fast_f32: 227; CHECK: ; %bb.0: 228; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; CHECK-NEXT: s_mov_b32 s4, 0x800000 230; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 231; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 232; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 233; CHECK-NEXT: v_ldexp_f32 v0, v0, v3 234; CHECK-NEXT: v_log_f32_e32 v0, v0 235; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 236; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 237; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 238; CHECK-NEXT: v_sub_f32_e32 v0, v0, v2 239; CHECK-NEXT: v_mul_f32_e32 v2, v1, v0 240; CHECK-NEXT: v_mov_b32_e32 v3, 0x42800000 241; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 242; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 243; CHECK-NEXT: v_fma_f32 v0, v1, v0, v2 244; CHECK-NEXT: v_exp_f32_e32 v0, v0 245; CHECK-NEXT: v_not_b32_e32 v1, 63 246; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 247; CHECK-NEXT: v_ldexp_f32 v0, v0, v1 248; CHECK-NEXT: s_setpc_b64 s[30:31] 249 %powr = tail call fast float @_Z4powrff(float %x, float %y) 250 ret float %powr 251} 252 253define double @test_powr_fast_f64(double %x, double %y) { 254; CHECK-LABEL: test_powr_fast_f64: 255; CHECK: ; %bb.0: 256; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; CHECK-NEXT: s_mov_b32 s16, s33 258; CHECK-NEXT: s_mov_b32 s33, s32 259; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 260; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 261; CHECK-NEXT: s_mov_b64 exec, s[18:19] 262; CHECK-NEXT: v_writelane_b32 v43, s16, 14 263; CHECK-NEXT: v_writelane_b32 v43, s30, 0 264; CHECK-NEXT: v_writelane_b32 v43, s31, 1 265; CHECK-NEXT: v_writelane_b32 v43, s34, 2 266; CHECK-NEXT: v_writelane_b32 v43, s35, 3 267; CHECK-NEXT: v_writelane_b32 v43, s36, 4 268; CHECK-NEXT: v_writelane_b32 v43, s37, 5 269; CHECK-NEXT: v_writelane_b32 v43, s38, 6 270; CHECK-NEXT: v_writelane_b32 v43, s39, 7 271; CHECK-NEXT: s_addk_i32 s32, 0x800 272; CHECK-NEXT: v_writelane_b32 v43, s40, 8 273; CHECK-NEXT: v_writelane_b32 v43, s41, 9 274; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] 275; CHECK-NEXT: s_getpc_b64 s[4:5] 276; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 277; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 278; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 279; CHECK-NEXT: v_writelane_b32 v43, s42, 10 280; CHECK-NEXT: v_writelane_b32 v43, s43, 11 281; CHECK-NEXT: v_writelane_b32 v43, s44, 12 282; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 283; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 284; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 285; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill 286; CHECK-NEXT: v_writelane_b32 v43, s45, 13 287; CHECK-NEXT: v_mov_b32_e32 v42, v31 288; CHECK-NEXT: v_mov_b32_e32 v41, v3 289; CHECK-NEXT: v_mov_b32_e32 v40, v2 290; CHECK-NEXT: s_mov_b32 s42, s15 291; CHECK-NEXT: s_mov_b32 s43, s14 292; CHECK-NEXT: s_mov_b32 s44, s13 293; CHECK-NEXT: s_mov_b32 s45, s12 294; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] 295; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] 296; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] 297; CHECK-NEXT: s_waitcnt lgkmcnt(0) 298; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 299; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1] 300; CHECK-NEXT: s_getpc_b64 s[4:5] 301; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 302; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 303; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 304; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 305; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] 306; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] 307; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] 308; CHECK-NEXT: s_mov_b32 s12, s45 309; CHECK-NEXT: s_mov_b32 s13, s44 310; CHECK-NEXT: s_mov_b32 s14, s43 311; CHECK-NEXT: s_mov_b32 s15, s42 312; CHECK-NEXT: v_mov_b32_e32 v31, v42 313; CHECK-NEXT: s_waitcnt lgkmcnt(0) 314; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 315; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload 316; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 317; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 318; CHECK-NEXT: v_readlane_b32 s45, v43, 13 319; CHECK-NEXT: v_readlane_b32 s44, v43, 12 320; CHECK-NEXT: v_readlane_b32 s43, v43, 11 321; CHECK-NEXT: v_readlane_b32 s42, v43, 10 322; CHECK-NEXT: v_readlane_b32 s41, v43, 9 323; CHECK-NEXT: v_readlane_b32 s40, v43, 8 324; CHECK-NEXT: v_readlane_b32 s39, v43, 7 325; CHECK-NEXT: v_readlane_b32 s38, v43, 6 326; CHECK-NEXT: v_readlane_b32 s37, v43, 5 327; CHECK-NEXT: v_readlane_b32 s36, v43, 4 328; CHECK-NEXT: v_readlane_b32 s35, v43, 3 329; CHECK-NEXT: v_readlane_b32 s34, v43, 2 330; CHECK-NEXT: v_readlane_b32 s31, v43, 1 331; CHECK-NEXT: v_readlane_b32 s30, v43, 0 332; CHECK-NEXT: s_mov_b32 s32, s33 333; CHECK-NEXT: v_readlane_b32 s4, v43, 14 334; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 335; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 336; CHECK-NEXT: s_mov_b64 exec, s[6:7] 337; CHECK-NEXT: s_mov_b32 s33, s4 338; CHECK-NEXT: s_waitcnt vmcnt(0) 339; CHECK-NEXT: s_setpc_b64 s[30:31] 340 %powr = tail call fast double @_Z4powrdd(double %x, double %y) 341 ret double %powr 342} 343 344; -------------------------------------------------------------------- 345; test pown 346; -------------------------------------------------------------------- 347 348define half @test_pown_fast_f16(half %x, i32 %y) { 349; CHECK-LABEL: test_pown_fast_f16: 350; CHECK: ; %bb.0: 351; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; CHECK-NEXT: v_cvt_f32_i32_e32 v2, v1 353; CHECK-NEXT: v_log_f16_e64 v3, |v0| 354; CHECK-NEXT: v_lshlrev_b16_e32 v1, 15, v1 355; CHECK-NEXT: v_and_b32_e32 v0, v1, v0 356; CHECK-NEXT: v_cvt_f16_f32_e32 v2, v2 357; CHECK-NEXT: v_mul_f16_e32 v2, v3, v2 358; CHECK-NEXT: v_exp_f16_e32 v2, v2 359; CHECK-NEXT: v_or_b32_e32 v0, v0, v2 360; CHECK-NEXT: s_setpc_b64 s[30:31] 361 %call = tail call fast half @_Z4pownDhi(half %x, i32 %y) 362 ret half %call 363} 364 365define float @test_pown_fast_f32(float %x, i32 %y) { 366; CHECK-LABEL: test_pown_fast_f32: 367; CHECK: ; %bb.0: 368; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 369; CHECK-NEXT: s_mov_b32 s4, 0x800000 370; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 371; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 372; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 373; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 374; CHECK-NEXT: v_log_f32_e32 v3, v3 375; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1 376; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 377; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 378; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2 379; CHECK-NEXT: v_mul_f32_e32 v3, v2, v4 380; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 381; CHECK-NEXT: v_mov_b32_e32 v5, 0x42800000 382; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 383; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 384; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3 385; CHECK-NEXT: v_exp_f32_e32 v2, v2 386; CHECK-NEXT: v_not_b32_e32 v3, 63 387; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc 388; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1 389; CHECK-NEXT: v_ldexp_f32 v2, v2, v3 390; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2 391; CHECK-NEXT: s_setpc_b64 s[30:31] 392 %call = tail call fast float @_Z4pownfi(float %x, i32 %y) 393 ret float %call 394} 395 396define double @test_pown_fast_f64(double %x, i32 %y) { 397; CHECK-LABEL: test_pown_fast_f64: 398; CHECK: ; %bb.0: 399; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 400; CHECK-NEXT: s_mov_b32 s16, s33 401; CHECK-NEXT: s_mov_b32 s33, s32 402; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 403; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 404; CHECK-NEXT: s_mov_b64 exec, s[18:19] 405; CHECK-NEXT: v_writelane_b32 v43, s16, 14 406; CHECK-NEXT: v_writelane_b32 v43, s30, 0 407; CHECK-NEXT: v_writelane_b32 v43, s31, 1 408; CHECK-NEXT: v_writelane_b32 v43, s34, 2 409; CHECK-NEXT: v_writelane_b32 v43, s35, 3 410; CHECK-NEXT: v_writelane_b32 v43, s36, 4 411; CHECK-NEXT: v_writelane_b32 v43, s37, 5 412; CHECK-NEXT: v_writelane_b32 v43, s38, 6 413; CHECK-NEXT: v_writelane_b32 v43, s39, 7 414; CHECK-NEXT: s_addk_i32 s32, 0x800 415; CHECK-NEXT: v_writelane_b32 v43, s40, 8 416; CHECK-NEXT: v_writelane_b32 v43, s41, 9 417; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] 418; CHECK-NEXT: s_getpc_b64 s[4:5] 419; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 420; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 421; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 422; CHECK-NEXT: v_writelane_b32 v43, s42, 10 423; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 424; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 425; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill 426; CHECK-NEXT: v_writelane_b32 v43, s43, 11 427; CHECK-NEXT: v_mov_b32_e32 v42, v1 428; CHECK-NEXT: v_writelane_b32 v43, s44, 12 429; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 430; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 431; CHECK-NEXT: v_writelane_b32 v43, s45, 13 432; CHECK-NEXT: v_mov_b32_e32 v40, v31 433; CHECK-NEXT: v_mov_b32_e32 v41, v2 434; CHECK-NEXT: s_mov_b32 s42, s15 435; CHECK-NEXT: s_mov_b32 s43, s14 436; CHECK-NEXT: s_mov_b32 s44, s13 437; CHECK-NEXT: s_mov_b32 s45, s12 438; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] 439; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] 440; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] 441; CHECK-NEXT: s_waitcnt lgkmcnt(0) 442; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 443; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 444; CHECK-NEXT: s_getpc_b64 s[4:5] 445; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 446; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 447; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 448; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 449; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 450; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] 451; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] 452; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] 453; CHECK-NEXT: s_mov_b32 s12, s45 454; CHECK-NEXT: s_mov_b32 s13, s44 455; CHECK-NEXT: s_mov_b32 s14, s43 456; CHECK-NEXT: s_mov_b32 s15, s42 457; CHECK-NEXT: v_mov_b32_e32 v31, v40 458; CHECK-NEXT: s_waitcnt lgkmcnt(0) 459; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 460; CHECK-NEXT: v_lshlrev_b32_e32 v2, 31, v41 461; CHECK-NEXT: v_and_b32_e32 v2, v2, v42 462; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload 463; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 464; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 465; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 466; CHECK-NEXT: v_readlane_b32 s45, v43, 13 467; CHECK-NEXT: v_readlane_b32 s44, v43, 12 468; CHECK-NEXT: v_readlane_b32 s43, v43, 11 469; CHECK-NEXT: v_readlane_b32 s42, v43, 10 470; CHECK-NEXT: v_readlane_b32 s41, v43, 9 471; CHECK-NEXT: v_readlane_b32 s40, v43, 8 472; CHECK-NEXT: v_readlane_b32 s39, v43, 7 473; CHECK-NEXT: v_readlane_b32 s38, v43, 6 474; CHECK-NEXT: v_readlane_b32 s37, v43, 5 475; CHECK-NEXT: v_readlane_b32 s36, v43, 4 476; CHECK-NEXT: v_readlane_b32 s35, v43, 3 477; CHECK-NEXT: v_readlane_b32 s34, v43, 2 478; CHECK-NEXT: v_readlane_b32 s31, v43, 1 479; CHECK-NEXT: v_readlane_b32 s30, v43, 0 480; CHECK-NEXT: s_mov_b32 s32, s33 481; CHECK-NEXT: v_readlane_b32 s4, v43, 14 482; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 483; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 484; CHECK-NEXT: s_mov_b64 exec, s[6:7] 485; CHECK-NEXT: s_mov_b32 s33, s4 486; CHECK-NEXT: s_waitcnt vmcnt(0) 487; CHECK-NEXT: s_setpc_b64 s[30:31] 488 %call = tail call fast double @_Z4powndi(double %x, i32 %y) 489 ret double %call 490} 491 492define half @test_pown_fast_f16_known_even(half %x, i32 %y.arg) { 493; CHECK-LABEL: test_pown_fast_f16_known_even: 494; CHECK: ; %bb.0: 495; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1 497; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 498; CHECK-NEXT: v_log_f16_e64 v0, |v0| 499; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 500; CHECK-NEXT: v_mul_f16_e32 v0, v0, v1 501; CHECK-NEXT: v_exp_f16_e32 v0, v0 502; CHECK-NEXT: s_setpc_b64 s[30:31] 503 %y = shl i32 %y.arg, 1 504 %call = tail call fast half @_Z4pownDhi(half %x, i32 %y) 505 ret half %call 506} 507 508define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) { 509; CHECK-LABEL: test_pown_fast_f32_known_even: 510; CHECK: ; %bb.0: 511; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; CHECK-NEXT: s_mov_b32 s4, 0x800000 513; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 514; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 515; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 516; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3 517; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1 518; CHECK-NEXT: v_log_f32_e32 v0, v0 519; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 520; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 521; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 522; CHECK-NEXT: v_sub_f32_e32 v0, v0, v2 523; CHECK-NEXT: v_mul_f32_e32 v2, v0, v1 524; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 525; CHECK-NEXT: v_mov_b32_e32 v3, 0x42800000 526; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v2 527; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 528; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2 529; CHECK-NEXT: v_exp_f32_e32 v0, v0 530; CHECK-NEXT: v_not_b32_e32 v1, 63 531; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 532; CHECK-NEXT: v_ldexp_f32 v0, v0, v1 533; CHECK-NEXT: s_setpc_b64 s[30:31] 534 %y = shl i32 %y.arg, 1 535 %call = tail call fast float @_Z4pownfi(float %x, i32 %y) 536 ret float %call 537} 538 539define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { 540; CHECK-LABEL: test_pown_fast_f64_known_even: 541; CHECK: ; %bb.0: 542; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; CHECK-NEXT: s_mov_b32 s16, s33 544; CHECK-NEXT: s_mov_b32 s33, s32 545; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 546; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 547; CHECK-NEXT: s_mov_b64 exec, s[18:19] 548; CHECK-NEXT: v_writelane_b32 v42, s16, 14 549; CHECK-NEXT: v_writelane_b32 v42, s30, 0 550; CHECK-NEXT: v_writelane_b32 v42, s31, 1 551; CHECK-NEXT: v_writelane_b32 v42, s34, 2 552; CHECK-NEXT: v_writelane_b32 v42, s35, 3 553; CHECK-NEXT: v_writelane_b32 v42, s36, 4 554; CHECK-NEXT: v_writelane_b32 v42, s37, 5 555; CHECK-NEXT: v_writelane_b32 v42, s38, 6 556; CHECK-NEXT: v_writelane_b32 v42, s39, 7 557; CHECK-NEXT: s_addk_i32 s32, 0x400 558; CHECK-NEXT: v_writelane_b32 v42, s40, 8 559; CHECK-NEXT: v_writelane_b32 v42, s41, 9 560; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] 561; CHECK-NEXT: s_getpc_b64 s[4:5] 562; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 563; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 564; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 565; CHECK-NEXT: v_writelane_b32 v42, s42, 10 566; CHECK-NEXT: v_writelane_b32 v42, s43, 11 567; CHECK-NEXT: v_writelane_b32 v42, s44, 12 568; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 569; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 570; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 571; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill 572; CHECK-NEXT: v_writelane_b32 v42, s45, 13 573; CHECK-NEXT: v_mov_b32_e32 v40, v31 574; CHECK-NEXT: s_mov_b32 s42, s15 575; CHECK-NEXT: s_mov_b32 s43, s14 576; CHECK-NEXT: s_mov_b32 s44, s13 577; CHECK-NEXT: s_mov_b32 s45, s12 578; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] 579; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] 580; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] 581; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2 582; CHECK-NEXT: s_waitcnt lgkmcnt(0) 583; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 584; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 585; CHECK-NEXT: s_getpc_b64 s[4:5] 586; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 587; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 588; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 589; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 590; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 591; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] 592; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] 593; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] 594; CHECK-NEXT: s_mov_b32 s12, s45 595; CHECK-NEXT: s_mov_b32 s13, s44 596; CHECK-NEXT: s_mov_b32 s14, s43 597; CHECK-NEXT: s_mov_b32 s15, s42 598; CHECK-NEXT: v_mov_b32_e32 v31, v40 599; CHECK-NEXT: s_waitcnt lgkmcnt(0) 600; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 601; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload 602; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 603; CHECK-NEXT: v_readlane_b32 s45, v42, 13 604; CHECK-NEXT: v_readlane_b32 s44, v42, 12 605; CHECK-NEXT: v_readlane_b32 s43, v42, 11 606; CHECK-NEXT: v_readlane_b32 s42, v42, 10 607; CHECK-NEXT: v_readlane_b32 s41, v42, 9 608; CHECK-NEXT: v_readlane_b32 s40, v42, 8 609; CHECK-NEXT: v_readlane_b32 s39, v42, 7 610; CHECK-NEXT: v_readlane_b32 s38, v42, 6 611; CHECK-NEXT: v_readlane_b32 s37, v42, 5 612; CHECK-NEXT: v_readlane_b32 s36, v42, 4 613; CHECK-NEXT: v_readlane_b32 s35, v42, 3 614; CHECK-NEXT: v_readlane_b32 s34, v42, 2 615; CHECK-NEXT: v_readlane_b32 s31, v42, 1 616; CHECK-NEXT: v_readlane_b32 s30, v42, 0 617; CHECK-NEXT: s_mov_b32 s32, s33 618; CHECK-NEXT: v_readlane_b32 s4, v42, 14 619; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 620; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 621; CHECK-NEXT: s_mov_b64 exec, s[6:7] 622; CHECK-NEXT: s_mov_b32 s33, s4 623; CHECK-NEXT: s_waitcnt vmcnt(0) 624; CHECK-NEXT: s_setpc_b64 s[30:31] 625 %y = shl i32 %y.arg, 1 626 %call = tail call fast double @_Z4powndi(double %x, i32 %y) 627 ret double %call 628} 629 630define half @test_pown_fast_f16_known_odd(half %x, i32 %y.arg) { 631; CHECK-LABEL: test_pown_fast_f16_known_odd: 632; CHECK: ; %bb.0: 633; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; CHECK-NEXT: v_or_b32_e32 v1, 1, v1 635; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 636; CHECK-NEXT: v_log_f16_e64 v2, |v0| 637; CHECK-NEXT: s_movk_i32 s4, 0x7fff 638; CHECK-NEXT: v_cvt_f16_f32_e32 v1, v1 639; CHECK-NEXT: v_mul_f16_e32 v1, v2, v1 640; CHECK-NEXT: v_exp_f16_e32 v1, v1 641; CHECK-NEXT: v_bfi_b32 v0, s4, v1, v0 642; CHECK-NEXT: s_setpc_b64 s[30:31] 643 %y = or i32 %y.arg, 1 644 %call = tail call fast half @_Z4pownDhi(half %x, i32 %y) 645 ret half %call 646} 647 648define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) { 649; CHECK-LABEL: test_pown_fast_f32_known_odd: 650; CHECK: ; %bb.0: 651; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; CHECK-NEXT: s_mov_b32 s4, 0x800000 653; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 654; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 655; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 656; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 657; CHECK-NEXT: v_or_b32_e32 v1, 1, v1 658; CHECK-NEXT: v_log_f32_e32 v3, v3 659; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 660; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 661; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 662; CHECK-NEXT: v_sub_f32_e32 v2, v3, v2 663; CHECK-NEXT: v_mul_f32_e32 v3, v2, v1 664; CHECK-NEXT: s_mov_b32 s4, 0xc2fc0000 665; CHECK-NEXT: v_mov_b32_e32 v4, 0x42800000 666; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v3 667; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc 668; CHECK-NEXT: v_fma_f32 v1, v2, v1, v3 669; CHECK-NEXT: v_exp_f32_e32 v1, v1 670; CHECK-NEXT: v_not_b32_e32 v2, 63 671; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 672; CHECK-NEXT: s_brev_b32 s4, -2 673; CHECK-NEXT: v_ldexp_f32 v1, v1, v2 674; CHECK-NEXT: v_bfi_b32 v0, s4, v1, v0 675; CHECK-NEXT: s_setpc_b64 s[30:31] 676 %y = or i32 %y.arg, 1 677 %call = tail call fast float @_Z4pownfi(float %x, i32 %y) 678 ret float %call 679} 680 681define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { 682; CHECK-LABEL: test_pown_fast_f64_known_odd: 683; CHECK: ; %bb.0: 684; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; CHECK-NEXT: s_mov_b32 s16, s33 686; CHECK-NEXT: s_mov_b32 s33, s32 687; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1 688; CHECK-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 689; CHECK-NEXT: s_mov_b64 exec, s[18:19] 690; CHECK-NEXT: v_writelane_b32 v43, s16, 14 691; CHECK-NEXT: v_writelane_b32 v43, s30, 0 692; CHECK-NEXT: v_writelane_b32 v43, s31, 1 693; CHECK-NEXT: v_writelane_b32 v43, s34, 2 694; CHECK-NEXT: v_writelane_b32 v43, s35, 3 695; CHECK-NEXT: v_writelane_b32 v43, s36, 4 696; CHECK-NEXT: v_writelane_b32 v43, s37, 5 697; CHECK-NEXT: v_writelane_b32 v43, s38, 6 698; CHECK-NEXT: v_writelane_b32 v43, s39, 7 699; CHECK-NEXT: s_addk_i32 s32, 0x800 700; CHECK-NEXT: v_writelane_b32 v43, s40, 8 701; CHECK-NEXT: v_writelane_b32 v43, s41, 9 702; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] 703; CHECK-NEXT: s_getpc_b64 s[4:5] 704; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 705; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 706; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 707; CHECK-NEXT: v_writelane_b32 v43, s42, 10 708; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 709; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 710; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill 711; CHECK-NEXT: v_writelane_b32 v43, s43, 11 712; CHECK-NEXT: v_mov_b32_e32 v41, v1 713; CHECK-NEXT: v_writelane_b32 v43, s44, 12 714; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v41 715; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 716; CHECK-NEXT: v_writelane_b32 v43, s45, 13 717; CHECK-NEXT: v_mov_b32_e32 v40, v31 718; CHECK-NEXT: s_mov_b32 s42, s15 719; CHECK-NEXT: s_mov_b32 s43, s14 720; CHECK-NEXT: s_mov_b32 s44, s13 721; CHECK-NEXT: s_mov_b32 s45, s12 722; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] 723; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] 724; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] 725; CHECK-NEXT: v_or_b32_e32 v42, 1, v2 726; CHECK-NEXT: s_waitcnt lgkmcnt(0) 727; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 728; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v42 729; CHECK-NEXT: s_getpc_b64 s[4:5] 730; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 731; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 732; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 733; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] 734; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] 735; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] 736; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] 737; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] 738; CHECK-NEXT: s_mov_b32 s12, s45 739; CHECK-NEXT: s_mov_b32 s13, s44 740; CHECK-NEXT: s_mov_b32 s14, s43 741; CHECK-NEXT: s_mov_b32 s15, s42 742; CHECK-NEXT: v_mov_b32_e32 v31, v40 743; CHECK-NEXT: s_waitcnt lgkmcnt(0) 744; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] 745; CHECK-NEXT: v_and_b32_e32 v2, 0x80000000, v41 746; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload 747; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 748; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 749; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 750; CHECK-NEXT: v_readlane_b32 s45, v43, 13 751; CHECK-NEXT: v_readlane_b32 s44, v43, 12 752; CHECK-NEXT: v_readlane_b32 s43, v43, 11 753; CHECK-NEXT: v_readlane_b32 s42, v43, 10 754; CHECK-NEXT: v_readlane_b32 s41, v43, 9 755; CHECK-NEXT: v_readlane_b32 s40, v43, 8 756; CHECK-NEXT: v_readlane_b32 s39, v43, 7 757; CHECK-NEXT: v_readlane_b32 s38, v43, 6 758; CHECK-NEXT: v_readlane_b32 s37, v43, 5 759; CHECK-NEXT: v_readlane_b32 s36, v43, 4 760; CHECK-NEXT: v_readlane_b32 s35, v43, 3 761; CHECK-NEXT: v_readlane_b32 s34, v43, 2 762; CHECK-NEXT: v_readlane_b32 s31, v43, 1 763; CHECK-NEXT: v_readlane_b32 s30, v43, 0 764; CHECK-NEXT: s_mov_b32 s32, s33 765; CHECK-NEXT: v_readlane_b32 s4, v43, 14 766; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 767; CHECK-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 768; CHECK-NEXT: s_mov_b64 exec, s[6:7] 769; CHECK-NEXT: s_mov_b32 s33, s4 770; CHECK-NEXT: s_waitcnt vmcnt(0) 771; CHECK-NEXT: s_setpc_b64 s[30:31] 772 %y = or i32 %y.arg, 1 773 %call = tail call fast double @_Z4powndi(double %x, i32 %y) 774 ret double %call 775} 776 777!llvm.module.flags = !{!0} 778!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 779