1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-SDAG,SI-SDAG %s 3; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX689,SI,GFX689-GISEL,SI-GISEL %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-SDAG,VI-SDAG %s 5; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GFX689,VI,GFX689-GISEL,VI-GISEL %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-SDAG,GFX900-SDAG %s 7; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX689,GFX900,GFX689-GISEL,GFX900-GISEL %s 8; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-SDAG %s 9; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX1100,GFX1100-GISEL %s 10 11; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s 12; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s 13 14define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { 15; SI-SDAG-LABEL: s_log2_f32: 16; SI-SDAG: ; %bb.0: 17; SI-SDAG-NEXT: s_load_dword s2, s[4:5], 0xb 18; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 19; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 20; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 21; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 22; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 23; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 24; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 25; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 26; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 27; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s2, v1 28; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 29; SI-SDAG-NEXT: s_mov_b32 s2, -1 30; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 31; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 32; SI-SDAG-NEXT: s_endpgm 33; 34; SI-GISEL-LABEL: s_log2_f32: 35; SI-GISEL: ; %bb.0: 36; SI-GISEL-NEXT: s_load_dword s2, s[4:5], 0xb 37; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 38; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 39; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 40; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 41; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 42; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 43; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 44; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 45; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s2, v0 46; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 47; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 48; SI-GISEL-NEXT: s_mov_b32 s2, -1 49; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 50; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 51; SI-GISEL-NEXT: s_endpgm 52; 53; VI-SDAG-LABEL: s_log2_f32: 54; VI-SDAG: ; %bb.0: 55; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 56; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 57; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 58; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 59; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 60; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 61; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 62; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 63; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 64; VI-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 65; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 66; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v0 67; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 68; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 69; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 70; VI-SDAG-NEXT: s_endpgm 71; 72; VI-GISEL-LABEL: s_log2_f32: 73; VI-GISEL: ; %bb.0: 74; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 75; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 76; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 77; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 78; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 79; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 80; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 81; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 82; VI-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 83; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 84; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 85; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 86; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 87; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 88; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 89; VI-GISEL-NEXT: s_endpgm 90; 91; GFX900-SDAG-LABEL: s_log2_f32: 92; GFX900-SDAG: ; %bb.0: 93; GFX900-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 94; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 95; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 96; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 97; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 98; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 99; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 100; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 101; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 102; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 103; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 104; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 105; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 106; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] 107; GFX900-SDAG-NEXT: s_endpgm 108; 109; GFX900-GISEL-LABEL: s_log2_f32: 110; GFX900-GISEL: ; %bb.0: 111; GFX900-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 112; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 113; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 114; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 115; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 116; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 117; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 118; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 119; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 120; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 121; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 122; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 123; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 124; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 125; GFX900-GISEL-NEXT: s_endpgm 126; 127; GFX1100-SDAG-LABEL: s_log2_f32: 128; GFX1100-SDAG: ; %bb.0: 129; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c 130; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 131; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 132; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, s2 133; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 134; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 135; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s0 136; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 137; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 138; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 139; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 140; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 141; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 142; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 143; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 144; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 145; GFX1100-SDAG-NEXT: s_endpgm 146; 147; GFX1100-GISEL-LABEL: s_log2_f32: 148; GFX1100-GISEL: ; %bb.0: 149; GFX1100-GISEL-NEXT: s_clause 0x1 150; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 151; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 152; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 153; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 154; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 155; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 156; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 157; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s3 158; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 159; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 160; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 161; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 162; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 163; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 164; GFX1100-GISEL-NEXT: global_store_b32 v2, v0, s[0:1] 165; GFX1100-GISEL-NEXT: s_endpgm 166; 167; R600-LABEL: s_log2_f32: 168; R600: ; %bb.0: 169; R600-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] 170; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 171; R600-NEXT: CF_END 172; R600-NEXT: PAD 173; R600-NEXT: ALU clause starting at 4: 174; R600-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, 175; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 176; R600-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, 177; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 178; R600-NEXT: MUL_IEEE T1.W, KC0[2].Z, PV.W, 179; R600-NEXT: CNDE * T0.W, T0.W, 0.0, literal.x, 180; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 181; R600-NEXT: LOG_IEEE * T0.X, PV.W, 182; R600-NEXT: ADD T0.X, PS, -T0.W, 183; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 184; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 185; 186; CM-LABEL: s_log2_f32: 187; CM: ; %bb.0: 188; CM-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] 189; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 190; CM-NEXT: CF_END 191; CM-NEXT: PAD 192; CM-NEXT: ALU clause starting at 4: 193; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, 194; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 195; CM-NEXT: CNDE * T1.W, PV.W, 1.0, literal.x, 196; CM-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 197; CM-NEXT: CNDE T0.Z, T0.W, 0.0, literal.x, 198; CM-NEXT: MUL_IEEE * T0.W, KC0[2].Z, PV.W, 199; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 200; CM-NEXT: LOG_IEEE T0.X, T0.W, 201; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, 202; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 203; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, 204; CM-NEXT: ADD * T0.X, PV.X, -T0.Z, 205; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 206; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 207 %result = call float @llvm.log2.f32(float %in) 208 store float %result, ptr addrspace(1) %out 209 ret void 210} 211 212; FIXME: We should be able to merge these packets together on Cayman so we 213; have a maximum of 4 instructions. 214define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) { 215; SI-SDAG-LABEL: s_log2_v2f32: 216; SI-SDAG: ; %bb.0: 217; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 218; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 219; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 220; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 221; SI-SDAG-NEXT: s_mov_b32 s6, -1 222; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 223; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 224; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 225; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 226; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 227; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 228; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 229; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 230; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 231; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s3, v3 232; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s2, v1 233; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 234; SI-SDAG-NEXT: v_log_f32_e32 v4, v1 235; SI-SDAG-NEXT: s_mov_b32 s4, s0 236; SI-SDAG-NEXT: s_mov_b32 s5, s1 237; SI-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 238; SI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 239; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 240; SI-SDAG-NEXT: s_endpgm 241; 242; SI-GISEL-LABEL: s_log2_v2f32: 243; SI-GISEL: ; %bb.0: 244; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 245; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 246; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 247; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 248; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 249; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 250; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 251; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 252; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 253; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 254; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s6, v2 255; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s7, v0 256; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 257; SI-GISEL-NEXT: v_log_f32_e32 v3, v0 258; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 259; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] 260; SI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 261; SI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 262; SI-GISEL-NEXT: s_mov_b32 s6, -1 263; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 264; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 265; SI-GISEL-NEXT: s_endpgm 266; 267; VI-SDAG-LABEL: s_log2_v2f32: 268; VI-SDAG: ; %bb.0: 269; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 270; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 271; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 272; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 273; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 274; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 275; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 276; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 277; VI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 278; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 279; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 280; VI-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 281; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 282; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 283; VI-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 284; VI-SDAG-NEXT: v_log_f32_e32 v4, v1 285; VI-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 286; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 287; VI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 288; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 289; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 290; VI-SDAG-NEXT: s_endpgm 291; 292; VI-GISEL-LABEL: s_log2_v2f32: 293; VI-GISEL: ; %bb.0: 294; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 295; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 296; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 297; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 298; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 299; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s7, v0 300; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 301; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 302; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 303; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 304; VI-GISEL-NEXT: v_ldexp_f32 v2, s6, v2 305; VI-GISEL-NEXT: v_ldexp_f32 v0, s7, v0 306; VI-GISEL-NEXT: v_log_f32_e32 v2, v2 307; VI-GISEL-NEXT: v_log_f32_e32 v3, v0 308; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 309; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] 310; VI-GISEL-NEXT: v_sub_f32_e32 v0, v2, v0 311; VI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 312; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 313; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 314; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 315; VI-GISEL-NEXT: s_endpgm 316; 317; GFX900-SDAG-LABEL: s_log2_v2f32: 318; GFX900-SDAG: ; %bb.0: 319; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 320; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 321; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 322; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 323; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 324; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 325; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 326; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 327; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 328; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 329; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 330; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 331; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 332; GFX900-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 333; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 334; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 335; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v1 336; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 337; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 338; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] 339; GFX900-SDAG-NEXT: s_endpgm 340; 341; GFX900-GISEL-LABEL: s_log2_v2f32: 342; GFX900-GISEL: ; %bb.0: 343; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 344; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 345; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 346; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 347; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 348; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 349; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 350; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 351; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 352; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 353; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 354; GFX900-GISEL-NEXT: v_ldexp_f32 v3, s10, v3 355; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s11, v0 356; GFX900-GISEL-NEXT: v_log_f32_e32 v3, v3 357; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v0 358; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 359; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v1, s[0:1] 360; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v3, v0 361; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 362; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 363; GFX900-GISEL-NEXT: s_endpgm 364; 365; GFX1100-SDAG-LABEL: s_log2_v2f32: 366; GFX1100-SDAG: ; %bb.0: 367; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 368; GFX1100-SDAG-NEXT: v_mov_b32_e32 v4, 0 369; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 370; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 371; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 372; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 373; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 374; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5 375; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4 376; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s5 377; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 378; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 379; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 380; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 381; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s3, v1 382; GFX1100-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 383; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 384; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 385; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 386; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 387; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v1, v0 :: v_dual_sub_f32 v0, v3, v2 388; GFX1100-SDAG-NEXT: global_store_b64 v4, v[0:1], s[0:1] 389; GFX1100-SDAG-NEXT: s_endpgm 390; 391; GFX1100-GISEL-LABEL: s_log2_v2f32: 392; GFX1100-GISEL: ; %bb.0: 393; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 394; GFX1100-GISEL-NEXT: v_mov_b32_e32 v4, 0 395; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 396; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s3 397; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s2 398; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 399; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 400; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 401; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s5 402; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s4 403; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 404; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 405; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s3, v1 406; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 407; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 408; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 409; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v3 :: v_dual_lshlrev_b32 v0, 5, v0 410; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s2, v0 411; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 412; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 413; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 414; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 415; GFX1100-GISEL-NEXT: global_store_b64 v4, v[0:1], s[0:1] 416; GFX1100-GISEL-NEXT: s_endpgm 417; 418; R600-LABEL: s_log2_v2f32: 419; R600: ; %bb.0: 420; R600-NEXT: ALU 18, @4, KC0[CB0:0-32], KC1[] 421; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 422; R600-NEXT: CF_END 423; R600-NEXT: PAD 424; R600-NEXT: ALU clause starting at 4: 425; R600-NEXT: SETGT T0.W, literal.x, KC0[3].X, 426; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].W, 427; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 428; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, 429; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 430; R600-NEXT: MUL_IEEE T2.W, KC0[3].X, PV.W, 431; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, 432; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 433; R600-NEXT: MUL_IEEE T0.Z, KC0[2].W, PS, 434; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.x, 435; R600-NEXT: LOG_IEEE * T0.X, PV.W, 436; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 437; R600-NEXT: ADD T0.Y, PS, -PV.W, 438; R600-NEXT: CNDE T0.W, T1.W, 0.0, literal.x, 439; R600-NEXT: LOG_IEEE * T0.X, PV.Z, 440; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 441; R600-NEXT: ADD T0.X, PS, -PV.W, 442; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 443; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 444; 445; CM-LABEL: s_log2_v2f32: 446; CM: ; %bb.0: 447; CM-NEXT: ALU 23, @4, KC0[CB0:0-32], KC1[] 448; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T0.X 449; CM-NEXT: CF_END 450; CM-NEXT: PAD 451; CM-NEXT: ALU clause starting at 4: 452; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].X, 453; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 454; CM-NEXT: CNDE T0.Z, PV.W, 1.0, literal.x, 455; CM-NEXT: SETGT * T1.W, literal.y, KC0[2].W, 456; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) 457; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, 458; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, 459; CM-NEXT: MUL_IEEE * T0.W, KC0[3].X, PV.Z, 460; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) 461; CM-NEXT: LOG_IEEE T0.X, T0.W, 462; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, 463; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 464; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, 465; CM-NEXT: ADD T1.Y, PV.X, -T1.Z, 466; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.x, 467; CM-NEXT: MUL_IEEE * T0.W, KC0[2].W, T0.Y, 468; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 469; CM-NEXT: LOG_IEEE T0.X, T0.W, 470; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, 471; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 472; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, 473; CM-NEXT: ADD * T1.X, PV.X, -T0.Z, 474; CM-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 475; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 476 %result = call <2 x float> @llvm.log2.v2f32(<2 x float> %in) 477 store <2 x float> %result, ptr addrspace(1) %out 478 ret void 479} 480 481define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) { 482; SI-SDAG-LABEL: s_log2_v3f32: 483; SI-SDAG: ; %bb.0: 484; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd 485; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 486; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 487; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 488; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 489; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 490; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 491; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 492; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 493; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 494; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 495; SI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 496; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 497; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 498; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 499; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 500; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s1, v3 501; SI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 502; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s2, v0 503; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 504; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, s0, v5 505; SI-SDAG-NEXT: v_log_f32_e32 v7, v0 506; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 507; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 508; SI-SDAG-NEXT: s_mov_b32 s6, -1 509; SI-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 510; SI-SDAG-NEXT: v_sub_f32_e32 v2, v7, v6 511; SI-SDAG-NEXT: v_sub_f32_e32 v0, v5, v4 512; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 513; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 514; SI-SDAG-NEXT: s_endpgm 515; 516; SI-GISEL-LABEL: s_log2_v3f32: 517; SI-GISEL: ; %bb.0: 518; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd 519; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 520; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 521; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 522; SI-GISEL-NEXT: s_mov_b32 s6, -1 523; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 524; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 525; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 526; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 527; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 528; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 529; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 530; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] 531; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 532; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 533; SI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 534; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 535; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, s9, v3 536; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 537; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 538; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s10, v1 539; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 540; SI-GISEL-NEXT: v_log_f32_e32 v4, v1 541; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[0:1] 542; SI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 543; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 544; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 545; SI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 546; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 547; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 548; SI-GISEL-NEXT: s_endpgm 549; 550; VI-SDAG-LABEL: s_log2_v3f32: 551; VI-SDAG: ; %bb.0: 552; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 553; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 554; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 555; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 556; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 557; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 558; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 559; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 560; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 561; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 562; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 563; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 564; VI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 565; VI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 566; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 567; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 568; VI-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 569; VI-SDAG-NEXT: v_ldexp_f32 v5, s1, v5 570; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 571; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 572; VI-SDAG-NEXT: v_log_f32_e32 v5, v5 573; VI-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 574; VI-SDAG-NEXT: v_log_f32_e32 v6, v1 575; VI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 576; VI-SDAG-NEXT: v_sub_f32_e32 v1, v5, v4 577; VI-SDAG-NEXT: v_mov_b32_e32 v3, s4 578; VI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0 579; VI-SDAG-NEXT: v_mov_b32_e32 v4, s5 580; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 581; VI-SDAG-NEXT: s_endpgm 582; 583; VI-GISEL-LABEL: s_log2_v3f32: 584; VI-GISEL: ; %bb.0: 585; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 586; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 587; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 588; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 589; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 590; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v1 591; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 592; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 593; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 594; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 595; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v1 596; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 597; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v1 598; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[0:1] 599; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 600; VI-GISEL-NEXT: v_lshlrev_b32_e32 v3, 5, v3 601; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 602; VI-GISEL-NEXT: v_ldexp_f32 v3, s9, v3 603; VI-GISEL-NEXT: v_ldexp_f32 v1, s10, v1 604; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 605; VI-GISEL-NEXT: v_log_f32_e32 v3, v3 606; VI-GISEL-NEXT: v_log_f32_e32 v4, v1 607; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, v2, s[0:1] 608; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 609; VI-GISEL-NEXT: v_sub_f32_e32 v1, v3, v1 610; VI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 611; VI-GISEL-NEXT: v_mov_b32_e32 v4, s3 612; VI-GISEL-NEXT: v_mov_b32_e32 v3, s2 613; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 614; VI-GISEL-NEXT: s_endpgm 615; 616; GFX900-SDAG-LABEL: s_log2_v3f32: 617; GFX900-SDAG: ; %bb.0: 618; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 619; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 620; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 621; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 622; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 623; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 624; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 625; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 626; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 627; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 628; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 629; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 630; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 631; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 632; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 633; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 634; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 635; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 636; GFX900-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 637; GFX900-SDAG-NEXT: v_ldexp_f32 v5, s1, v5 638; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 639; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 640; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v5 641; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v1 642; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 643; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v5, v4 644; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0 645; GFX900-SDAG-NEXT: global_store_dwordx3 v7, v[0:2], s[6:7] 646; GFX900-SDAG-NEXT: s_endpgm 647; 648; GFX900-GISEL-LABEL: s_log2_v3f32: 649; GFX900-GISEL: ; %bb.0: 650; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 651; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 652; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 653; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42000000 654; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 655; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 656; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 657; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 658; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 659; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 660; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 661; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc 662; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v1 663; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 664; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 665; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4 666; GFX900-GISEL-NEXT: v_ldexp_f32 v4, s1, v4 667; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v1 668; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 669; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 670; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s2, v1 671; GFX900-GISEL-NEXT: v_log_f32_e32 v4, v4 672; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v1 673; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc 674; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] 675; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v4, v1 676; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 677; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] 678; GFX900-GISEL-NEXT: s_endpgm 679; 680; GFX1100-SDAG-LABEL: s_log2_v3f32: 681; GFX1100-SDAG: ; %bb.0: 682; GFX1100-SDAG-NEXT: s_clause 0x1 683; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 684; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 685; GFX1100-SDAG-NEXT: v_mov_b32_e32 v6, 0 686; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 687; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 688; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 689; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s0 690; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 691; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s6 692; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s3 693; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 694; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s7 695; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s6 696; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3 697; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v4, 5, v4 698; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s7 699; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 700; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 701; GFX1100-SDAG-NEXT: v_ldexp_f32 v4, s1, v4 702; GFX1100-SDAG-NEXT: v_ldexp_f32 v5, s0, v5 703; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 704; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 705; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 706; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 707; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 708; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v4, v1 709; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s2, v2 710; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 711; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 712; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 713; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v2, v0 714; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v5, v3 715; GFX1100-SDAG-NEXT: global_store_b96 v6, v[0:2], s[4:5] 716; GFX1100-SDAG-NEXT: s_endpgm 717; 718; GFX1100-GISEL-LABEL: s_log2_v3f32: 719; GFX1100-GISEL: ; %bb.0: 720; GFX1100-GISEL-NEXT: s_clause 0x1 721; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 722; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 723; GFX1100-GISEL-NEXT: v_mov_b32_e32 v6, 0 724; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 725; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 726; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s0 727; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 728; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 729; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 730; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 731; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) 732; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7 733; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s6 734; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s3 735; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 736; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 737; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 738; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 739; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 740; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 741; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 742; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 743; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 744; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 745; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 746; GFX1100-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 747; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 748; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v3 749; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 750; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 751; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 752; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v5 753; GFX1100-GISEL-NEXT: global_store_b96 v6, v[0:2], s[4:5] 754; GFX1100-GISEL-NEXT: s_endpgm 755; 756; R600-LABEL: s_log2_v3f32: 757; R600: ; %bb.0: 758; R600-NEXT: ALU 29, @4, KC0[CB0:0-32], KC1[] 759; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 760; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 761; R600-NEXT: CF_END 762; R600-NEXT: ALU clause starting at 4: 763; R600-NEXT: SETGT T0.W, literal.x, KC0[3].Z, 764; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].Y, 765; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 766; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, 767; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 768; R600-NEXT: MUL_IEEE T2.W, KC0[3].Z, PV.W, 769; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.x, 770; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 771; R600-NEXT: MUL_IEEE T0.Y, KC0[3].Y, PS, 772; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].W, 773; R600-NEXT: CNDE T0.W, T0.W, 0.0, literal.y, 774; R600-NEXT: LOG_IEEE * T0.X, PV.W, 775; R600-NEXT: 8388608(1.175494e-38), 1107296256(3.200000e+01) 776; R600-NEXT: ADD T1.Y, PS, -PV.W, 777; R600-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x, 778; R600-NEXT: CNDE T0.W, T1.W, 0.0, literal.y, 779; R600-NEXT: LOG_IEEE * T0.X, PV.Y, 780; R600-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) 781; R600-NEXT: ADD T1.X, PS, -PV.W, 782; R600-NEXT: MUL_IEEE T0.W, KC0[3].W, PV.Z, 783; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 784; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 785; R600-NEXT: CNDE T1.W, T0.Z, 0.0, literal.x, 786; R600-NEXT: LOG_IEEE * T0.Y, PV.W, 787; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 788; R600-NEXT: ADD T2.X, PS, -PV.W, 789; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 790; R600-NEXT: 8(1.121039e-44), 0(0.000000e+00) 791; R600-NEXT: LSHR * T3.X, PV.W, literal.x, 792; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 793; 794; CM-LABEL: s_log2_v3f32: 795; CM: ; %bb.0: 796; CM-NEXT: ALU 35, @4, KC0[CB0:0-32], KC1[] 797; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X 798; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T2.X 799; CM-NEXT: CF_END 800; CM-NEXT: ALU clause starting at 4: 801; CM-NEXT: SETGT * T0.W, literal.x, KC0[3].W, 802; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 803; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, 804; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, 805; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Y, 806; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) 807; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x, 808; CM-NEXT: CNDE T1.Y, PV.Z, 1.0, literal.x, 809; CM-NEXT: CNDE T1.Z, T0.W, 0.0, literal.y, 810; CM-NEXT: MUL_IEEE * T0.W, KC0[3].W, PV.Y, 811; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) 812; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, 813; CM-NEXT: LOG_IEEE T0.Y, T0.W, 814; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 815; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, 816; CM-NEXT: ADD T1.X, PV.Y, -T1.Z, 817; CM-NEXT: CNDE T0.Y, T0.Z, 0.0, literal.x, 818; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 819; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Z, T1.Y, 820; CM-NEXT: 1107296256(3.200000e+01), 8(1.121039e-44) 821; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, 822; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, 823; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 824; CM-NEXT: LOG_IEEE * T0.W, T0.W, 825; CM-NEXT: LSHR T2.X, T0.Z, literal.x, 826; CM-NEXT: ADD T0.Y, PV.W, -T0.Y, 827; CM-NEXT: CNDE T0.Z, T1.W, 0.0, literal.y, 828; CM-NEXT: MUL_IEEE * T0.W, KC0[3].Y, T0.X, 829; CM-NEXT: 2(2.802597e-45), 1107296256(3.200000e+01) 830; CM-NEXT: LOG_IEEE T0.X, T0.W, 831; CM-NEXT: LOG_IEEE T0.Y (MASKED), T0.W, 832; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 833; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, 834; CM-NEXT: ADD * T0.X, PV.X, -T0.Z, 835; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, 836; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 837 %result = call <3 x float> @llvm.log2.v3f32(<3 x float> %in) 838 store <3 x float> %result, ptr addrspace(1) %out 839 ret void 840} 841 842; FIXME: We should be able to merge these packets together on Cayman so we 843; have a maximum of 4 instructions. 844define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) { 845; SI-SDAG-LABEL: s_log2_v4f32: 846; SI-SDAG: ; %bb.0: 847; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 848; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd 849; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 850; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 851; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 852; SI-SDAG-NEXT: s_mov_b32 s2, -1 853; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 854; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 855; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 856; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 857; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 858; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 859; SI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 860; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 861; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 862; SI-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 863; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 864; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 865; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 866; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 867; SI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 868; SI-SDAG-NEXT: v_lshlrev_b32_e32 v7, 5, v7 869; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 870; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s7, v3 871; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, s6, v5 872; SI-SDAG-NEXT: v_ldexp_f32_e32 v7, s5, v7 873; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s4, v1 874; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 875; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 876; SI-SDAG-NEXT: v_log_f32_e32 v7, v7 877; SI-SDAG-NEXT: v_log_f32_e32 v8, v1 878; SI-SDAG-NEXT: v_sub_f32_e32 v3, v3, v2 879; SI-SDAG-NEXT: v_sub_f32_e32 v2, v5, v4 880; SI-SDAG-NEXT: v_sub_f32_e32 v1, v7, v6 881; SI-SDAG-NEXT: v_sub_f32_e32 v0, v8, v0 882; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 883; SI-SDAG-NEXT: s_endpgm 884; 885; SI-GISEL-LABEL: s_log2_v4f32: 886; SI-GISEL: ; %bb.0: 887; SI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd 888; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 889; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 890; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 891; SI-GISEL-NEXT: s_mov_b32 s6, -1 892; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 893; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 894; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 895; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 896; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 897; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 898; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, s8, v0 899; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 900; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 901; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, s9, v1 902; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 903; SI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc 904; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 905; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v3, s[0:1] 906; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 907; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 908; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 909; SI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 910; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 911; SI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4 912; SI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 913; SI-GISEL-NEXT: v_ldexp_f32_e32 v4, s10, v4 914; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, s11, v2 915; SI-GISEL-NEXT: v_log_f32_e32 v4, v4 916; SI-GISEL-NEXT: v_log_f32_e32 v5, v2 917; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 918; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] 919; SI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 920; SI-GISEL-NEXT: v_sub_f32_e32 v3, v5, v3 921; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 922; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 923; SI-GISEL-NEXT: s_endpgm 924; 925; VI-SDAG-LABEL: s_log2_v4f32: 926; VI-SDAG: ; %bb.0: 927; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 928; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 929; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 930; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 931; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 932; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 933; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 934; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 935; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 936; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc 937; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 938; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 939; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc 940; VI-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc 941; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 942; VI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 943; VI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 944; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 945; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 946; VI-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 947; VI-SDAG-NEXT: v_ldexp_f32 v5, s2, v5 948; VI-SDAG-NEXT: v_lshlrev_b32_e32 v7, 5, v7 949; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 950; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 951; VI-SDAG-NEXT: v_log_f32_e32 v5, v5 952; VI-SDAG-NEXT: v_ldexp_f32 v7, s1, v7 953; VI-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 954; VI-SDAG-NEXT: v_log_f32_e32 v7, v7 955; VI-SDAG-NEXT: v_log_f32_e32 v8, v1 956; VI-SDAG-NEXT: v_sub_f32_e32 v3, v3, v2 957; VI-SDAG-NEXT: v_sub_f32_e32 v2, v5, v4 958; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4 959; VI-SDAG-NEXT: v_sub_f32_e32 v1, v7, v6 960; VI-SDAG-NEXT: v_sub_f32_e32 v0, v8, v0 961; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5 962; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 963; VI-SDAG-NEXT: s_endpgm 964; 965; VI-GISEL-LABEL: s_log2_v4f32: 966; VI-GISEL: ; %bb.0: 967; VI-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 968; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 969; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 970; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 971; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 972; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 973; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 974; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 975; VI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 976; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 977; VI-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 978; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 979; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 980; VI-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 981; VI-GISEL-NEXT: v_log_f32_e32 v1, v1 982; VI-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc 983; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 984; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, v3, s[0:1] 985; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 986; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 987; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 988; VI-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 989; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 990; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 5, v4 991; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 992; VI-GISEL-NEXT: v_ldexp_f32 v4, s10, v4 993; VI-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 994; VI-GISEL-NEXT: v_log_f32_e32 v4, v4 995; VI-GISEL-NEXT: v_log_f32_e32 v5, v2 996; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 997; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] 998; VI-GISEL-NEXT: v_sub_f32_e32 v2, v4, v2 999; VI-GISEL-NEXT: v_sub_f32_e32 v3, v5, v3 1000; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3 1001; VI-GISEL-NEXT: v_mov_b32_e32 v4, s2 1002; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1003; VI-GISEL-NEXT: s_endpgm 1004; 1005; GFX900-SDAG-LABEL: s_log2_v4f32: 1006; GFX900-SDAG: ; %bb.0: 1007; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1008; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1009; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 1010; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1011; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 1012; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1013; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 1014; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc 1015; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1016; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 1017; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc 1018; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc 1019; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 1020; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc 1021; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc 1022; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 1023; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 1024; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1025; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 1026; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v6, 5, v6 1027; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v8, 5, v8 1028; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1029; GFX900-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 1030; GFX900-SDAG-NEXT: v_ldexp_f32 v6, s2, v6 1031; GFX900-SDAG-NEXT: v_ldexp_f32 v8, s1, v8 1032; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 1033; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 1034; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 1035; GFX900-SDAG-NEXT: v_log_f32_e32 v8, v8 1036; GFX900-SDAG-NEXT: v_log_f32_e32 v9, v1 1037; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v3, v2 1038; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 1039; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v8, v7 1040; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v9, v0 1041; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] 1042; GFX900-SDAG-NEXT: s_endpgm 1043; 1044; GFX900-GISEL-LABEL: s_log2_v4f32: 1045; GFX900-GISEL: ; %bb.0: 1046; GFX900-GISEL-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 1047; GFX900-GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 1048; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x800000 1049; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42000000 1050; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 1051; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1052; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s8, v2 1053; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1054; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v2 1055; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v0, 5, v0 1056; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1] 1057; GFX900-GISEL-NEXT: v_ldexp_f32 v0, s8, v0 1058; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1059; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1060; GFX900-GISEL-NEXT: v_ldexp_f32 v1, s9, v1 1061; GFX900-GISEL-NEXT: v_log_f32_e32 v1, v1 1062; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc 1063; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 1064; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, v3, s[0:1] 1065; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s10, v2 1066; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v2 1067; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v5 1068; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 1069; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 1070; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v5, 5, v5 1071; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1072; GFX900-GISEL-NEXT: v_ldexp_f32 v5, s10, v5 1073; GFX900-GISEL-NEXT: v_ldexp_f32 v2, s11, v2 1074; GFX900-GISEL-NEXT: v_log_f32_e32 v5, v5 1075; GFX900-GISEL-NEXT: v_log_f32_e32 v6, v2 1076; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 1077; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] 1078; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v5, v2 1079; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v6, v3 1080; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] 1081; GFX900-GISEL-NEXT: s_endpgm 1082; 1083; GFX1100-SDAG-LABEL: s_log2_v4f32: 1084; GFX1100-SDAG: ; %bb.0: 1085; GFX1100-SDAG-NEXT: s_clause 0x1 1086; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 1087; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 1088; GFX1100-SDAG-NEXT: v_mov_b32_e32 v9, 0 1089; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1090; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s3 1091; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 1092; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s1 1093; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s0 1094; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1095; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s6 1096; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s7 1097; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s7 1098; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8 1099; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, s9 1100; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1101; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 1102; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s6 1103; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s8 1104; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v7, 5, v7 1105; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s3, v2 1106; GFX1100-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 1107; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s9 1108; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1109; GFX1100-SDAG-NEXT: v_ldexp_f32 v7, s0, v7 1110; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 1111; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v6, 5, v6 1112; GFX1100-SDAG-NEXT: v_log_f32_e32 v8, v3 1113; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_3) 1114; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7 1115; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v2, v0 1116; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 1117; GFX1100-SDAG-NEXT: v_ldexp_f32 v6, s1, v6 1118; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1119; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v8, v1 1120; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v7, v5 1121; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6 1122; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1123; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v6, v4 1124; GFX1100-SDAG-NEXT: global_store_b128 v9, v[0:3], s[4:5] 1125; GFX1100-SDAG-NEXT: s_endpgm 1126; 1127; GFX1100-GISEL-LABEL: s_log2_v4f32: 1128; GFX1100-GISEL: ; %bb.0: 1129; GFX1100-GISEL-NEXT: s_clause 0x1 1130; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 1131; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 1132; GFX1100-GISEL-NEXT: v_mov_b32_e32 v8, 0 1133; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1134; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s1 1135; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s0 1136; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s3 1137; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s2 1138; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1139; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 1140; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 1141; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s7 1142; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9 1143; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s6 1144; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1145; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 1146; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 0x42000000, s9 1147; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 0x42000000, s8 1148; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 1149; GFX1100-GISEL-NEXT: v_ldexp_f32 v1, s1, v1 1150; GFX1100-GISEL-NEXT: v_log_f32_e32 v1, v1 1151; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1152; GFX1100-GISEL-NEXT: v_dual_sub_f32 v1, v1, v5 :: v_dual_lshlrev_b32 v0, 5, v0 1153; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1154; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, s0, v0 1155; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 1156; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1157; GFX1100-GISEL-NEXT: v_dual_sub_f32 v0, v0, v4 :: v_dual_lshlrev_b32 v3, 5, v3 1158; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1159; GFX1100-GISEL-NEXT: v_ldexp_f32 v3, s3, v3 1160; GFX1100-GISEL-NEXT: v_log_f32_e32 v3, v3 1161; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1162; GFX1100-GISEL-NEXT: v_dual_sub_f32 v3, v3, v7 :: v_dual_lshlrev_b32 v2, 5, v2 1163; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1164; GFX1100-GISEL-NEXT: v_ldexp_f32 v2, s2, v2 1165; GFX1100-GISEL-NEXT: v_log_f32_e32 v2, v2 1166; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1167; GFX1100-GISEL-NEXT: v_sub_f32_e32 v2, v2, v6 1168; GFX1100-GISEL-NEXT: global_store_b128 v8, v[0:3], s[4:5] 1169; GFX1100-GISEL-NEXT: s_endpgm 1170; 1171; R600-LABEL: s_log2_v4f32: 1172; R600: ; %bb.0: 1173; R600-NEXT: ALU 33, @4, KC0[CB0:0-32], KC1[] 1174; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 1175; R600-NEXT: CF_END 1176; R600-NEXT: PAD 1177; R600-NEXT: ALU clause starting at 4: 1178; R600-NEXT: SETGT T0.W, literal.x, KC0[4].X, 1179; R600-NEXT: SETGT * T1.W, literal.x, KC0[3].W, 1180; R600-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 1181; R600-NEXT: CNDE * T2.W, PV.W, 1.0, literal.x, 1182; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 1183; R600-NEXT: MUL_IEEE T0.Z, KC0[4].X, PV.W, 1184; R600-NEXT: SETGT T2.W, literal.x, KC0[3].Z, 1185; R600-NEXT: CNDE * T3.W, T1.W, 1.0, literal.y, 1186; R600-NEXT: 8388608(1.175494e-38), 1333788672(4.294967e+09) 1187; R600-NEXT: MUL_IEEE T0.X, KC0[3].W, PS, 1188; R600-NEXT: CNDE T0.Y, T0.W, 0.0, literal.x, 1189; R600-NEXT: SETGT T1.Z, literal.y, KC0[3].Y, 1190; R600-NEXT: CNDE T0.W, PV.W, 1.0, literal.z, 1191; R600-NEXT: LOG_IEEE * T0.Z, PV.Z, 1192; R600-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38) 1193; R600-NEXT: 1333788672(4.294967e+09), 0(0.000000e+00) 1194; R600-NEXT: MUL_IEEE T1.X, KC0[3].Z, PV.W, 1195; R600-NEXT: CNDE T1.Y, T1.W, 0.0, literal.x, 1196; R600-NEXT: CNDE T2.Z, PV.Z, 1.0, literal.y, 1197; R600-NEXT: ADD T0.W, PS, -PV.Y, 1198; R600-NEXT: LOG_IEEE * T0.X, PV.X, 1199; R600-NEXT: 1107296256(3.200000e+01), 1333788672(4.294967e+09) 1200; R600-NEXT: MUL_IEEE T2.Y, KC0[3].Y, PV.Z, 1201; R600-NEXT: ADD T0.Z, PS, -PV.Y, 1202; R600-NEXT: CNDE T1.W, T2.W, 0.0, literal.x, 1203; R600-NEXT: LOG_IEEE * T0.X, PV.X, 1204; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 1205; R600-NEXT: ADD T0.Y, PS, -PV.W, 1206; R600-NEXT: CNDE T1.W, T1.Z, 0.0, literal.x, 1207; R600-NEXT: LOG_IEEE * T0.X, PV.Y, 1208; R600-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 1209; R600-NEXT: ADD T0.X, PS, -PV.W, 1210; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1211; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1212; 1213; CM-LABEL: s_log2_v4f32: 1214; CM: ; %bb.0: 1215; CM-NEXT: ALU 43, @4, KC0[CB0:0-32], KC1[] 1216; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 1217; CM-NEXT: CF_END 1218; CM-NEXT: PAD 1219; CM-NEXT: ALU clause starting at 4: 1220; CM-NEXT: SETGT * T0.W, literal.x, KC0[4].X, 1221; CM-NEXT: 8388608(1.175494e-38), 0(0.000000e+00) 1222; CM-NEXT: CNDE T0.Y, PV.W, 1.0, literal.x, 1223; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W, 1224; CM-NEXT: SETGT * T1.W, literal.y, KC0[3].Z, 1225; CM-NEXT: 1333788672(4.294967e+09), 8388608(1.175494e-38) 1226; CM-NEXT: CNDE T0.X, PV.W, 1.0, literal.x, 1227; CM-NEXT: CNDE T1.Y, T0.W, 0.0, literal.y, 1228; CM-NEXT: CNDE T1.Z, PV.Z, 1.0, literal.x, 1229; CM-NEXT: MUL_IEEE * T0.W, KC0[4].X, PV.Y, 1230; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) 1231; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.W, 1232; CM-NEXT: LOG_IEEE T0.Y, T0.W, 1233; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.W, 1234; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.W, 1235; CM-NEXT: CNDE T1.X, T0.Z, 0.0, literal.x, 1236; CM-NEXT: SETGT T2.Y, literal.y, KC0[3].Y, 1237; CM-NEXT: MUL_IEEE T0.Z, KC0[3].W, T1.Z, 1238; CM-NEXT: ADD * T0.W, PV.Y, -T1.Y, 1239; CM-NEXT: 1107296256(3.200000e+01), 8388608(1.175494e-38) 1240; CM-NEXT: LOG_IEEE T0.X (MASKED), T0.Z, 1241; CM-NEXT: LOG_IEEE T0.Y, T0.Z, 1242; CM-NEXT: LOG_IEEE T0.Z (MASKED), T0.Z, 1243; CM-NEXT: LOG_IEEE * T0.W (MASKED), T0.Z, 1244; CM-NEXT: CNDE T2.X, T2.Y, 1.0, literal.x, 1245; CM-NEXT: CNDE T1.Y, T1.W, 0.0, literal.y, 1246; CM-NEXT: ADD T0.Z, PV.Y, -T1.X, 1247; CM-NEXT: MUL_IEEE * T1.W, KC0[3].Z, T0.X, BS:VEC_021/SCL_122 1248; CM-NEXT: 1333788672(4.294967e+09), 1107296256(3.200000e+01) 1249; CM-NEXT: LOG_IEEE T0.X, T1.W, 1250; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, 1251; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, 1252; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, 1253; CM-NEXT: ADD T0.Y, PV.X, -T1.Y, 1254; CM-NEXT: CNDE T1.Z, T2.Y, 0.0, literal.x, 1255; CM-NEXT: MUL_IEEE * T1.W, KC0[3].Y, T2.X, 1256; CM-NEXT: 1107296256(3.200000e+01), 0(0.000000e+00) 1257; CM-NEXT: LOG_IEEE T0.X, T1.W, 1258; CM-NEXT: LOG_IEEE T0.Y (MASKED), T1.W, 1259; CM-NEXT: LOG_IEEE T0.Z (MASKED), T1.W, 1260; CM-NEXT: LOG_IEEE * T0.W (MASKED), T1.W, 1261; CM-NEXT: ADD * T0.X, PV.X, -T1.Z, 1262; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1263; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1264 %result = call <4 x float> @llvm.log2.v4f32(<4 x float> %in) 1265 store <4 x float> %result, ptr addrspace(1) %out 1266 ret void 1267} 1268 1269define float @v_log2_f32(float %in) { 1270; SI-SDAG-LABEL: v_log2_f32: 1271; SI-SDAG: ; %bb.0: 1272; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1273; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1274; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1275; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1276; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1277; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 1278; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 1279; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1280; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1281; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1282; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 1283; 1284; SI-GISEL-LABEL: v_log2_f32: 1285; SI-GISEL: ; %bb.0: 1286; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1287; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1288; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1289; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1290; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1291; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 1292; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 1293; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1294; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1295; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1296; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 1297; 1298; VI-SDAG-LABEL: v_log2_f32: 1299; VI-SDAG: ; %bb.0: 1300; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1301; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1302; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1303; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1304; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1305; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1306; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 1307; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1308; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1309; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1310; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 1311; 1312; VI-GISEL-LABEL: v_log2_f32: 1313; VI-GISEL: ; %bb.0: 1314; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1315; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1316; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1317; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1318; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1319; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1320; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 1321; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1322; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1323; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1324; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 1325; 1326; GFX900-SDAG-LABEL: v_log2_f32: 1327; GFX900-SDAG: ; %bb.0: 1328; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1329; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 1330; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1331; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1332; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1333; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1334; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 1335; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1336; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1337; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1338; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 1339; 1340; GFX900-GISEL-LABEL: v_log2_f32: 1341; GFX900-GISEL: ; %bb.0: 1342; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1344; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1345; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1346; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1347; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1348; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1349; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1350; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1351; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1352; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 1353; 1354; GFX1100-SDAG-LABEL: v_log2_f32: 1355; GFX1100-SDAG: ; %bb.0: 1356; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1357; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 1358; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1359; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 1360; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1361; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1362; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1363; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1364; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 1365; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1366; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1367; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 1368; 1369; GFX1100-GISEL-LABEL: v_log2_f32: 1370; GFX1100-GISEL: ; %bb.0: 1371; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1372; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 1373; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 1374; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1375; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1376; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1377; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 1378; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 1379; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 1380; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1381; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1382; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 1383; 1384; R600-LABEL: v_log2_f32: 1385; R600: ; %bb.0: 1386; R600-NEXT: CF_END 1387; R600-NEXT: PAD 1388; 1389; CM-LABEL: v_log2_f32: 1390; CM: ; %bb.0: 1391; CM-NEXT: CF_END 1392; CM-NEXT: PAD 1393 %result = call float @llvm.log2.f32(float %in) 1394 ret float %result 1395} 1396 1397define float @v_log2_fabs_f32(float %in) { 1398; SI-SDAG-LABEL: v_log2_fabs_f32: 1399; SI-SDAG: ; %bb.0: 1400; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1401; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1402; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 1403; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1404; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1405; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 1406; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 1407; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1408; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1409; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1410; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 1411; 1412; SI-GISEL-LABEL: v_log2_fabs_f32: 1413; SI-GISEL: ; %bb.0: 1414; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1415; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1416; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 1417; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1418; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1419; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 1420; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 1421; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1422; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1423; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1424; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 1425; 1426; VI-SDAG-LABEL: v_log2_fabs_f32: 1427; VI-SDAG: ; %bb.0: 1428; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1430; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 1431; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1432; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1433; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 1434; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 1435; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1436; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1437; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1438; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 1439; 1440; VI-GISEL-LABEL: v_log2_fabs_f32: 1441; VI-GISEL: ; %bb.0: 1442; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1444; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 1445; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1446; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1447; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 1448; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 1449; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1450; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1451; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1452; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 1453; 1454; GFX900-SDAG-LABEL: v_log2_fabs_f32: 1455; GFX900-SDAG: ; %bb.0: 1456; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1457; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 1458; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 1459; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1460; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1461; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 1462; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 1463; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1464; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1465; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1466; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 1467; 1468; GFX900-GISEL-LABEL: v_log2_fabs_f32: 1469; GFX900-GISEL: ; %bb.0: 1470; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1471; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1472; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 1473; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1474; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1475; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 1476; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1477; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1478; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1479; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1480; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 1481; 1482; GFX1100-SDAG-LABEL: v_log2_fabs_f32: 1483; GFX1100-SDAG: ; %bb.0: 1484; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1485; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| 1486; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1487; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 1488; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 1489; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1490; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1491; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 1492; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 1493; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1494; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1495; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 1496; 1497; GFX1100-GISEL-LABEL: v_log2_fabs_f32: 1498; GFX1100-GISEL: ; %bb.0: 1499; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1500; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| 1501; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1502; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 1503; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1504; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1505; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 1506; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 1507; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 1508; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1509; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1510; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 1511; 1512; R600-LABEL: v_log2_fabs_f32: 1513; R600: ; %bb.0: 1514; R600-NEXT: CF_END 1515; R600-NEXT: PAD 1516; 1517; CM-LABEL: v_log2_fabs_f32: 1518; CM: ; %bb.0: 1519; CM-NEXT: CF_END 1520; CM-NEXT: PAD 1521 %fabs = call float @llvm.fabs.f32(float %in) 1522 %result = call float @llvm.log2.f32(float %fabs) 1523 ret float %result 1524} 1525 1526define float @v_log2_fneg_fabs_f32(float %in) { 1527; SI-SDAG-LABEL: v_log2_fneg_fabs_f32: 1528; SI-SDAG: ; %bb.0: 1529; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1530; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 1531; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 1532; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1533; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1534; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -|v0|, v2 1535; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 1536; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1537; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1538; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1539; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 1540; 1541; SI-GISEL-LABEL: v_log2_fneg_fabs_f32: 1542; SI-GISEL: ; %bb.0: 1543; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1544; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1545; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 1546; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1547; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1548; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1 1549; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 1550; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1551; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1552; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1553; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 1554; 1555; VI-SDAG-LABEL: v_log2_fneg_fabs_f32: 1556; VI-SDAG: ; %bb.0: 1557; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1558; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 1559; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 1560; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1561; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1562; VI-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v2 1563; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 1564; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1565; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1566; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1567; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 1568; 1569; VI-GISEL-LABEL: v_log2_fneg_fabs_f32: 1570; VI-GISEL: ; %bb.0: 1571; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1572; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1573; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 1574; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1575; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1576; VI-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 1577; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 1578; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1579; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1580; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1581; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 1582; 1583; GFX900-SDAG-LABEL: v_log2_fneg_fabs_f32: 1584; GFX900-SDAG: ; %bb.0: 1585; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1586; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 1587; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 1588; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1589; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1590; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v2 1591; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 1592; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1593; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1594; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1595; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 1596; 1597; GFX900-GISEL-LABEL: v_log2_fneg_fabs_f32: 1598; GFX900-GISEL: ; %bb.0: 1599; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1600; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1601; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 1602; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1603; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1604; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 1605; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1606; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1607; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1608; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1609; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 1610; 1611; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_f32: 1612; GFX1100-SDAG: ; %bb.0: 1613; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1614; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| 1615; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1616; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 1617; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 1618; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1619; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1620; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v2 1621; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 1622; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1623; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1624; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 1625; 1626; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_f32: 1627; GFX1100-GISEL: ; %bb.0: 1628; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1629; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -|v0| 1630; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1631; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 1632; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1633; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1634; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -|v0|, v1 1635; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 1636; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 1637; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1638; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1639; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 1640; 1641; R600-LABEL: v_log2_fneg_fabs_f32: 1642; R600: ; %bb.0: 1643; R600-NEXT: CF_END 1644; R600-NEXT: PAD 1645; 1646; CM-LABEL: v_log2_fneg_fabs_f32: 1647; CM: ; %bb.0: 1648; CM-NEXT: CF_END 1649; CM-NEXT: PAD 1650 %fabs = call float @llvm.fabs.f32(float %in) 1651 %fneg.fabs = fneg float %fabs 1652 %result = call float @llvm.log2.f32(float %fneg.fabs) 1653 ret float %result 1654} 1655 1656define float @v_log2_fneg_f32(float %in) { 1657; SI-SDAG-LABEL: v_log2_fneg_f32: 1658; SI-SDAG: ; %bb.0: 1659; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1660; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 1661; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1662; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1663; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1664; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -v0, v2 1665; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 1666; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1667; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1668; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1669; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 1670; 1671; SI-GISEL-LABEL: v_log2_fneg_f32: 1672; SI-GISEL: ; %bb.0: 1673; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1674; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1675; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 1676; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1677; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1678; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, -v0, v1 1679; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 1680; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1681; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1682; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1683; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 1684; 1685; VI-SDAG-LABEL: v_log2_fneg_f32: 1686; VI-SDAG: ; %bb.0: 1687; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1688; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 1689; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1690; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1691; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1692; VI-SDAG-NEXT: v_ldexp_f32 v0, -v0, v2 1693; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 1694; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1695; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1696; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1697; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 1698; 1699; VI-GISEL-LABEL: v_log2_fneg_f32: 1700; VI-GISEL: ; %bb.0: 1701; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1702; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1703; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 1704; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1705; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1706; VI-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 1707; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 1708; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1709; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1710; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1711; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 1712; 1713; GFX900-SDAG-LABEL: v_log2_fneg_f32: 1714; GFX900-SDAG: ; %bb.0: 1715; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1716; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 1717; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1718; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1719; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1720; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -v0, v2 1721; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 1722; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1723; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1724; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1725; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 1726; 1727; GFX900-GISEL-LABEL: v_log2_fneg_f32: 1728; GFX900-GISEL: ; %bb.0: 1729; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1730; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1731; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 1732; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1733; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1734; GFX900-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 1735; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1736; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1737; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1738; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1739; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 1740; 1741; GFX1100-SDAG-LABEL: v_log2_fneg_f32: 1742; GFX1100-SDAG: ; %bb.0: 1743; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1744; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 1745; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1746; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 1747; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1748; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1749; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -v0, v2 1750; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1751; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 1752; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1753; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1754; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 1755; 1756; GFX1100-GISEL-LABEL: v_log2_fneg_f32: 1757; GFX1100-GISEL: ; %bb.0: 1758; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, -v0 1760; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1761; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 1762; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1763; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1764; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, -v0, v1 1765; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 1766; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 1767; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1768; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1769; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 1770; 1771; R600-LABEL: v_log2_fneg_f32: 1772; R600: ; %bb.0: 1773; R600-NEXT: CF_END 1774; R600-NEXT: PAD 1775; 1776; CM-LABEL: v_log2_fneg_f32: 1777; CM: ; %bb.0: 1778; CM-NEXT: CF_END 1779; CM-NEXT: PAD 1780 %fneg = fneg float %in 1781 %result = call float @llvm.log2.f32(float %fneg) 1782 ret float %result 1783} 1784 1785define float @v_log2_f32_fast(float %in) { 1786; SI-SDAG-LABEL: v_log2_f32_fast: 1787; SI-SDAG: ; %bb.0: 1788; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1789; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1790; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1791; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1792; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1793; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 1794; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 1795; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1796; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1797; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1798; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 1799; 1800; SI-GISEL-LABEL: v_log2_f32_fast: 1801; SI-GISEL: ; %bb.0: 1802; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1803; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1804; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1805; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1806; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1807; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 1808; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 1809; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1810; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1811; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1812; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 1813; 1814; VI-SDAG-LABEL: v_log2_f32_fast: 1815; VI-SDAG: ; %bb.0: 1816; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1817; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1818; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1819; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1820; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1821; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1822; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 1823; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1824; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1825; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1826; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 1827; 1828; VI-GISEL-LABEL: v_log2_f32_fast: 1829; VI-GISEL: ; %bb.0: 1830; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1831; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1832; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1833; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1834; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1835; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1836; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 1837; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1838; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1839; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1840; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 1841; 1842; GFX900-SDAG-LABEL: v_log2_f32_fast: 1843; GFX900-SDAG: ; %bb.0: 1844; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1845; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 1846; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1847; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1848; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1849; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1850; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 1851; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1852; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1853; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1854; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 1855; 1856; GFX900-GISEL-LABEL: v_log2_f32_fast: 1857; GFX900-GISEL: ; %bb.0: 1858; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1859; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1860; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1861; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1862; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1863; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1864; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1865; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1866; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1867; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1868; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 1869; 1870; GFX1100-SDAG-LABEL: v_log2_f32_fast: 1871; GFX1100-SDAG: ; %bb.0: 1872; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 1874; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1875; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 1876; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1877; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1878; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1879; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1880; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 1881; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 1882; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1883; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 1884; 1885; GFX1100-GISEL-LABEL: v_log2_f32_fast: 1886; GFX1100-GISEL: ; %bb.0: 1887; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1888; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 1889; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 1890; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1891; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1892; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1893; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 1894; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 1895; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 1896; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 1897; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1898; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 1899; 1900; R600-LABEL: v_log2_f32_fast: 1901; R600: ; %bb.0: 1902; R600-NEXT: CF_END 1903; R600-NEXT: PAD 1904; 1905; CM-LABEL: v_log2_f32_fast: 1906; CM: ; %bb.0: 1907; CM-NEXT: CF_END 1908; CM-NEXT: PAD 1909 %result = call fast float @llvm.log2.f32(float %in) 1910 ret float %result 1911} 1912 1913define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { 1914; SI-SDAG-LABEL: v_log2_f32_unsafe_math_attr: 1915; SI-SDAG: ; %bb.0: 1916; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1917; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1918; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1919; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1920; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1921; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 1922; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 1923; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1924; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1925; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1926; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 1927; 1928; SI-GISEL-LABEL: v_log2_f32_unsafe_math_attr: 1929; SI-GISEL: ; %bb.0: 1930; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1931; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1932; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1933; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1934; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1935; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 1936; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 1937; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1938; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1939; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1940; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 1941; 1942; VI-SDAG-LABEL: v_log2_f32_unsafe_math_attr: 1943; VI-SDAG: ; %bb.0: 1944; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1945; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 1946; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1947; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1948; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1949; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1950; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 1951; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1952; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1953; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1954; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 1955; 1956; VI-GISEL-LABEL: v_log2_f32_unsafe_math_attr: 1957; VI-GISEL: ; %bb.0: 1958; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1959; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1960; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1961; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1962; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1963; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1964; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 1965; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1966; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1967; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1968; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 1969; 1970; GFX900-SDAG-LABEL: v_log2_f32_unsafe_math_attr: 1971; GFX900-SDAG: ; %bb.0: 1972; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1973; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 1974; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1975; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1976; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 1977; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 1978; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 1979; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 1980; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1981; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 1982; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 1983; 1984; GFX900-GISEL-LABEL: v_log2_f32_unsafe_math_attr: 1985; GFX900-GISEL: ; %bb.0: 1986; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1987; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 1988; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1989; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1990; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1991; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1992; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 1993; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 1994; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1995; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 1996; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 1997; 1998; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr: 1999; GFX1100-SDAG: ; %bb.0: 2000; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2001; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2002; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2003; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2004; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2005; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2006; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2007; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2008; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2009; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2010; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2011; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2012; 2013; GFX1100-GISEL-LABEL: v_log2_f32_unsafe_math_attr: 2014; GFX1100-GISEL: ; %bb.0: 2015; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2016; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2017; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2018; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2019; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2020; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2021; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2022; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2023; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2024; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2025; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2026; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2027; 2028; R600-LABEL: v_log2_f32_unsafe_math_attr: 2029; R600: ; %bb.0: 2030; R600-NEXT: CF_END 2031; R600-NEXT: PAD 2032; 2033; CM-LABEL: v_log2_f32_unsafe_math_attr: 2034; CM: ; %bb.0: 2035; CM-NEXT: CF_END 2036; CM-NEXT: PAD 2037 %result = call float @llvm.log2.f32(float %in) 2038 ret float %result 2039} 2040 2041define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { 2042; SI-SDAG-LABEL: v_log2_f32_approx_fn_attr: 2043; SI-SDAG: ; %bb.0: 2044; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2045; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2046; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2047; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2048; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2049; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 2050; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2051; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2052; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2053; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2054; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2055; 2056; SI-GISEL-LABEL: v_log2_f32_approx_fn_attr: 2057; SI-GISEL: ; %bb.0: 2058; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2059; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2060; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2061; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2062; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2063; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 2064; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2065; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2066; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2067; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2068; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2069; 2070; VI-SDAG-LABEL: v_log2_f32_approx_fn_attr: 2071; VI-SDAG: ; %bb.0: 2072; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2073; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2074; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2075; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2076; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2077; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2078; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2079; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2080; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2081; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2082; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2083; 2084; VI-GISEL-LABEL: v_log2_f32_approx_fn_attr: 2085; VI-GISEL: ; %bb.0: 2086; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2087; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2088; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2089; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2090; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2091; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2092; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2093; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2094; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2095; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2096; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2097; 2098; GFX900-SDAG-LABEL: v_log2_f32_approx_fn_attr: 2099; GFX900-SDAG: ; %bb.0: 2100; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2101; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2102; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2103; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2104; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2105; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2106; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2107; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2108; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2109; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2110; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2111; 2112; GFX900-GISEL-LABEL: v_log2_f32_approx_fn_attr: 2113; GFX900-GISEL: ; %bb.0: 2114; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2115; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2116; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2117; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2118; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2119; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2120; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2121; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2122; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2123; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2124; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2125; 2126; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr: 2127; GFX1100-SDAG: ; %bb.0: 2128; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2129; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2130; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2131; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2132; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2133; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2134; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2135; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2136; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2137; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2138; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2139; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2140; 2141; GFX1100-GISEL-LABEL: v_log2_f32_approx_fn_attr: 2142; GFX1100-GISEL: ; %bb.0: 2143; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2144; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2145; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2146; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2147; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2148; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2149; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2150; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2151; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2152; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2153; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2154; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2155; 2156; R600-LABEL: v_log2_f32_approx_fn_attr: 2157; R600: ; %bb.0: 2158; R600-NEXT: CF_END 2159; R600-NEXT: PAD 2160; 2161; CM-LABEL: v_log2_f32_approx_fn_attr: 2162; CM: ; %bb.0: 2163; CM-NEXT: CF_END 2164; CM-NEXT: PAD 2165 %result = call float @llvm.log2.f32(float %in) 2166 ret float %result 2167} 2168 2169define float @v_log2_f32_ninf(float %in) { 2170; SI-SDAG-LABEL: v_log2_f32_ninf: 2171; SI-SDAG: ; %bb.0: 2172; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2173; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2174; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2175; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2176; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2177; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 2178; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2179; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2180; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2181; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2182; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2183; 2184; SI-GISEL-LABEL: v_log2_f32_ninf: 2185; SI-GISEL: ; %bb.0: 2186; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2187; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2188; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2189; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2190; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2191; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 2192; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2193; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2194; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2195; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2196; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2197; 2198; VI-SDAG-LABEL: v_log2_f32_ninf: 2199; VI-SDAG: ; %bb.0: 2200; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2201; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2202; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2203; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2204; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2205; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2206; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2207; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2208; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2209; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2210; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2211; 2212; VI-GISEL-LABEL: v_log2_f32_ninf: 2213; VI-GISEL: ; %bb.0: 2214; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2215; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2216; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2217; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2218; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2219; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2220; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2221; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2222; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2223; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2224; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2225; 2226; GFX900-SDAG-LABEL: v_log2_f32_ninf: 2227; GFX900-SDAG: ; %bb.0: 2228; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2229; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2230; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2231; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2232; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2233; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2234; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2235; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2236; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2237; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2238; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2239; 2240; GFX900-GISEL-LABEL: v_log2_f32_ninf: 2241; GFX900-GISEL: ; %bb.0: 2242; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2243; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2244; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2245; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2246; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2247; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2248; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2249; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2250; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2251; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2252; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2253; 2254; GFX1100-SDAG-LABEL: v_log2_f32_ninf: 2255; GFX1100-SDAG: ; %bb.0: 2256; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2257; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2258; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2259; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2260; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2261; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2262; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2263; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2264; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2265; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2266; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2267; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2268; 2269; GFX1100-GISEL-LABEL: v_log2_f32_ninf: 2270; GFX1100-GISEL: ; %bb.0: 2271; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2272; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2273; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2274; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2275; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2276; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2277; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2278; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2279; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2280; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2281; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2282; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2283; 2284; R600-LABEL: v_log2_f32_ninf: 2285; R600: ; %bb.0: 2286; R600-NEXT: CF_END 2287; R600-NEXT: PAD 2288; 2289; CM-LABEL: v_log2_f32_ninf: 2290; CM: ; %bb.0: 2291; CM-NEXT: CF_END 2292; CM-NEXT: PAD 2293 %result = call ninf float @llvm.log2.f32(float %in) 2294 ret float %result 2295} 2296 2297define float @v_log2_f32_afn(float %in) { 2298; SI-SDAG-LABEL: v_log2_f32_afn: 2299; SI-SDAG: ; %bb.0: 2300; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2301; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2302; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2303; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2304; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2305; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 2306; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2307; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2308; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2309; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2310; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2311; 2312; SI-GISEL-LABEL: v_log2_f32_afn: 2313; SI-GISEL: ; %bb.0: 2314; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2315; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2316; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2317; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2318; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2319; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 2320; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2321; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2322; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2323; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2324; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2325; 2326; VI-SDAG-LABEL: v_log2_f32_afn: 2327; VI-SDAG: ; %bb.0: 2328; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2329; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2330; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2331; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2332; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2333; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2334; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2335; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2336; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2337; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2338; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2339; 2340; VI-GISEL-LABEL: v_log2_f32_afn: 2341; VI-GISEL: ; %bb.0: 2342; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2343; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2344; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2345; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2346; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2347; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2348; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2349; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2350; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2351; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2352; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2353; 2354; GFX900-SDAG-LABEL: v_log2_f32_afn: 2355; GFX900-SDAG: ; %bb.0: 2356; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2357; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2358; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2359; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2360; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2361; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2362; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2363; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2364; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2365; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2366; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2367; 2368; GFX900-GISEL-LABEL: v_log2_f32_afn: 2369; GFX900-GISEL: ; %bb.0: 2370; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2371; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2372; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2373; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2374; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2375; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2376; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2377; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2378; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2379; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2380; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2381; 2382; GFX1100-SDAG-LABEL: v_log2_f32_afn: 2383; GFX1100-SDAG: ; %bb.0: 2384; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2385; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2386; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2387; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2388; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2389; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2390; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2391; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2392; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2393; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2394; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2395; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2396; 2397; GFX1100-GISEL-LABEL: v_log2_f32_afn: 2398; GFX1100-GISEL: ; %bb.0: 2399; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2400; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2401; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2402; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2403; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2404; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2405; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2406; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2407; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2408; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2409; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2410; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2411; 2412; R600-LABEL: v_log2_f32_afn: 2413; R600: ; %bb.0: 2414; R600-NEXT: CF_END 2415; R600-NEXT: PAD 2416; 2417; CM-LABEL: v_log2_f32_afn: 2418; CM: ; %bb.0: 2419; CM-NEXT: CF_END 2420; CM-NEXT: PAD 2421 %result = call afn float @llvm.log2.f32(float %in) 2422 ret float %result 2423} 2424 2425define float @v_log2_f32_afn_daz(float %in) #0 { 2426; GFX689-LABEL: v_log2_f32_afn_daz: 2427; GFX689: ; %bb.0: 2428; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2429; GFX689-NEXT: v_log_f32_e32 v0, v0 2430; GFX689-NEXT: s_setpc_b64 s[30:31] 2431; 2432; GFX1100-LABEL: v_log2_f32_afn_daz: 2433; GFX1100: ; %bb.0: 2434; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2435; GFX1100-NEXT: v_log_f32_e32 v0, v0 2436; GFX1100-NEXT: s_setpc_b64 s[30:31] 2437; 2438; R600-LABEL: v_log2_f32_afn_daz: 2439; R600: ; %bb.0: 2440; R600-NEXT: CF_END 2441; R600-NEXT: PAD 2442; 2443; CM-LABEL: v_log2_f32_afn_daz: 2444; CM: ; %bb.0: 2445; CM-NEXT: CF_END 2446; CM-NEXT: PAD 2447 %result = call afn float @llvm.log2.f32(float %in) 2448 ret float %result 2449} 2450 2451define float @v_log2_f32_afn_dynamic(float %in) #1 { 2452; SI-SDAG-LABEL: v_log2_f32_afn_dynamic: 2453; SI-SDAG: ; %bb.0: 2454; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2455; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2456; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2457; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2458; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2459; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 2460; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2461; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2462; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2463; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2464; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2465; 2466; SI-GISEL-LABEL: v_log2_f32_afn_dynamic: 2467; SI-GISEL: ; %bb.0: 2468; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2469; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2470; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2471; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2472; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2473; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 2474; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2475; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2476; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2477; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2478; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2479; 2480; VI-SDAG-LABEL: v_log2_f32_afn_dynamic: 2481; VI-SDAG: ; %bb.0: 2482; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2483; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2484; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2485; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2486; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2487; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2488; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2489; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2490; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2491; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2492; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2493; 2494; VI-GISEL-LABEL: v_log2_f32_afn_dynamic: 2495; VI-GISEL: ; %bb.0: 2496; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2497; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2498; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2499; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2500; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2501; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2502; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2503; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2504; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2505; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2506; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2507; 2508; GFX900-SDAG-LABEL: v_log2_f32_afn_dynamic: 2509; GFX900-SDAG: ; %bb.0: 2510; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2511; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2512; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2513; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2514; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2515; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2516; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2517; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2518; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2519; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2520; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2521; 2522; GFX900-GISEL-LABEL: v_log2_f32_afn_dynamic: 2523; GFX900-GISEL: ; %bb.0: 2524; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2525; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2526; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2527; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2528; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2529; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2530; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2531; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2532; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2533; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2534; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2535; 2536; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic: 2537; GFX1100-SDAG: ; %bb.0: 2538; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2539; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2540; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2541; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2542; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2543; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2544; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2545; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2546; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2547; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2548; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2549; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2550; 2551; GFX1100-GISEL-LABEL: v_log2_f32_afn_dynamic: 2552; GFX1100-GISEL: ; %bb.0: 2553; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2554; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2555; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2556; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2557; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2558; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2559; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2560; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2561; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2562; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2563; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2564; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2565; 2566; R600-LABEL: v_log2_f32_afn_dynamic: 2567; R600: ; %bb.0: 2568; R600-NEXT: CF_END 2569; R600-NEXT: PAD 2570; 2571; CM-LABEL: v_log2_f32_afn_dynamic: 2572; CM: ; %bb.0: 2573; CM-NEXT: CF_END 2574; CM-NEXT: PAD 2575 %result = call afn float @llvm.log2.f32(float %in) 2576 ret float %result 2577} 2578 2579define float @v_fabs_log2_f32_afn(float %in) { 2580; SI-SDAG-LABEL: v_fabs_log2_f32_afn: 2581; SI-SDAG: ; %bb.0: 2582; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2583; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2584; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2585; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2586; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2587; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 2588; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2589; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2590; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2591; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2592; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2593; 2594; SI-GISEL-LABEL: v_fabs_log2_f32_afn: 2595; SI-GISEL: ; %bb.0: 2596; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2597; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2598; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 2599; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2600; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2601; SI-GISEL-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 2602; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2603; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2604; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2605; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2606; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2607; 2608; VI-SDAG-LABEL: v_fabs_log2_f32_afn: 2609; VI-SDAG: ; %bb.0: 2610; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2611; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2612; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2613; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2614; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2615; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 2616; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2617; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2618; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2619; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2620; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2621; 2622; VI-GISEL-LABEL: v_fabs_log2_f32_afn: 2623; VI-GISEL: ; %bb.0: 2624; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2625; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2626; VI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 2627; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2628; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2629; VI-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 2630; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2631; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2632; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2633; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2634; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2635; 2636; GFX900-SDAG-LABEL: v_fabs_log2_f32_afn: 2637; GFX900-SDAG: ; %bb.0: 2638; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2639; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2640; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2641; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2642; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2643; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 2644; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2645; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2646; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2647; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2648; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2649; 2650; GFX900-GISEL-LABEL: v_fabs_log2_f32_afn: 2651; GFX900-GISEL: ; %bb.0: 2652; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2653; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2654; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 2655; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2656; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2657; GFX900-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 2658; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2659; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2660; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2661; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2662; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2663; 2664; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn: 2665; GFX1100-SDAG: ; %bb.0: 2666; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2667; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| 2668; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2669; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 2670; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 2671; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2672; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2673; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 2674; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2675; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2676; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2677; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2678; 2679; GFX1100-GISEL-LABEL: v_fabs_log2_f32_afn: 2680; GFX1100-GISEL: ; %bb.0: 2681; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2682; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| 2683; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2684; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 2685; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2686; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2687; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, |v0|, v1 2688; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 2689; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2690; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2691; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2692; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2693; 2694; R600-LABEL: v_fabs_log2_f32_afn: 2695; R600: ; %bb.0: 2696; R600-NEXT: CF_END 2697; R600-NEXT: PAD 2698; 2699; CM-LABEL: v_fabs_log2_f32_afn: 2700; CM: ; %bb.0: 2701; CM-NEXT: CF_END 2702; CM-NEXT: PAD 2703 %fabs = call float @llvm.fabs.f32(float %in) 2704 %result = call afn float @llvm.log2.f32(float %fabs) 2705 ret float %result 2706} 2707 2708define float @v_log2_f32_daz(float %in) #0 { 2709; GFX689-LABEL: v_log2_f32_daz: 2710; GFX689: ; %bb.0: 2711; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2712; GFX689-NEXT: v_log_f32_e32 v0, v0 2713; GFX689-NEXT: s_setpc_b64 s[30:31] 2714; 2715; GFX1100-LABEL: v_log2_f32_daz: 2716; GFX1100: ; %bb.0: 2717; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2718; GFX1100-NEXT: v_log_f32_e32 v0, v0 2719; GFX1100-NEXT: s_setpc_b64 s[30:31] 2720; 2721; R600-LABEL: v_log2_f32_daz: 2722; R600: ; %bb.0: 2723; R600-NEXT: CF_END 2724; R600-NEXT: PAD 2725; 2726; CM-LABEL: v_log2_f32_daz: 2727; CM: ; %bb.0: 2728; CM-NEXT: CF_END 2729; CM-NEXT: PAD 2730 %result = call float @llvm.log2.f32(float %in) 2731 ret float %result 2732} 2733 2734define float @v_log2_f32_nnan(float %in) { 2735; SI-SDAG-LABEL: v_log2_f32_nnan: 2736; SI-SDAG: ; %bb.0: 2737; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2738; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2739; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2740; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2741; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2742; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 2743; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2744; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2745; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2746; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2747; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2748; 2749; SI-GISEL-LABEL: v_log2_f32_nnan: 2750; SI-GISEL: ; %bb.0: 2751; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2752; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2753; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2754; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2755; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2756; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 2757; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2758; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2759; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2760; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2761; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2762; 2763; VI-SDAG-LABEL: v_log2_f32_nnan: 2764; VI-SDAG: ; %bb.0: 2765; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2766; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2767; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2768; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2769; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2770; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2771; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2772; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2773; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2774; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2775; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2776; 2777; VI-GISEL-LABEL: v_log2_f32_nnan: 2778; VI-GISEL: ; %bb.0: 2779; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2780; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2781; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2782; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2783; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2784; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2785; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2786; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2787; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2788; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2789; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2790; 2791; GFX900-SDAG-LABEL: v_log2_f32_nnan: 2792; GFX900-SDAG: ; %bb.0: 2793; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2794; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2795; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2796; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2797; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2798; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2799; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2800; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2801; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2802; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2803; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2804; 2805; GFX900-GISEL-LABEL: v_log2_f32_nnan: 2806; GFX900-GISEL: ; %bb.0: 2807; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2808; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2809; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2810; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2811; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2812; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2813; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2814; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2815; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2816; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2817; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2818; 2819; GFX1100-SDAG-LABEL: v_log2_f32_nnan: 2820; GFX1100-SDAG: ; %bb.0: 2821; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2822; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2823; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2824; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2825; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2826; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2827; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2828; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2829; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2830; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2831; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2832; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2833; 2834; GFX1100-GISEL-LABEL: v_log2_f32_nnan: 2835; GFX1100-GISEL: ; %bb.0: 2836; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2837; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2838; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2839; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2840; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2841; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2842; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2843; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2844; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2845; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 2846; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2847; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2848; 2849; R600-LABEL: v_log2_f32_nnan: 2850; R600: ; %bb.0: 2851; R600-NEXT: CF_END 2852; R600-NEXT: PAD 2853; 2854; CM-LABEL: v_log2_f32_nnan: 2855; CM: ; %bb.0: 2856; CM-NEXT: CF_END 2857; CM-NEXT: PAD 2858 %result = call nnan float @llvm.log2.f32(float %in) 2859 ret float %result 2860} 2861 2862define float @v_log2_f32_nnan_daz(float %in) #0 { 2863; GFX689-LABEL: v_log2_f32_nnan_daz: 2864; GFX689: ; %bb.0: 2865; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2866; GFX689-NEXT: v_log_f32_e32 v0, v0 2867; GFX689-NEXT: s_setpc_b64 s[30:31] 2868; 2869; GFX1100-LABEL: v_log2_f32_nnan_daz: 2870; GFX1100: ; %bb.0: 2871; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2872; GFX1100-NEXT: v_log_f32_e32 v0, v0 2873; GFX1100-NEXT: s_setpc_b64 s[30:31] 2874; 2875; R600-LABEL: v_log2_f32_nnan_daz: 2876; R600: ; %bb.0: 2877; R600-NEXT: CF_END 2878; R600-NEXT: PAD 2879; 2880; CM-LABEL: v_log2_f32_nnan_daz: 2881; CM: ; %bb.0: 2882; CM-NEXT: CF_END 2883; CM-NEXT: PAD 2884 %result = call nnan float @llvm.log2.f32(float %in) 2885 ret float %result 2886} 2887 2888define float @v_log2_f32_nnan_dynamic(float %in) #1 { 2889; SI-SDAG-LABEL: v_log2_f32_nnan_dynamic: 2890; SI-SDAG: ; %bb.0: 2891; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2892; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2893; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2894; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2895; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2896; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 2897; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 2898; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2899; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2900; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2901; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2902; 2903; SI-GISEL-LABEL: v_log2_f32_nnan_dynamic: 2904; SI-GISEL: ; %bb.0: 2905; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2906; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2907; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2908; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2909; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2910; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 2911; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 2912; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2913; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2914; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2915; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2916; 2917; VI-SDAG-LABEL: v_log2_f32_nnan_dynamic: 2918; VI-SDAG: ; %bb.0: 2919; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2920; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 2921; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2922; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2923; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2924; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2925; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 2926; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2927; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2928; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2929; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2930; 2931; VI-GISEL-LABEL: v_log2_f32_nnan_dynamic: 2932; VI-GISEL: ; %bb.0: 2933; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2934; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2935; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2936; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2937; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2938; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2939; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 2940; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2941; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2942; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2943; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2944; 2945; GFX900-SDAG-LABEL: v_log2_f32_nnan_dynamic: 2946; GFX900-SDAG: ; %bb.0: 2947; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2948; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 2949; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2950; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2951; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2952; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2953; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 2954; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 2955; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2956; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2957; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2958; 2959; GFX900-GISEL-LABEL: v_log2_f32_nnan_dynamic: 2960; GFX900-GISEL: ; %bb.0: 2961; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2962; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 2963; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2964; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2965; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2966; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2967; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 2968; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 2969; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2970; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 2971; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2972; 2973; GFX1100-SDAG-LABEL: v_log2_f32_nnan_dynamic: 2974; GFX1100-SDAG: ; %bb.0: 2975; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2976; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2977; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 2978; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2979; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2980; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2981; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 2982; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2983; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 2984; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 2985; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 2986; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2987; 2988; GFX1100-GISEL-LABEL: v_log2_f32_nnan_dynamic: 2989; GFX1100-GISEL: ; %bb.0: 2990; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2991; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 2992; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 2993; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2994; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2995; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 2996; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 2997; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2998; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 2999; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 3000; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3001; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3002; 3003; R600-LABEL: v_log2_f32_nnan_dynamic: 3004; R600: ; %bb.0: 3005; R600-NEXT: CF_END 3006; R600-NEXT: PAD 3007; 3008; CM-LABEL: v_log2_f32_nnan_dynamic: 3009; CM: ; %bb.0: 3010; CM-NEXT: CF_END 3011; CM-NEXT: PAD 3012 %result = call nnan float @llvm.log2.f32(float %in) 3013 ret float %result 3014} 3015 3016define float @v_log2_f32_ninf_daz(float %in) #0 { 3017; GFX689-LABEL: v_log2_f32_ninf_daz: 3018; GFX689: ; %bb.0: 3019; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3020; GFX689-NEXT: v_log_f32_e32 v0, v0 3021; GFX689-NEXT: s_setpc_b64 s[30:31] 3022; 3023; GFX1100-LABEL: v_log2_f32_ninf_daz: 3024; GFX1100: ; %bb.0: 3025; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3026; GFX1100-NEXT: v_log_f32_e32 v0, v0 3027; GFX1100-NEXT: s_setpc_b64 s[30:31] 3028; 3029; R600-LABEL: v_log2_f32_ninf_daz: 3030; R600: ; %bb.0: 3031; R600-NEXT: CF_END 3032; R600-NEXT: PAD 3033; 3034; CM-LABEL: v_log2_f32_ninf_daz: 3035; CM: ; %bb.0: 3036; CM-NEXT: CF_END 3037; CM-NEXT: PAD 3038 %result = call ninf float @llvm.log2.f32(float %in) 3039 ret float %result 3040} 3041 3042define float @v_log2_f32_ninf_dynamic(float %in) #1 { 3043; SI-SDAG-LABEL: v_log2_f32_ninf_dynamic: 3044; SI-SDAG: ; %bb.0: 3045; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3046; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3047; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3048; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3049; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3050; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 3051; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3052; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3053; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3054; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3055; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3056; 3057; SI-GISEL-LABEL: v_log2_f32_ninf_dynamic: 3058; SI-GISEL: ; %bb.0: 3059; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3060; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3061; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3062; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3063; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3064; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 3065; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3066; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3067; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3068; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3069; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3070; 3071; VI-SDAG-LABEL: v_log2_f32_ninf_dynamic: 3072; VI-SDAG: ; %bb.0: 3073; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3074; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3075; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3076; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3077; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3078; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3079; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 3080; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3081; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3082; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3083; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3084; 3085; VI-GISEL-LABEL: v_log2_f32_ninf_dynamic: 3086; VI-GISEL: ; %bb.0: 3087; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3088; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3089; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3090; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3091; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3092; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3093; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 3094; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3095; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3096; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3097; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3098; 3099; GFX900-SDAG-LABEL: v_log2_f32_ninf_dynamic: 3100; GFX900-SDAG: ; %bb.0: 3101; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3102; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 3103; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3104; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3105; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3106; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3107; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 3108; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3109; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3110; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3111; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3112; 3113; GFX900-GISEL-LABEL: v_log2_f32_ninf_dynamic: 3114; GFX900-GISEL: ; %bb.0: 3115; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3116; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3117; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3118; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3119; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3120; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3121; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 3122; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3123; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3124; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3125; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3126; 3127; GFX1100-SDAG-LABEL: v_log2_f32_ninf_dynamic: 3128; GFX1100-SDAG: ; %bb.0: 3129; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3130; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3131; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 3132; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3133; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3134; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3135; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3136; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3137; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 3138; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 3139; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3140; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 3141; 3142; GFX1100-GISEL-LABEL: v_log2_f32_ninf_dynamic: 3143; GFX1100-GISEL: ; %bb.0: 3144; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3145; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3146; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 3147; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3148; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3149; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3150; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3151; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 3152; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 3153; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 3154; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3155; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3156; 3157; R600-LABEL: v_log2_f32_ninf_dynamic: 3158; R600: ; %bb.0: 3159; R600-NEXT: CF_END 3160; R600-NEXT: PAD 3161; 3162; CM-LABEL: v_log2_f32_ninf_dynamic: 3163; CM: ; %bb.0: 3164; CM-NEXT: CF_END 3165; CM-NEXT: PAD 3166 %result = call ninf float @llvm.log2.f32(float %in) 3167 ret float %result 3168} 3169 3170define float @v_log2_f32_nnan_ninf(float %in) { 3171; SI-SDAG-LABEL: v_log2_f32_nnan_ninf: 3172; SI-SDAG: ; %bb.0: 3173; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3174; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3175; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3176; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3177; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3178; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 3179; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3180; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3181; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3182; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3183; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3184; 3185; SI-GISEL-LABEL: v_log2_f32_nnan_ninf: 3186; SI-GISEL: ; %bb.0: 3187; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3188; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3189; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3190; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3191; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3192; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 3193; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3194; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3195; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3196; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3197; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3198; 3199; VI-SDAG-LABEL: v_log2_f32_nnan_ninf: 3200; VI-SDAG: ; %bb.0: 3201; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3202; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3203; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3204; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3205; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3206; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3207; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 3208; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3209; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3210; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3211; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3212; 3213; VI-GISEL-LABEL: v_log2_f32_nnan_ninf: 3214; VI-GISEL: ; %bb.0: 3215; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3216; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3217; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3218; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3219; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3220; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3221; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 3222; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3223; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3224; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3225; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3226; 3227; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf: 3228; GFX900-SDAG: ; %bb.0: 3229; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3230; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 3231; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3232; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3233; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3234; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3235; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 3236; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3237; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3238; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3239; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3240; 3241; GFX900-GISEL-LABEL: v_log2_f32_nnan_ninf: 3242; GFX900-GISEL: ; %bb.0: 3243; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3244; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3245; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3246; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3247; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3248; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3249; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 3250; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3251; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3252; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3253; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3254; 3255; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf: 3256; GFX1100-SDAG: ; %bb.0: 3257; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3258; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3259; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 3260; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3261; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3262; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3263; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3264; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3265; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 3266; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 3267; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3268; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 3269; 3270; GFX1100-GISEL-LABEL: v_log2_f32_nnan_ninf: 3271; GFX1100-GISEL: ; %bb.0: 3272; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3273; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3274; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 3275; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3276; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3277; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3278; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3279; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 3280; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 3281; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 3282; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3283; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3284; 3285; R600-LABEL: v_log2_f32_nnan_ninf: 3286; R600: ; %bb.0: 3287; R600-NEXT: CF_END 3288; R600-NEXT: PAD 3289; 3290; CM-LABEL: v_log2_f32_nnan_ninf: 3291; CM: ; %bb.0: 3292; CM-NEXT: CF_END 3293; CM-NEXT: PAD 3294 %result = call nnan ninf float @llvm.log2.f32(float %in) 3295 ret float %result 3296} 3297 3298define float @v_log2_f32_nnan_ninf_daz(float %in) #0 { 3299; GFX689-LABEL: v_log2_f32_nnan_ninf_daz: 3300; GFX689: ; %bb.0: 3301; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3302; GFX689-NEXT: v_log_f32_e32 v0, v0 3303; GFX689-NEXT: s_setpc_b64 s[30:31] 3304; 3305; GFX1100-LABEL: v_log2_f32_nnan_ninf_daz: 3306; GFX1100: ; %bb.0: 3307; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3308; GFX1100-NEXT: v_log_f32_e32 v0, v0 3309; GFX1100-NEXT: s_setpc_b64 s[30:31] 3310; 3311; R600-LABEL: v_log2_f32_nnan_ninf_daz: 3312; R600: ; %bb.0: 3313; R600-NEXT: CF_END 3314; R600-NEXT: PAD 3315; 3316; CM-LABEL: v_log2_f32_nnan_ninf_daz: 3317; CM: ; %bb.0: 3318; CM-NEXT: CF_END 3319; CM-NEXT: PAD 3320 %result = call nnan ninf float @llvm.log2.f32(float %in) 3321 ret float %result 3322} 3323 3324define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { 3325; SI-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: 3326; SI-SDAG: ; %bb.0: 3327; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3328; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3329; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3330; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3331; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3332; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 3333; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3334; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3335; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3336; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3337; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3338; 3339; SI-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: 3340; SI-GISEL: ; %bb.0: 3341; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3342; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3343; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3344; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3345; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3346; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 3347; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3348; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3349; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3350; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3351; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3352; 3353; VI-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: 3354; VI-SDAG: ; %bb.0: 3355; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3356; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3357; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3358; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3359; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3360; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3361; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 3362; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3363; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3364; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3365; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3366; 3367; VI-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: 3368; VI-GISEL: ; %bb.0: 3369; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3370; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3371; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3372; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3373; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3374; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3375; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 3376; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3377; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3378; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3379; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3380; 3381; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: 3382; GFX900-SDAG: ; %bb.0: 3383; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3384; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 3385; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3386; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3387; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3388; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3389; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 3390; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3391; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3392; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3393; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3394; 3395; GFX900-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: 3396; GFX900-GISEL: ; %bb.0: 3397; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3398; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3399; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3400; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3401; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3402; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3403; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 3404; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3405; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3406; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3407; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3408; 3409; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: 3410; GFX1100-SDAG: ; %bb.0: 3411; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3412; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3413; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 3414; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3415; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3416; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3417; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3418; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3419; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 3420; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 3421; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3422; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 3423; 3424; GFX1100-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: 3425; GFX1100-GISEL: ; %bb.0: 3426; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3427; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3428; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 3429; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3430; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3431; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3432; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3433; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 3434; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 3435; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 3436; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3437; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3438; 3439; R600-LABEL: v_log2_f32_nnan_ninf_dynamic: 3440; R600: ; %bb.0: 3441; R600-NEXT: CF_END 3442; R600-NEXT: PAD 3443; 3444; CM-LABEL: v_log2_f32_nnan_ninf_dynamic: 3445; CM: ; %bb.0: 3446; CM-NEXT: CF_END 3447; CM-NEXT: PAD 3448 %result = call nnan ninf float @llvm.log2.f32(float %in) 3449 ret float %result 3450} 3451 3452define float @v_log2_f32_fast_daz(float %in) #0 { 3453; GFX689-LABEL: v_log2_f32_fast_daz: 3454; GFX689: ; %bb.0: 3455; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3456; GFX689-NEXT: v_log_f32_e32 v0, v0 3457; GFX689-NEXT: s_setpc_b64 s[30:31] 3458; 3459; GFX1100-LABEL: v_log2_f32_fast_daz: 3460; GFX1100: ; %bb.0: 3461; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3462; GFX1100-NEXT: v_log_f32_e32 v0, v0 3463; GFX1100-NEXT: s_setpc_b64 s[30:31] 3464; 3465; R600-LABEL: v_log2_f32_fast_daz: 3466; R600: ; %bb.0: 3467; R600-NEXT: CF_END 3468; R600-NEXT: PAD 3469; 3470; CM-LABEL: v_log2_f32_fast_daz: 3471; CM: ; %bb.0: 3472; CM-NEXT: CF_END 3473; CM-NEXT: PAD 3474 %result = call fast float @llvm.log2.f32(float %in) 3475 ret float %result 3476} 3477 3478define float @v_log2_f32_dynamic_mode(float %in) #1 { 3479; SI-SDAG-LABEL: v_log2_f32_dynamic_mode: 3480; SI-SDAG: ; %bb.0: 3481; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3482; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3483; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3484; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3485; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3486; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 3487; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3488; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3489; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3490; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3491; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3492; 3493; SI-GISEL-LABEL: v_log2_f32_dynamic_mode: 3494; SI-GISEL: ; %bb.0: 3495; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3496; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3497; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3498; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3499; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3500; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 3501; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3502; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3503; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3504; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3505; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3506; 3507; VI-SDAG-LABEL: v_log2_f32_dynamic_mode: 3508; VI-SDAG: ; %bb.0: 3509; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3510; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3511; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3512; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3513; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3514; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3515; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 3516; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3517; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3518; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3519; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3520; 3521; VI-GISEL-LABEL: v_log2_f32_dynamic_mode: 3522; VI-GISEL: ; %bb.0: 3523; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3524; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3525; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3526; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3527; VI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3528; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3529; VI-GISEL-NEXT: v_log_f32_e32 v0, v0 3530; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3531; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3532; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3533; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3534; 3535; GFX900-SDAG-LABEL: v_log2_f32_dynamic_mode: 3536; GFX900-SDAG: ; %bb.0: 3537; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3538; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 3539; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3540; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3541; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3542; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3543; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 3544; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3545; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3546; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3547; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3548; 3549; GFX900-GISEL-LABEL: v_log2_f32_dynamic_mode: 3550; GFX900-GISEL: ; %bb.0: 3551; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3552; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x800000 3553; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3554; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3555; GFX900-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3556; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3557; GFX900-GISEL-NEXT: v_log_f32_e32 v0, v0 3558; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3559; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3560; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3561; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3562; 3563; GFX1100-SDAG-LABEL: v_log2_f32_dynamic_mode: 3564; GFX1100-SDAG: ; %bb.0: 3565; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3566; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3567; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 3568; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3569; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3570; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3571; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 3572; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3573; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 3574; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 3575; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3576; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 3577; 3578; GFX1100-GISEL-LABEL: v_log2_f32_dynamic_mode: 3579; GFX1100-GISEL: ; %bb.0: 3580; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3581; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3582; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 3583; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3584; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3585; GFX1100-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 3586; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3587; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 3588; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 3589; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 3590; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3591; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3592; 3593; R600-LABEL: v_log2_f32_dynamic_mode: 3594; R600: ; %bb.0: 3595; R600-NEXT: CF_END 3596; R600-NEXT: PAD 3597; 3598; CM-LABEL: v_log2_f32_dynamic_mode: 3599; CM: ; %bb.0: 3600; CM-NEXT: CF_END 3601; CM-NEXT: PAD 3602 %result = call float @llvm.log2.f32(float %in) 3603 ret float %result 3604} 3605 3606define float @v_log2_f32_undef() { 3607; GFX689-SDAG-LABEL: v_log2_f32_undef: 3608; GFX689-SDAG: ; %bb.0: 3609; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3610; GFX689-SDAG-NEXT: v_log_f32_e32 v0, s4 3611; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] 3612; 3613; GFX689-GISEL-LABEL: v_log2_f32_undef: 3614; GFX689-GISEL: ; %bb.0: 3615; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3616; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0x800000 3617; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x4f800000 3618; GFX689-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 3619; GFX689-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 3620; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3621; GFX689-GISEL-NEXT: v_log_f32_e32 v0, v0 3622; GFX689-GISEL-NEXT: v_mov_b32_e32 v1, 0x42000000 3623; GFX689-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3624; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3625; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] 3626; 3627; GFX1100-SDAG-LABEL: v_log2_f32_undef: 3628; GFX1100-SDAG: ; %bb.0: 3629; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3630; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, s0 3631; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 3632; 3633; GFX1100-GISEL-LABEL: v_log2_f32_undef: 3634; GFX1100-GISEL: ; %bb.0: 3635; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3636; GFX1100-GISEL-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 3637; GFX1100-GISEL-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0 3638; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3639; GFX1100-GISEL-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo 3640; GFX1100-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3641; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 3642; GFX1100-GISEL-NEXT: v_log_f32_e32 v0, v0 3643; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 3644; GFX1100-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 3645; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3646; 3647; R600-LABEL: v_log2_f32_undef: 3648; R600: ; %bb.0: 3649; R600-NEXT: CF_END 3650; R600-NEXT: PAD 3651; 3652; CM-LABEL: v_log2_f32_undef: 3653; CM: ; %bb.0: 3654; CM-NEXT: CF_END 3655; CM-NEXT: PAD 3656 %result = call float @llvm.log2.f32(float undef) 3657 ret float %result 3658} 3659 3660define float @v_log2_f32_0() { 3661; GFX689-SDAG-LABEL: v_log2_f32_0: 3662; GFX689-SDAG: ; %bb.0: 3663; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3664; GFX689-SDAG-NEXT: v_log_f32_e32 v0, 0 3665; GFX689-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 3666; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] 3667; 3668; GFX689-GISEL-LABEL: v_log2_f32_0: 3669; GFX689-GISEL: ; %bb.0: 3670; GFX689-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3671; GFX689-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 3672; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] 3673; 3674; GFX1100-SDAG-LABEL: v_log2_f32_0: 3675; GFX1100-SDAG: ; %bb.0: 3676; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3677; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, 0 3678; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 3679; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, 0xc2000000, v0 3680; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 3681; 3682; GFX1100-GISEL-LABEL: v_log2_f32_0: 3683; GFX1100-GISEL: ; %bb.0: 3684; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3685; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0xff800000 3686; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 3687; 3688; R600-LABEL: v_log2_f32_0: 3689; R600: ; %bb.0: 3690; R600-NEXT: CF_END 3691; R600-NEXT: PAD 3692; 3693; CM-LABEL: v_log2_f32_0: 3694; CM: ; %bb.0: 3695; CM-NEXT: CF_END 3696; CM-NEXT: PAD 3697 %result = call float @llvm.log2.f32(float 0.0) 3698 ret float %result 3699} 3700 3701define float @v_log2_f32_from_fpext_f16(i16 %src.i) { 3702; GFX689-LABEL: v_log2_f32_from_fpext_f16: 3703; GFX689: ; %bb.0: 3704; GFX689-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3705; GFX689-NEXT: v_cvt_f32_f16_e32 v0, v0 3706; GFX689-NEXT: v_log_f32_e32 v0, v0 3707; GFX689-NEXT: s_setpc_b64 s[30:31] 3708; 3709; GFX1100-LABEL: v_log2_f32_from_fpext_f16: 3710; GFX1100: ; %bb.0: 3711; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3712; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 3713; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 3714; GFX1100-NEXT: v_log_f32_e32 v0, v0 3715; GFX1100-NEXT: s_setpc_b64 s[30:31] 3716; 3717; R600-LABEL: v_log2_f32_from_fpext_f16: 3718; R600: ; %bb.0: 3719; R600-NEXT: CF_END 3720; R600-NEXT: PAD 3721; 3722; CM-LABEL: v_log2_f32_from_fpext_f16: 3723; CM: ; %bb.0: 3724; CM-NEXT: CF_END 3725; CM-NEXT: PAD 3726 %src = bitcast i16 %src.i to half 3727 %fpext = fpext half %src to float 3728 %result = call float @llvm.log2.f32(float %fpext) 3729 ret float %result 3730} 3731 3732define float @v_log2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { 3733; SI-SDAG-LABEL: v_log2_f32_from_fpext_math_f16: 3734; SI-SDAG: ; %bb.0: 3735; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3736; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 3737; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 3738; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 3739; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 3740; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3741; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3742; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3743; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 3744; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3745; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 3746; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3747; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 3748; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3749; 3750; SI-GISEL-LABEL: v_log2_f32_from_fpext_math_f16: 3751; SI-GISEL: ; %bb.0: 3752; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3753; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 3754; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 3755; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 3756; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 3757; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 3758; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3759; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3760; 3761; VI-LABEL: v_log2_f32_from_fpext_math_f16: 3762; VI: ; %bb.0: 3763; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3764; VI-NEXT: v_add_f16_e32 v0, v0, v1 3765; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 3766; VI-NEXT: v_log_f32_e32 v0, v0 3767; VI-NEXT: s_setpc_b64 s[30:31] 3768; 3769; GFX900-LABEL: v_log2_f32_from_fpext_math_f16: 3770; GFX900: ; %bb.0: 3771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3772; GFX900-NEXT: v_add_f16_e32 v0, v0, v1 3773; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 3774; GFX900-NEXT: v_log_f32_e32 v0, v0 3775; GFX900-NEXT: s_setpc_b64 s[30:31] 3776; 3777; GFX1100-LABEL: v_log2_f32_from_fpext_math_f16: 3778; GFX1100: ; %bb.0: 3779; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3780; GFX1100-NEXT: v_add_f16_e32 v0, v0, v1 3781; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3782; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 3783; GFX1100-NEXT: v_log_f32_e32 v0, v0 3784; GFX1100-NEXT: s_setpc_b64 s[30:31] 3785; 3786; R600-LABEL: v_log2_f32_from_fpext_math_f16: 3787; R600: ; %bb.0: 3788; R600-NEXT: CF_END 3789; R600-NEXT: PAD 3790; 3791; CM-LABEL: v_log2_f32_from_fpext_math_f16: 3792; CM: ; %bb.0: 3793; CM-NEXT: CF_END 3794; CM-NEXT: PAD 3795 %src0 = bitcast i16 %src0.i to half 3796 %src1 = bitcast i16 %src1.i to half 3797 %fadd = fadd half %src0, %src1 3798 %fpext = fpext half %fadd to float 3799 %result = call float @llvm.log2.f32(float %fpext) 3800 ret float %result 3801} 3802 3803define float @v_log2_f32_from_fpext_bf16(bfloat %src) { 3804; SI-LABEL: v_log2_f32_from_fpext_bf16: 3805; SI: ; %bb.0: 3806; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3807; SI-NEXT: s_mov_b32 s4, 0x800000 3808; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3809; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3810; SI-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3811; SI-NEXT: v_ldexp_f32_e32 v0, v0, v2 3812; SI-NEXT: v_log_f32_e32 v0, v0 3813; SI-NEXT: v_mov_b32_e32 v1, 0x42000000 3814; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3815; SI-NEXT: v_sub_f32_e32 v0, v0, v1 3816; SI-NEXT: s_setpc_b64 s[30:31] 3817; 3818; VI-LABEL: v_log2_f32_from_fpext_bf16: 3819; VI: ; %bb.0: 3820; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3821; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3822; VI-NEXT: s_mov_b32 s4, 0x800000 3823; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3824; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3825; VI-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3826; VI-NEXT: v_ldexp_f32 v0, v0, v1 3827; VI-NEXT: v_log_f32_e32 v0, v0 3828; VI-NEXT: v_mov_b32_e32 v1, 0x42000000 3829; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3830; VI-NEXT: v_sub_f32_e32 v0, v0, v1 3831; VI-NEXT: s_setpc_b64 s[30:31] 3832; 3833; GFX900-LABEL: v_log2_f32_from_fpext_bf16: 3834; GFX900: ; %bb.0: 3835; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3836; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3837; GFX900-NEXT: s_mov_b32 s4, 0x800000 3838; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3839; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 3840; GFX900-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3841; GFX900-NEXT: v_ldexp_f32 v0, v0, v2 3842; GFX900-NEXT: v_log_f32_e32 v0, v0 3843; GFX900-NEXT: v_mov_b32_e32 v1, 0x42000000 3844; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3845; GFX900-NEXT: v_sub_f32_e32 v0, v0, v1 3846; GFX900-NEXT: s_setpc_b64 s[30:31] 3847; 3848; GFX1100-LABEL: v_log2_f32_from_fpext_bf16: 3849; GFX1100: ; %bb.0: 3850; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3851; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3852; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 3853; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 3854; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 3855; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo 3856; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 5, v2 3857; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3858; GFX1100-NEXT: v_ldexp_f32 v0, v0, v2 3859; GFX1100-NEXT: v_log_f32_e32 v0, v0 3860; GFX1100-NEXT: s_waitcnt_depctr 0xfff 3861; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 3862; GFX1100-NEXT: s_setpc_b64 s[30:31] 3863; 3864; R600-LABEL: v_log2_f32_from_fpext_bf16: 3865; R600: ; %bb.0: 3866; R600-NEXT: CF_END 3867; R600-NEXT: PAD 3868; 3869; CM-LABEL: v_log2_f32_from_fpext_bf16: 3870; CM: ; %bb.0: 3871; CM-NEXT: CF_END 3872; CM-NEXT: PAD 3873 %fpext = fpext bfloat %src to float 3874 %result = call float @llvm.log2.f32(float %fpext) 3875 ret float %result 3876} 3877 3878define half @v_log2_f16(half %in) { 3879; SI-SDAG-LABEL: v_log2_f16: 3880; SI-SDAG: ; %bb.0: 3881; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3882; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 3883; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 3884; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3885; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 3886; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 3887; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3888; 3889; SI-GISEL-LABEL: v_log2_f16: 3890; SI-GISEL: ; %bb.0: 3891; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3892; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 3893; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3894; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 3895; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3896; 3897; VI-LABEL: v_log2_f16: 3898; VI: ; %bb.0: 3899; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3900; VI-NEXT: v_log_f16_e32 v0, v0 3901; VI-NEXT: s_setpc_b64 s[30:31] 3902; 3903; GFX900-LABEL: v_log2_f16: 3904; GFX900: ; %bb.0: 3905; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3906; GFX900-NEXT: v_log_f16_e32 v0, v0 3907; GFX900-NEXT: s_setpc_b64 s[30:31] 3908; 3909; GFX1100-LABEL: v_log2_f16: 3910; GFX1100: ; %bb.0: 3911; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3912; GFX1100-NEXT: v_log_f16_e32 v0, v0 3913; GFX1100-NEXT: s_setpc_b64 s[30:31] 3914; 3915; R600-LABEL: v_log2_f16: 3916; R600: ; %bb.0: 3917; R600-NEXT: CF_END 3918; R600-NEXT: PAD 3919; 3920; CM-LABEL: v_log2_f16: 3921; CM: ; %bb.0: 3922; CM-NEXT: CF_END 3923; CM-NEXT: PAD 3924 %result = call half @llvm.log2.f16(half %in) 3925 ret half %result 3926} 3927 3928define half @v_log2_fabs_f16(half %in) { 3929; SI-SDAG-LABEL: v_log2_fabs_f16: 3930; SI-SDAG: ; %bb.0: 3931; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3932; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 3933; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 3934; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3935; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 3936; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 3937; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3938; 3939; SI-GISEL-LABEL: v_log2_fabs_f16: 3940; SI-GISEL: ; %bb.0: 3941; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3942; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| 3943; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3944; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 3945; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3946; 3947; VI-LABEL: v_log2_fabs_f16: 3948; VI: ; %bb.0: 3949; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3950; VI-NEXT: v_log_f16_e64 v0, |v0| 3951; VI-NEXT: s_setpc_b64 s[30:31] 3952; 3953; GFX900-LABEL: v_log2_fabs_f16: 3954; GFX900: ; %bb.0: 3955; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3956; GFX900-NEXT: v_log_f16_e64 v0, |v0| 3957; GFX900-NEXT: s_setpc_b64 s[30:31] 3958; 3959; GFX1100-LABEL: v_log2_fabs_f16: 3960; GFX1100: ; %bb.0: 3961; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3962; GFX1100-NEXT: v_log_f16_e64 v0, |v0| 3963; GFX1100-NEXT: s_setpc_b64 s[30:31] 3964; 3965; R600-LABEL: v_log2_fabs_f16: 3966; R600: ; %bb.0: 3967; R600-NEXT: CF_END 3968; R600-NEXT: PAD 3969; 3970; CM-LABEL: v_log2_fabs_f16: 3971; CM: ; %bb.0: 3972; CM-NEXT: CF_END 3973; CM-NEXT: PAD 3974 %fabs = call half @llvm.fabs.f16(half %in) 3975 %result = call half @llvm.log2.f16(half %fabs) 3976 ret half %result 3977} 3978 3979define half @v_log2_fneg_fabs_f16(half %in) { 3980; SI-SDAG-LABEL: v_log2_fneg_fabs_f16: 3981; SI-SDAG: ; %bb.0: 3982; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3983; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 3984; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| 3985; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 3986; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 3987; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 3988; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3989; 3990; SI-GISEL-LABEL: v_log2_fneg_fabs_f16: 3991; SI-GISEL: ; %bb.0: 3992; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3993; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| 3994; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 3995; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 3996; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3997; 3998; VI-LABEL: v_log2_fneg_fabs_f16: 3999; VI: ; %bb.0: 4000; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4001; VI-NEXT: v_log_f16_e64 v0, -|v0| 4002; VI-NEXT: s_setpc_b64 s[30:31] 4003; 4004; GFX900-LABEL: v_log2_fneg_fabs_f16: 4005; GFX900: ; %bb.0: 4006; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4007; GFX900-NEXT: v_log_f16_e64 v0, -|v0| 4008; GFX900-NEXT: s_setpc_b64 s[30:31] 4009; 4010; GFX1100-LABEL: v_log2_fneg_fabs_f16: 4011; GFX1100: ; %bb.0: 4012; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4013; GFX1100-NEXT: v_log_f16_e64 v0, -|v0| 4014; GFX1100-NEXT: s_setpc_b64 s[30:31] 4015; 4016; R600-LABEL: v_log2_fneg_fabs_f16: 4017; R600: ; %bb.0: 4018; R600-NEXT: CF_END 4019; R600-NEXT: PAD 4020; 4021; CM-LABEL: v_log2_fneg_fabs_f16: 4022; CM: ; %bb.0: 4023; CM-NEXT: CF_END 4024; CM-NEXT: PAD 4025 %fabs = call half @llvm.fabs.f16(half %in) 4026 %fneg.fabs = fneg half %fabs 4027 %result = call half @llvm.log2.f16(half %fneg.fabs) 4028 ret half %result 4029} 4030 4031define half @v_log2_fneg_f16(half %in) { 4032; SI-SDAG-LABEL: v_log2_fneg_f16: 4033; SI-SDAG: ; %bb.0: 4034; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4035; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 4036; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4037; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4038; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4039; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4040; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4041; 4042; SI-GISEL-LABEL: v_log2_fneg_f16: 4043; SI-GISEL: ; %bb.0: 4044; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4045; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 4046; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4047; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4048; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4049; 4050; VI-LABEL: v_log2_fneg_f16: 4051; VI: ; %bb.0: 4052; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4053; VI-NEXT: v_log_f16_e64 v0, -v0 4054; VI-NEXT: s_setpc_b64 s[30:31] 4055; 4056; GFX900-LABEL: v_log2_fneg_f16: 4057; GFX900: ; %bb.0: 4058; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4059; GFX900-NEXT: v_log_f16_e64 v0, -v0 4060; GFX900-NEXT: s_setpc_b64 s[30:31] 4061; 4062; GFX1100-LABEL: v_log2_fneg_f16: 4063; GFX1100: ; %bb.0: 4064; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4065; GFX1100-NEXT: v_log_f16_e64 v0, -v0 4066; GFX1100-NEXT: s_setpc_b64 s[30:31] 4067; 4068; R600-LABEL: v_log2_fneg_f16: 4069; R600: ; %bb.0: 4070; R600-NEXT: CF_END 4071; R600-NEXT: PAD 4072; 4073; CM-LABEL: v_log2_fneg_f16: 4074; CM: ; %bb.0: 4075; CM-NEXT: CF_END 4076; CM-NEXT: PAD 4077 %fneg = fneg half %in 4078 %result = call half @llvm.log2.f16(half %fneg) 4079 ret half %result 4080} 4081 4082define half @v_log2_f16_fast(half %in) { 4083; SI-SDAG-LABEL: v_log2_f16_fast: 4084; SI-SDAG: ; %bb.0: 4085; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4086; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4087; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4088; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4089; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4090; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4091; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4092; 4093; SI-GISEL-LABEL: v_log2_f16_fast: 4094; SI-GISEL: ; %bb.0: 4095; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4096; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4097; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4098; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4099; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4100; 4101; VI-LABEL: v_log2_f16_fast: 4102; VI: ; %bb.0: 4103; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4104; VI-NEXT: v_log_f16_e32 v0, v0 4105; VI-NEXT: s_setpc_b64 s[30:31] 4106; 4107; GFX900-LABEL: v_log2_f16_fast: 4108; GFX900: ; %bb.0: 4109; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4110; GFX900-NEXT: v_log_f16_e32 v0, v0 4111; GFX900-NEXT: s_setpc_b64 s[30:31] 4112; 4113; GFX1100-LABEL: v_log2_f16_fast: 4114; GFX1100: ; %bb.0: 4115; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4116; GFX1100-NEXT: v_log_f16_e32 v0, v0 4117; GFX1100-NEXT: s_setpc_b64 s[30:31] 4118; 4119; R600-LABEL: v_log2_f16_fast: 4120; R600: ; %bb.0: 4121; R600-NEXT: CF_END 4122; R600-NEXT: PAD 4123; 4124; CM-LABEL: v_log2_f16_fast: 4125; CM: ; %bb.0: 4126; CM-NEXT: CF_END 4127; CM-NEXT: PAD 4128 %result = call fast half @llvm.log2.f16(half %in) 4129 ret half %result 4130} 4131 4132define <2 x half> @v_log2_v2f16(<2 x half> %in) { 4133; SI-SDAG-LABEL: v_log2_v2f16: 4134; SI-SDAG: ; %bb.0: 4135; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4136; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4137; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4138; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4139; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4140; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4141; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4142; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4143; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4144; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4145; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4146; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4147; 4148; SI-GISEL-LABEL: v_log2_v2f16: 4149; SI-GISEL: ; %bb.0: 4150; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4151; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4152; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4153; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4154; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4155; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4156; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4157; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4158; 4159; VI-SDAG-LABEL: v_log2_v2f16: 4160; VI-SDAG: ; %bb.0: 4161; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4162; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4163; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 4164; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4165; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4166; 4167; VI-GISEL-LABEL: v_log2_v2f16: 4168; VI-GISEL: ; %bb.0: 4169; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4170; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 4171; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4172; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4173; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4174; 4175; GFX900-SDAG-LABEL: v_log2_v2f16: 4176; GFX900-SDAG: ; %bb.0: 4177; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4178; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4179; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 4180; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4181; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4182; 4183; GFX900-GISEL-LABEL: v_log2_v2f16: 4184; GFX900-GISEL: ; %bb.0: 4185; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4186; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 4187; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4188; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 4189; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4190; 4191; GFX1100-LABEL: v_log2_v2f16: 4192; GFX1100: ; %bb.0: 4193; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4194; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4195; GFX1100-NEXT: v_log_f16_e32 v0, v0 4196; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 4197; GFX1100-NEXT: v_log_f16_e32 v1, v1 4198; GFX1100-NEXT: s_waitcnt_depctr 0xfff 4199; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 4200; GFX1100-NEXT: s_setpc_b64 s[30:31] 4201; 4202; R600-LABEL: v_log2_v2f16: 4203; R600: ; %bb.0: 4204; R600-NEXT: CF_END 4205; R600-NEXT: PAD 4206; 4207; CM-LABEL: v_log2_v2f16: 4208; CM: ; %bb.0: 4209; CM-NEXT: CF_END 4210; CM-NEXT: PAD 4211 %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %in) 4212 ret <2 x half> %result 4213} 4214 4215define <2 x half> @v_log2_fabs_v2f16(<2 x half> %in) { 4216; SI-SDAG-LABEL: v_log2_fabs_v2f16: 4217; SI-SDAG: ; %bb.0: 4218; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4219; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4220; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4221; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 4222; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| 4223; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4224; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4225; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4226; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4227; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4228; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4229; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4230; 4231; SI-GISEL-LABEL: v_log2_fabs_v2f16: 4232; SI-GISEL: ; %bb.0: 4233; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4234; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4235; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 4236; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4237; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 4238; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4239; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4240; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4241; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4242; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4243; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4244; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4245; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4246; 4247; VI-SDAG-LABEL: v_log2_fabs_v2f16: 4248; VI-SDAG: ; %bb.0: 4249; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4250; VI-SDAG-NEXT: v_log_f16_sdwa v1, |v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4251; VI-SDAG-NEXT: v_log_f16_e64 v0, |v0| 4252; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4253; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4254; 4255; VI-GISEL-LABEL: v_log2_fabs_v2f16: 4256; VI-GISEL: ; %bb.0: 4257; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4258; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 4259; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 4260; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4261; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4262; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4263; 4264; GFX900-SDAG-LABEL: v_log2_fabs_v2f16: 4265; GFX900-SDAG: ; %bb.0: 4266; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4267; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4268; GFX900-SDAG-NEXT: v_log_f16_e64 v0, |v0| 4269; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4270; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4271; 4272; GFX900-GISEL-LABEL: v_log2_fabs_v2f16: 4273; GFX900-GISEL: ; %bb.0: 4274; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4275; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 4276; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 4277; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4278; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 4279; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4280; 4281; GFX1100-SDAG-LABEL: v_log2_fabs_v2f16: 4282; GFX1100-SDAG: ; %bb.0: 4283; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4284; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4285; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, |v0| 4286; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 4287; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, |v1| 4288; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 4289; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4290; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 4291; 4292; GFX1100-GISEL-LABEL: v_log2_fabs_v2f16: 4293; GFX1100-GISEL: ; %bb.0: 4294; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4295; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 4296; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4297; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4298; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 4299; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 4300; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 4301; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 4302; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 4303; 4304; R600-LABEL: v_log2_fabs_v2f16: 4305; R600: ; %bb.0: 4306; R600-NEXT: CF_END 4307; R600-NEXT: PAD 4308; 4309; CM-LABEL: v_log2_fabs_v2f16: 4310; CM: ; %bb.0: 4311; CM-NEXT: CF_END 4312; CM-NEXT: PAD 4313 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) 4314 %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %fabs) 4315 ret <2 x half> %result 4316} 4317 4318define <2 x half> @v_log2_fneg_fabs_v2f16(<2 x half> %in) { 4319; SI-SDAG-LABEL: v_log2_fneg_fabs_v2f16: 4320; SI-SDAG: ; %bb.0: 4321; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4322; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4323; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4324; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4325; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4326; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 4327; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4328; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4329; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4330; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4331; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4332; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4333; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4334; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4335; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4336; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4337; 4338; SI-GISEL-LABEL: v_log2_fneg_fabs_v2f16: 4339; SI-GISEL: ; %bb.0: 4340; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4341; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4342; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 4343; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4344; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 4345; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4346; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4347; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4348; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4349; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4350; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4351; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4352; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4353; 4354; VI-SDAG-LABEL: v_log2_fneg_fabs_v2f16: 4355; VI-SDAG: ; %bb.0: 4356; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4357; VI-SDAG-NEXT: v_log_f16_sdwa v1, -|v0| dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4358; VI-SDAG-NEXT: v_log_f16_e64 v0, -|v0| 4359; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4360; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4361; 4362; VI-GISEL-LABEL: v_log2_fneg_fabs_v2f16: 4363; VI-GISEL: ; %bb.0: 4364; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4365; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 4366; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 4367; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4368; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4369; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4370; 4371; GFX900-SDAG-LABEL: v_log2_fneg_fabs_v2f16: 4372; GFX900-SDAG: ; %bb.0: 4373; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4374; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4375; GFX900-SDAG-NEXT: v_log_f16_e64 v0, -|v0| 4376; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4377; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4378; 4379; GFX900-GISEL-LABEL: v_log2_fneg_fabs_v2f16: 4380; GFX900-GISEL: ; %bb.0: 4381; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4382; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 4383; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 4384; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4385; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 4386; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4387; 4388; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_v2f16: 4389; GFX1100-SDAG: ; %bb.0: 4390; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4391; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4392; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -|v0| 4393; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 4394; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -|v1| 4395; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 4396; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4397; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 4398; 4399; GFX1100-GISEL-LABEL: v_log2_fneg_fabs_v2f16: 4400; GFX1100-GISEL: ; %bb.0: 4401; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4402; GFX1100-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 4403; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4404; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4405; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 4406; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 4407; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 4408; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 4409; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 4410; 4411; R600-LABEL: v_log2_fneg_fabs_v2f16: 4412; R600: ; %bb.0: 4413; R600-NEXT: CF_END 4414; R600-NEXT: PAD 4415; 4416; CM-LABEL: v_log2_fneg_fabs_v2f16: 4417; CM: ; %bb.0: 4418; CM-NEXT: CF_END 4419; CM-NEXT: PAD 4420 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) 4421 %fneg.fabs = fneg <2 x half> %fabs 4422 %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %fneg.fabs) 4423 ret <2 x half> %result 4424} 4425 4426define <2 x half> @v_log2_fneg_v2f16(<2 x half> %in) { 4427; SI-SDAG-LABEL: v_log2_fneg_v2f16: 4428; SI-SDAG: ; %bb.0: 4429; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4430; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4431; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4432; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4433; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4434; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 4435; SI-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4436; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4437; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4438; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4439; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4440; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4441; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4442; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4443; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4444; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4445; 4446; SI-GISEL-LABEL: v_log2_fneg_v2f16: 4447; SI-GISEL: ; %bb.0: 4448; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4449; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4450; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 4451; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4452; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 4453; SI-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4454; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4455; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4456; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4457; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4458; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4459; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4460; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4461; 4462; VI-SDAG-LABEL: v_log2_fneg_v2f16: 4463; VI-SDAG: ; %bb.0: 4464; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4465; VI-SDAG-NEXT: v_log_f16_sdwa v1, -v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4466; VI-SDAG-NEXT: v_log_f16_e64 v0, -v0 4467; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4468; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4469; 4470; VI-GISEL-LABEL: v_log2_fneg_v2f16: 4471; VI-GISEL: ; %bb.0: 4472; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4473; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 4474; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 4475; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4476; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4477; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4478; 4479; GFX900-SDAG-LABEL: v_log2_fneg_v2f16: 4480; GFX900-SDAG: ; %bb.0: 4481; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4482; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4483; GFX900-SDAG-NEXT: v_log_f16_e64 v0, -v0 4484; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4485; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4486; 4487; GFX900-GISEL-LABEL: v_log2_fneg_v2f16: 4488; GFX900-GISEL: ; %bb.0: 4489; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4490; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 4491; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 4492; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4493; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 4494; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4495; 4496; GFX1100-SDAG-LABEL: v_log2_fneg_v2f16: 4497; GFX1100-SDAG: ; %bb.0: 4498; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4499; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4500; GFX1100-SDAG-NEXT: v_log_f16_e64 v0, -v0 4501; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 4502; GFX1100-SDAG-NEXT: v_log_f16_e64 v1, -v1 4503; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 4504; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4505; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 4506; 4507; GFX1100-GISEL-LABEL: v_log2_fneg_v2f16: 4508; GFX1100-GISEL: ; %bb.0: 4509; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4510; GFX1100-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 4511; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 4512; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4513; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 4514; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 4515; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 4516; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 4517; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 4518; 4519; R600-LABEL: v_log2_fneg_v2f16: 4520; R600: ; %bb.0: 4521; R600-NEXT: CF_END 4522; R600-NEXT: PAD 4523; 4524; CM-LABEL: v_log2_fneg_v2f16: 4525; CM: ; %bb.0: 4526; CM-NEXT: CF_END 4527; CM-NEXT: PAD 4528 %fneg = fneg <2 x half> %in 4529 %result = call <2 x half> @llvm.log2.v2f16(<2 x half> %fneg) 4530 ret <2 x half> %result 4531} 4532 4533define <2 x half> @v_log2_v2f16_fast(<2 x half> %in) { 4534; SI-SDAG-LABEL: v_log2_v2f16_fast: 4535; SI-SDAG: ; %bb.0: 4536; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4537; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4538; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4539; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4540; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4541; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4542; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4543; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4544; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4545; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4546; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4547; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4548; 4549; SI-GISEL-LABEL: v_log2_v2f16_fast: 4550; SI-GISEL: ; %bb.0: 4551; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4552; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4553; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4554; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4555; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4556; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4557; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4558; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4559; 4560; VI-SDAG-LABEL: v_log2_v2f16_fast: 4561; VI-SDAG: ; %bb.0: 4562; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4563; VI-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4564; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 4565; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 4566; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4567; 4568; VI-GISEL-LABEL: v_log2_v2f16_fast: 4569; VI-GISEL: ; %bb.0: 4570; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4571; VI-GISEL-NEXT: v_log_f16_e32 v1, v0 4572; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4573; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 4574; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4575; 4576; GFX900-SDAG-LABEL: v_log2_v2f16_fast: 4577; GFX900-SDAG: ; %bb.0: 4578; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4579; GFX900-SDAG-NEXT: v_log_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4580; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 4581; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 4582; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4583; 4584; GFX900-GISEL-LABEL: v_log2_v2f16_fast: 4585; GFX900-GISEL: ; %bb.0: 4586; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4587; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v0 4588; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4589; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 4590; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4591; 4592; GFX1100-LABEL: v_log2_v2f16_fast: 4593; GFX1100: ; %bb.0: 4594; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4595; GFX1100-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4596; GFX1100-NEXT: v_log_f16_e32 v0, v0 4597; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 4598; GFX1100-NEXT: v_log_f16_e32 v1, v1 4599; GFX1100-NEXT: s_waitcnt_depctr 0xfff 4600; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v1 4601; GFX1100-NEXT: s_setpc_b64 s[30:31] 4602; 4603; R600-LABEL: v_log2_v2f16_fast: 4604; R600: ; %bb.0: 4605; R600-NEXT: CF_END 4606; R600-NEXT: PAD 4607; 4608; CM-LABEL: v_log2_v2f16_fast: 4609; CM: ; %bb.0: 4610; CM-NEXT: CF_END 4611; CM-NEXT: PAD 4612 %result = call fast <2 x half> @llvm.log2.v2f16(<2 x half> %in) 4613 ret <2 x half> %result 4614} 4615 4616define <3 x half> @v_log2_v3f16(<3 x half> %in) { 4617; SI-SDAG-LABEL: v_log2_v3f16: 4618; SI-SDAG: ; %bb.0: 4619; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4620; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4621; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4622; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4623; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4624; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4625; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4626; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4627; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4628; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 4629; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4630; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4631; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4632; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4633; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4634; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4635; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4636; 4637; SI-GISEL-LABEL: v_log2_v3f16: 4638; SI-GISEL: ; %bb.0: 4639; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4640; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4641; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4642; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 4643; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4644; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4645; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 4646; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4647; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4648; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 4649; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4650; 4651; VI-SDAG-LABEL: v_log2_v3f16: 4652; VI-SDAG: ; %bb.0: 4653; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4654; VI-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4655; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 4656; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 4657; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 4658; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4659; 4660; VI-GISEL-LABEL: v_log2_v3f16: 4661; VI-GISEL: ; %bb.0: 4662; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4663; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 4664; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4665; VI-GISEL-NEXT: v_log_f16_e32 v1, v1 4666; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 4667; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4668; 4669; GFX900-SDAG-LABEL: v_log2_v3f16: 4670; GFX900-SDAG: ; %bb.0: 4671; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4672; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4673; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 4674; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 4675; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 4676; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4677; 4678; GFX900-GISEL-LABEL: v_log2_v3f16: 4679; GFX900-GISEL: ; %bb.0: 4680; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4681; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 4682; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4683; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v1 4684; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 4685; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4686; 4687; GFX1100-LABEL: v_log2_v3f16: 4688; GFX1100: ; %bb.0: 4689; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4690; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0 4691; GFX1100-NEXT: v_log_f16_e32 v0, v0 4692; GFX1100-NEXT: v_log_f16_e32 v1, v1 4693; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 4694; GFX1100-NEXT: v_log_f16_e32 v2, v2 4695; GFX1100-NEXT: s_waitcnt_depctr 0xfff 4696; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2 4697; GFX1100-NEXT: s_setpc_b64 s[30:31] 4698; 4699; R600-LABEL: v_log2_v3f16: 4700; R600: ; %bb.0: 4701; R600-NEXT: CF_END 4702; R600-NEXT: PAD 4703; 4704; CM-LABEL: v_log2_v3f16: 4705; CM: ; %bb.0: 4706; CM-NEXT: CF_END 4707; CM-NEXT: PAD 4708 %result = call <3 x half> @llvm.log2.v3f16(<3 x half> %in) 4709 ret <3 x half> %result 4710} 4711 4712define <3 x half> @v_log2_v3f16_fast(<3 x half> %in) { 4713; SI-SDAG-LABEL: v_log2_v3f16_fast: 4714; SI-SDAG: ; %bb.0: 4715; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4716; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4717; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4718; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4719; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4720; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4721; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4722; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4723; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4724; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 4725; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4726; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4727; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4728; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4729; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4730; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4731; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4732; 4733; SI-GISEL-LABEL: v_log2_v3f16_fast: 4734; SI-GISEL: ; %bb.0: 4735; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4736; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4737; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4738; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 4739; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4740; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4741; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 4742; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4743; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4744; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 4745; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4746; 4747; VI-SDAG-LABEL: v_log2_v3f16_fast: 4748; VI-SDAG: ; %bb.0: 4749; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4750; VI-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4751; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 4752; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 4753; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 4754; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4755; 4756; VI-GISEL-LABEL: v_log2_v3f16_fast: 4757; VI-GISEL: ; %bb.0: 4758; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4759; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 4760; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4761; VI-GISEL-NEXT: v_log_f16_e32 v1, v1 4762; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 4763; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4764; 4765; GFX900-SDAG-LABEL: v_log2_v3f16_fast: 4766; GFX900-SDAG: ; %bb.0: 4767; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4768; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4769; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 4770; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 4771; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 4772; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4773; 4774; GFX900-GISEL-LABEL: v_log2_v3f16_fast: 4775; GFX900-GISEL: ; %bb.0: 4776; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4777; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 4778; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4779; GFX900-GISEL-NEXT: v_log_f16_e32 v1, v1 4780; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 4781; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4782; 4783; GFX1100-LABEL: v_log2_v3f16_fast: 4784; GFX1100: ; %bb.0: 4785; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4786; GFX1100-NEXT: v_lshrrev_b32_e32 v2, 16, v0 4787; GFX1100-NEXT: v_log_f16_e32 v0, v0 4788; GFX1100-NEXT: v_log_f16_e32 v1, v1 4789; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 4790; GFX1100-NEXT: v_log_f16_e32 v2, v2 4791; GFX1100-NEXT: s_waitcnt_depctr 0xfff 4792; GFX1100-NEXT: v_pack_b32_f16 v0, v0, v2 4793; GFX1100-NEXT: s_setpc_b64 s[30:31] 4794; 4795; R600-LABEL: v_log2_v3f16_fast: 4796; R600: ; %bb.0: 4797; R600-NEXT: CF_END 4798; R600-NEXT: PAD 4799; 4800; CM-LABEL: v_log2_v3f16_fast: 4801; CM: ; %bb.0: 4802; CM-NEXT: CF_END 4803; CM-NEXT: PAD 4804 %result = call fast <3 x half> @llvm.log2.v3f16(<3 x half> %in) 4805 ret <3 x half> %result 4806} 4807 4808define <4 x half> @v_log2_v4f16(<4 x half> %in) { 4809; SI-SDAG-LABEL: v_log2_v4f16: 4810; SI-SDAG: ; %bb.0: 4811; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4812; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4813; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4814; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4815; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 4816; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4817; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4818; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4819; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 4820; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4821; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4822; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 4823; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 4824; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4825; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4826; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4827; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 4828; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4829; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4830; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4831; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 4832; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4833; 4834; SI-GISEL-LABEL: v_log2_v4f16: 4835; SI-GISEL: ; %bb.0: 4836; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4837; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4838; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4839; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 4840; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 4841; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4842; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4843; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 4844; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 4845; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4846; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4847; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 4848; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 4849; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4850; 4851; VI-SDAG-LABEL: v_log2_v4f16: 4852; VI-SDAG: ; %bb.0: 4853; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4854; VI-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4855; VI-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4856; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 4857; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 4858; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 4859; VI-SDAG-NEXT: v_or_b32_e32 v1, v1, v2 4860; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4861; 4862; VI-GISEL-LABEL: v_log2_v4f16: 4863; VI-GISEL: ; %bb.0: 4864; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4865; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 4866; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4867; VI-GISEL-NEXT: v_log_f16_e32 v3, v1 4868; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4869; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 4870; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 4871; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4872; 4873; GFX900-SDAG-LABEL: v_log2_v4f16: 4874; GFX900-SDAG: ; %bb.0: 4875; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4876; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4877; GFX900-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4878; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 4879; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 4880; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3 4881; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 4882; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4883; 4884; GFX900-GISEL-LABEL: v_log2_v4f16: 4885; GFX900-GISEL: ; %bb.0: 4886; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4887; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 4888; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4889; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1 4890; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 4891; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 4892; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1 4893; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4894; 4895; GFX1100-SDAG-LABEL: v_log2_v4f16: 4896; GFX1100-SDAG: ; %bb.0: 4897; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4898; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 4899; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 4900; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1 4901; GFX1100-SDAG-NEXT: v_log_f16_e32 v0, v0 4902; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4903; GFX1100-SDAG-NEXT: v_log_f16_e32 v2, v2 4904; GFX1100-SDAG-NEXT: v_log_f16_e32 v3, v3 4905; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 4906; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 4907; GFX1100-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3 4908; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 4909; 4910; GFX1100-GISEL-LABEL: v_log2_v4f16: 4911; GFX1100-GISEL: ; %bb.0: 4912; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4913; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 4914; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 4915; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 4916; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 4917; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4918; GFX1100-GISEL-NEXT: v_log_f16_e32 v2, v2 4919; GFX1100-GISEL-NEXT: v_log_f16_e32 v3, v3 4920; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 4921; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v2 4922; GFX1100-GISEL-NEXT: v_pack_b32_f16 v1, v1, v3 4923; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 4924; 4925; R600-LABEL: v_log2_v4f16: 4926; R600: ; %bb.0: 4927; R600-NEXT: CF_END 4928; R600-NEXT: PAD 4929; 4930; CM-LABEL: v_log2_v4f16: 4931; CM: ; %bb.0: 4932; CM-NEXT: CF_END 4933; CM-NEXT: PAD 4934 %result = call <4 x half> @llvm.log2.v4f16(<4 x half> %in) 4935 ret <4 x half> %result 4936} 4937 4938define <4 x half> @v_log2_v4f16_fast(<4 x half> %in) { 4939; SI-SDAG-LABEL: v_log2_v4f16_fast: 4940; SI-SDAG: ; %bb.0: 4941; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4942; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4943; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4944; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4945; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 4946; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4947; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4948; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4949; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 4950; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 4951; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 4952; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 4953; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 4954; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 4955; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 4956; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 4957; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3 4958; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 4959; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 4960; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 4961; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 4962; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4963; 4964; SI-GISEL-LABEL: v_log2_v4f16_fast: 4965; SI-GISEL: ; %bb.0: 4966; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4967; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 4968; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 4969; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 4970; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 4971; SI-GISEL-NEXT: v_log_f32_e32 v0, v0 4972; SI-GISEL-NEXT: v_log_f32_e32 v1, v1 4973; SI-GISEL-NEXT: v_log_f32_e32 v2, v2 4974; SI-GISEL-NEXT: v_log_f32_e32 v3, v3 4975; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 4976; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 4977; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 4978; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 4979; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4980; 4981; VI-SDAG-LABEL: v_log2_v4f16_fast: 4982; VI-SDAG: ; %bb.0: 4983; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4984; VI-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4985; VI-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4986; VI-SDAG-NEXT: v_log_f16_e32 v0, v0 4987; VI-SDAG-NEXT: v_log_f16_e32 v1, v1 4988; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v3 4989; VI-SDAG-NEXT: v_or_b32_e32 v1, v1, v2 4990; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4991; 4992; VI-GISEL-LABEL: v_log2_v4f16_fast: 4993; VI-GISEL: ; %bb.0: 4994; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4995; VI-GISEL-NEXT: v_log_f16_e32 v2, v0 4996; VI-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4997; VI-GISEL-NEXT: v_log_f16_e32 v3, v1 4998; VI-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 4999; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 5000; VI-GISEL-NEXT: v_or_b32_e32 v1, v3, v1 5001; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 5002; 5003; GFX900-SDAG-LABEL: v_log2_v4f16_fast: 5004; GFX900-SDAG: ; %bb.0: 5005; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5006; GFX900-SDAG-NEXT: v_log_f16_sdwa v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 5007; GFX900-SDAG-NEXT: v_log_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 5008; GFX900-SDAG-NEXT: v_log_f16_e32 v0, v0 5009; GFX900-SDAG-NEXT: v_log_f16_e32 v1, v1 5010; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3 5011; GFX900-SDAG-NEXT: v_pack_b32_f16 v1, v1, v2 5012; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5013; 5014; GFX900-GISEL-LABEL: v_log2_v4f16_fast: 5015; GFX900-GISEL: ; %bb.0: 5016; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5017; GFX900-GISEL-NEXT: v_log_f16_e32 v2, v0 5018; GFX900-GISEL-NEXT: v_log_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 5019; GFX900-GISEL-NEXT: v_log_f16_e32 v3, v1 5020; GFX900-GISEL-NEXT: v_log_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 5021; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v2, v0 5022; GFX900-GISEL-NEXT: v_pack_b32_f16 v1, v3, v1 5023; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5024; 5025; GFX1100-SDAG-LABEL: v_log2_v4f16_fast: 5026; GFX1100-SDAG: ; %bb.0: 5027; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5028; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5029; GFX1100-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1 5030; GFX1100-SDAG-NEXT: v_log_f16_e32 v1, v1 5031; GFX1100-SDAG-NEXT: v_log_f16_e32 v0, v0 5032; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5033; GFX1100-SDAG-NEXT: v_log_f16_e32 v2, v2 5034; GFX1100-SDAG-NEXT: v_log_f16_e32 v3, v3 5035; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff 5036; GFX1100-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2 5037; GFX1100-SDAG-NEXT: v_pack_b32_f16 v1, v1, v3 5038; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 5039; 5040; GFX1100-GISEL-LABEL: v_log2_v4f16_fast: 5041; GFX1100-GISEL: ; %bb.0: 5042; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5043; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5044; GFX1100-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 5045; GFX1100-GISEL-NEXT: v_log_f16_e32 v0, v0 5046; GFX1100-GISEL-NEXT: v_log_f16_e32 v1, v1 5047; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5048; GFX1100-GISEL-NEXT: v_log_f16_e32 v2, v2 5049; GFX1100-GISEL-NEXT: v_log_f16_e32 v3, v3 5050; GFX1100-GISEL-NEXT: s_waitcnt_depctr 0xfff 5051; GFX1100-GISEL-NEXT: v_pack_b32_f16 v0, v0, v2 5052; GFX1100-GISEL-NEXT: v_pack_b32_f16 v1, v1, v3 5053; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 5054; 5055; R600-LABEL: v_log2_v4f16_fast: 5056; R600: ; %bb.0: 5057; R600-NEXT: CF_END 5058; R600-NEXT: PAD 5059; 5060; CM-LABEL: v_log2_v4f16_fast: 5061; CM: ; %bb.0: 5062; CM-NEXT: CF_END 5063; CM-NEXT: PAD 5064 %result = call fast <4 x half> @llvm.log2.v4f16(<4 x half> %in) 5065 ret <4 x half> %result 5066} 5067 5068declare float @llvm.fabs.f32(float) #2 5069declare float @llvm.log2.f32(float) #2 5070declare <2 x float> @llvm.log2.v2f32(<2 x float>) #2 5071declare <3 x float> @llvm.log2.v3f32(<3 x float>) #2 5072declare <4 x float> @llvm.log2.v4f32(<4 x float>) #2 5073declare half @llvm.fabs.f16(half) #2 5074declare half @llvm.log2.f16(half) #2 5075declare <2 x half> @llvm.log2.v2f16(<2 x half>) #2 5076declare <3 x half> @llvm.log2.v3f16(<3 x half>) #2 5077declare <4 x half> @llvm.log2.v4f16(<4 x half>) #2 5078declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 5079 5080attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } 5081attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } 5082attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 5083