1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,VI,VI-SDAG %s 3; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,VI,VI-GISEL %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG,GFX900,GFX900-SDAG %s 5; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL,GFX900,GFX900-GISEL %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-SDAG %s 7; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=SI,SI-GISEL %s 8 9; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s 10; RUN: llc -mtriple=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM %s 11 12define amdgpu_kernel void @s_exp_f32(ptr addrspace(1) %out, float %in) { 13; VI-SDAG-LABEL: s_exp_f32: 14; VI-SDAG: ; %bb.0: 15; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 16; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 17; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 18; VI-SDAG-NEXT: s_and_b32 s0, s2, 0xfffff000 19; VI-SDAG-NEXT: v_mov_b32_e32 v1, s0 20; VI-SDAG-NEXT: v_sub_f32_e32 v1, s2, v1 21; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1 22; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 23; VI-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 24; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 25; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x39a3b295 26; VI-SDAG-NEXT: v_rndne_f32_e32 v2, v0 27; VI-SDAG-NEXT: v_mul_f32_e32 v3, s0, v3 28; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 29; VI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 30; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 31; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 32; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v2 33; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 34; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 35; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 36; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 37; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 38; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218 39; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 40; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1 41; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 42; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 43; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 44; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 45; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 46; VI-SDAG-NEXT: s_endpgm 47; 48; VI-GISEL-LABEL: s_exp_f32: 49; VI-GISEL: ; %bb.0: 50; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 51; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 52; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295 53; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 54; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 55; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 56; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2 57; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v2 58; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 59; VI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v0 60; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 61; VI-GISEL-NEXT: v_mul_f32_e32 v1, s0, v1 62; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 63; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v0 64; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v2 65; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 66; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 67; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 68; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 69; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 70; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 71; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 72; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 73; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 74; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 75; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 76; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 77; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 78; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 79; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 80; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 81; VI-GISEL-NEXT: s_endpgm 82; 83; GFX900-SDAG-LABEL: s_exp_f32: 84; GFX900-SDAG: ; %bb.0: 85; GFX900-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 86; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 87; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 88; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 89; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 90; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 91; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 92; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v2 93; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 94; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 95; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 96; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3 97; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 98; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x7f800000 99; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 100; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 101; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 102; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v1 103; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218 104; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 105; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v1 106; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 107; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] 108; GFX900-SDAG-NEXT: s_endpgm 109; 110; GFX900-GISEL-LABEL: s_exp_f32: 111; GFX900-GISEL: ; %bb.0: 112; GFX900-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 113; GFX900-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 114; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 115; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f 116; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 117; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 118; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v0, -v2 119; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 120; GFX900-GISEL-NEXT: v_fma_f32 v0, s2, v1, v0 121; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3 122; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 123; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 124; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 125; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 126; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 127; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 128; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 129; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 130; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 131; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 132; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 133; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0 134; GFX900-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 135; GFX900-GISEL-NEXT: s_endpgm 136; 137; SI-SDAG-LABEL: s_exp_f32: 138; SI-SDAG: ; %bb.0: 139; SI-SDAG-NEXT: s_load_dword s6, s[4:5], 0xb 140; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 141; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 142; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 143; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 144; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 145; SI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 146; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 147; SI-SDAG-NEXT: v_fma_f32 v0, s6, v0, -v2 148; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 149; SI-SDAG-NEXT: v_fma_f32 v0, s6, v1, v0 150; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 151; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 152; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v3 153; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 154; SI-SDAG-NEXT: s_mov_b32 s2, -1 155; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 156; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 157; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s6, v1 158; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42b17218 159; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 160; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s6, v1 161; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 162; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 163; SI-SDAG-NEXT: s_endpgm 164; 165; SI-GISEL-LABEL: s_exp_f32: 166; SI-GISEL: ; %bb.0: 167; SI-GISEL-NEXT: s_load_dword s2, s[4:5], 0xb 168; SI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 169; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 170; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f 171; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 172; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 173; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 174; SI-GISEL-NEXT: v_fma_f32 v0, s2, v0, -v2 175; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 176; SI-GISEL-NEXT: v_fma_f32 v0, s2, v1, v0 177; SI-GISEL-NEXT: v_sub_f32_e32 v1, v2, v3 178; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 179; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v3 180; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 181; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 182; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v2 183; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 184; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 185; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 186; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 187; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v1 188; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 189; SI-GISEL-NEXT: s_mov_b32 s2, -1 190; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 191; SI-GISEL-NEXT: s_endpgm 192; 193; R600-LABEL: s_exp_f32: 194; R600: ; %bb.0: 195; R600-NEXT: ALU 59, @4, KC0[CB0:0-32], KC1[] 196; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 197; R600-NEXT: CF_END 198; R600-NEXT: PAD 199; R600-NEXT: ALU clause starting at 4: 200; R600-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, 201; R600-NEXT: -4096(nan), 0(0.000000e+00) 202; R600-NEXT: ADD T1.W, KC0[2].Z, -PV.W, 203; R600-NEXT: MUL_IEEE * T2.W, PV.W, literal.x, 204; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 205; R600-NEXT: RNDNE T3.W, PS, 206; R600-NEXT: MUL_IEEE * T4.W, PV.W, literal.x, 207; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 208; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PS, 209; R600-NEXT: TRUNC * T4.W, PV.W, 210; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 211; R600-NEXT: FLT_TO_INT T0.Z, PS, 212; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, 213; R600-NEXT: ADD * T1.W, T2.W, -T3.W, 214; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 215; R600-NEXT: ADD T1.Z, PS, PV.W, 216; R600-NEXT: MAX_INT T0.W, PV.Z, literal.x, 217; R600-NEXT: MIN_INT * T1.W, PV.Z, literal.y, 218; R600-NEXT: -330(nan), 381(5.338947e-43) 219; R600-NEXT: ADD_INT T0.X, PS, literal.x, 220; R600-NEXT: ADD_INT T0.Y, PV.W, literal.y, 221; R600-NEXT: ADD_INT T2.Z, T0.Z, literal.z, 222; R600-NEXT: SETGT_UINT T0.W, T0.Z, literal.w, 223; R600-NEXT: EXP_IEEE * T1.X, PV.Z, 224; R600-NEXT: -254(nan), 204(2.858649e-43) 225; R600-NEXT: 102(1.429324e-43), -229(nan) 226; R600-NEXT: ADD_INT T2.X, T0.Z, literal.x, 227; R600-NEXT: SETGT_UINT T1.Y, T0.Z, literal.y, 228; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, 229; R600-NEXT: SETGT_INT T1.W, T0.Z, literal.x, 230; R600-NEXT: MUL_IEEE * T2.W, PS, literal.z, 231; R600-NEXT: -127(nan), 254(3.559298e-43) 232; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 233; R600-NEXT: MUL_IEEE T3.X, PS, literal.x, 234; R600-NEXT: MUL_IEEE T0.Y, T1.X, literal.y, 235; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T0.Z, 236; R600-NEXT: CNDE_INT T3.W, PV.Y, PV.X, T0.X, 237; R600-NEXT: SETGT_INT * T4.W, T0.Z, literal.z, 238; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38) 239; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) 240; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W, 241; R600-NEXT: MUL_IEEE T3.W, PV.Y, literal.x, 242; R600-NEXT: CNDE_INT * T0.W, T0.W, PV.X, T2.W, 243; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 244; R600-NEXT: CNDE_INT T1.Z, T1.W, PS, T1.X, 245; R600-NEXT: CNDE_INT T0.W, T1.Y, T0.Y, PV.W, 246; R600-NEXT: LSHL * T1.W, PV.Z, literal.x, 247; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00) 248; R600-NEXT: ADD_INT T1.W, PS, literal.x, 249; R600-NEXT: CNDE_INT * T0.W, T4.W, PV.Z, PV.W, 250; R600-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) 251; R600-NEXT: MUL_IEEE T0.W, PS, PV.W, 252; R600-NEXT: SETGT * T1.W, literal.x, KC0[2].Z, 253; R600-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00) 254; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0, 255; R600-NEXT: SETGT * T1.W, KC0[2].Z, literal.x, 256; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) 257; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x, 258; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 259; R600-NEXT: 2139095040(INF), 2(2.802597e-45) 260; 261; CM-LABEL: s_exp_f32: 262; CM: ; %bb.0: 263; CM-NEXT: ALU 62, @4, KC0[CB0:0-32], KC1[] 264; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 265; CM-NEXT: CF_END 266; CM-NEXT: PAD 267; CM-NEXT: ALU clause starting at 4: 268; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x, 269; CM-NEXT: -4096(nan), 0(0.000000e+00) 270; CM-NEXT: ADD * T1.W, KC0[2].Z, -PV.W, 271; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, 272; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y, 273; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 274; CM-NEXT: RNDNE T1.Z, PV.W, 275; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z, 276; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 277; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W, 278; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212 279; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 280; CM-NEXT: TRUNC T1.Z, T1.Z, 281; CM-NEXT: ADD * T0.W, PV.W, PV.Z, 282; CM-NEXT: EXP_IEEE T0.X, T0.W, 283; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, 284; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, 285; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, 286; CM-NEXT: FLT_TO_INT T0.Z, T1.Z, 287; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x, 288; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 289; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, 290; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y, 291; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z, 292; CM-NEXT: 209715200(1.972152e-31), -330(nan) 293; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) 294; CM-NEXT: ADD_INT T1.X, PV.W, literal.x, 295; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y, 296; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z, 297; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w, 298; CM-NEXT: -254(nan), 204(2.858649e-43) 299; CM-NEXT: 102(1.429324e-43), -229(nan) 300; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x, 301; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y, 302; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, 303; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x, 304; CM-NEXT: -127(nan), 254(3.559298e-43) 305; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x, 306; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z, 307; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X, 308; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y, 309; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43) 310; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, 311; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x, 312; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W, 313; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 314; CM-NEXT: CNDE_INT T0.Y, T2.W, PV.W, T0.X, 315; CM-NEXT: CNDE_INT T0.Z, T2.Y, T3.X, PV.Z, 316; CM-NEXT: LSHL * T0.W, PV.Y, literal.x, 317; CM-NEXT: 23(3.222986e-44), 0(0.000000e+00) 318; CM-NEXT: ADD_INT T1.Z, PV.W, literal.x, 319; CM-NEXT: CNDE_INT * T0.W, T3.W, PV.Y, PV.Z, 320; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) 321; CM-NEXT: MUL_IEEE T0.Z, PV.W, PV.Z, 322; CM-NEXT: SETGT * T0.W, literal.x, KC0[2].Z, 323; CM-NEXT: -1026650416(-1.032789e+02), 0(0.000000e+00) 324; CM-NEXT: CNDE T0.Z, PV.W, PV.Z, 0.0, 325; CM-NEXT: SETGT * T0.W, KC0[2].Z, literal.x, 326; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) 327; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, 328; CM-NEXT: 2139095040(INF), 0(0.000000e+00) 329; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 330; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 331 %result = call float @llvm.exp.f32(float %in) 332 store float %result, ptr addrspace(1) %out 333 ret void 334} 335 336; FIXME: We should be able to merge these packets together on Cayman so we 337; have a maximum of 4 instructions. 338define amdgpu_kernel void @s_exp_v2f32(ptr addrspace(1) %out, <2 x float> %in) { 339; VI-SDAG-LABEL: s_exp_v2f32: 340; VI-SDAG: ; %bb.0: 341; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 342; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 343; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 344; VI-SDAG-NEXT: s_and_b32 s4, s3, 0xfffff000 345; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 346; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2 347; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 348; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 349; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 350; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295 351; VI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v0 352; VI-SDAG-NEXT: v_mul_f32_e32 v5, s4, v4 353; VI-SDAG-NEXT: s_and_b32 s4, s2, 0xfffff000 354; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 355; VI-SDAG-NEXT: v_mov_b32_e32 v6, s4 356; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 357; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 358; VI-SDAG-NEXT: v_sub_f32_e32 v6, s2, v6 359; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 360; VI-SDAG-NEXT: v_mul_f32_e32 v0, s4, v0 361; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v6 362; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6 363; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 364; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 365; VI-SDAG-NEXT: v_rndne_f32_e32 v5, v0 366; VI-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 367; VI-SDAG-NEXT: v_mul_f32_e32 v4, s4, v4 368; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 369; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 370; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 371; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 372; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v5 373; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 374; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 375; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v2 376; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218 377; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 378; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x7f800000 379; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3 380; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 381; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 382; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v2 383; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 384; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3 385; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 386; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 387; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 388; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 389; VI-SDAG-NEXT: s_endpgm 390; 391; VI-GISEL-LABEL: s_exp_v2f32: 392; VI-GISEL: ; %bb.0: 393; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 394; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 395; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x39a3b295 396; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 397; VI-GISEL-NEXT: s_and_b32 s4, s2, 0xfffff000 398; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 399; VI-GISEL-NEXT: v_sub_f32_e32 v2, s2, v2 400; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 401; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 402; VI-GISEL-NEXT: v_mul_f32_e32 v3, s4, v0 403; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 404; VI-GISEL-NEXT: v_mul_f32_e32 v4, s4, v1 405; VI-GISEL-NEXT: s_and_b32 s4, s3, 0xfffff000 406; VI-GISEL-NEXT: v_mov_b32_e32 v5, s4 407; VI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2 408; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 409; VI-GISEL-NEXT: v_sub_f32_e32 v5, s3, v5 410; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 411; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x39a3b295, v5 412; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5 413; VI-GISEL-NEXT: v_add_f32_e32 v2, v3, v2 414; VI-GISEL-NEXT: v_mul_f32_e32 v0, s4, v0 415; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 416; VI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v1 417; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 418; VI-GISEL-NEXT: v_exp_f32_e32 v2, v2 419; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v5 420; VI-GISEL-NEXT: v_rndne_f32_e32 v5, v0 421; VI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v5 422; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 423; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v5 424; VI-GISEL-NEXT: v_exp_f32_e32 v5, v0 425; VI-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 426; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 427; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 428; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 429; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 430; VI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 431; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 432; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 433; VI-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 434; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v3 435; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 436; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v4 437; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 438; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 439; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 440; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 441; VI-GISEL-NEXT: s_endpgm 442; 443; GFX900-SDAG-LABEL: s_exp_v2f32: 444; GFX900-SDAG: ; %bb.0: 445; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 446; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 447; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 448; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0 449; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 450; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 451; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 452; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 453; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 454; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 455; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0 456; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 457; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6 458; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6 459; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 460; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 461; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7 462; GFX900-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 463; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v8, v0 464; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 465; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v7 466; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 467; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5 468; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42b17218 469; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 470; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000 471; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v3 472; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v2, vcc 473; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v6 474; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5 475; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 476; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v3 477; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 478; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc 479; GFX900-SDAG-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] 480; GFX900-SDAG-NEXT: s_endpgm 481; 482; GFX900-GISEL-LABEL: s_exp_v2f32: 483; GFX900-GISEL: ; %bb.0: 484; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 485; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 486; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f 487; GFX900-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 488; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 489; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 490; GFX900-GISEL-NEXT: v_fma_f32 v3, s2, v0, -v2 491; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v2 492; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s3, v0 493; GFX900-GISEL-NEXT: v_fma_f32 v3, s2, v1, v3 494; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 495; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v0, -v5 496; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 497; GFX900-GISEL-NEXT: v_fma_f32 v0, s3, v1, v0 498; GFX900-GISEL-NEXT: v_rndne_f32_e32 v1, v5 499; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 500; GFX900-GISEL-NEXT: v_exp_f32_e32 v2, v2 501; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v1 502; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v5, v0 503; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 504; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v0 505; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 506; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v2, v3 507; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 508; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 509; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 510; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 511; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 512; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v5, v1 513; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 514; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 515; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 516; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 517; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0 518; GFX900-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 519; GFX900-GISEL-NEXT: s_endpgm 520; 521; SI-SDAG-LABEL: s_exp_v2f32: 522; SI-SDAG: ; %bb.0: 523; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 524; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 525; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 526; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 527; SI-SDAG-NEXT: s_mov_b32 s6, -1 528; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 529; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 530; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 531; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 532; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 533; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 534; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 535; SI-SDAG-NEXT: v_mul_f32_e32 v5, s2, v0 536; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 537; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 538; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5 539; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v5 540; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6 541; SI-SDAG-NEXT: v_fma_f32 v0, s2, v1, v0 542; SI-SDAG-NEXT: v_add_f32_e32 v0, v7, v0 543; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 544; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v5, v6 545; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3 546; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 547; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v3 548; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218 549; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 550; SI-SDAG-NEXT: v_mov_b32_e32 v6, 0x7f800000 551; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v4 552; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc 553; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v5 554; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 555; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 556; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4 557; SI-SDAG-NEXT: s_mov_b32 s4, s0 558; SI-SDAG-NEXT: s_mov_b32 s5, s1 559; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc 560; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 561; SI-SDAG-NEXT: s_endpgm 562; 563; SI-GISEL-LABEL: s_exp_v2f32: 564; SI-GISEL: ; %bb.0: 565; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 566; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 567; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x32a5705f 568; SI-GISEL-NEXT: v_mov_b32_e32 v6, 0x7f800000 569; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 570; SI-GISEL-NEXT: v_mul_f32_e32 v2, s2, v0 571; SI-GISEL-NEXT: v_fma_f32 v3, s2, v0, -v2 572; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v2 573; SI-GISEL-NEXT: v_mul_f32_e32 v5, s3, v0 574; SI-GISEL-NEXT: v_fma_f32 v3, s2, v1, v3 575; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v4 576; SI-GISEL-NEXT: v_fma_f32 v0, s3, v0, -v5 577; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 578; SI-GISEL-NEXT: v_fma_f32 v0, s3, v1, v0 579; SI-GISEL-NEXT: v_rndne_f32_e32 v1, v5 580; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 581; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 582; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v1 583; SI-GISEL-NEXT: v_add_f32_e32 v0, v5, v0 584; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 585; SI-GISEL-NEXT: v_exp_f32_e32 v5, v0 586; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 587; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3 588; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 589; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 590; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 591; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 592; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v6, vcc 593; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v5, v1 594; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 595; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 596; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v3 597; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 598; SI-GISEL-NEXT: s_mov_b32 s2, -1 599; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 600; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 601; SI-GISEL-NEXT: s_endpgm 602; 603; R600-LABEL: s_exp_v2f32: 604; R600: ; %bb.0: 605; R600-NEXT: ALU 96, @4, KC0[CB0:0-32], KC1[] 606; R600-NEXT: ALU 12, @101, KC0[CB0:0-32], KC1[] 607; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1 608; R600-NEXT: CF_END 609; R600-NEXT: ALU clause starting at 4: 610; R600-NEXT: AND_INT * T0.W, KC0[3].X, literal.x, 611; R600-NEXT: -4096(nan), 0(0.000000e+00) 612; R600-NEXT: ADD * T1.W, KC0[3].X, -PV.W, 613; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x, 614; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y, 615; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 616; R600-NEXT: RNDNE T0.Z, PS, 617; R600-NEXT: MULADD_IEEE T1.W, T1.W, literal.x, PV.W, 618; R600-NEXT: AND_INT * T2.W, KC0[2].W, literal.y, 619; R600-NEXT: 1069064192(1.442383e+00), -4096(nan) 620; R600-NEXT: ADD T1.Z, KC0[2].W, -PS, 621; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PV.W, 622; R600-NEXT: ADD * T1.W, T3.W, -PV.Z, 623; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 624; R600-NEXT: ADD T2.Z, PS, PV.W, 625; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x, 626; R600-NEXT: MUL_IEEE * T1.W, T2.W, literal.y, 627; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 628; R600-NEXT: RNDNE T0.Y, PS, 629; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.x, PV.W, 630; R600-NEXT: TRUNC T0.W, T0.Z, BS:VEC_120/SCL_212 631; R600-NEXT: EXP_IEEE * T0.X, PV.Z, 632; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 633; R600-NEXT: FLT_TO_INT T1.Y, PV.W, 634; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x, 635; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.y, PV.Z, 636; R600-NEXT: ADD * T1.W, T1.W, -PV.Y, 637; R600-NEXT: 209715200(1.972152e-31), 967029397(3.122284e-04) 638; R600-NEXT: ADD T1.Z, PS, PV.W, 639; R600-NEXT: MUL_IEEE T0.W, PV.Z, literal.x, 640; R600-NEXT: SETGT_UINT * T1.W, PV.Y, literal.y, 641; R600-NEXT: 209715200(1.972152e-31), -229(nan) 642; R600-NEXT: CNDE_INT T0.Z, PS, PV.W, T0.Z, 643; R600-NEXT: SETGT_INT T0.W, T1.Y, literal.x, 644; R600-NEXT: EXP_IEEE * T1.X, PV.Z, 645; R600-NEXT: -127(nan), 0(0.000000e+00) 646; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.X, 647; R600-NEXT: MAX_INT T2.W, T1.Y, literal.x, 648; R600-NEXT: MUL_IEEE * T3.W, PS, literal.y, 649; R600-NEXT: -330(nan), 209715200(1.972152e-31) 650; R600-NEXT: MUL_IEEE T2.X, PS, literal.x, 651; R600-NEXT: ADD_INT T2.Y, PV.W, literal.y, 652; R600-NEXT: ADD_INT T1.Z, T1.Y, literal.z, 653; R600-NEXT: MIN_INT T2.W, T1.Y, literal.w, 654; R600-NEXT: TRUNC * T4.W, T0.Y, 655; R600-NEXT: 209715200(1.972152e-31), 204(2.858649e-43) 656; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43) 657; R600-NEXT: FLT_TO_INT T3.X, PS, 658; R600-NEXT: ADD_INT T0.Y, PV.W, literal.x, 659; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y, 660; R600-NEXT: SETGT_UINT T2.W, T1.Y, literal.z, 661; R600-NEXT: CNDE_INT * T1.W, T1.W, PV.Y, PV.Z, 662; R600-NEXT: -254(nan), -127(nan) 663; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) 664; R600-NEXT: MUL_IEEE T4.X, T1.X, literal.x, 665; R600-NEXT: MUL_IEEE T2.Y, T0.X, literal.x, BS:VEC_120/SCL_212 666; R600-NEXT: CNDE_INT T1.Z, T0.W, PS, T1.Y, 667; R600-NEXT: CNDE_INT T0.W, PV.W, PV.Z, PV.Y, 668; R600-NEXT: MAX_INT * T1.W, PV.X, literal.y, 669; R600-NEXT: 2130706432(1.701412e+38), -330(nan) 670; R600-NEXT: SETGT_INT T0.X, T1.Y, literal.x, 671; R600-NEXT: ADD_INT T0.Y, PS, literal.y, 672; R600-NEXT: ADD_INT T2.Z, T3.X, literal.z, 673; R600-NEXT: SETGT_UINT * T1.W, T3.X, literal.w, 674; R600-NEXT: 127(1.779649e-43), 204(2.858649e-43) 675; R600-NEXT: 102(1.429324e-43), -229(nan) 676; R600-NEXT: MIN_INT * T4.W, T3.X, literal.x, 677; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) 678; R600-NEXT: ADD_INT T5.X, PV.W, literal.x, 679; R600-NEXT: ADD_INT T1.Y, T3.X, literal.y, 680; R600-NEXT: SETGT_UINT T3.Z, T3.X, literal.z, 681; R600-NEXT: CNDE_INT T4.W, T1.W, T0.Y, T2.Z, 682; R600-NEXT: SETGT_INT * T5.W, T3.X, literal.y, 683; R600-NEXT: -254(nan), -127(nan) 684; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) 685; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T3.X, 686; R600-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, PV.X, 687; R600-NEXT: SETGT_INT T2.Z, T3.X, literal.x, 688; R600-NEXT: CNDE_INT T0.W, T0.X, T1.Z, T0.W, BS:VEC_120/SCL_212 689; R600-NEXT: MUL_IEEE * T4.W, T2.Y, literal.y, 690; R600-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38) 691; R600-NEXT: CNDE_INT T3.X, T2.W, T2.Y, PS, BS:VEC_120/SCL_212 692; R600-NEXT: LSHL T1.Y, PV.W, literal.x, 693; R600-NEXT: CNDE_INT T1.Z, PV.Z, PV.X, PV.Y, 694; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y, 695; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W, 696; R600-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38) 697; R600-NEXT: CNDE_INT T1.X, T5.W, PS, T1.X, BS:VEC_021/SCL_122 698; R600-NEXT: CNDE_INT T0.Y, T3.Z, T4.X, PV.W, BS:VEC_201 699; R600-NEXT: LSHL T1.Z, PV.Z, literal.x, 700; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y, 701; R600-NEXT: CNDE_INT * T1.W, T0.X, T0.Z, PV.X, 702; R600-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) 703; R600-NEXT: MUL_IEEE T1.Y, PS, PV.W, 704; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].X, 705; R600-NEXT: ADD_INT * T0.W, PV.Z, literal.y, 706; R600-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00) 707; R600-NEXT: ALU clause starting at 101: 708; R600-NEXT: CNDE_INT * T1.W, T2.Z, T1.X, T0.Y, 709; R600-NEXT: MUL_IEEE T0.Y, PV.W, T0.W, 710; R600-NEXT: SETGT T1.Z, literal.x, KC0[2].W, 711; R600-NEXT: CNDE T0.W, T0.Z, T1.Y, 0.0, 712; R600-NEXT: SETGT * T1.W, KC0[3].X, literal.y, 713; R600-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01) 714; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x, 715; R600-NEXT: CNDE T0.W, PV.Z, PV.Y, 0.0, 716; R600-NEXT: SETGT * T1.W, KC0[2].W, literal.y, 717; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01) 718; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x, 719; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y, 720; R600-NEXT: 2139095040(INF), 2(2.802597e-45) 721; 722; CM-LABEL: s_exp_v2f32: 723; CM: ; %bb.0: 724; CM-NEXT: ALU 98, @4, KC0[CB0:0-32], KC1[] 725; CM-NEXT: ALU 18, @103, KC0[CB0:0-32], KC1[] 726; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 727; CM-NEXT: CF_END 728; CM-NEXT: ALU clause starting at 4: 729; CM-NEXT: AND_INT * T0.W, KC0[2].W, literal.x, 730; CM-NEXT: -4096(nan), 0(0.000000e+00) 731; CM-NEXT: ADD * T1.W, KC0[2].W, -PV.W, 732; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, 733; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y, 734; CM-NEXT: AND_INT * T2.W, KC0[3].X, literal.z, 735; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 736; CM-NEXT: -4096(nan), 0(0.000000e+00) 737; CM-NEXT: ADD T1.Y, KC0[3].X, -PV.W, 738; CM-NEXT: RNDNE T1.Z, PV.Z, 739; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y, 740; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 741; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W, 742; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z, 743; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x, 744; CM-NEXT: MUL_IEEE * T0.W, T2.W, literal.y, BS:VEC_120/SCL_212 745; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 746; CM-NEXT: TRUNC T1.X, T1.Z, 747; CM-NEXT: RNDNE T2.Y, PV.W, 748; CM-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, PV.Z, 749; CM-NEXT: ADD * T1.W, PV.Y, PV.X, 750; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 751; CM-NEXT: EXP_IEEE T0.X, T1.W, 752; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, 753; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, 754; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, 755; CM-NEXT: MULADD_IEEE T2.X, T2.W, literal.x, T0.Z, 756; CM-NEXT: ADD T0.Y, T0.W, -T2.Y, BS:VEC_120/SCL_212 757; CM-NEXT: FLT_TO_INT T0.Z, T1.X, 758; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y, 759; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31) 760; CM-NEXT: MUL_IEEE T1.X, PV.W, literal.x, 761; CM-NEXT: SETGT_UINT T1.Y, PV.Z, literal.y, 762; CM-NEXT: TRUNC T1.Z, T2.Y, 763; CM-NEXT: ADD * T1.W, PV.Y, PV.X, 764; CM-NEXT: 209715200(1.972152e-31), -229(nan) 765; CM-NEXT: EXP_IEEE T0.X (MASKED), T1.W, 766; CM-NEXT: EXP_IEEE T0.Y, T1.W, 767; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, 768; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, 769; CM-NEXT: FLT_TO_INT T2.X, T1.Z, 770; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x, 771; CM-NEXT: CNDE_INT T1.Z, T1.Y, T1.X, T0.W, 772; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212 773; CM-NEXT: 209715200(1.972152e-31), -127(nan) 774; CM-NEXT: CNDE_INT T1.X, PV.W, PV.Z, T0.X, 775; CM-NEXT: MUL_IEEE T3.Y, PV.Y, literal.x, 776; CM-NEXT: SETGT_UINT T1.Z, PV.X, literal.y, 777; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.z, 778; CM-NEXT: 209715200(1.972152e-31), -229(nan) 779; CM-NEXT: -330(nan), 0(0.000000e+00) 780; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, 781; CM-NEXT: ADD_INT T4.Y, T0.Z, literal.y, 782; CM-NEXT: CNDE_INT T2.Z, PV.Z, PV.Y, T2.Y, 783; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z, 784; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) 785; CM-NEXT: -127(nan), 0(0.000000e+00) 786; CM-NEXT: CNDE_INT T4.X, PV.W, PV.Z, T0.Y, 787; CM-NEXT: MUL_IEEE T2.Y, T0.X, literal.x, 788; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y, BS:VEC_120/SCL_212 789; CM-NEXT: CNDE_INT * T2.W, T1.Y, PV.X, PV.Y, 790; CM-NEXT: 2130706432(1.701412e+38), -330(nan) 791; CM-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.Z, 792; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.x, 793; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y, 794; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z, 795; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) 796; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) 797; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, 798; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y, 799; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z, 800; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z, 801; CM-NEXT: -254(nan), -127(nan) 802; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00) 803; CM-NEXT: MUL_IEEE T5.X, T0.Y, literal.x, 804; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T2.X, 805; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X, 806; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y, 807; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43) 808; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x, 809; CM-NEXT: ADD_INT T1.Y, PV.W, literal.y, 810; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z, 811; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w, 812; CM-NEXT: 127(1.779649e-43), -254(nan) 813; CM-NEXT: -127(nan), 254(3.559298e-43) 814; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, PV.Y, 815; CM-NEXT: SETGT_INT T1.Y, T0.Z, literal.x, 816; CM-NEXT: CNDE_INT T0.Z, PV.X, T0.Y, T1.Z, 817; CM-NEXT: MUL_IEEE * T1.W, T5.X, literal.y, 818; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38) 819; CM-NEXT: CNDE_INT T5.X, T3.Z, T5.X, PV.W, 820; CM-NEXT: LSHL T0.Y, PV.Z, literal.x, 821; CM-NEXT: CNDE_INT T0.Z, PV.Y, T0.X, PV.X, BS:VEC_021/SCL_122 822; CM-NEXT: MUL_IEEE * T1.W, T2.Y, literal.y, 823; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38) 824; CM-NEXT: CNDE_INT T0.X, T0.W, T2.Y, PV.W, 825; CM-NEXT: LSHL T2.Y, PV.Z, literal.x, 826; CM-NEXT: ADD_INT * T0.Z, PV.Y, literal.y, 827; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) 828; CM-NEXT: ALU clause starting at 103: 829; CM-NEXT: CNDE_INT * T0.W, T2.X, T4.X, T5.X, 830; CM-NEXT: MUL_IEEE T2.X, PV.W, T0.Z, 831; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].X, 832; CM-NEXT: ADD_INT T0.Z, T2.Y, literal.y, 833; CM-NEXT: CNDE_INT * T0.W, T1.Y, T1.X, T0.X, BS:VEC_120/SCL_212 834; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00) 835; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, 836; CM-NEXT: SETGT T1.Y, literal.x, KC0[2].W, 837; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, 838; CM-NEXT: SETGT * T0.W, KC0[3].X, literal.y, 839; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01) 840; CM-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.x, 841; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, 842; CM-NEXT: SETGT * T0.W, KC0[2].W, literal.y, 843; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01) 844; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, 845; CM-NEXT: 2139095040(INF), 0(0.000000e+00) 846; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 847; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 848 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %in) 849 store <2 x float> %result, ptr addrspace(1) %out 850 ret void 851} 852 853define amdgpu_kernel void @s_exp_v3f32(ptr addrspace(1) %out, <3 x float> %in) { 854; VI-SDAG-LABEL: s_exp_v3f32: 855; VI-SDAG: ; %bb.0: 856; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 857; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 858; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 859; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 860; VI-SDAG-NEXT: s_and_b32 s3, s2, 0xfffff000 861; VI-SDAG-NEXT: v_mov_b32_e32 v2, s3 862; VI-SDAG-NEXT: v_sub_f32_e32 v2, s2, v2 863; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 864; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 865; VI-SDAG-NEXT: v_mul_f32_e32 v1, s3, v0 866; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 867; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295 868; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 869; VI-SDAG-NEXT: v_mul_f32_e32 v5, s3, v4 870; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 871; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 872; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 873; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 874; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 875; VI-SDAG-NEXT: s_and_b32 s3, s1, 0xfffff000 876; VI-SDAG-NEXT: v_mov_b32_e32 v7, s3 877; VI-SDAG-NEXT: v_sub_f32_e32 v7, s1, v7 878; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 879; VI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 880; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7 881; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7 882; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v2 883; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8 884; VI-SDAG-NEXT: v_mul_f32_e32 v8, s3, v4 885; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 886; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 887; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 888; VI-SDAG-NEXT: v_exp_f32_e32 v7, v2 889; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 890; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 891; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 892; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218 893; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 894; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 895; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v5 896; VI-SDAG-NEXT: s_and_b32 s2, s0, 0xfffff000 897; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc 898; VI-SDAG-NEXT: v_ldexp_f32 v1, v7, v6 899; VI-SDAG-NEXT: v_mov_b32_e32 v7, s2 900; VI-SDAG-NEXT: v_sub_f32_e32 v7, s0, v7 901; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 902; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v7 903; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7 904; VI-SDAG-NEXT: v_rndne_f32_e32 v6, v0 905; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v9 906; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 907; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v6 908; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 909; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 910; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 911; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v6 912; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3 913; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 914; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v5 915; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 916; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 917; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3 918; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 919; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v5 920; VI-SDAG-NEXT: v_mov_b32_e32 v3, s4 921; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 922; VI-SDAG-NEXT: v_mov_b32_e32 v4, s5 923; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 924; VI-SDAG-NEXT: s_endpgm 925; 926; VI-GISEL-LABEL: s_exp_v3f32: 927; VI-GISEL: ; %bb.0: 928; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 929; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000 930; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295 931; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 932; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 933; VI-GISEL-NEXT: s_and_b32 s3, s0, 0xfffff000 934; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3 935; VI-GISEL-NEXT: v_sub_f32_e32 v0, s0, v0 936; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0 937; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 938; VI-GISEL-NEXT: v_mul_f32_e32 v3, s3, v1 939; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4 940; VI-GISEL-NEXT: v_mul_f32_e32 v4, s3, v2 941; VI-GISEL-NEXT: s_and_b32 s3, s1, 0xfffff000 942; VI-GISEL-NEXT: v_mov_b32_e32 v5, s3 943; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0 944; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 945; VI-GISEL-NEXT: v_sub_f32_e32 v5, s1, v5 946; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v4 947; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v5 948; VI-GISEL-NEXT: v_mul_f32_e32 v5, 0x3fb8a000, v5 949; VI-GISEL-NEXT: v_add_f32_e32 v0, v3, v0 950; VI-GISEL-NEXT: v_mul_f32_e32 v6, s3, v1 951; VI-GISEL-NEXT: v_add_f32_e32 v5, v5, v7 952; VI-GISEL-NEXT: v_mul_f32_e32 v7, s3, v2 953; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 954; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 955; VI-GISEL-NEXT: v_add_f32_e32 v5, v7, v5 956; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6 957; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7 958; VI-GISEL-NEXT: v_add_f32_e32 v5, v6, v5 959; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 960; VI-GISEL-NEXT: v_exp_f32_e32 v5, v5 961; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 962; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 963; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v3 964; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 965; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 966; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v4 967; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 968; VI-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 969; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0 970; VI-GISEL-NEXT: v_sub_f32_e32 v6, s2, v6 971; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v6 972; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6 973; VI-GISEL-NEXT: v_mul_f32_e32 v1, s0, v1 974; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 975; VI-GISEL-NEXT: v_mul_f32_e32 v2, s0, v2 976; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v6 977; VI-GISEL-NEXT: v_rndne_f32_e32 v6, v1 978; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v6 979; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 980; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v6 981; VI-GISEL-NEXT: v_exp_f32_e32 v6, v1 982; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 983; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 984; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v3 985; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc 986; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v4 987; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 988; VI-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 989; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v3 990; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 991; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v4 992; VI-GISEL-NEXT: v_mov_b32_e32 v3, s4 993; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 994; VI-GISEL-NEXT: v_mov_b32_e32 v4, s5 995; VI-GISEL-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 996; VI-GISEL-NEXT: s_endpgm 997; 998; GFX900-SDAG-LABEL: s_exp_v3f32: 999; GFX900-SDAG: ; %bb.0: 1000; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1001; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1002; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 1003; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 1004; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218 1005; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1006; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s1, v0 1007; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v6 1008; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v6, v7 1009; GFX900-SDAG-NEXT: v_fma_f32 v6, s1, v0, -v6 1010; GFX900-SDAG-NEXT: v_fma_f32 v6, s1, v1, v6 1011; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s2, v0 1012; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 1013; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 1014; GFX900-SDAG-NEXT: v_fma_f32 v4, s2, v0, -v2 1015; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6 1016; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 1017; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 1018; GFX900-SDAG-NEXT: v_fma_f32 v4, s2, v1, v4 1019; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 1020; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 1021; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 1022; GFX900-SDAG-NEXT: v_ldexp_f32 v6, v6, v7 1023; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s0, v0 1024; GFX900-SDAG-NEXT: v_rndne_f32_e32 v9, v7 1025; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v0, -v7 1026; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9 1027; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v1, v0 1028; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 1029; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 1030; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v10, v0 1031; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 1032; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 1033; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9 1034; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 1035; GFX900-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 1036; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v5 1037; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 1038; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3 1039; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc 1040; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v5 1041; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 1042; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v7 1043; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3 1044; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1045; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v5 1046; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 1047; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 1048; GFX900-SDAG-NEXT: global_store_dwordx3 v4, v[0:2], s[6:7] 1049; GFX900-SDAG-NEXT: s_endpgm 1050; 1051; GFX900-GISEL-LABEL: s_exp_v3f32: 1052; GFX900-GISEL: ; %bb.0: 1053; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1054; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1055; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 1056; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 1057; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1058; GFX900-GISEL-NEXT: v_mul_f32_e32 v5, s1, v1 1059; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v1, -v5 1060; GFX900-GISEL-NEXT: v_rndne_f32_e32 v7, v5 1061; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, v6 1062; GFX900-GISEL-NEXT: v_sub_f32_e32 v5, v5, v7 1063; GFX900-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 1064; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 1065; GFX900-GISEL-NEXT: v_exp_f32_e32 v5, v5 1066; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v1 1067; GFX900-GISEL-NEXT: v_fma_f32 v3, s0, v1, -v0 1068; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v0 1069; GFX900-GISEL-NEXT: v_fma_f32 v3, s0, v2, v3 1070; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 1071; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 1072; GFX900-GISEL-NEXT: v_ldexp_f32 v5, v5, v6 1073; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1 1074; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 1075; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 1076; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v1, -v6 1077; GFX900-GISEL-NEXT: v_fma_f32 v1, s2, v2, v1 1078; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v6 1079; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v2 1080; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 1081; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 1082; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v3 1083; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 1084; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 1085; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 1086; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v1 1087; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 1088; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 1089; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 1090; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1091; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 1092; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc 1093; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 1094; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1095; GFX900-GISEL-NEXT: v_ldexp_f32 v2, v6, v2 1096; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 1097; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 1098; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 1099; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 1100; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0 1101; GFX900-GISEL-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] 1102; GFX900-GISEL-NEXT: s_endpgm 1103; 1104; SI-SDAG-LABEL: s_exp_v3f32: 1105; SI-SDAG: ; %bb.0: 1106; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd 1107; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 1108; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x32a5705f 1109; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1110; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1111; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1112; SI-SDAG-NEXT: v_mul_f32_e32 v5, s0, v0 1113; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v5 1114; SI-SDAG-NEXT: v_sub_f32_e32 v7, v5, v6 1115; SI-SDAG-NEXT: v_fma_f32 v5, s0, v0, -v5 1116; SI-SDAG-NEXT: v_fma_f32 v5, s0, v2, v5 1117; SI-SDAG-NEXT: v_mul_f32_e32 v1, s1, v0 1118; SI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 1119; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 1120; SI-SDAG-NEXT: v_fma_f32 v4, s1, v0, -v1 1121; SI-SDAG-NEXT: v_exp_f32_e32 v5, v5 1122; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 1123; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 1124; SI-SDAG-NEXT: v_fma_f32 v4, s1, v2, v4 1125; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 1126; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 1127; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 1128; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, v5, v6 1129; SI-SDAG-NEXT: v_mul_f32_e32 v6, s2, v0 1130; SI-SDAG-NEXT: v_rndne_f32_e32 v8, v6 1131; SI-SDAG-NEXT: v_fma_f32 v0, s2, v0, -v6 1132; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v8 1133; SI-SDAG-NEXT: v_fma_f32 v0, s2, v2, v0 1134; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v3 1135; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 1136; SI-SDAG-NEXT: v_add_f32_e32 v0, v9, v0 1137; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v3 1138; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x42b17218 1139; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0 1140; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v8 1141; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1142; SI-SDAG-NEXT: v_mov_b32_e32 v7, 0x7f800000 1143; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v4 1144; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc 1145; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v3 1146; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc 1147; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v4 1148; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc 1149; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6 1150; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v3 1151; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 1152; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v4 1153; SI-SDAG-NEXT: s_mov_b32 s6, -1 1154; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc 1155; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 1156; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1157; SI-SDAG-NEXT: s_endpgm 1158; 1159; SI-GISEL-LABEL: s_exp_v3f32: 1160; SI-GISEL: ; %bb.0: 1161; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd 1162; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 1163; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 1164; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1165; SI-GISEL-NEXT: s_mov_b32 s6, -1 1166; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1167; SI-GISEL-NEXT: v_mul_f32_e32 v5, s1, v1 1168; SI-GISEL-NEXT: v_fma_f32 v6, s1, v1, -v5 1169; SI-GISEL-NEXT: v_rndne_f32_e32 v7, v5 1170; SI-GISEL-NEXT: v_fma_f32 v6, s1, v2, v6 1171; SI-GISEL-NEXT: v_sub_f32_e32 v5, v5, v7 1172; SI-GISEL-NEXT: v_add_f32_e32 v5, v5, v6 1173; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 1174; SI-GISEL-NEXT: v_exp_f32_e32 v5, v5 1175; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v1 1176; SI-GISEL-NEXT: v_fma_f32 v3, s0, v1, -v0 1177; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v0 1178; SI-GISEL-NEXT: v_fma_f32 v3, s0, v2, v3 1179; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 1180; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 1181; SI-GISEL-NEXT: v_ldexp_f32_e32 v5, v5, v6 1182; SI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v1 1183; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 1184; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 1185; SI-GISEL-NEXT: v_fma_f32 v1, s2, v1, -v6 1186; SI-GISEL-NEXT: v_fma_f32 v1, s2, v2, v1 1187; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v6 1188; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v2 1189; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 1190; SI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 1191; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3 1192; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 1193; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x42b17218 1194; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 1195; SI-GISEL-NEXT: v_exp_f32_e32 v6, v1 1196; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 1197; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 1198; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v3 1199; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1200; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 1201; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v5, 0, vcc 1202; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v3 1203; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1204; SI-GISEL-NEXT: v_ldexp_f32_e32 v2, v6, v2 1205; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 1206; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 1207; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v3 1208; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 1209; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 1210; SI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1211; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 1212; SI-GISEL-NEXT: s_endpgm 1213; 1214; R600-LABEL: s_exp_v3f32: 1215; R600: ; %bb.0: 1216; R600-NEXT: ALU 99, @6, KC0[CB0:0-32], KC1[] 1217; R600-NEXT: ALU 69, @106, KC0[CB0:0-32], KC1[] 1218; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1219; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1220; R600-NEXT: CF_END 1221; R600-NEXT: PAD 1222; R600-NEXT: ALU clause starting at 6: 1223; R600-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, 1224; R600-NEXT: -4096(nan), 0(0.000000e+00) 1225; R600-NEXT: MUL_IEEE T1.W, PV.W, literal.x, 1226; R600-NEXT: ADD * T2.W, KC0[3].Y, -PV.W, 1227; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 1228; R600-NEXT: RNDNE * T3.W, PV.W, 1229; R600-NEXT: TRUNC T4.W, PV.W, 1230; R600-NEXT: MUL_IEEE * T5.W, T2.W, literal.x, 1231; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 1232; R600-NEXT: MULADD_IEEE T2.W, T2.W, literal.x, PS, 1233; R600-NEXT: FLT_TO_INT * T4.W, PV.W, 1234; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 1235; R600-NEXT: MAX_INT T0.Z, PS, literal.x, 1236; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.y, PV.W, 1237; R600-NEXT: ADD * T1.W, T1.W, -T3.W, 1238; R600-NEXT: -330(nan), 967029397(3.122284e-04) 1239; R600-NEXT: ADD T0.Y, PS, PV.W, 1240; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.x, 1241; R600-NEXT: ADD_INT T0.W, T4.W, literal.y, 1242; R600-NEXT: SETGT_UINT * T1.W, T4.W, literal.z, 1243; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43) 1244; R600-NEXT: -229(nan), 0(0.000000e+00) 1245; R600-NEXT: CNDE_INT T0.Z, PS, PV.Z, PV.W, 1246; R600-NEXT: SETGT_INT T0.W, T4.W, literal.x, 1247; R600-NEXT: EXP_IEEE * T0.X, PV.Y, 1248; R600-NEXT: -127(nan), 0(0.000000e+00) 1249; R600-NEXT: MUL_IEEE T1.X, PS, literal.x, 1250; R600-NEXT: CNDE_INT T0.Y, PV.W, PV.Z, T4.W, 1251; R600-NEXT: MIN_INT T0.Z, T4.W, literal.y, 1252; R600-NEXT: AND_INT T2.W, KC0[3].W, literal.z, 1253; R600-NEXT: MUL_IEEE * T3.W, PS, literal.w, 1254; R600-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43) 1255; R600-NEXT: -4096(nan), 209715200(1.972152e-31) 1256; R600-NEXT: MUL_IEEE T2.X, PS, literal.x, 1257; R600-NEXT: ADD T1.Y, KC0[3].W, -PV.W, 1258; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, 1259; R600-NEXT: ADD_INT T5.W, T4.W, literal.z, 1260; R600-NEXT: SETGT_UINT * T6.W, T4.W, literal.w, 1261; R600-NEXT: 209715200(1.972152e-31), -254(nan) 1262; R600-NEXT: -127(nan), 254(3.559298e-43) 1263; R600-NEXT: CNDE_INT T3.X, PS, PV.W, PV.Z, 1264; R600-NEXT: SETGT_INT T2.Y, T4.W, literal.x, 1265; R600-NEXT: MUL_IEEE T0.Z, PV.Y, literal.y, 1266; R600-NEXT: MUL_IEEE * T4.W, T2.W, literal.z, BS:VEC_120/SCL_212 1267; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04) 1268; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 1269; R600-NEXT: CNDE_INT * T1.W, T1.W, T2.X, T3.W, 1270; R600-NEXT: CNDE_INT T0.X, T0.W, PV.W, T0.X, BS:VEC_021/SCL_122 1271; R600-NEXT: RNDNE T3.Y, T4.W, BS:VEC_120/SCL_212 1272; R600-NEXT: MULADD_IEEE T0.Z, T1.Y, literal.x, T0.Z, 1273; R600-NEXT: CNDE_INT T0.W, T2.Y, T0.Y, T3.X, BS:VEC_120/SCL_212 1274; R600-NEXT: MUL_IEEE * T1.W, T1.X, literal.y, 1275; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38) 1276; R600-NEXT: CNDE_INT T1.X, T6.W, T1.X, PS, 1277; R600-NEXT: LSHL T0.Y, PV.W, literal.x, 1278; R600-NEXT: AND_INT T1.Z, KC0[3].Z, literal.y, 1279; R600-NEXT: MULADD_IEEE T0.W, T2.W, literal.z, PV.Z, BS:VEC_120/SCL_212 1280; R600-NEXT: ADD * T1.W, T4.W, -PV.Y, 1281; R600-NEXT: 23(3.222986e-44), -4096(nan) 1282; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 1283; R600-NEXT: ADD T1.Y, PS, PV.W, 1284; R600-NEXT: MUL_IEEE T0.Z, PV.Z, literal.x, 1285; R600-NEXT: ADD_INT T0.W, PV.Y, literal.y, 1286; R600-NEXT: CNDE_INT * T1.W, T2.Y, T0.X, PV.X, 1287; R600-NEXT: 1069064192(1.442383e+00), 1065353216(1.000000e+00) 1288; R600-NEXT: MUL_IEEE T0.X, PS, PV.W, 1289; R600-NEXT: ADD T0.Y, KC0[3].Z, -T1.Z, 1290; R600-NEXT: RNDNE T2.Z, PV.Z, 1291; R600-NEXT: TRUNC T0.W, T3.Y, 1292; R600-NEXT: EXP_IEEE * T1.X, PV.Y, 1293; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Y, 1294; R600-NEXT: FLT_TO_INT T1.Y, PV.W, 1295; R600-NEXT: TRUNC T3.Z, PV.Z, 1296; R600-NEXT: MUL_IEEE T0.W, PV.Y, literal.y, 1297; R600-NEXT: MUL_IEEE * T1.W, PS, literal.z, 1298; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04) 1299; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 1300; R600-NEXT: MUL_IEEE T3.X, PS, literal.x, 1301; R600-NEXT: MUL_IEEE T2.Y, T1.X, literal.y, 1302; R600-NEXT: MULADD_IEEE T4.Z, T0.Y, literal.z, PV.W, 1303; R600-NEXT: FLT_TO_INT T0.W, PV.Z, 1304; R600-NEXT: MIN_INT * T2.W, PV.Y, literal.w, 1305; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38) 1306; R600-NEXT: 1069064192(1.442383e+00), 381(5.338947e-43) 1307; R600-NEXT: ADD_INT T4.X, PS, literal.x, 1308; R600-NEXT: MAX_INT T0.Y, PV.W, literal.y, 1309; R600-NEXT: MULADD_IEEE T1.Z, T1.Z, literal.z, PV.Z, 1310; R600-NEXT: ADD T2.W, T0.Z, -T2.Z, BS:VEC_120/SCL_212 1311; R600-NEXT: MIN_INT * T3.W, PV.W, literal.w, 1312; R600-NEXT: -254(nan), -330(nan) 1313; R600-NEXT: 967029397(3.122284e-04), 381(5.338947e-43) 1314; R600-NEXT: ADD_INT T5.X, PS, literal.x, 1315; R600-NEXT: ADD T3.Y, PV.W, PV.Z, 1316; R600-NEXT: ADD_INT T0.Z, PV.Y, literal.y, 1317; R600-NEXT: ADD_INT T2.W, T0.W, literal.z, 1318; R600-NEXT: SETGT_UINT * T3.W, T0.W, literal.w, 1319; R600-NEXT: -254(nan), 204(2.858649e-43) 1320; R600-NEXT: 102(1.429324e-43), -229(nan) 1321; R600-NEXT: ADD_INT * T6.X, T0.W, literal.x, 1322; R600-NEXT: -127(nan), 0(0.000000e+00) 1323; R600-NEXT: ALU clause starting at 106: 1324; R600-NEXT: SETGT_UINT T0.Y, T0.W, literal.x, 1325; R600-NEXT: CNDE_INT T0.Z, T3.W, T0.Z, T2.W, BS:VEC_102/SCL_221 1326; R600-NEXT: SETGT_INT T2.W, T0.W, literal.y, 1327; R600-NEXT: EXP_IEEE * T1.Z, T3.Y, 1328; R600-NEXT: 254(3.559298e-43), -127(nan) 1329; R600-NEXT: ADD_INT T7.X, T1.Y, literal.x, 1330; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y, 1331; R600-NEXT: CNDE_INT T0.Z, PV.W, PV.Z, T0.W, 1332; R600-NEXT: CNDE_INT T4.W, PV.Y, T6.X, T5.X, 1333; R600-NEXT: SETGT_INT * T0.W, T0.W, literal.z, 1334; R600-NEXT: -127(nan), 209715200(1.972152e-31) 1335; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) 1336; R600-NEXT: SETGT_UINT T5.X, T1.Y, literal.x, 1337; R600-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, 1338; R600-NEXT: MAX_INT T0.Z, T1.Y, literal.y, 1339; R600-NEXT: MUL_IEEE T4.W, PV.Y, literal.z, 1340; R600-NEXT: MUL_IEEE * T5.W, T1.Z, literal.w, 1341; R600-NEXT: 254(3.559298e-43), -330(nan) 1342; R600-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38) 1343; R600-NEXT: MUL_IEEE T6.X, PS, literal.x, 1344; R600-NEXT: CNDE_INT T3.Y, T3.W, PV.W, T3.Y, BS:VEC_021/SCL_122 1345; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, 1346; R600-NEXT: ADD_INT T3.W, T1.Y, literal.z, 1347; R600-NEXT: SETGT_UINT * T4.W, T1.Y, literal.w, 1348; R600-NEXT: 2130706432(1.701412e+38), 204(2.858649e-43) 1349; R600-NEXT: 102(1.429324e-43), -229(nan) 1350; R600-NEXT: CNDE_INT T8.X, PS, PV.Z, PV.W, 1351; R600-NEXT: SETGT_INT T5.Y, T1.Y, literal.x, 1352; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.Y, T1.Z, 1353; R600-NEXT: CNDE_INT T2.W, T0.Y, T5.W, PV.X, BS:VEC_120/SCL_212 1354; R600-NEXT: LSHL * T3.W, T4.Y, literal.y, 1355; R600-NEXT: -127(nan), 23(3.222986e-44) 1356; R600-NEXT: ADD_INT T6.X, PS, literal.x, 1357; R600-NEXT: CNDE_INT T0.Y, T0.W, PV.Z, PV.W, 1358; R600-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T1.Y, 1359; R600-NEXT: CNDE_INT T0.W, T5.X, T7.X, T4.X, 1360; R600-NEXT: SETGT_INT * T2.W, T1.Y, literal.y, 1361; R600-NEXT: 1065353216(1.000000e+00), 127(1.779649e-43) 1362; R600-NEXT: CNDE_INT T4.X, PS, PV.Z, PV.W, 1363; R600-NEXT: MUL_IEEE T0.Y, PV.Y, PV.X, 1364; R600-NEXT: SETGT T0.Z, literal.x, KC0[3].Z, 1365; R600-NEXT: MUL_IEEE T0.W, T2.Y, literal.y, 1366; R600-NEXT: CNDE_INT * T1.W, T4.W, T3.X, T1.W, 1367; R600-NEXT: -1026650416(-1.032789e+02), 2130706432(1.701412e+38) 1368; R600-NEXT: CNDE_INT T1.X, T5.Y, PS, T1.X, 1369; R600-NEXT: CNDE_INT T1.Y, T5.X, T2.Y, PV.W, 1370; R600-NEXT: CNDE T0.Z, PV.Z, PV.Y, 0.0, 1371; R600-NEXT: SETGT T0.W, KC0[3].Z, literal.x, 1372; R600-NEXT: LSHL * T1.W, PV.X, literal.y, 1373; R600-NEXT: 1118925336(8.872284e+01), 23(3.222986e-44) 1374; R600-NEXT: ADD_INT T3.X, PS, literal.x, 1375; R600-NEXT: CNDE T0.Y, PV.W, PV.Z, literal.y, 1376; R600-NEXT: CNDE_INT T0.Z, T2.W, PV.X, PV.Y, 1377; R600-NEXT: CNDE T0.W, T2.X, T0.X, 0.0, 1378; R600-NEXT: SETGT * T1.W, KC0[3].Y, literal.z, 1379; R600-NEXT: 1065353216(1.000000e+00), 2139095040(INF) 1380; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) 1381; R600-NEXT: CNDE T0.X, PS, PV.W, literal.x, 1382; R600-NEXT: MUL_IEEE T0.W, PV.Z, PV.X, 1383; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W, 1384; R600-NEXT: 2139095040(INF), -1026650416(-1.032789e+02) 1385; R600-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 1386; R600-NEXT: CNDE T0.W, PS, PV.W, 0.0, 1387; R600-NEXT: SETGT * T1.W, KC0[3].W, literal.y, 1388; R600-NEXT: 2(2.802597e-45), 1118925336(8.872284e+01) 1389; R600-NEXT: CNDE T2.X, PS, PV.W, literal.x, 1390; R600-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1391; R600-NEXT: 2139095040(INF), 8(1.121039e-44) 1392; R600-NEXT: LSHR * T3.X, PV.W, literal.x, 1393; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1394; 1395; CM-LABEL: s_exp_v3f32: 1396; CM: ; %bb.0: 1397; CM-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[] 1398; CM-NEXT: ALU 77, @108, KC0[CB0:0-32], KC1[] 1399; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 1400; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T3.X 1401; CM-NEXT: CF_END 1402; CM-NEXT: PAD 1403; CM-NEXT: ALU clause starting at 6: 1404; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, 1405; CM-NEXT: -4096(nan), 0(0.000000e+00) 1406; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W, 1407; CM-NEXT: MUL_IEEE T0.Z, PV.W, literal.x, 1408; CM-NEXT: MUL_IEEE * T2.W, T0.W, literal.y, 1409; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 1410; CM-NEXT: RNDNE T1.Z, PV.W, 1411; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Z, 1412; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 1413; CM-NEXT: MULADD_IEEE T0.Z, T0.W, literal.x, PV.W, 1414; CM-NEXT: ADD * T0.W, T2.W, -PV.Z, BS:VEC_120/SCL_212 1415; CM-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 1416; CM-NEXT: TRUNC T1.Z, T1.Z, 1417; CM-NEXT: ADD * T0.W, PV.W, PV.Z, 1418; CM-NEXT: EXP_IEEE T0.X, T0.W, 1419; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, 1420; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, 1421; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, 1422; CM-NEXT: FLT_TO_INT T0.Z, T1.Z, 1423; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.x, 1424; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 1425; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, 1426; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.y, 1427; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z, 1428; CM-NEXT: 209715200(1.972152e-31), -330(nan) 1429; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) 1430; CM-NEXT: ADD_INT T1.X, PV.W, literal.x, 1431; CM-NEXT: ADD_INT T1.Y, PV.Z, literal.y, 1432; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z, 1433; CM-NEXT: SETGT_UINT * T1.W, T0.Z, literal.w, 1434; CM-NEXT: -254(nan), 204(2.858649e-43) 1435; CM-NEXT: 102(1.429324e-43), -229(nan) 1436; CM-NEXT: ADD_INT T2.X, T0.Z, literal.x, 1437; CM-NEXT: SETGT_UINT T2.Y, T0.Z, literal.y, 1438; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, 1439; CM-NEXT: SETGT_INT * T2.W, T0.Z, literal.x, 1440; CM-NEXT: -127(nan), 254(3.559298e-43) 1441; CM-NEXT: MUL_IEEE T3.X, T0.X, literal.x, 1442; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, T0.Z, 1443; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.X, 1444; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.y, 1445; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43) 1446; CM-NEXT: CNDE_INT T1.Y, PV.W, PV.Y, PV.Z, 1447; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.x, 1448; CM-NEXT: CNDE_INT * T0.W, T1.W, T0.Y, T0.W, 1449; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 1450; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X, 1451; CM-NEXT: CNDE_INT T0.Y, T2.Y, T3.X, PV.Z, 1452; CM-NEXT: LSHL T0.Z, PV.Y, literal.x, 1453; CM-NEXT: AND_INT * T0.W, KC0[3].Z, literal.y, 1454; CM-NEXT: 23(3.222986e-44), -4096(nan) 1455; CM-NEXT: ADD T1.Y, KC0[3].Z, -PV.W, 1456; CM-NEXT: ADD_INT T0.Z, PV.Z, literal.x, 1457; CM-NEXT: CNDE_INT * T1.W, T3.W, PV.X, PV.Y, 1458; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) 1459; CM-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, 1460; CM-NEXT: MUL_IEEE T0.Y, PV.Y, literal.x, 1461; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y, 1462; CM-NEXT: AND_INT * T1.W, KC0[3].W, literal.z, 1463; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 1464; CM-NEXT: -4096(nan), 0(0.000000e+00) 1465; CM-NEXT: SETGT T1.X, literal.x, KC0[3].Y, 1466; CM-NEXT: ADD T2.Y, KC0[3].W, -PV.W, 1467; CM-NEXT: RNDNE T1.Z, PV.Z, 1468; CM-NEXT: MULADD_IEEE * T2.W, T1.Y, literal.y, PV.Y, 1469; CM-NEXT: -1026650416(-1.032789e+02), 1069064192(1.442383e+00) 1470; CM-NEXT: MULADD_IEEE T2.X, T0.W, literal.x, PV.W, 1471; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z, 1472; CM-NEXT: MUL_IEEE T0.Z, PV.Y, literal.x, 1473; CM-NEXT: MUL_IEEE * T0.W, T1.W, literal.y, BS:VEC_120/SCL_212 1474; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 1475; CM-NEXT: TRUNC T3.X, T1.Z, 1476; CM-NEXT: RNDNE T1.Y, PV.W, 1477; CM-NEXT: MULADD_IEEE T0.Z, T2.Y, literal.x, PV.Z, 1478; CM-NEXT: ADD * T2.W, PV.Y, PV.X, 1479; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 1480; CM-NEXT: EXP_IEEE T0.X (MASKED), T2.W, 1481; CM-NEXT: EXP_IEEE T0.Y, T2.W, 1482; CM-NEXT: EXP_IEEE T0.Z (MASKED), T2.W, 1483; CM-NEXT: EXP_IEEE * T0.W (MASKED), T2.W, 1484; CM-NEXT: MULADD_IEEE T2.X, T1.W, literal.x, T0.Z, 1485; CM-NEXT: ADD T2.Y, T0.W, -T1.Y, BS:VEC_120/SCL_212 1486; CM-NEXT: FLT_TO_INT T0.Z, T3.X, 1487; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.y, 1488; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31) 1489; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x, 1490; CM-NEXT: SETGT_UINT T3.Y, PV.Z, literal.y, 1491; CM-NEXT: TRUNC T1.Z, T1.Y, 1492; CM-NEXT: ADD * T1.W, PV.Y, PV.X, 1493; CM-NEXT: 209715200(1.972152e-31), -229(nan) 1494; CM-NEXT: EXP_IEEE T1.X (MASKED), T1.W, 1495; CM-NEXT: EXP_IEEE T1.Y, T1.W, 1496; CM-NEXT: EXP_IEEE T1.Z (MASKED), T1.W, 1497; CM-NEXT: EXP_IEEE * T1.W (MASKED), T1.W, 1498; CM-NEXT: FLT_TO_INT T2.X, T1.Z, 1499; CM-NEXT: MUL_IEEE T2.Y, PV.Y, literal.x, 1500; CM-NEXT: CNDE_INT T1.Z, T3.Y, T3.X, T0.W, 1501; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y, BS:VEC_120/SCL_212 1502; CM-NEXT: 209715200(1.972152e-31), -127(nan) 1503; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.Y, 1504; CM-NEXT: MUL_IEEE * T4.Y, PV.Y, literal.x, 1505; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 1506; CM-NEXT: ALU clause starting at 108: 1507; CM-NEXT: SETGT_UINT T1.Z, T2.X, literal.x, 1508; CM-NEXT: MAX_INT * T1.W, T0.Z, literal.y, 1509; CM-NEXT: -229(nan), -330(nan) 1510; CM-NEXT: ADD_INT T4.X, PV.W, literal.x, 1511; CM-NEXT: ADD_INT T5.Y, T0.Z, literal.y, 1512; CM-NEXT: CNDE_INT T2.Z, PV.Z, T4.Y, T2.Y, 1513; CM-NEXT: SETGT_INT * T1.W, T2.X, literal.z, 1514; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) 1515; CM-NEXT: -127(nan), 0(0.000000e+00) 1516; CM-NEXT: CNDE_INT T5.X, PV.W, PV.Z, T1.Y, 1517; CM-NEXT: MUL_IEEE T0.Y, T0.Y, literal.x, 1518; CM-NEXT: MAX_INT T2.Z, T2.X, literal.y, 1519; CM-NEXT: CNDE_INT * T2.W, T3.Y, PV.X, PV.Y, BS:VEC_120/SCL_212 1520; CM-NEXT: 2130706432(1.701412e+38), -330(nan) 1521; CM-NEXT: CNDE_INT T4.X, T0.W, PV.W, T0.Z, 1522; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x, 1523; CM-NEXT: ADD_INT T2.Z, T2.X, literal.y, 1524; CM-NEXT: MIN_INT * T0.W, T2.X, literal.z, 1525; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) 1526; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) 1527; CM-NEXT: ADD_INT T6.X, PV.W, literal.x, 1528; CM-NEXT: ADD_INT T3.Y, T2.X, literal.y, 1529; CM-NEXT: SETGT_UINT T3.Z, T2.X, literal.z, 1530; CM-NEXT: CNDE_INT * T0.W, T1.Z, PV.Y, PV.Z, 1531; CM-NEXT: -254(nan), -127(nan) 1532; CM-NEXT: 254(3.559298e-43), 0(0.000000e+00) 1533; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x, 1534; CM-NEXT: CNDE_INT T1.Y, T1.W, PV.W, T2.X, 1535; CM-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, PV.X, 1536; CM-NEXT: MIN_INT * T0.W, T0.Z, literal.y, 1537; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43) 1538; CM-NEXT: SETGT_INT T2.X, T2.X, literal.x, 1539; CM-NEXT: ADD_INT T2.Y, PV.W, literal.y, 1540; CM-NEXT: ADD_INT T2.Z, T0.Z, literal.z, 1541; CM-NEXT: SETGT_UINT * T0.W, T0.Z, literal.w, 1542; CM-NEXT: 127(1.779649e-43), -254(nan) 1543; CM-NEXT: -127(nan), 254(3.559298e-43) 1544; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y, 1545; CM-NEXT: SETGT_INT T2.Y, T0.Z, literal.x, 1546; CM-NEXT: CNDE_INT T0.Z, PV.X, T1.Y, T1.Z, 1547; CM-NEXT: MUL_IEEE * T1.W, T7.X, literal.y, 1548; CM-NEXT: 127(1.779649e-43), 2130706432(1.701412e+38) 1549; CM-NEXT: CNDE_INT T7.X, T3.Z, T7.X, PV.W, 1550; CM-NEXT: LSHL T1.Y, PV.Z, literal.x, 1551; CM-NEXT: CNDE_INT T0.Z, PV.Y, T4.X, PV.X, BS:VEC_021/SCL_122 1552; CM-NEXT: MUL_IEEE * T1.W, T0.Y, literal.y, 1553; CM-NEXT: 23(3.222986e-44), 2130706432(1.701412e+38) 1554; CM-NEXT: CNDE_INT T4.X, T0.W, T0.Y, PV.W, 1555; CM-NEXT: LSHL T0.Y, PV.Z, literal.x, 1556; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, 1557; CM-NEXT: CNDE_INT * T0.W, T2.X, T5.X, PV.X, 1558; CM-NEXT: 23(3.222986e-44), 1065353216(1.000000e+00) 1559; CM-NEXT: MUL_IEEE T2.X, PV.W, PV.Z, 1560; CM-NEXT: SETGT T1.Y, literal.x, KC0[3].W, 1561; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, 1562; CM-NEXT: CNDE_INT * T0.W, T2.Y, T3.X, PV.X, 1563; CM-NEXT: -1026650416(-1.032789e+02), 1065353216(1.000000e+00) 1564; CM-NEXT: MUL_IEEE T3.X, PV.W, PV.Z, 1565; CM-NEXT: SETGT T0.Y, literal.x, KC0[3].Z, 1566; CM-NEXT: CNDE T0.Z, PV.Y, PV.X, 0.0, 1567; CM-NEXT: SETGT * T0.W, KC0[3].W, literal.y, 1568; CM-NEXT: -1026650416(-1.032789e+02), 1118925336(8.872284e+01) 1569; CM-NEXT: CNDE T2.X, PV.W, PV.Z, literal.x, 1570; CM-NEXT: CNDE T0.Y, PV.Y, PV.X, 0.0, 1571; CM-NEXT: SETGT T0.Z, KC0[3].Z, literal.y, 1572; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 1573; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01) 1574; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1575; CM-NEXT: LSHR T3.X, PV.W, literal.x, 1576; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, literal.y, 1577; CM-NEXT: CNDE T0.Z, T1.X, T0.X, 0.0, 1578; CM-NEXT: SETGT * T0.W, KC0[3].Y, literal.z, 1579; CM-NEXT: 2(2.802597e-45), 2139095040(INF) 1580; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) 1581; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, 1582; CM-NEXT: 2139095040(INF), 0(0.000000e+00) 1583; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1584; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1585 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %in) 1586 store <3 x float> %result, ptr addrspace(1) %out 1587 ret void 1588} 1589 1590; FIXME: We should be able to merge these packets together on Cayman so we 1591; have a maximum of 4 instructions. 1592define amdgpu_kernel void @s_exp_v4f32(ptr addrspace(1) %out, <4 x float> %in) { 1593; VI-SDAG-LABEL: s_exp_v4f32: 1594; VI-SDAG: ; %bb.0: 1595; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1596; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8a000 1597; VI-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218 1598; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 1599; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1600; VI-SDAG-NEXT: s_and_b32 s6, s3, 0xfffff000 1601; VI-SDAG-NEXT: v_mov_b32_e32 v2, s6 1602; VI-SDAG-NEXT: v_sub_f32_e32 v2, s3, v2 1603; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 1604; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 1605; VI-SDAG-NEXT: v_mul_f32_e32 v1, s6, v0 1606; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 1607; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x39a3b295 1608; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 1609; VI-SDAG-NEXT: v_mul_f32_e32 v5, s6, v4 1610; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 1611; VI-SDAG-NEXT: v_add_f32_e32 v2, v5, v2 1612; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 1613; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 1614; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 1615; VI-SDAG-NEXT: s_and_b32 s6, s2, 0xfffff000 1616; VI-SDAG-NEXT: v_mov_b32_e32 v7, s6 1617; VI-SDAG-NEXT: v_sub_f32_e32 v7, s2, v7 1618; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 1619; VI-SDAG-NEXT: v_mul_f32_e32 v2, s6, v0 1620; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x39a3b295, v7 1621; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3fb8a000, v7 1622; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 1623; VI-SDAG-NEXT: v_add_f32_e32 v7, v7, v8 1624; VI-SDAG-NEXT: v_mul_f32_e32 v8, s6, v4 1625; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 1626; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 1627; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0 1628; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 1629; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5 1630; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 1631; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v3 1632; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1633; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v6 1634; VI-SDAG-NEXT: s_and_b32 s3, s1, 0xfffff000 1635; VI-SDAG-NEXT: v_mov_b32_e32 v9, s3 1636; VI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 1637; VI-SDAG-NEXT: v_sub_f32_e32 v9, s1, v9 1638; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v1, vcc 1639; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, v7 1640; VI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 1641; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9 1642; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9 1643; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v2 1644; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10 1645; VI-SDAG-NEXT: v_mul_f32_e32 v10, s3, v4 1646; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v7 1647; VI-SDAG-NEXT: v_add_f32_e32 v9, v10, v9 1648; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v9 1649; VI-SDAG-NEXT: v_exp_f32_e32 v9, v2 1650; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 1651; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5 1652; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1653; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v6 1654; VI-SDAG-NEXT: s_and_b32 s2, s0, 0xfffff000 1655; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v1, vcc 1656; VI-SDAG-NEXT: v_ldexp_f32 v1, v9, v7 1657; VI-SDAG-NEXT: v_mov_b32_e32 v9, s2 1658; VI-SDAG-NEXT: v_sub_f32_e32 v9, s0, v9 1659; VI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 1660; VI-SDAG-NEXT: v_mul_f32_e32 v10, 0x39a3b295, v9 1661; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3fb8a000, v9 1662; VI-SDAG-NEXT: v_rndne_f32_e32 v7, v0 1663; VI-SDAG-NEXT: v_add_f32_e32 v9, v9, v10 1664; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 1665; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v7 1666; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v9 1667; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v4 1668; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 1669; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v4, v7 1670; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v5 1671; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 1672; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v6 1673; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 1674; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v4 1675; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v5 1676; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1677; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v6 1678; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4 1679; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 1680; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5 1681; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1682; VI-SDAG-NEXT: s_endpgm 1683; 1684; VI-GISEL-LABEL: s_exp_v4f32: 1685; VI-GISEL: ; %bb.0: 1686; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1687; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8a000 1688; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x39a3b295 1689; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218 1690; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 1691; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1692; VI-GISEL-NEXT: s_and_b32 s6, s0, 0xfffff000 1693; VI-GISEL-NEXT: v_mov_b32_e32 v0, s6 1694; VI-GISEL-NEXT: v_sub_f32_e32 v0, s0, v0 1695; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0 1696; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 1697; VI-GISEL-NEXT: v_mul_f32_e32 v1, s6, v2 1698; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v4 1699; VI-GISEL-NEXT: v_mul_f32_e32 v4, s6, v3 1700; VI-GISEL-NEXT: v_add_f32_e32 v0, v4, v0 1701; VI-GISEL-NEXT: v_rndne_f32_e32 v4, v1 1702; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v4 1703; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 1704; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 1705; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 1706; VI-GISEL-NEXT: s_and_b32 s6, s1, 0xfffff000 1707; VI-GISEL-NEXT: v_mul_f32_e32 v6, s6, v2 1708; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 1709; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1710; VI-GISEL-NEXT: v_mov_b32_e32 v1, s6 1711; VI-GISEL-NEXT: v_sub_f32_e32 v1, s1, v1 1712; VI-GISEL-NEXT: v_mul_f32_e32 v7, 0x39a3b295, v1 1713; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 1714; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v7 1715; VI-GISEL-NEXT: v_mul_f32_e32 v7, s6, v3 1716; VI-GISEL-NEXT: v_add_f32_e32 v1, v7, v1 1717; VI-GISEL-NEXT: v_rndne_f32_e32 v7, v6 1718; VI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v7 1719; VI-GISEL-NEXT: v_add_f32_e32 v1, v6, v1 1720; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 1721; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 1722; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 1723; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 1724; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 1725; VI-GISEL-NEXT: s_and_b32 s0, s2, 0xfffff000 1726; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 1727; VI-GISEL-NEXT: v_mov_b32_e32 v6, s0 1728; VI-GISEL-NEXT: v_sub_f32_e32 v6, s2, v6 1729; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v6 1730; VI-GISEL-NEXT: v_mul_f32_e32 v6, 0x3fb8a000, v6 1731; VI-GISEL-NEXT: v_mul_f32_e32 v8, s0, v2 1732; VI-GISEL-NEXT: v_add_f32_e32 v6, v6, v9 1733; VI-GISEL-NEXT: v_mul_f32_e32 v9, s0, v3 1734; VI-GISEL-NEXT: v_add_f32_e32 v6, v9, v6 1735; VI-GISEL-NEXT: v_rndne_f32_e32 v9, v8 1736; VI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v9 1737; VI-GISEL-NEXT: v_add_f32_e32 v6, v8, v6 1738; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 1739; VI-GISEL-NEXT: v_exp_f32_e32 v6, v6 1740; VI-GISEL-NEXT: s_and_b32 s0, s3, 0xfffff000 1741; VI-GISEL-NEXT: v_mul_f32_e32 v2, s0, v2 1742; VI-GISEL-NEXT: v_mul_f32_e32 v3, s0, v3 1743; VI-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 1744; VI-GISEL-NEXT: v_mov_b32_e32 v8, s0 1745; VI-GISEL-NEXT: v_sub_f32_e32 v8, s3, v8 1746; VI-GISEL-NEXT: v_mul_f32_e32 v9, 0x39a3b295, v8 1747; VI-GISEL-NEXT: v_mul_f32_e32 v8, 0x3fb8a000, v8 1748; VI-GISEL-NEXT: v_add_f32_e32 v8, v8, v9 1749; VI-GISEL-NEXT: v_add_f32_e32 v3, v3, v8 1750; VI-GISEL-NEXT: v_rndne_f32_e32 v8, v2 1751; VI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v8 1752; VI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 1753; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 1754; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1755; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 1756; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v8 1757; VI-GISEL-NEXT: v_exp_f32_e32 v8, v2 1758; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 1759; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 1760; VI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1761; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 1762; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc 1763; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 1764; VI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 1765; VI-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 1766; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 1767; VI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc 1768; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 1769; VI-GISEL-NEXT: v_mov_b32_e32 v4, s4 1770; VI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1771; VI-GISEL-NEXT: v_mov_b32_e32 v5, s5 1772; VI-GISEL-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1773; VI-GISEL-NEXT: s_endpgm 1774; 1775; GFX900-SDAG-LABEL: s_exp_v4f32: 1776; GFX900-SDAG: ; %bb.0: 1777; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1778; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1779; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 1780; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 1781; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0xc2ce8ed0 1782; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1783; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 1784; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v2 1785; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 1786; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 1787; GFX900-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 1788; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 1789; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 1790; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 1791; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v5 1792; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x42b17218 1793; GFX900-SDAG-NEXT: v_mov_b32_e32 v9, 0x7f800000 1794; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, v3 1795; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s2, v0 1796; GFX900-SDAG-NEXT: v_rndne_f32_e32 v7, v3 1797; GFX900-SDAG-NEXT: v_sub_f32_e32 v8, v3, v7 1798; GFX900-SDAG-NEXT: v_fma_f32 v3, s2, v0, -v3 1799; GFX900-SDAG-NEXT: v_fma_f32 v3, s2, v1, v3 1800; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v8, v3 1801; GFX900-SDAG-NEXT: v_exp_f32_e32 v8, v3 1802; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 1803; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 1804; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v6 1805; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v2, vcc 1806; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v8, v7 1807; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s1, v0 1808; GFX900-SDAG-NEXT: v_rndne_f32_e32 v8, v7 1809; GFX900-SDAG-NEXT: v_sub_f32_e32 v10, v7, v8 1810; GFX900-SDAG-NEXT: v_fma_f32 v7, s1, v0, -v7 1811; GFX900-SDAG-NEXT: v_fma_f32 v7, s1, v1, v7 1812; GFX900-SDAG-NEXT: v_add_f32_e32 v7, v10, v7 1813; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7 1814; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v8 1815; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v5 1816; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 1817; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v6 1818; GFX900-SDAG-NEXT: v_ldexp_f32 v7, v7, v8 1819; GFX900-SDAG-NEXT: v_mul_f32_e32 v8, s0, v0 1820; GFX900-SDAG-NEXT: v_rndne_f32_e32 v10, v8 1821; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v0, -v8 1822; GFX900-SDAG-NEXT: v_sub_f32_e32 v11, v8, v10 1823; GFX900-SDAG-NEXT: v_fma_f32 v0, s0, v1, v0 1824; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v11, v0 1825; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 1826; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v8, v10 1827; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc 1828; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v5 1829; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc 1830; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v6 1831; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc 1832; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v8 1833; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v5 1834; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1835; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v6 1836; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 1837; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc 1838; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] 1839; GFX900-SDAG-NEXT: s_endpgm 1840; 1841; GFX900-GISEL-LABEL: s_exp_v4f32: 1842; GFX900-GISEL: ; %bb.0: 1843; GFX900-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 1844; GFX900-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1845; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b 1846; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 1847; GFX900-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218 1848; GFX900-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1849; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, s0, v2 1850; GFX900-GISEL-NEXT: v_fma_f32 v1, s0, v2, -v0 1851; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v0 1852; GFX900-GISEL-NEXT: v_fma_f32 v1, s0, v3, v1 1853; GFX900-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 1854; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 1855; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 1856; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 1857; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 1858; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 1859; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 1860; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 1861; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1 1862; GFX900-GISEL-NEXT: v_rndne_f32_e32 v7, v1 1863; GFX900-GISEL-NEXT: v_fma_f32 v6, s1, v3, v6 1864; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7 1865; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 1866; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 1867; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 1868; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 1869; GFX900-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 1870; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 1871; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v6 1872; GFX900-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 1873; GFX900-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6 1874; GFX900-GISEL-NEXT: v_rndne_f32_e32 v9, v6 1875; GFX900-GISEL-NEXT: v_fma_f32 v8, s2, v3, v8 1876; GFX900-GISEL-NEXT: v_sub_f32_e32 v6, v6, v9 1877; GFX900-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 1878; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 1879; GFX900-GISEL-NEXT: v_exp_f32_e32 v6, v6 1880; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 1881; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 1882; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 1883; GFX900-GISEL-NEXT: v_ldexp_f32 v6, v6, v8 1884; GFX900-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 1885; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 1886; GFX900-GISEL-NEXT: v_fma_f32 v2, s3, v3, v2 1887; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v8 1888; GFX900-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3 1889; GFX900-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 1890; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 1891; GFX900-GISEL-NEXT: v_exp_f32_e32 v8, v2 1892; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 1893; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 1894; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 1895; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc 1896; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 1897; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 1898; GFX900-GISEL-NEXT: v_ldexp_f32 v3, v8, v3 1899; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 1900; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc 1901; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 1902; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1903; GFX900-GISEL-NEXT: v_mov_b32_e32 v4, 0 1904; GFX900-GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] 1905; GFX900-GISEL-NEXT: s_endpgm 1906; 1907; SI-SDAG-LABEL: s_exp_v4f32: 1908; SI-SDAG: ; %bb.0: 1909; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd 1910; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 1911; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 1912; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x42b17218 1913; SI-SDAG-NEXT: v_mov_b32_e32 v8, 0x7f800000 1914; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1915; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v0 1916; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 1917; SI-SDAG-NEXT: v_fma_f32 v4, s3, v0, -v2 1918; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 1919; SI-SDAG-NEXT: v_fma_f32 v4, s3, v1, v4 1920; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 1921; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 1922; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v3, v3 1923; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 1924; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s3, v4 1925; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1926; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v3 1927; SI-SDAG-NEXT: v_mul_f32_e32 v3, s2, v0 1928; SI-SDAG-NEXT: v_rndne_f32_e32 v6, v3 1929; SI-SDAG-NEXT: v_sub_f32_e32 v7, v3, v6 1930; SI-SDAG-NEXT: v_fma_f32 v3, s2, v0, -v3 1931; SI-SDAG-NEXT: v_fma_f32 v3, s2, v1, v3 1932; SI-SDAG-NEXT: v_add_f32_e32 v3, v7, v3 1933; SI-SDAG-NEXT: v_exp_f32_e32 v7, v3 1934; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v6, v6 1935; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 1936; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s3, v5 1937; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, v8, v2, vcc 1938; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, v7, v6 1939; SI-SDAG-NEXT: v_mul_f32_e32 v6, s1, v0 1940; SI-SDAG-NEXT: v_rndne_f32_e32 v7, v6 1941; SI-SDAG-NEXT: v_sub_f32_e32 v9, v6, v7 1942; SI-SDAG-NEXT: v_fma_f32 v6, s1, v0, -v6 1943; SI-SDAG-NEXT: v_fma_f32 v6, s1, v1, v6 1944; SI-SDAG-NEXT: v_add_f32_e32 v6, v9, v6 1945; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6 1946; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v7 1947; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s2, v4 1948; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc 1949; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v5 1950; SI-SDAG-NEXT: v_ldexp_f32_e32 v6, v6, v7 1951; SI-SDAG-NEXT: v_mul_f32_e32 v7, s0, v0 1952; SI-SDAG-NEXT: v_rndne_f32_e32 v9, v7 1953; SI-SDAG-NEXT: v_fma_f32 v0, s0, v0, -v7 1954; SI-SDAG-NEXT: v_sub_f32_e32 v10, v7, v9 1955; SI-SDAG-NEXT: v_fma_f32 v0, s0, v1, v0 1956; SI-SDAG-NEXT: v_add_f32_e32 v0, v10, v0 1957; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 1958; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v7, v9 1959; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 1960; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s1, v4 1961; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v6, vcc 1962; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s1, v5 1963; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 1964; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v7 1965; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v4 1966; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 1967; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v5 1968; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1969; SI-SDAG-NEXT: s_mov_b32 s6, -1 1970; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 1971; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1972; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1973; SI-SDAG-NEXT: s_endpgm 1974; 1975; SI-GISEL-LABEL: s_exp_v4f32: 1976; SI-GISEL: ; %bb.0: 1977; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd 1978; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3fb8aa3b 1979; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 1980; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x42b17218 1981; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1982; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1983; SI-GISEL-NEXT: v_mul_f32_e32 v0, s0, v2 1984; SI-GISEL-NEXT: v_fma_f32 v1, s0, v2, -v0 1985; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v0 1986; SI-GISEL-NEXT: v_fma_f32 v1, s0, v3, v1 1987; SI-GISEL-NEXT: v_sub_f32_e32 v0, v0, v4 1988; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 1989; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v4 1990; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 1991; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0xc2ce8ed0 1992; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s0, v4 1993; SI-GISEL-NEXT: s_mov_b32 s6, -1 1994; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 1995; SI-GISEL-NEXT: v_mul_f32_e32 v1, s1, v2 1996; SI-GISEL-NEXT: v_fma_f32 v6, s1, v2, -v1 1997; SI-GISEL-NEXT: v_rndne_f32_e32 v7, v1 1998; SI-GISEL-NEXT: v_fma_f32 v6, s1, v3, v6 1999; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v7 2000; SI-GISEL-NEXT: v_add_f32_e32 v1, v1, v6 2001; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v6, v7 2002; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2003; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 2004; SI-GISEL-NEXT: v_mov_b32_e32 v7, 0x7f800000 2005; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s0, v5 2006; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v6 2007; SI-GISEL-NEXT: v_mul_f32_e32 v6, s2, v2 2008; SI-GISEL-NEXT: v_fma_f32 v8, s2, v2, -v6 2009; SI-GISEL-NEXT: v_rndne_f32_e32 v9, v6 2010; SI-GISEL-NEXT: v_fma_f32 v8, s2, v3, v8 2011; SI-GISEL-NEXT: v_sub_f32_e32 v6, v6, v9 2012; SI-GISEL-NEXT: v_add_f32_e32 v6, v6, v8 2013; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v8, v9 2014; SI-GISEL-NEXT: v_exp_f32_e32 v6, v6 2015; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc 2016; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s1, v4 2017; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 2018; SI-GISEL-NEXT: v_ldexp_f32_e32 v6, v6, v8 2019; SI-GISEL-NEXT: v_mul_f32_e32 v8, s3, v2 2020; SI-GISEL-NEXT: v_fma_f32 v2, s3, v2, -v8 2021; SI-GISEL-NEXT: v_fma_f32 v2, s3, v3, v2 2022; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v8 2023; SI-GISEL-NEXT: v_sub_f32_e32 v8, v8, v3 2024; SI-GISEL-NEXT: v_add_f32_e32 v2, v8, v2 2025; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v3 2026; SI-GISEL-NEXT: v_exp_f32_e32 v8, v2 2027; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s1, v5 2028; SI-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 2029; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s2, v4 2030; SI-GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc 2031; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s2, v5 2032; SI-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc 2033; SI-GISEL-NEXT: v_ldexp_f32_e32 v3, v8, v3 2034; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s3, v4 2035; SI-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc 2036; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s3, v5 2037; SI-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 2038; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 2039; SI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2040; SI-GISEL-NEXT: s_endpgm 2041; 2042; R600-LABEL: s_exp_v4f32: 2043; R600: ; %bb.0: 2044; R600-NEXT: ALU 98, @6, KC0[CB0:0-32], KC1[] 2045; R600-NEXT: ALU 95, @105, KC0[CB0:0-32], KC1[] 2046; R600-NEXT: ALU 24, @201, KC0[CB0:0-32], KC1[] 2047; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 2048; R600-NEXT: CF_END 2049; R600-NEXT: PAD 2050; R600-NEXT: ALU clause starting at 6: 2051; R600-NEXT: AND_INT * T0.W, KC0[3].Z, literal.x, 2052; R600-NEXT: -4096(nan), 0(0.000000e+00) 2053; R600-NEXT: ADD * T1.W, KC0[3].Z, -PV.W, 2054; R600-NEXT: MUL_IEEE T2.W, PV.W, literal.x, 2055; R600-NEXT: MUL_IEEE * T3.W, T0.W, literal.y, 2056; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 2057; R600-NEXT: RNDNE T4.W, PS, 2058; R600-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.W, BS:VEC_021/SCL_122 2059; R600-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 2060; R600-NEXT: MULADD_IEEE T0.W, T0.W, literal.x, PS, 2061; R600-NEXT: ADD * T1.W, T3.W, -PV.W, 2062; R600-NEXT: 967029397(3.122284e-04), 0(0.000000e+00) 2063; R600-NEXT: ADD T0.W, PS, PV.W, 2064; R600-NEXT: TRUNC * T1.W, T4.W, 2065; R600-NEXT: FLT_TO_INT T1.W, PS, 2066; R600-NEXT: EXP_IEEE * T0.X, PV.W, 2067; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x, 2068; R600-NEXT: MAX_INT T0.W, PV.W, literal.y, 2069; R600-NEXT: MIN_INT * T2.W, PV.W, literal.z, 2070; R600-NEXT: 209715200(1.972152e-31), -330(nan) 2071; R600-NEXT: 381(5.338947e-43), 0(0.000000e+00) 2072; R600-NEXT: ADD_INT T1.X, PS, literal.x, 2073; R600-NEXT: AND_INT T0.Y, KC0[4].X, literal.y, 2074; R600-NEXT: ADD_INT T1.Z, PV.W, literal.z, 2075; R600-NEXT: ADD_INT * T0.W, T1.W, literal.w, 2076; R600-NEXT: -254(nan), -4096(nan) 2077; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43) 2078; R600-NEXT: SETGT_UINT * T2.W, T1.W, literal.x, 2079; R600-NEXT: -229(nan), 0(0.000000e+00) 2080; R600-NEXT: ADD_INT T2.X, T1.W, literal.x, 2081; R600-NEXT: SETGT_UINT T1.Y, T1.W, literal.y, 2082; R600-NEXT: CNDE_INT T1.Z, PV.W, T1.Z, T0.W, 2083; R600-NEXT: SETGT_INT T0.W, T1.W, literal.x, 2084; R600-NEXT: ADD * T3.W, KC0[4].X, -T0.Y, 2085; R600-NEXT: -127(nan), 254(3.559298e-43) 2086; R600-NEXT: MUL_IEEE T3.X, PS, literal.x, 2087; R600-NEXT: MUL_IEEE T2.Y, T0.Y, literal.y, 2088; R600-NEXT: CNDE_INT T1.Z, PV.W, PV.Z, T1.W, 2089; R600-NEXT: CNDE_INT T4.W, PV.Y, PV.X, T1.X, 2090; R600-NEXT: SETGT_INT * T1.W, T1.W, literal.z, 2091; R600-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 2092; R600-NEXT: 127(1.779649e-43), 0(0.000000e+00) 2093; R600-NEXT: CNDE_INT T1.X, PS, PV.Z, PV.W, 2094; R600-NEXT: RNDNE T3.Y, PV.Y, 2095; R600-NEXT: MULADD_IEEE T1.Z, T3.W, literal.x, PV.X, 2096; R600-NEXT: MUL_IEEE T3.W, T0.Z, literal.y, 2097; R600-NEXT: MUL_IEEE * T4.W, T0.X, literal.z, 2098; R600-NEXT: 1069064192(1.442383e+00), 209715200(1.972152e-31) 2099; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 2100; R600-NEXT: MUL_IEEE T2.X, PS, literal.x, 2101; R600-NEXT: CNDE_INT T4.Y, T2.W, PV.W, T0.Z, 2102; R600-NEXT: MULADD_IEEE T0.Z, T0.Y, literal.y, PV.Z, 2103; R600-NEXT: ADD T2.W, T2.Y, -PV.Y, BS:VEC_120/SCL_212 2104; R600-NEXT: AND_INT * T3.W, KC0[3].Y, literal.z, 2105; R600-NEXT: 2130706432(1.701412e+38), 967029397(3.122284e-04) 2106; R600-NEXT: -4096(nan), 0(0.000000e+00) 2107; R600-NEXT: MUL_IEEE T3.X, PS, literal.x, 2108; R600-NEXT: ADD T0.Y, PV.W, PV.Z, 2109; R600-NEXT: CNDE_INT T0.Z, T0.W, PV.Y, T0.X, BS:VEC_021/SCL_122 2110; R600-NEXT: CNDE_INT T0.W, T1.Y, T4.W, PV.X, 2111; R600-NEXT: LSHL * T2.W, T1.X, literal.y, 2112; R600-NEXT: 1069064192(1.442383e+00), 23(3.222986e-44) 2113; R600-NEXT: AND_INT T0.X, KC0[3].W, literal.x, 2114; R600-NEXT: TRUNC T1.Y, T3.Y, 2115; R600-NEXT: ADD_INT T1.Z, PS, literal.y, 2116; R600-NEXT: CNDE_INT T0.W, T1.W, PV.Z, PV.W, 2117; R600-NEXT: EXP_IEEE * T0.Y, PV.Y, 2118; R600-NEXT: -4096(nan), 1065353216(1.000000e+00) 2119; R600-NEXT: MUL_IEEE T1.X, PV.W, PV.Z, 2120; R600-NEXT: FLT_TO_INT T1.Y, PV.Y, 2121; R600-NEXT: MUL_IEEE T0.Z, PS, literal.x, 2122; R600-NEXT: ADD T0.W, KC0[3].W, -PV.X, 2123; R600-NEXT: RNDNE * T1.W, T3.X, 2124; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 2125; R600-NEXT: SETGT T2.X, literal.x, KC0[3].Z, 2126; R600-NEXT: TRUNC T2.Y, PS, 2127; R600-NEXT: MUL_IEEE T1.Z, PV.W, literal.y, 2128; R600-NEXT: MUL_IEEE T2.W, PV.Z, literal.z, 2129; R600-NEXT: MAX_INT * T4.W, PV.Y, literal.w, 2130; R600-NEXT: -1026650416(-1.032789e+02), 967029397(3.122284e-04) 2131; R600-NEXT: 209715200(1.972152e-31), -330(nan) 2132; R600-NEXT: ADD T4.X, KC0[3].Y, -T3.W, 2133; R600-NEXT: ADD_INT T3.Y, PS, literal.x, 2134; R600-NEXT: ADD_INT T2.Z, T1.Y, literal.y, 2135; R600-NEXT: SETGT_UINT T4.W, T1.Y, literal.z, 2136; R600-NEXT: MIN_INT * T5.W, T1.Y, literal.w, 2137; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43) 2138; R600-NEXT: -229(nan), 381(5.338947e-43) 2139; R600-NEXT: ADD_INT T5.X, PS, literal.x, 2140; R600-NEXT: ADD_INT T4.Y, T1.Y, literal.y, 2141; R600-NEXT: SETGT_UINT T3.Z, T1.Y, literal.z, 2142; R600-NEXT: CNDE_INT T5.W, PV.W, PV.Y, PV.Z, 2143; R600-NEXT: SETGT_INT * T6.W, T1.Y, literal.y, 2144; R600-NEXT: -254(nan), -127(nan) 2145; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) 2146; R600-NEXT: MUL_IEEE T6.X, T0.Y, literal.x, 2147; R600-NEXT: CNDE_INT T3.Y, PS, PV.W, T1.Y, 2148; R600-NEXT: CNDE_INT * T2.Z, PV.Z, PV.Y, PV.X, 2149; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 2150; R600-NEXT: ALU clause starting at 105: 2151; R600-NEXT: SETGT_INT T5.W, T1.Y, literal.x, 2152; R600-NEXT: MUL_IEEE * T7.W, T4.X, literal.y, 2153; R600-NEXT: 127(1.779649e-43), 967029397(3.122284e-04) 2154; R600-NEXT: MUL_IEEE T5.X, T0.X, literal.x, 2155; R600-NEXT: MULADD_IEEE T1.Y, T4.X, literal.x, PS, BS:VEC_120/SCL_212 2156; R600-NEXT: CNDE_INT T2.Z, PV.W, T3.Y, T2.Z, 2157; R600-NEXT: MUL_IEEE T7.W, T6.X, literal.y, BS:VEC_201 2158; R600-NEXT: CNDE_INT * T2.W, T4.W, T2.W, T0.Z, 2159; R600-NEXT: 1069064192(1.442383e+00), 2130706432(1.701412e+38) 2160; R600-NEXT: CNDE_INT T4.X, T6.W, PS, T0.Y, 2161; R600-NEXT: CNDE_INT T0.Y, T3.Z, T6.X, PV.W, 2162; R600-NEXT: LSHL T0.Z, PV.Z, literal.x, 2163; R600-NEXT: MULADD_IEEE T2.W, T3.W, literal.y, PV.Y, BS:VEC_201 2164; R600-NEXT: ADD * T1.W, T3.X, -T1.W, 2165; R600-NEXT: 23(3.222986e-44), 967029397(3.122284e-04) 2166; R600-NEXT: ADD T3.X, PS, PV.W, 2167; R600-NEXT: ADD_INT T1.Y, PV.Z, literal.x, 2168; R600-NEXT: CNDE_INT T0.Z, T5.W, PV.X, PV.Y, 2169; R600-NEXT: RNDNE T1.W, T5.X, 2170; R600-NEXT: MULADD_IEEE * T0.W, T0.W, literal.y, T1.Z, BS:VEC_021/SCL_122 2171; R600-NEXT: 1065353216(1.000000e+00), 1069064192(1.442383e+00) 2172; R600-NEXT: MULADD_IEEE T0.X, T0.X, literal.x, PS, 2173; R600-NEXT: ADD T0.Y, T5.X, -PV.W, BS:VEC_120/SCL_212 2174; R600-NEXT: MUL_IEEE T0.Z, PV.Z, PV.Y, 2175; R600-NEXT: SETGT T0.W, literal.y, KC0[4].X, 2176; R600-NEXT: EXP_IEEE * T1.Y, PV.X, 2177; R600-NEXT: 967029397(3.122284e-04), -1026650416(-1.032789e+02) 2178; R600-NEXT: CNDE T3.X, PV.W, PV.Z, 0.0, 2179; R600-NEXT: ADD T0.Y, PV.Y, PV.X, 2180; R600-NEXT: FLT_TO_INT T0.Z, T2.Y, 2181; R600-NEXT: TRUNC T0.W, T1.W, 2182; R600-NEXT: MUL_IEEE * T1.W, PS, literal.x, 2183; R600-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 2184; R600-NEXT: SETGT T0.X, KC0[4].X, literal.x, 2185; R600-NEXT: MUL_IEEE T2.Y, PS, literal.y, 2186; R600-NEXT: FLT_TO_INT T1.Z, PV.W, 2187; R600-NEXT: MAX_INT T0.W, PV.Z, literal.z, 2188; R600-NEXT: EXP_IEEE * T0.Y, PV.Y, 2189; R600-NEXT: 1118925336(8.872284e+01), 209715200(1.972152e-31) 2190; R600-NEXT: -330(nan), 0(0.000000e+00) 2191; R600-NEXT: MUL_IEEE T4.X, T1.Y, literal.x, 2192; R600-NEXT: MUL_IEEE T3.Y, PS, literal.y, 2193; R600-NEXT: ADD_INT T2.Z, PV.W, literal.z, 2194; R600-NEXT: ADD_INT * T0.W, T0.Z, literal.w, 2195; R600-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) 2196; R600-NEXT: 204(2.858649e-43), 102(1.429324e-43) 2197; R600-NEXT: MAX_INT * T2.W, T1.Z, literal.x, 2198; R600-NEXT: -330(nan), 0(0.000000e+00) 2199; R600-NEXT: SETGT_UINT T5.X, T0.Z, literal.x, 2200; R600-NEXT: ADD_INT T4.Y, PV.W, literal.y, 2201; R600-NEXT: ADD_INT T3.Z, T1.Z, literal.z, BS:VEC_120/SCL_212 2202; R600-NEXT: SETGT_UINT T2.W, T1.Z, literal.x, BS:VEC_120/SCL_212 2203; R600-NEXT: MIN_INT * T3.W, T1.Z, literal.w, 2204; R600-NEXT: -229(nan), 204(2.858649e-43) 2205; R600-NEXT: 102(1.429324e-43), 381(5.338947e-43) 2206; R600-NEXT: ADD_INT T6.X, PS, literal.x, 2207; R600-NEXT: ADD_INT T5.Y, T1.Z, literal.y, 2208; R600-NEXT: SETGT_UINT T4.Z, T1.Z, literal.z, 2209; R600-NEXT: CNDE_INT T3.W, PV.W, PV.Y, PV.Z, 2210; R600-NEXT: SETGT_INT * T4.W, T1.Z, literal.y, 2211; R600-NEXT: -254(nan), -127(nan) 2212; R600-NEXT: 254(3.559298e-43), 0(0.000000e+00) 2213; R600-NEXT: CNDE_INT T7.X, PS, PV.W, T1.Z, BS:VEC_021/SCL_122 2214; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.Y, PV.X, 2215; R600-NEXT: SETGT_INT T1.Z, T1.Z, literal.x, BS:VEC_120/SCL_212 2216; R600-NEXT: CNDE_INT T0.W, T5.X, T2.Z, T0.W, BS:VEC_102/SCL_221 2217; R600-NEXT: SETGT_INT * T3.W, T0.Z, literal.y, 2218; R600-NEXT: 127(1.779649e-43), -127(nan) 2219; R600-NEXT: CNDE_INT T6.X, PS, PV.W, T0.Z, 2220; R600-NEXT: CNDE_INT T4.Y, PV.Z, PV.X, PV.Y, 2221; R600-NEXT: MIN_INT T2.Z, T0.Z, literal.x, 2222; R600-NEXT: MUL_IEEE T0.W, T3.Y, literal.y, 2223; R600-NEXT: MUL_IEEE * T5.W, T0.Y, literal.z, 2224; R600-NEXT: 381(5.338947e-43), 209715200(1.972152e-31) 2225; R600-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 2226; R600-NEXT: MUL_IEEE T7.X, PS, literal.x, 2227; R600-NEXT: CNDE_INT T3.Y, T2.W, PV.W, T3.Y, 2228; R600-NEXT: ADD_INT T2.Z, PV.Z, literal.y, 2229; R600-NEXT: ADD_INT T0.W, T0.Z, literal.z, 2230; R600-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w, 2231; R600-NEXT: 2130706432(1.701412e+38), -254(nan) 2232; R600-NEXT: -127(nan), 254(3.559298e-43) 2233; R600-NEXT: CNDE_INT T8.X, PS, PV.W, PV.Z, 2234; R600-NEXT: SETGT_INT T5.Y, T0.Z, literal.x, 2235; R600-NEXT: CNDE_INT T0.Z, T4.W, PV.Y, T0.Y, BS:VEC_021/SCL_122 2236; R600-NEXT: CNDE_INT T0.W, T4.Z, T5.W, PV.X, BS:VEC_120/SCL_212 2237; R600-NEXT: LSHL * T4.W, T4.Y, literal.y, 2238; R600-NEXT: 127(1.779649e-43), 23(3.222986e-44) 2239; R600-NEXT: ADD_INT T7.X, PS, literal.x, 2240; R600-NEXT: CNDE_INT T0.Y, T1.Z, PV.Z, PV.W, 2241; R600-NEXT: CNDE_INT T0.Z, PV.Y, T6.X, PV.X, 2242; R600-NEXT: MUL_IEEE T0.W, T4.X, literal.y, 2243; R600-NEXT: CNDE_INT * T1.W, T5.X, T2.Y, T1.W, 2244; R600-NEXT: 1065353216(1.000000e+00), 2130706432(1.701412e+38) 2245; R600-NEXT: CNDE_INT T5.X, T3.W, PS, T1.Y, 2246; R600-NEXT: CNDE_INT * T1.Y, T2.W, T4.X, PV.W, BS:VEC_120/SCL_212 2247; R600-NEXT: ALU clause starting at 201: 2248; R600-NEXT: LSHL T0.Z, T0.Z, literal.x, 2249; R600-NEXT: MUL_IEEE T0.W, T0.Y, T7.X, 2250; R600-NEXT: SETGT * T1.W, literal.y, KC0[3].W, 2251; R600-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02) 2252; R600-NEXT: CNDE T4.X, PS, PV.W, 0.0, 2253; R600-NEXT: SETGT T0.Y, KC0[3].W, literal.x, 2254; R600-NEXT: ADD_INT T0.Z, PV.Z, literal.y, 2255; R600-NEXT: CNDE_INT T0.W, T5.Y, T5.X, T1.Y, BS:VEC_102/SCL_221 2256; R600-NEXT: CNDE * T1.W, T0.X, T3.X, literal.z, 2257; R600-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00) 2258; R600-NEXT: 2139095040(INF), 0(0.000000e+00) 2259; R600-NEXT: MUL_IEEE T0.X, PV.W, PV.Z, 2260; R600-NEXT: SETGT T2.Y, literal.x, KC0[3].Y, 2261; R600-NEXT: CNDE T1.Z, PV.Y, PV.X, literal.y, 2262; R600-NEXT: CNDE T0.W, T2.X, T1.X, 0.0, 2263; R600-NEXT: SETGT * T2.W, KC0[3].Z, literal.z, 2264; R600-NEXT: -1026650416(-1.032789e+02), 2139095040(INF) 2265; R600-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) 2266; R600-NEXT: CNDE T1.Y, PS, PV.W, literal.x, 2267; R600-NEXT: CNDE T0.W, PV.Y, PV.X, 0.0, 2268; R600-NEXT: SETGT * T2.W, KC0[3].Y, literal.y, 2269; R600-NEXT: 2139095040(INF), 1118925336(8.872284e+01) 2270; R600-NEXT: CNDE T1.X, PS, PV.W, literal.x, 2271; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.y, 2272; R600-NEXT: 2139095040(INF), 2(2.802597e-45) 2273; 2274; CM-LABEL: s_exp_v4f32: 2275; CM: ; %bb.0: 2276; CM-NEXT: ALU 97, @6, KC0[CB0:0-32], KC1[] 2277; CM-NEXT: ALU 97, @104, KC0[CB0:0-32], KC1[] 2278; CM-NEXT: ALU 35, @202, KC0[CB0:0-32], KC1[] 2279; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 2280; CM-NEXT: CF_END 2281; CM-NEXT: PAD 2282; CM-NEXT: ALU clause starting at 6: 2283; CM-NEXT: AND_INT * T0.W, KC0[3].Y, literal.x, 2284; CM-NEXT: -4096(nan), 0(0.000000e+00) 2285; CM-NEXT: ADD * T1.W, KC0[3].Y, -PV.W, 2286; CM-NEXT: MUL_IEEE T0.Y, PV.W, literal.x, 2287; CM-NEXT: MUL_IEEE T0.Z, T0.W, literal.y, 2288; CM-NEXT: AND_INT * T2.W, KC0[3].W, literal.z, 2289; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 2290; CM-NEXT: -4096(nan), 0(0.000000e+00) 2291; CM-NEXT: ADD T1.Y, KC0[3].W, -PV.W, 2292; CM-NEXT: RNDNE T1.Z, PV.Z, 2293; CM-NEXT: MULADD_IEEE * T1.W, T1.W, literal.x, PV.Y, 2294; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 2295; CM-NEXT: MULADD_IEEE T0.X, T0.W, literal.x, PV.W, 2296; CM-NEXT: ADD T0.Y, T0.Z, -PV.Z, 2297; CM-NEXT: MUL_IEEE T0.Z, T2.W, literal.y, BS:VEC_120/SCL_212 2298; CM-NEXT: MUL_IEEE * T0.W, PV.Y, literal.x, 2299; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 2300; CM-NEXT: TRUNC T1.X, T1.Z, 2301; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.W, 2302; CM-NEXT: RNDNE T1.Z, PV.Z, 2303; CM-NEXT: ADD * T0.W, PV.Y, PV.X, 2304; CM-NEXT: 1069064192(1.442383e+00), 0(0.000000e+00) 2305; CM-NEXT: EXP_IEEE T0.X, T0.W, 2306; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, 2307; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, 2308; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W, 2309; CM-NEXT: TRUNC T2.X, T1.Z, 2310; CM-NEXT: MULADD_IEEE T0.Y, T2.W, literal.x, T1.Y, 2311; CM-NEXT: FLT_TO_INT T2.Z, T1.X, 2312; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y, 2313; CM-NEXT: 967029397(3.122284e-04), 209715200(1.972152e-31) 2314; CM-NEXT: ADD T1.X, T0.Z, -T1.Z, 2315; CM-NEXT: MUL_IEEE T1.Y, PV.W, literal.x, 2316; CM-NEXT: MAX_INT T0.Z, PV.Z, literal.y, 2317; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.z, 2318; CM-NEXT: 209715200(1.972152e-31), -330(nan) 2319; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) 2320; CM-NEXT: ADD_INT T3.X, PV.W, literal.x, 2321; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y, 2322; CM-NEXT: ADD_INT T0.Z, T2.Z, literal.z, 2323; CM-NEXT: SETGT_UINT * T1.W, T2.Z, literal.w, 2324; CM-NEXT: -254(nan), 204(2.858649e-43) 2325; CM-NEXT: 102(1.429324e-43), -229(nan) 2326; CM-NEXT: ADD_INT T4.X, T2.Z, literal.x, 2327; CM-NEXT: SETGT_UINT T3.Y, T2.Z, literal.y, 2328; CM-NEXT: CNDE_INT T0.Z, PV.W, PV.Y, PV.Z, 2329; CM-NEXT: SETGT_INT * T2.W, T2.Z, literal.x, 2330; CM-NEXT: -127(nan), 254(3.559298e-43) 2331; CM-NEXT: MUL_IEEE T5.X, T0.X, literal.x, 2332; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T2.Z, 2333; CM-NEXT: CNDE_INT T0.Z, PV.Y, PV.X, T3.X, 2334; CM-NEXT: SETGT_INT * T3.W, T2.Z, literal.y, 2335; CM-NEXT: 2130706432(1.701412e+38), 127(1.779649e-43) 2336; CM-NEXT: AND_INT T3.X, KC0[3].Z, literal.x, 2337; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Y, PV.Z, 2338; CM-NEXT: MUL_IEEE T0.Z, PV.X, literal.y, 2339; CM-NEXT: CNDE_INT * T0.W, T1.W, T1.Y, T0.W, 2340; CM-NEXT: -4096(nan), 2130706432(1.701412e+38) 2341; CM-NEXT: CNDE_INT T0.X, T2.W, PV.W, T0.X, 2342; CM-NEXT: CNDE_INT T1.Y, T3.Y, T5.X, PV.Z, 2343; CM-NEXT: LSHL T0.Z, PV.Y, literal.x, 2344; CM-NEXT: MUL_IEEE * T0.W, PV.X, literal.y, 2345; CM-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00) 2346; CM-NEXT: RNDNE T4.X, PV.W, 2347; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.x, 2348; CM-NEXT: CNDE_INT T0.Z, T3.W, PV.X, PV.Y, 2349; CM-NEXT: ADD * T1.W, T1.X, T0.Y, 2350; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) 2351; CM-NEXT: EXP_IEEE T0.X, T1.W, 2352; CM-NEXT: EXP_IEEE T0.Y (MASKED), T1.W, 2353; CM-NEXT: EXP_IEEE T0.Z (MASKED), T1.W, 2354; CM-NEXT: EXP_IEEE * T0.W (MASKED), T1.W, 2355; CM-NEXT: MUL_IEEE T1.X, T0.Z, T2.Y, 2356; CM-NEXT: TRUNC T0.Y, T4.X, 2357; CM-NEXT: FLT_TO_INT T0.Z, T2.X, BS:VEC_120/SCL_212 2358; CM-NEXT: MUL_IEEE * T1.W, PV.X, literal.x, 2359; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 2360; CM-NEXT: MUL_IEEE T2.X, PV.W, literal.x, 2361; CM-NEXT: MUL_IEEE T1.Y, T0.X, literal.y, 2362; CM-NEXT: MAX_INT T1.Z, PV.Z, literal.z, 2363; CM-NEXT: MIN_INT * T2.W, PV.Z, literal.w, 2364; CM-NEXT: 209715200(1.972152e-31), 2130706432(1.701412e+38) 2365; CM-NEXT: -330(nan), 381(5.338947e-43) 2366; CM-NEXT: ADD_INT T5.X, PV.W, literal.x, 2367; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y, 2368; CM-NEXT: ADD_INT T1.Z, T0.Z, literal.z, 2369; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w, 2370; CM-NEXT: -254(nan), 204(2.858649e-43) 2371; CM-NEXT: 102(1.429324e-43), -229(nan) 2372; CM-NEXT: ADD_INT T6.X, T0.Z, literal.x, 2373; CM-NEXT: SETGT_UINT T3.Y, T0.Z, literal.y, 2374; CM-NEXT: CNDE_INT T1.Z, PV.W, PV.Y, PV.Z, 2375; CM-NEXT: SETGT_INT * T3.W, T0.Z, literal.x, 2376; CM-NEXT: -127(nan), 254(3.559298e-43) 2377; CM-NEXT: CNDE_INT T7.X, PV.W, PV.Z, T0.Z, 2378; CM-NEXT: CNDE_INT T2.Y, PV.Y, PV.X, T5.X, 2379; CM-NEXT: SETGT_INT * T0.Z, T0.Z, literal.x, 2380; CM-NEXT: 127(1.779649e-43), 0(0.000000e+00) 2381; CM-NEXT: ALU clause starting at 104: 2382; CM-NEXT: ADD * T4.W, KC0[3].Z, -T3.X, 2383; CM-NEXT: MUL_IEEE T5.X, PV.W, literal.x, 2384; CM-NEXT: CNDE_INT T2.Y, T0.Z, T7.X, T2.Y, 2385; CM-NEXT: MUL_IEEE T1.Z, T1.Y, literal.y, 2386; CM-NEXT: CNDE_INT * T1.W, T2.W, T2.X, T1.W, BS:VEC_021/SCL_122 2387; CM-NEXT: 967029397(3.122284e-04), 2130706432(1.701412e+38) 2388; CM-NEXT: CNDE_INT T0.X, T3.W, PV.W, T0.X, 2389; CM-NEXT: CNDE_INT T1.Y, T3.Y, T1.Y, PV.Z, 2390; CM-NEXT: LSHL T1.Z, PV.Y, literal.x, 2391; CM-NEXT: MULADD_IEEE * T1.W, T4.W, literal.y, PV.X, BS:VEC_120/SCL_212 2392; CM-NEXT: 23(3.222986e-44), 1069064192(1.442383e+00) 2393; CM-NEXT: MULADD_IEEE T2.X, T3.X, literal.x, PV.W, 2394; CM-NEXT: ADD T2.Y, T0.W, -T4.X, 2395; CM-NEXT: ADD_INT T1.Z, PV.Z, literal.y, 2396; CM-NEXT: CNDE_INT * T0.W, T0.Z, PV.X, PV.Y, 2397; CM-NEXT: 967029397(3.122284e-04), 1065353216(1.000000e+00) 2398; CM-NEXT: AND_INT T0.X, KC0[4].X, literal.x, 2399; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z, 2400; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].W, 2401; CM-NEXT: ADD * T0.W, PV.Y, PV.X, 2402; CM-NEXT: -4096(nan), -1026650416(-1.032789e+02) 2403; CM-NEXT: EXP_IEEE T0.X (MASKED), T0.W, 2404; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W, 2405; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W, 2406; CM-NEXT: EXP_IEEE * T0.W, T0.W, 2407; CM-NEXT: CNDE T2.X, T0.Z, T1.Y, 0.0, 2408; CM-NEXT: ADD T1.Y, KC0[4].X, -T0.X, 2409; CM-NEXT: FLT_TO_INT T0.Z, T0.Y, 2410; CM-NEXT: MUL_IEEE * T1.W, PV.W, literal.x, 2411; CM-NEXT: 209715200(1.972152e-31), 0(0.000000e+00) 2412; CM-NEXT: MUL_IEEE T3.X, PV.W, literal.x, 2413; CM-NEXT: SETGT_UINT T0.Y, PV.Z, literal.y, 2414; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.z, 2415; CM-NEXT: MUL_IEEE * T2.W, T0.X, literal.w, 2416; CM-NEXT: 209715200(1.972152e-31), -229(nan) 2417; CM-NEXT: 967029397(3.122284e-04), 1069064192(1.442383e+00) 2418; CM-NEXT: RNDNE T4.X, PV.W, 2419; CM-NEXT: MULADD_IEEE T1.Y, T1.Y, literal.x, PV.Z, 2420; CM-NEXT: CNDE_INT T1.Z, PV.Y, PV.X, T1.W, 2421; CM-NEXT: SETGT_INT * T1.W, T0.Z, literal.y, 2422; CM-NEXT: 1069064192(1.442383e+00), -127(nan) 2423; CM-NEXT: CNDE_INT T3.X, PV.W, PV.Z, T0.W, 2424; CM-NEXT: MULADD_IEEE T1.Y, T0.X, literal.x, PV.Y, 2425; CM-NEXT: ADD T1.Z, T2.W, -PV.X, 2426; CM-NEXT: MAX_INT * T2.W, T0.Z, literal.y, 2427; CM-NEXT: 967029397(3.122284e-04), -330(nan) 2428; CM-NEXT: ADD_INT T0.X, PV.W, literal.x, 2429; CM-NEXT: ADD_INT T2.Y, T0.Z, literal.y, 2430; CM-NEXT: TRUNC T2.Z, T4.X, 2431; CM-NEXT: ADD * T2.W, PV.Z, PV.Y, 2432; CM-NEXT: 204(2.858649e-43), 102(1.429324e-43) 2433; CM-NEXT: EXP_IEEE T1.X (MASKED), T2.W, 2434; CM-NEXT: EXP_IEEE T1.Y, T2.W, 2435; CM-NEXT: EXP_IEEE T1.Z (MASKED), T2.W, 2436; CM-NEXT: EXP_IEEE * T1.W (MASKED), T2.W, 2437; CM-NEXT: MUL_IEEE T4.X, T0.W, literal.x, 2438; CM-NEXT: FLT_TO_INT T3.Y, T2.Z, 2439; CM-NEXT: MUL_IEEE T1.Z, PV.Y, literal.y, 2440; CM-NEXT: CNDE_INT * T0.W, T0.Y, T0.X, T2.Y, 2441; CM-NEXT: 2130706432(1.701412e+38), 209715200(1.972152e-31) 2442; CM-NEXT: CNDE_INT T0.X, T1.W, PV.W, T0.Z, 2443; CM-NEXT: MUL_IEEE T0.Y, PV.Z, literal.x, 2444; CM-NEXT: MAX_INT T2.Z, PV.Y, literal.y, 2445; CM-NEXT: MIN_INT * T0.W, PV.Y, literal.z, 2446; CM-NEXT: 209715200(1.972152e-31), -330(nan) 2447; CM-NEXT: 381(5.338947e-43), 0(0.000000e+00) 2448; CM-NEXT: ADD_INT T5.X, PV.W, literal.x, 2449; CM-NEXT: ADD_INT T2.Y, PV.Z, literal.y, 2450; CM-NEXT: ADD_INT T2.Z, T3.Y, literal.z, 2451; CM-NEXT: SETGT_UINT * T0.W, T3.Y, literal.w, 2452; CM-NEXT: -254(nan), 204(2.858649e-43) 2453; CM-NEXT: 102(1.429324e-43), -229(nan) 2454; CM-NEXT: ADD_INT T6.X, T3.Y, literal.x, 2455; CM-NEXT: SETGT_UINT T4.Y, T3.Y, literal.y, 2456; CM-NEXT: CNDE_INT T2.Z, PV.W, PV.Y, PV.Z, 2457; CM-NEXT: SETGT_INT * T1.W, T3.Y, literal.x, 2458; CM-NEXT: -127(nan), 254(3.559298e-43) 2459; CM-NEXT: MUL_IEEE T7.X, T1.Y, literal.x, 2460; CM-NEXT: CNDE_INT T2.Y, PV.W, PV.Z, T3.Y, 2461; CM-NEXT: CNDE_INT T2.Z, PV.Y, PV.X, T5.X, 2462; CM-NEXT: MIN_INT * T2.W, T0.Z, literal.y, 2463; CM-NEXT: 2130706432(1.701412e+38), 381(5.338947e-43) 2464; CM-NEXT: SETGT_INT T5.X, T3.Y, literal.x, 2465; CM-NEXT: ADD_INT T3.Y, PV.W, literal.y, 2466; CM-NEXT: ADD_INT T3.Z, T0.Z, literal.z, 2467; CM-NEXT: SETGT_UINT * T2.W, T0.Z, literal.w, 2468; CM-NEXT: 127(1.779649e-43), -254(nan) 2469; CM-NEXT: -127(nan), 254(3.559298e-43) 2470; CM-NEXT: CNDE_INT T6.X, PV.W, PV.Z, PV.Y, 2471; CM-NEXT: CNDE_INT T2.Y, PV.X, T2.Y, T2.Z, 2472; CM-NEXT: MUL_IEEE T2.Z, T7.X, literal.x, 2473; CM-NEXT: CNDE_INT * T0.W, T0.W, T0.Y, T1.Z, BS:VEC_021/SCL_122 2474; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 2475; CM-NEXT: SETGT_INT T8.X, T0.Z, literal.x, 2476; CM-NEXT: CNDE_INT T0.Y, T1.W, PV.W, T1.Y, 2477; CM-NEXT: CNDE_INT T0.Z, T4.Y, T7.X, PV.Z, 2478; CM-NEXT: LSHL * T0.W, PV.Y, literal.y, 2479; CM-NEXT: 127(1.779649e-43), 23(3.222986e-44) 2480; CM-NEXT: ALU clause starting at 202: 2481; CM-NEXT: ADD_INT T7.X, T0.W, literal.x, 2482; CM-NEXT: CNDE_INT * T0.Y, T5.X, T0.Y, T0.Z, 2483; CM-NEXT: 1065353216(1.000000e+00), 0(0.000000e+00) 2484; CM-NEXT: CNDE_INT * T0.Z, T8.X, T0.X, T6.X, 2485; CM-NEXT: MUL_IEEE * T0.W, T4.X, literal.x, 2486; CM-NEXT: 2130706432(1.701412e+38), 0(0.000000e+00) 2487; CM-NEXT: CNDE_INT T0.X, T2.W, T4.X, PV.W, 2488; CM-NEXT: LSHL T1.Y, T0.Z, literal.x, 2489; CM-NEXT: MUL_IEEE T0.Z, T0.Y, T7.X, BS:VEC_021/SCL_122 2490; CM-NEXT: SETGT * T0.W, literal.y, KC0[4].X, 2491; CM-NEXT: 23(3.222986e-44), -1026650416(-1.032789e+02) 2492; CM-NEXT: CNDE T4.X, PV.W, PV.Z, 0.0, 2493; CM-NEXT: SETGT T0.Y, KC0[4].X, literal.x, 2494; CM-NEXT: ADD_INT T0.Z, PV.Y, literal.y, 2495; CM-NEXT: CNDE_INT * T0.W, T8.X, T3.X, PV.X, 2496; CM-NEXT: 1118925336(8.872284e+01), 1065353216(1.000000e+00) 2497; CM-NEXT: SETGT T0.X, KC0[3].W, literal.x, 2498; CM-NEXT: MUL_IEEE T1.Y, PV.W, PV.Z, 2499; CM-NEXT: SETGT T0.Z, literal.y, KC0[3].Z, 2500; CM-NEXT: CNDE * T0.W, PV.Y, PV.X, literal.z, 2501; CM-NEXT: 1118925336(8.872284e+01), -1026650416(-1.032789e+02) 2502; CM-NEXT: 2139095040(INF), 0(0.000000e+00) 2503; CM-NEXT: SETGT T3.X, literal.x, KC0[3].Y, 2504; CM-NEXT: CNDE T0.Y, PV.Z, PV.Y, 0.0, 2505; CM-NEXT: CNDE T0.Z, PV.X, T2.X, literal.y, 2506; CM-NEXT: SETGT * T1.W, KC0[3].Z, literal.z, 2507; CM-NEXT: -1026650416(-1.032789e+02), 2139095040(INF) 2508; CM-NEXT: 1118925336(8.872284e+01), 0(0.000000e+00) 2509; CM-NEXT: CNDE T0.Y, PV.W, PV.Y, literal.x, 2510; CM-NEXT: CNDE T1.Z, PV.X, T1.X, 0.0, 2511; CM-NEXT: SETGT * T1.W, KC0[3].Y, literal.y, 2512; CM-NEXT: 2139095040(INF), 1118925336(8.872284e+01) 2513; CM-NEXT: CNDE * T0.X, PV.W, PV.Z, literal.x, 2514; CM-NEXT: 2139095040(INF), 0(0.000000e+00) 2515; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2516; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2517 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %in) 2518 store <4 x float> %result, ptr addrspace(1) %out 2519 ret void 2520} 2521 2522define float @v_exp_f32(float %in) { 2523; VI-SDAG-LABEL: v_exp_f32: 2524; VI-SDAG: ; %bb.0: 2525; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2526; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 2527; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 2528; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 2529; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 2530; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 2531; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 2532; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 2533; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 2534; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 2535; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 2536; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 2537; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 2538; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 2539; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 2540; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 2541; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 2542; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 2543; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2544; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2545; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 2546; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2547; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2548; 2549; VI-GISEL-LABEL: v_exp_f32: 2550; VI-GISEL: ; %bb.0: 2551; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2552; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 2553; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 2554; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 2555; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 2556; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 2557; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 2558; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 2559; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 2560; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 2561; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 2562; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 2563; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 2564; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2565; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2566; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 2567; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2568; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2569; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2570; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 2571; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 2572; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2573; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2574; 2575; GFX900-SDAG-LABEL: v_exp_f32: 2576; GFX900-SDAG: ; %bb.0: 2577; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2578; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 2579; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 2580; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 2581; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 2582; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 2583; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 2584; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 2585; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 2586; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 2587; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 2588; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 2589; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 2590; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 2591; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 2592; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2593; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2594; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 2595; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2596; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2597; 2598; GFX900-GISEL-LABEL: v_exp_f32: 2599; GFX900-GISEL: ; %bb.0: 2600; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2601; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 2602; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 2603; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 2604; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 2605; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 2606; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 2607; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 2608; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 2609; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 2610; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 2611; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2612; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 2613; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2614; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2615; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2616; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 2617; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 2618; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2619; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2620; 2621; SI-SDAG-LABEL: v_exp_f32: 2622; SI-SDAG: ; %bb.0: 2623; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2624; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 2625; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 2626; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 2627; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 2628; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 2629; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 2630; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 2631; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 2632; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 2633; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 2634; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 2635; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 2636; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 2637; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 2638; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2639; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2640; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 2641; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2642; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2643; 2644; SI-GISEL-LABEL: v_exp_f32: 2645; SI-GISEL: ; %bb.0: 2646; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2647; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 2648; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 2649; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 2650; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 2651; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 2652; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 2653; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 2654; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 2655; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 2656; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2657; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2658; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 2659; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2660; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2661; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2662; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 2663; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 2664; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2665; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2666; 2667; R600-LABEL: v_exp_f32: 2668; R600: ; %bb.0: 2669; R600-NEXT: CF_END 2670; R600-NEXT: PAD 2671; 2672; CM-LABEL: v_exp_f32: 2673; CM: ; %bb.0: 2674; CM-NEXT: CF_END 2675; CM-NEXT: PAD 2676 %result = call float @llvm.exp.f32(float %in) 2677 ret float %result 2678} 2679 2680define float @v_exp_fabs_f32(float %in) { 2681; VI-SDAG-LABEL: v_exp_fabs_f32: 2682; VI-SDAG: ; %bb.0: 2683; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2684; VI-SDAG-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 2685; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 2686; VI-SDAG-NEXT: v_sub_f32_e64 v4, |v0|, v1 2687; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 2688; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 2689; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 2690; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 2691; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 2692; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 2693; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 2694; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 2695; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 2696; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 2697; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 2698; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 2699; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 2700; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 2701; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 2702; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2703; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2704; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 2705; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2706; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2707; 2708; VI-GISEL-LABEL: v_exp_fabs_f32: 2709; VI-GISEL: ; %bb.0: 2710; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2711; VI-GISEL-NEXT: v_and_b32_e32 v1, 0x7fffffff, v0 2712; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 2713; VI-GISEL-NEXT: v_sub_f32_e64 v2, |v0|, v1 2714; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 2715; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 2716; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 2717; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 2718; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 2719; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 2720; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 2721; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 2722; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 2723; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 2724; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2725; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2726; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 2727; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2728; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 2729; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2730; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 2731; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 2732; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2733; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2734; 2735; GFX900-SDAG-LABEL: v_exp_fabs_f32: 2736; GFX900-SDAG: ; %bb.0: 2737; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2738; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 2739; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 2740; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 2741; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 2742; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 2743; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 2744; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 2745; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 2746; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 2747; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 2748; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 2749; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 2750; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 2751; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 2752; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2753; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2754; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 2755; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2756; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2757; 2758; GFX900-GISEL-LABEL: v_exp_fabs_f32: 2759; GFX900-GISEL: ; %bb.0: 2760; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2761; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 2762; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, |v0|, v1 2763; GFX900-GISEL-NEXT: v_fma_f32 v1, |v0|, v1, -v2 2764; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 2765; GFX900-GISEL-NEXT: v_fma_f32 v1, |v0|, v3, v1 2766; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 2767; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 2768; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 2769; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 2770; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 2771; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2772; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 2773; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2774; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 2775; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2776; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 2777; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 2778; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2779; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2780; 2781; SI-SDAG-LABEL: v_exp_fabs_f32: 2782; SI-SDAG: ; %bb.0: 2783; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2784; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 2785; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 2786; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 2787; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 2788; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 2789; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 2790; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 2791; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 2792; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 2793; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 2794; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 2795; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 2796; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 2797; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 2798; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2799; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2800; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 2801; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2802; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2803; 2804; SI-GISEL-LABEL: v_exp_fabs_f32: 2805; SI-GISEL: ; %bb.0: 2806; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2807; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 2808; SI-GISEL-NEXT: v_mul_f32_e64 v2, |v0|, v1 2809; SI-GISEL-NEXT: v_fma_f32 v1, |v0|, v1, -v2 2810; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 2811; SI-GISEL-NEXT: v_fma_f32 v1, |v0|, v3, v1 2812; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 2813; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 2814; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 2815; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 2816; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2817; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2818; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 2819; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2820; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 2821; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2822; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 2823; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 2824; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2825; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2826; 2827; R600-LABEL: v_exp_fabs_f32: 2828; R600: ; %bb.0: 2829; R600-NEXT: CF_END 2830; R600-NEXT: PAD 2831; 2832; CM-LABEL: v_exp_fabs_f32: 2833; CM: ; %bb.0: 2834; CM-NEXT: CF_END 2835; CM-NEXT: PAD 2836 %fabs = call float @llvm.fabs.f32(float %in) 2837 %result = call float @llvm.exp.f32(float %fabs) 2838 ret float %result 2839} 2840 2841define float @v_exp_fneg_fabs_f32(float %in) { 2842; VI-SDAG-LABEL: v_exp_fneg_fabs_f32: 2843; VI-SDAG: ; %bb.0: 2844; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2845; VI-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v0 2846; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 2847; VI-SDAG-NEXT: v_sub_f32_e64 v4, -|v0|, v1 2848; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 2849; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 2850; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 2851; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 2852; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 2853; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 2854; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 2855; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 2856; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 2857; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 2858; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 2859; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 2860; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 2861; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 2862; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 2863; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2864; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2865; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 2866; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2867; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 2868; 2869; VI-GISEL-LABEL: v_exp_fneg_fabs_f32: 2870; VI-GISEL: ; %bb.0: 2871; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2872; VI-GISEL-NEXT: v_or_b32_e32 v1, 0x80000000, v0 2873; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 2874; VI-GISEL-NEXT: v_sub_f32_e64 v2, -|v0|, v1 2875; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 2876; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 2877; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 2878; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 2879; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 2880; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 2881; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 2882; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 2883; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 2884; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 2885; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2886; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2887; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 2888; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2889; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 2890; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2891; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 2892; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 2893; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2894; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 2895; 2896; GFX900-SDAG-LABEL: v_exp_fneg_fabs_f32: 2897; GFX900-SDAG: ; %bb.0: 2898; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2899; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b 2900; GFX900-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 2901; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 2902; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 2903; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 2904; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f 2905; GFX900-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 2906; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 2907; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 2908; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 2909; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 2910; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 2911; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 2912; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 2913; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2914; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2915; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 2916; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2917; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 2918; 2919; GFX900-GISEL-LABEL: v_exp_fneg_fabs_f32: 2920; GFX900-GISEL: ; %bb.0: 2921; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2922; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 2923; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, -|v0|, v1 2924; GFX900-GISEL-NEXT: v_fma_f32 v1, -|v0|, v1, -v2 2925; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 2926; GFX900-GISEL-NEXT: v_fma_f32 v1, -|v0|, v3, v1 2927; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 2928; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 2929; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 2930; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 2931; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 2932; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2933; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 2934; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2935; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 2936; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2937; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 2938; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 2939; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2940; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 2941; 2942; SI-SDAG-LABEL: v_exp_fneg_fabs_f32: 2943; SI-SDAG: ; %bb.0: 2944; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2945; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b 2946; SI-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 2947; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 2948; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 2949; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, -v1 2950; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f 2951; SI-SDAG-NEXT: v_fma_f32 v1, |v0|, s4, v1 2952; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 2953; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 2954; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 2955; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 2956; SI-SDAG-NEXT: v_cmp_ngt_f32_e64 vcc, |v0|, s4 2957; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 2958; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 2959; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 2960; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 2961; SI-SDAG-NEXT: v_cmp_nlt_f32_e64 vcc, |v0|, s4 2962; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2963; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 2964; 2965; SI-GISEL-LABEL: v_exp_fneg_fabs_f32: 2966; SI-GISEL: ; %bb.0: 2967; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2968; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 2969; SI-GISEL-NEXT: v_mul_f32_e64 v2, -|v0|, v1 2970; SI-GISEL-NEXT: v_fma_f32 v1, -|v0|, v1, -v2 2971; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 2972; SI-GISEL-NEXT: v_fma_f32 v1, -|v0|, v3, v1 2973; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 2974; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 2975; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 2976; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 2977; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 2978; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 2979; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 2980; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 2981; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -|v0|, v2 2982; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 2983; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 2984; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -|v0|, v2 2985; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 2986; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 2987; 2988; R600-LABEL: v_exp_fneg_fabs_f32: 2989; R600: ; %bb.0: 2990; R600-NEXT: CF_END 2991; R600-NEXT: PAD 2992; 2993; CM-LABEL: v_exp_fneg_fabs_f32: 2994; CM: ; %bb.0: 2995; CM-NEXT: CF_END 2996; CM-NEXT: PAD 2997 %fabs = call float @llvm.fabs.f32(float %in) 2998 %fneg.fabs = fneg float %fabs 2999 %result = call float @llvm.exp.f32(float %fneg.fabs) 3000 ret float %result 3001} 3002 3003define float @v_exp_fneg_f32(float %in) { 3004; VI-SDAG-LABEL: v_exp_fneg_f32: 3005; VI-SDAG: ; %bb.0: 3006; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3007; VI-SDAG-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 3008; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 3009; VI-SDAG-NEXT: v_sub_f32_e64 v4, -v0, v1 3010; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 3011; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 3012; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 3013; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 3014; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 3015; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3016; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 3017; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 3018; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 3019; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3020; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 3021; VI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 3022; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3023; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 3024; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3025; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3026; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3027; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3028; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3029; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3030; 3031; VI-GISEL-LABEL: v_exp_fneg_f32: 3032; VI-GISEL: ; %bb.0: 3033; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3034; VI-GISEL-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 3035; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v1 3036; VI-GISEL-NEXT: v_sub_f32_e64 v2, -v0, v1 3037; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 3038; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 3039; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 3040; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 3041; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3042; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 3043; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 3044; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 3045; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 3046; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 3047; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3048; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3049; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3050; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3051; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 3052; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3053; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 3054; VI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 3055; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3056; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3057; 3058; GFX900-SDAG-LABEL: v_exp_fneg_f32: 3059; GFX900-SDAG: ; %bb.0: 3060; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3061; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0 3062; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b 3063; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3064; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3065; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3066; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f 3067; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3068; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3069; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 3070; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3071; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 3072; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3073; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 3074; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3075; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3076; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3077; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3078; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3079; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3080; 3081; GFX900-GISEL-LABEL: v_exp_fneg_f32: 3082; GFX900-GISEL: ; %bb.0: 3083; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3084; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3085; GFX900-GISEL-NEXT: v_mul_f32_e64 v2, -v0, v1 3086; GFX900-GISEL-NEXT: v_fma_f32 v1, -v0, v1, -v2 3087; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3088; GFX900-GISEL-NEXT: v_fma_f32 v1, -v0, v3, v1 3089; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3090; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3091; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3092; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3093; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 3094; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3095; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3096; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3097; GFX900-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 3098; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3099; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 3100; GFX900-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 3101; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3102; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3103; 3104; SI-SDAG-LABEL: v_exp_fneg_f32: 3105; SI-SDAG: ; %bb.0: 3106; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3107; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0xbfb8aa3b, v0 3108; SI-SDAG-NEXT: s_mov_b32 s4, 0xbfb8aa3b 3109; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3110; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3111; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3112; SI-SDAG-NEXT: s_mov_b32 s4, 0xb2a5705f 3113; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3114; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3115; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3116; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3117; SI-SDAG-NEXT: s_mov_b32 s4, 0x42ce8ed0 3118; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3119; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2b17218 3120; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 3121; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3122; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3123; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3124; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3125; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3126; 3127; SI-GISEL-LABEL: v_exp_fneg_f32: 3128; SI-GISEL: ; %bb.0: 3129; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3130; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3131; SI-GISEL-NEXT: v_mul_f32_e64 v2, -v0, v1 3132; SI-GISEL-NEXT: v_fma_f32 v1, -v0, v1, -v2 3133; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3134; SI-GISEL-NEXT: v_fma_f32 v1, -v0, v3, v1 3135; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3136; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3137; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3138; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3139; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3140; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3141; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 3142; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3143; SI-GISEL-NEXT: v_cmp_lt_f32_e64 s[4:5], -v0, v2 3144; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3145; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, s[4:5] 3146; SI-GISEL-NEXT: v_cmp_gt_f32_e64 vcc, -v0, v2 3147; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3148; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3149; 3150; R600-LABEL: v_exp_fneg_f32: 3151; R600: ; %bb.0: 3152; R600-NEXT: CF_END 3153; R600-NEXT: PAD 3154; 3155; CM-LABEL: v_exp_fneg_f32: 3156; CM: ; %bb.0: 3157; CM-NEXT: CF_END 3158; CM-NEXT: PAD 3159 %fneg = fneg float %in 3160 %result = call float @llvm.exp.f32(float %fneg) 3161 ret float %result 3162} 3163 3164define float @v_exp_f32_fast(float %in) { 3165; GCN-SDAG-LABEL: v_exp_f32_fast: 3166; GCN-SDAG: ; %bb.0: 3167; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3168; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3169; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3170; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3171; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3172; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3173; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 3174; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3175; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3176; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 3177; 3178; GCN-GISEL-LABEL: v_exp_f32_fast: 3179; GCN-GISEL: ; %bb.0: 3180; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3181; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3182; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3183; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3184; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3185; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3186; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 3187; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3188; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3189; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 3190; 3191; SI-SDAG-LABEL: v_exp_f32_fast: 3192; SI-SDAG: ; %bb.0: 3193; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3194; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3195; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3196; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3197; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3198; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3199; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 3200; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3201; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3202; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3203; 3204; SI-GISEL-LABEL: v_exp_f32_fast: 3205; SI-GISEL: ; %bb.0: 3206; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3207; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3208; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3209; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3210; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3211; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3212; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 3213; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3214; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3215; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3216; 3217; R600-LABEL: v_exp_f32_fast: 3218; R600: ; %bb.0: 3219; R600-NEXT: CF_END 3220; R600-NEXT: PAD 3221; 3222; CM-LABEL: v_exp_f32_fast: 3223; CM: ; %bb.0: 3224; CM-NEXT: CF_END 3225; CM-NEXT: PAD 3226 %result = call fast float @llvm.exp.f32(float %in) 3227 ret float %result 3228} 3229 3230define float @v_exp_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { 3231; GCN-SDAG-LABEL: v_exp_f32_unsafe_math_attr: 3232; GCN-SDAG: ; %bb.0: 3233; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3234; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3235; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3236; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3237; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3238; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3239; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 3240; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3241; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3242; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 3243; 3244; GCN-GISEL-LABEL: v_exp_f32_unsafe_math_attr: 3245; GCN-GISEL: ; %bb.0: 3246; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3247; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3248; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3249; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3250; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3251; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3252; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 3253; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3254; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3255; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 3256; 3257; SI-SDAG-LABEL: v_exp_f32_unsafe_math_attr: 3258; SI-SDAG: ; %bb.0: 3259; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3260; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3261; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3262; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3263; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3264; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3265; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 3266; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3267; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3268; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3269; 3270; SI-GISEL-LABEL: v_exp_f32_unsafe_math_attr: 3271; SI-GISEL: ; %bb.0: 3272; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3273; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3274; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3275; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3276; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3277; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3278; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 3279; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3280; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3281; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3282; 3283; R600-LABEL: v_exp_f32_unsafe_math_attr: 3284; R600: ; %bb.0: 3285; R600-NEXT: CF_END 3286; R600-NEXT: PAD 3287; 3288; CM-LABEL: v_exp_f32_unsafe_math_attr: 3289; CM: ; %bb.0: 3290; CM-NEXT: CF_END 3291; CM-NEXT: PAD 3292 %result = call float @llvm.exp.f32(float %in) 3293 ret float %result 3294} 3295 3296define float @v_exp_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { 3297; GCN-SDAG-LABEL: v_exp_f32_approx_fn_attr: 3298; GCN-SDAG: ; %bb.0: 3299; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3300; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3301; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3302; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3303; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3304; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3305; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 3306; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3307; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3308; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 3309; 3310; GCN-GISEL-LABEL: v_exp_f32_approx_fn_attr: 3311; GCN-GISEL: ; %bb.0: 3312; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3313; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3314; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3315; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3316; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3317; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3318; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 3319; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3320; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3321; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 3322; 3323; SI-SDAG-LABEL: v_exp_f32_approx_fn_attr: 3324; SI-SDAG: ; %bb.0: 3325; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3326; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3327; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3328; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3329; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3330; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3331; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 3332; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3333; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3334; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3335; 3336; SI-GISEL-LABEL: v_exp_f32_approx_fn_attr: 3337; SI-GISEL: ; %bb.0: 3338; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3339; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3340; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3341; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3342; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3343; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3344; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 3345; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3346; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3347; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3348; 3349; R600-LABEL: v_exp_f32_approx_fn_attr: 3350; R600: ; %bb.0: 3351; R600-NEXT: CF_END 3352; R600-NEXT: PAD 3353; 3354; CM-LABEL: v_exp_f32_approx_fn_attr: 3355; CM: ; %bb.0: 3356; CM-NEXT: CF_END 3357; CM-NEXT: PAD 3358 %result = call float @llvm.exp.f32(float %in) 3359 ret float %result 3360} 3361 3362define float @v_exp_f32_ninf(float %in) { 3363; VI-SDAG-LABEL: v_exp_f32_ninf: 3364; VI-SDAG: ; %bb.0: 3365; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3366; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 3367; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 3368; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 3369; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 3370; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 3371; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 3372; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 3373; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3374; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 3375; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 3376; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 3377; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3378; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 3379; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3380; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3381; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3382; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 3383; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3384; 3385; VI-GISEL-LABEL: v_exp_f32_ninf: 3386; VI-GISEL: ; %bb.0: 3387; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3388; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 3389; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 3390; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 3391; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 3392; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 3393; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 3394; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3395; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 3396; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 3397; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 3398; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 3399; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 3400; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3401; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3402; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3403; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3404; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 3405; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3406; 3407; GFX900-SDAG-LABEL: v_exp_f32_ninf: 3408; GFX900-SDAG: ; %bb.0: 3409; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3410; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 3411; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 3412; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3413; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3414; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3415; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 3416; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3417; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3418; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 3419; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3420; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3421; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3422; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3423; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 3424; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3425; 3426; GFX900-GISEL-LABEL: v_exp_f32_ninf: 3427; GFX900-GISEL: ; %bb.0: 3428; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3429; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3430; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 3431; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 3432; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3433; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 3434; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3435; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3436; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3437; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3438; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 3439; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3440; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3441; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3442; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 3443; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3444; 3445; SI-SDAG-LABEL: v_exp_f32_ninf: 3446; SI-SDAG: ; %bb.0: 3447; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3448; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 3449; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 3450; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3451; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3452; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3453; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 3454; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3455; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3456; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3457; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3458; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3459; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3460; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 3461; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 3462; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3463; 3464; SI-GISEL-LABEL: v_exp_f32_ninf: 3465; SI-GISEL: ; %bb.0: 3466; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3467; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3468; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 3469; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 3470; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3471; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 3472; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3473; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3474; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3475; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3476; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3477; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 3478; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3479; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3480; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 3481; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3482; 3483; R600-LABEL: v_exp_f32_ninf: 3484; R600: ; %bb.0: 3485; R600-NEXT: CF_END 3486; R600-NEXT: PAD 3487; 3488; CM-LABEL: v_exp_f32_ninf: 3489; CM: ; %bb.0: 3490; CM-NEXT: CF_END 3491; CM-NEXT: PAD 3492 %result = call ninf float @llvm.exp.f32(float %in) 3493 ret float %result 3494} 3495 3496define float @v_exp_f32_afn(float %in) { 3497; GCN-SDAG-LABEL: v_exp_f32_afn: 3498; GCN-SDAG: ; %bb.0: 3499; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3500; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3501; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3502; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3503; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3504; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3505; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 3506; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3507; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3508; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 3509; 3510; GCN-GISEL-LABEL: v_exp_f32_afn: 3511; GCN-GISEL: ; %bb.0: 3512; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3513; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3514; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3515; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3516; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3517; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3518; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 3519; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3520; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3521; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 3522; 3523; SI-SDAG-LABEL: v_exp_f32_afn: 3524; SI-SDAG: ; %bb.0: 3525; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3526; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3527; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3528; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3529; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3530; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3531; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 3532; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3533; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3534; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3535; 3536; SI-GISEL-LABEL: v_exp_f32_afn: 3537; SI-GISEL: ; %bb.0: 3538; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3539; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3540; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3541; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3542; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3543; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3544; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 3545; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3546; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3547; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3548; 3549; R600-LABEL: v_exp_f32_afn: 3550; R600: ; %bb.0: 3551; R600-NEXT: CF_END 3552; R600-NEXT: PAD 3553; 3554; CM-LABEL: v_exp_f32_afn: 3555; CM: ; %bb.0: 3556; CM-NEXT: CF_END 3557; CM-NEXT: PAD 3558 %result = call afn float @llvm.exp.f32(float %in) 3559 ret float %result 3560} 3561 3562define float @v_exp_f32_afn_daz(float %in) #0 { 3563; GCN-LABEL: v_exp_f32_afn_daz: 3564; GCN: ; %bb.0: 3565; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3566; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3567; GCN-NEXT: v_exp_f32_e32 v0, v0 3568; GCN-NEXT: s_setpc_b64 s[30:31] 3569; 3570; SI-LABEL: v_exp_f32_afn_daz: 3571; SI: ; %bb.0: 3572; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3573; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3574; SI-NEXT: v_exp_f32_e32 v0, v0 3575; SI-NEXT: s_setpc_b64 s[30:31] 3576; 3577; R600-LABEL: v_exp_f32_afn_daz: 3578; R600: ; %bb.0: 3579; R600-NEXT: CF_END 3580; R600-NEXT: PAD 3581; 3582; CM-LABEL: v_exp_f32_afn_daz: 3583; CM: ; %bb.0: 3584; CM-NEXT: CF_END 3585; CM-NEXT: PAD 3586 %result = call afn float @llvm.exp.f32(float %in) 3587 ret float %result 3588} 3589 3590define float @v_exp_f32_afn_dynamic(float %in) #1 { 3591; GCN-SDAG-LABEL: v_exp_f32_afn_dynamic: 3592; GCN-SDAG: ; %bb.0: 3593; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3594; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3595; GCN-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3596; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3597; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3598; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3599; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 3600; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3601; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3602; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 3603; 3604; GCN-GISEL-LABEL: v_exp_f32_afn_dynamic: 3605; GCN-GISEL: ; %bb.0: 3606; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3607; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3608; GCN-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3609; GCN-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3610; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3611; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3612; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 3613; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3614; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3615; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 3616; 3617; SI-SDAG-LABEL: v_exp_f32_afn_dynamic: 3618; SI-SDAG: ; %bb.0: 3619; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3620; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3621; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 3622; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3623; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3624; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3625; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 3626; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3627; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3628; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3629; 3630; SI-GISEL-LABEL: v_exp_f32_afn_dynamic: 3631; SI-GISEL: ; %bb.0: 3632; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3633; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3634; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x42800000, v0 3635; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3636; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3637; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3638; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 3639; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3640; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3641; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3642; 3643; R600-LABEL: v_exp_f32_afn_dynamic: 3644; R600: ; %bb.0: 3645; R600-NEXT: CF_END 3646; R600-NEXT: PAD 3647; 3648; CM-LABEL: v_exp_f32_afn_dynamic: 3649; CM: ; %bb.0: 3650; CM-NEXT: CF_END 3651; CM-NEXT: PAD 3652 %result = call afn float @llvm.exp.f32(float %in) 3653 ret float %result 3654} 3655 3656define float @v_fabs_exp_f32_afn(float %in) { 3657; GCN-SDAG-LABEL: v_fabs_exp_f32_afn: 3658; GCN-SDAG: ; %bb.0: 3659; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3660; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3661; GCN-SDAG-NEXT: s_mov_b32 s5, 0x42800000 3662; GCN-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5 3663; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3664; GCN-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc 3665; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3666; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 3667; GCN-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3668; GCN-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3669; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 3670; 3671; GCN-GISEL-LABEL: v_fabs_exp_f32_afn: 3672; GCN-GISEL: ; %bb.0: 3673; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3674; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3675; GCN-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 3676; GCN-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2 3677; GCN-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 3678; GCN-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc 3679; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3680; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 3681; GCN-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3682; GCN-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3683; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 3684; 3685; SI-SDAG-LABEL: v_fabs_exp_f32_afn: 3686; SI-SDAG: ; %bb.0: 3687; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3688; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 3689; SI-SDAG-NEXT: s_mov_b32 s5, 0x42800000 3690; SI-SDAG-NEXT: v_add_f32_e64 v1, |v0|, s5 3691; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3692; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc 3693; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3694; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 3695; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3696; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3697; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3698; 3699; SI-GISEL-LABEL: v_fabs_exp_f32_afn: 3700; SI-GISEL: ; %bb.0: 3701; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3702; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2aeac50 3703; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 3704; SI-GISEL-NEXT: v_add_f32_e64 v2, |v0|, v2 3705; SI-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 3706; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc 3707; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 3708; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 3709; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 3710; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3711; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3712; 3713; R600-LABEL: v_fabs_exp_f32_afn: 3714; R600: ; %bb.0: 3715; R600-NEXT: CF_END 3716; R600-NEXT: PAD 3717; 3718; CM-LABEL: v_fabs_exp_f32_afn: 3719; CM: ; %bb.0: 3720; CM-NEXT: CF_END 3721; CM-NEXT: PAD 3722 %fabs = call float @llvm.fabs.f32(float %in) 3723 %result = call afn float @llvm.exp.f32(float %fabs) 3724 ret float %result 3725} 3726 3727define float @v_exp_f32_daz(float %in) #0 { 3728; VI-SDAG-LABEL: v_exp_f32_daz: 3729; VI-SDAG: ; %bb.0: 3730; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3731; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 3732; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 3733; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 3734; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 3735; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 3736; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 3737; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 3738; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3739; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 3740; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 3741; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 3742; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3743; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 3744; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3745; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3746; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 3747; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3748; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3749; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3750; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3751; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3752; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3753; 3754; VI-GISEL-LABEL: v_exp_f32_daz: 3755; VI-GISEL: ; %bb.0: 3756; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3757; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 3758; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 3759; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 3760; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 3761; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 3762; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 3763; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3764; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 3765; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 3766; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 3767; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 3768; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 3769; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3770; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3771; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3772; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3773; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3774; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3775; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3776; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 3777; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3778; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3779; 3780; GFX900-SDAG-LABEL: v_exp_f32_daz: 3781; GFX900-SDAG: ; %bb.0: 3782; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3783; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 3784; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 3785; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3786; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3787; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3788; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 3789; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3790; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3791; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 3792; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3793; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3794; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3795; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 3796; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3797; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3798; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3799; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3800; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3801; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3802; 3803; GFX900-GISEL-LABEL: v_exp_f32_daz: 3804; GFX900-GISEL: ; %bb.0: 3805; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3806; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3807; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 3808; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 3809; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3810; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 3811; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3812; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3813; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3814; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3815; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 3816; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3817; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3818; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3819; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3820; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3821; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3822; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 3823; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3824; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3825; 3826; SI-SDAG-LABEL: v_exp_f32_daz: 3827; SI-SDAG: ; %bb.0: 3828; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3829; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 3830; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 3831; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3832; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3833; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3834; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 3835; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3836; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3837; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3838; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3839; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3840; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3841; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 3842; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 3843; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3844; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3845; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3846; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3847; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 3848; 3849; SI-GISEL-LABEL: v_exp_f32_daz: 3850; SI-GISEL: ; %bb.0: 3851; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3852; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3853; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 3854; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 3855; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3856; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 3857; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3858; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3859; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3860; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3861; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3862; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3863; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 3864; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3865; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3866; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3867; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3868; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 3869; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3870; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 3871; 3872; R600-LABEL: v_exp_f32_daz: 3873; R600: ; %bb.0: 3874; R600-NEXT: CF_END 3875; R600-NEXT: PAD 3876; 3877; CM-LABEL: v_exp_f32_daz: 3878; CM: ; %bb.0: 3879; CM-NEXT: CF_END 3880; CM-NEXT: PAD 3881 %result = call float @llvm.exp.f32(float %in) 3882 ret float %result 3883} 3884 3885define float @v_exp_f32_nnan(float %in) { 3886; VI-SDAG-LABEL: v_exp_f32_nnan: 3887; VI-SDAG: ; %bb.0: 3888; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3889; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 3890; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 3891; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 3892; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 3893; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 3894; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 3895; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 3896; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3897; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 3898; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 3899; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 3900; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3901; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 3902; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3903; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3904; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 3905; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3906; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3907; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3908; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3909; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3910; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 3911; 3912; VI-GISEL-LABEL: v_exp_f32_nnan: 3913; VI-GISEL: ; %bb.0: 3914; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3915; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 3916; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 3917; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 3918; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 3919; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 3920; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 3921; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 3922; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 3923; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 3924; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 3925; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 3926; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 3927; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 3928; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3929; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3930; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3931; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3932; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3933; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3934; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 3935; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3936; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 3937; 3938; GFX900-SDAG-LABEL: v_exp_f32_nnan: 3939; GFX900-SDAG: ; %bb.0: 3940; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3941; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 3942; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 3943; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3944; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3945; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3946; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 3947; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3948; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3949; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 3950; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3951; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3952; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3953; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 3954; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 3955; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 3956; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 3957; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 3958; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3959; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 3960; 3961; GFX900-GISEL-LABEL: v_exp_f32_nnan: 3962; GFX900-GISEL: ; %bb.0: 3963; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3964; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 3965; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 3966; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 3967; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 3968; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 3969; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 3970; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 3971; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 3972; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 3973; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 3974; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 3975; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 3976; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 3977; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 3978; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 3979; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3980; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 3981; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3982; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 3983; 3984; SI-SDAG-LABEL: v_exp_f32_nnan: 3985; SI-SDAG: ; %bb.0: 3986; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3987; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 3988; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 3989; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 3990; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 3991; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 3992; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 3993; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 3994; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 3995; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 3996; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 3997; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 3998; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 3999; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4000; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4001; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4002; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4003; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4004; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4005; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4006; 4007; SI-GISEL-LABEL: v_exp_f32_nnan: 4008; SI-GISEL: ; %bb.0: 4009; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4010; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4011; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4012; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4013; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4014; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4015; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4016; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4017; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4018; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4019; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4020; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4021; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4022; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4023; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4024; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4025; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4026; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4027; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4028; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4029; 4030; R600-LABEL: v_exp_f32_nnan: 4031; R600: ; %bb.0: 4032; R600-NEXT: CF_END 4033; R600-NEXT: PAD 4034; 4035; CM-LABEL: v_exp_f32_nnan: 4036; CM: ; %bb.0: 4037; CM-NEXT: CF_END 4038; CM-NEXT: PAD 4039 %result = call nnan float @llvm.exp.f32(float %in) 4040 ret float %result 4041} 4042 4043define float @v_exp_f32_nnan_daz(float %in) #0 { 4044; VI-SDAG-LABEL: v_exp_f32_nnan_daz: 4045; VI-SDAG: ; %bb.0: 4046; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4047; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4048; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4049; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4050; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4051; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4052; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4053; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4054; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4055; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4056; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4057; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4058; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4059; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4060; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4061; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4062; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4063; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4064; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4065; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4066; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4067; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4068; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4069; 4070; VI-GISEL-LABEL: v_exp_f32_nnan_daz: 4071; VI-GISEL: ; %bb.0: 4072; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4073; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4074; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4075; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4076; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4077; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4078; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4079; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4080; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4081; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4082; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4083; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4084; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4085; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4086; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4087; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4088; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4089; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4090; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4091; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4092; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4093; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4094; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4095; 4096; GFX900-SDAG-LABEL: v_exp_f32_nnan_daz: 4097; GFX900-SDAG: ; %bb.0: 4098; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4099; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4100; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4101; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4102; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4103; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4104; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4105; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4106; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4107; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4108; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4109; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4110; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4111; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4112; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4113; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4114; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4115; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4116; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4117; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4118; 4119; GFX900-GISEL-LABEL: v_exp_f32_nnan_daz: 4120; GFX900-GISEL: ; %bb.0: 4121; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4122; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4123; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4124; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4125; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4126; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4127; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4128; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4129; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4130; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4131; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4132; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4133; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4134; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4135; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4136; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4137; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4138; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4139; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4140; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4141; 4142; SI-SDAG-LABEL: v_exp_f32_nnan_daz: 4143; SI-SDAG: ; %bb.0: 4144; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4145; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4146; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4147; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4148; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4149; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4150; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4151; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4152; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4153; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4154; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4155; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4156; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4157; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4158; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4159; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4160; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4161; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4162; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4163; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4164; 4165; SI-GISEL-LABEL: v_exp_f32_nnan_daz: 4166; SI-GISEL: ; %bb.0: 4167; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4168; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4169; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4170; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4171; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4172; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4173; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4174; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4175; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4176; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4177; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4178; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4179; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4180; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4181; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4182; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4183; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4184; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4185; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4186; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4187; 4188; R600-LABEL: v_exp_f32_nnan_daz: 4189; R600: ; %bb.0: 4190; R600-NEXT: CF_END 4191; R600-NEXT: PAD 4192; 4193; CM-LABEL: v_exp_f32_nnan_daz: 4194; CM: ; %bb.0: 4195; CM-NEXT: CF_END 4196; CM-NEXT: PAD 4197 %result = call nnan float @llvm.exp.f32(float %in) 4198 ret float %result 4199} 4200 4201define float @v_exp_f32_nnan_dynamic(float %in) #1 { 4202; VI-SDAG-LABEL: v_exp_f32_nnan_dynamic: 4203; VI-SDAG: ; %bb.0: 4204; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4205; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4206; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4207; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4208; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4209; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4210; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4211; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4212; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4213; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4214; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4215; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4216; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4217; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4218; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4219; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4220; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4221; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4222; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4223; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4224; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4225; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4226; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4227; 4228; VI-GISEL-LABEL: v_exp_f32_nnan_dynamic: 4229; VI-GISEL: ; %bb.0: 4230; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4231; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4232; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4233; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4234; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4235; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4236; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4237; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4238; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4239; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4240; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4241; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4242; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4243; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4244; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4245; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4246; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4247; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4248; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4249; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4250; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4251; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4252; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4253; 4254; GFX900-SDAG-LABEL: v_exp_f32_nnan_dynamic: 4255; GFX900-SDAG: ; %bb.0: 4256; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4257; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4258; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4259; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4260; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4261; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4262; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4263; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4264; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4265; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4266; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4267; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4268; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4269; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4270; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4271; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4272; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4273; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4274; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4275; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4276; 4277; GFX900-GISEL-LABEL: v_exp_f32_nnan_dynamic: 4278; GFX900-GISEL: ; %bb.0: 4279; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4280; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4281; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4282; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4283; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4284; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4285; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4286; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4287; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4288; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4289; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4290; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4291; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4292; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4293; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4294; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4295; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4296; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4297; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4298; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4299; 4300; SI-SDAG-LABEL: v_exp_f32_nnan_dynamic: 4301; SI-SDAG: ; %bb.0: 4302; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4303; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4304; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4305; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4306; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4307; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4308; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4309; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4310; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4311; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4312; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4313; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4314; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4315; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 4316; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4317; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 4318; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 4319; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 4320; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4321; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4322; 4323; SI-GISEL-LABEL: v_exp_f32_nnan_dynamic: 4324; SI-GISEL: ; %bb.0: 4325; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4326; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4327; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4328; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4329; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4330; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4331; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4332; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4333; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4334; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4335; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4336; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 4337; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4338; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4339; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4340; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 4341; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 4342; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 4343; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 4344; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4345; 4346; R600-LABEL: v_exp_f32_nnan_dynamic: 4347; R600: ; %bb.0: 4348; R600-NEXT: CF_END 4349; R600-NEXT: PAD 4350; 4351; CM-LABEL: v_exp_f32_nnan_dynamic: 4352; CM: ; %bb.0: 4353; CM-NEXT: CF_END 4354; CM-NEXT: PAD 4355 %result = call nnan float @llvm.exp.f32(float %in) 4356 ret float %result 4357} 4358 4359define float @v_exp_f32_ninf_daz(float %in) #0 { 4360; VI-SDAG-LABEL: v_exp_f32_ninf_daz: 4361; VI-SDAG: ; %bb.0: 4362; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4363; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4364; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4365; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4366; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4367; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4368; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4369; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4370; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4371; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4372; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4373; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4374; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4375; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4376; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4377; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4378; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4379; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4380; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4381; 4382; VI-GISEL-LABEL: v_exp_f32_ninf_daz: 4383; VI-GISEL: ; %bb.0: 4384; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4385; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4386; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4387; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4388; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4389; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4390; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4391; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4392; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4393; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4394; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4395; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4396; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4397; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4398; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4399; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4400; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4401; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4402; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4403; 4404; GFX900-SDAG-LABEL: v_exp_f32_ninf_daz: 4405; GFX900-SDAG: ; %bb.0: 4406; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4407; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4408; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4409; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4410; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4411; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4412; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4413; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4414; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4415; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4416; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4417; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4418; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4419; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4420; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4421; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4422; 4423; GFX900-GISEL-LABEL: v_exp_f32_ninf_daz: 4424; GFX900-GISEL: ; %bb.0: 4425; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4426; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4427; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4428; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4429; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4430; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4431; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4432; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4433; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4434; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4435; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4436; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4437; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4438; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4439; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4440; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4441; 4442; SI-SDAG-LABEL: v_exp_f32_ninf_daz: 4443; SI-SDAG: ; %bb.0: 4444; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4445; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4446; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4447; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4448; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4449; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4450; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4451; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4452; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4453; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4454; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4455; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4456; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4457; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4458; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4459; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4460; 4461; SI-GISEL-LABEL: v_exp_f32_ninf_daz: 4462; SI-GISEL: ; %bb.0: 4463; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4464; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4465; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4466; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4467; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4468; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4469; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4470; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4471; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4472; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4473; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4474; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4475; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4476; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4477; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4478; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4479; 4480; R600-LABEL: v_exp_f32_ninf_daz: 4481; R600: ; %bb.0: 4482; R600-NEXT: CF_END 4483; R600-NEXT: PAD 4484; 4485; CM-LABEL: v_exp_f32_ninf_daz: 4486; CM: ; %bb.0: 4487; CM-NEXT: CF_END 4488; CM-NEXT: PAD 4489 %result = call ninf float @llvm.exp.f32(float %in) 4490 ret float %result 4491} 4492 4493define float @v_exp_f32_ninf_dynamic(float %in) #1 { 4494; VI-SDAG-LABEL: v_exp_f32_ninf_dynamic: 4495; VI-SDAG: ; %bb.0: 4496; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4497; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4498; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4499; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4500; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4501; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4502; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4503; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4504; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4505; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4506; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4507; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4508; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4509; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4510; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4511; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4512; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4513; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4514; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4515; 4516; VI-GISEL-LABEL: v_exp_f32_ninf_dynamic: 4517; VI-GISEL: ; %bb.0: 4518; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4519; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4520; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4521; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4522; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4523; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4524; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4525; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4526; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4527; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4528; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4529; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4530; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4531; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4532; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4533; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4534; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4535; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4536; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4537; 4538; GFX900-SDAG-LABEL: v_exp_f32_ninf_dynamic: 4539; GFX900-SDAG: ; %bb.0: 4540; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4541; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4542; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4543; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4544; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4545; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4546; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4547; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4548; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4549; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4550; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4551; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4552; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4553; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4554; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4555; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4556; 4557; GFX900-GISEL-LABEL: v_exp_f32_ninf_dynamic: 4558; GFX900-GISEL: ; %bb.0: 4559; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4560; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4561; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4562; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4563; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4564; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4565; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4566; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4567; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4568; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4569; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4570; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4571; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4572; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4573; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4574; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4575; 4576; SI-SDAG-LABEL: v_exp_f32_ninf_dynamic: 4577; SI-SDAG: ; %bb.0: 4578; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4579; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4580; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4581; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4582; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4583; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4584; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4585; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4586; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4587; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4588; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4589; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4590; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4591; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4592; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4593; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4594; 4595; SI-GISEL-LABEL: v_exp_f32_ninf_dynamic: 4596; SI-GISEL: ; %bb.0: 4597; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4598; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4599; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4600; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4601; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4602; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4603; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4604; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4605; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4606; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4607; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4608; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4609; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4610; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4611; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4612; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4613; 4614; R600-LABEL: v_exp_f32_ninf_dynamic: 4615; R600: ; %bb.0: 4616; R600-NEXT: CF_END 4617; R600-NEXT: PAD 4618; 4619; CM-LABEL: v_exp_f32_ninf_dynamic: 4620; CM: ; %bb.0: 4621; CM-NEXT: CF_END 4622; CM-NEXT: PAD 4623 %result = call ninf float @llvm.exp.f32(float %in) 4624 ret float %result 4625} 4626 4627define float @v_exp_f32_nnan_ninf(float %in) { 4628; VI-SDAG-LABEL: v_exp_f32_nnan_ninf: 4629; VI-SDAG: ; %bb.0: 4630; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4631; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4632; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4633; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4634; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4635; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4636; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4637; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4638; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4639; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4640; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4641; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4642; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4643; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4644; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4645; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4646; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4647; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4648; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4649; 4650; VI-GISEL-LABEL: v_exp_f32_nnan_ninf: 4651; VI-GISEL: ; %bb.0: 4652; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4653; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4654; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4655; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4656; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4657; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4658; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4659; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4660; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4661; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4662; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4663; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4664; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4665; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4666; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4667; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4668; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4669; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4670; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4671; 4672; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf: 4673; GFX900-SDAG: ; %bb.0: 4674; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4675; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4676; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4677; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4678; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4679; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4680; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4681; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4682; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4683; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4684; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4685; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4686; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4687; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4688; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4689; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4690; 4691; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf: 4692; GFX900-GISEL: ; %bb.0: 4693; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4694; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4695; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4696; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4697; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4698; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4699; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4700; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4701; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4702; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4703; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4704; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4705; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4706; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4707; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4708; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4709; 4710; SI-SDAG-LABEL: v_exp_f32_nnan_ninf: 4711; SI-SDAG: ; %bb.0: 4712; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4713; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4714; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4715; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4716; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4717; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4718; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4719; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4720; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4721; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4722; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4723; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4724; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4725; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4726; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4727; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4728; 4729; SI-GISEL-LABEL: v_exp_f32_nnan_ninf: 4730; SI-GISEL: ; %bb.0: 4731; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4732; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4733; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4734; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4735; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4736; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4737; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4738; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4739; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4740; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4741; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4742; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4743; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4744; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4745; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4746; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4747; 4748; R600-LABEL: v_exp_f32_nnan_ninf: 4749; R600: ; %bb.0: 4750; R600-NEXT: CF_END 4751; R600-NEXT: PAD 4752; 4753; CM-LABEL: v_exp_f32_nnan_ninf: 4754; CM: ; %bb.0: 4755; CM-NEXT: CF_END 4756; CM-NEXT: PAD 4757 %result = call nnan ninf float @llvm.exp.f32(float %in) 4758 ret float %result 4759} 4760 4761define float @v_exp_f32_nnan_ninf_daz(float %in) #0 { 4762; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: 4763; VI-SDAG: ; %bb.0: 4764; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4765; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4766; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4767; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4768; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4769; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4770; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4771; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4772; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4773; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4774; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4775; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4776; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4777; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4778; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4779; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4780; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4781; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4782; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4783; 4784; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz: 4785; VI-GISEL: ; %bb.0: 4786; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4787; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4788; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4789; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4790; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4791; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4792; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4793; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4794; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4795; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4796; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4797; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4798; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4799; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4800; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4801; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4802; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4803; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4804; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4805; 4806; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: 4807; GFX900-SDAG: ; %bb.0: 4808; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4809; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4810; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4811; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4812; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4813; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4814; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4815; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4816; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4817; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4818; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4819; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4820; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4821; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4822; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4823; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4824; 4825; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_daz: 4826; GFX900-GISEL: ; %bb.0: 4827; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4828; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4829; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4830; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4831; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4832; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4833; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4834; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4835; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4836; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4837; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4838; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4839; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4840; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4841; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4842; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4843; 4844; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_daz: 4845; SI-SDAG: ; %bb.0: 4846; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4847; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4848; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4849; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4850; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4851; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4852; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4853; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4854; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4855; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4856; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4857; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4858; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4859; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4860; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4861; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4862; 4863; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_daz: 4864; SI-GISEL: ; %bb.0: 4865; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4866; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4867; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4868; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4869; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4870; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4871; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4872; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4873; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4874; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4875; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4876; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 4877; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4878; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4879; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4880; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 4881; 4882; R600-LABEL: v_exp_f32_nnan_ninf_daz: 4883; R600: ; %bb.0: 4884; R600-NEXT: CF_END 4885; R600-NEXT: PAD 4886; 4887; CM-LABEL: v_exp_f32_nnan_ninf_daz: 4888; CM: ; %bb.0: 4889; CM-NEXT: CF_END 4890; CM-NEXT: PAD 4891 %result = call nnan ninf float @llvm.exp.f32(float %in) 4892 ret float %result 4893} 4894 4895define float @v_exp_f32_nnan_ninf_dynamic(float %in) #1 { 4896; VI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: 4897; VI-SDAG: ; %bb.0: 4898; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4899; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4900; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 4901; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 4902; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 4903; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 4904; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 4905; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 4906; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4907; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 4908; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 4909; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 4910; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4911; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 4912; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4913; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4914; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4915; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4916; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 4917; 4918; VI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: 4919; VI-GISEL: ; %bb.0: 4920; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4921; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 4922; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 4923; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 4924; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 4925; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 4926; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 4927; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 4928; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 4929; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 4930; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 4931; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 4932; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 4933; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 4934; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4935; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4936; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4937; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4938; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 4939; 4940; GFX900-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: 4941; GFX900-SDAG: ; %bb.0: 4942; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4943; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4944; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4945; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4946; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4947; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4948; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4949; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4950; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4951; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 4952; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4953; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4954; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4955; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 4956; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4957; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 4958; 4959; GFX900-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: 4960; GFX900-GISEL: ; %bb.0: 4961; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4962; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 4963; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 4964; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 4965; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 4966; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 4967; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 4968; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 4969; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 4970; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 4971; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 4972; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 4973; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 4974; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 4975; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 4976; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 4977; 4978; SI-SDAG-LABEL: v_exp_f32_nnan_ninf_dynamic: 4979; SI-SDAG: ; %bb.0: 4980; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4981; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 4982; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 4983; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 4984; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 4985; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 4986; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 4987; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 4988; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 4989; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 4990; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 4991; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 4992; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 4993; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 4994; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 4995; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 4996; 4997; SI-GISEL-LABEL: v_exp_f32_nnan_ninf_dynamic: 4998; SI-GISEL: ; %bb.0: 4999; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5000; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5001; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 5002; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 5003; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 5004; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 5005; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 5006; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 5007; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 5008; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 5009; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5010; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 5011; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5012; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5013; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 5014; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 5015; 5016; R600-LABEL: v_exp_f32_nnan_ninf_dynamic: 5017; R600: ; %bb.0: 5018; R600-NEXT: CF_END 5019; R600-NEXT: PAD 5020; 5021; CM-LABEL: v_exp_f32_nnan_ninf_dynamic: 5022; CM: ; %bb.0: 5023; CM-NEXT: CF_END 5024; CM-NEXT: PAD 5025 %result = call nnan ninf float @llvm.exp.f32(float %in) 5026 ret float %result 5027} 5028 5029define float @v_exp_f32_fast_daz(float %in) #0 { 5030; GCN-LABEL: v_exp_f32_fast_daz: 5031; GCN: ; %bb.0: 5032; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5033; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 5034; GCN-NEXT: v_exp_f32_e32 v0, v0 5035; GCN-NEXT: s_setpc_b64 s[30:31] 5036; 5037; SI-LABEL: v_exp_f32_fast_daz: 5038; SI: ; %bb.0: 5039; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5040; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 5041; SI-NEXT: v_exp_f32_e32 v0, v0 5042; SI-NEXT: s_setpc_b64 s[30:31] 5043; 5044; R600-LABEL: v_exp_f32_fast_daz: 5045; R600: ; %bb.0: 5046; R600-NEXT: CF_END 5047; R600-NEXT: PAD 5048; 5049; CM-LABEL: v_exp_f32_fast_daz: 5050; CM: ; %bb.0: 5051; CM-NEXT: CF_END 5052; CM-NEXT: PAD 5053 %result = call fast float @llvm.exp.f32(float %in) 5054 ret float %result 5055} 5056 5057define float @v_exp_f32_dynamic_mode(float %in) #1 { 5058; VI-SDAG-LABEL: v_exp_f32_dynamic_mode: 5059; VI-SDAG: ; %bb.0: 5060; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5061; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5062; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 5063; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 5064; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 5065; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 5066; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 5067; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 5068; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 5069; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 5070; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 5071; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 5072; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5073; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5074; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5075; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5076; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5077; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5078; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5079; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5080; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5081; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5082; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 5083; 5084; VI-GISEL-LABEL: v_exp_f32_dynamic_mode: 5085; VI-GISEL: ; %bb.0: 5086; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5087; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5088; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 5089; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 5090; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 5091; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 5092; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 5093; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 5094; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 5095; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 5096; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 5097; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 5098; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 5099; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5100; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5101; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5102; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5103; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5104; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5105; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5106; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5107; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5108; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 5109; 5110; GFX900-SDAG-LABEL: v_exp_f32_dynamic_mode: 5111; GFX900-SDAG: ; %bb.0: 5112; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5113; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5114; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5115; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 5116; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 5117; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 5118; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 5119; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 5120; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 5121; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 5122; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 5123; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5124; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5125; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5126; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5127; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5128; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5129; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5130; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5131; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5132; 5133; GFX900-GISEL-LABEL: v_exp_f32_dynamic_mode: 5134; GFX900-GISEL: ; %bb.0: 5135; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5136; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5137; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 5138; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 5139; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 5140; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 5141; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 5142; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 5143; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 5144; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 5145; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 5146; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5147; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5148; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5149; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5150; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5151; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5152; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5153; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5154; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5155; 5156; SI-SDAG-LABEL: v_exp_f32_dynamic_mode: 5157; SI-SDAG: ; %bb.0: 5158; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5159; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5160; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5161; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 5162; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 5163; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 5164; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 5165; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 5166; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 5167; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5168; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 5169; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5170; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5171; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5172; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 5173; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5174; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5175; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5176; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5177; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 5178; 5179; SI-GISEL-LABEL: v_exp_f32_dynamic_mode: 5180; SI-GISEL: ; %bb.0: 5181; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5182; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5183; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 5184; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 5185; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 5186; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 5187; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 5188; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 5189; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 5190; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 5191; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5192; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5193; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 5194; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5195; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5196; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5197; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5198; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5199; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5200; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 5201; 5202; R600-LABEL: v_exp_f32_dynamic_mode: 5203; R600: ; %bb.0: 5204; R600-NEXT: CF_END 5205; R600-NEXT: PAD 5206; 5207; CM-LABEL: v_exp_f32_dynamic_mode: 5208; CM: ; %bb.0: 5209; CM-NEXT: CF_END 5210; CM-NEXT: PAD 5211 %result = call float @llvm.exp.f32(float %in) 5212 ret float %result 5213} 5214 5215define float @v_exp_f32_undef() { 5216; VI-SDAG-LABEL: v_exp_f32_undef: 5217; VI-SDAG: ; %bb.0: 5218; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5219; VI-SDAG-NEXT: v_rndne_f32_e32 v0, 0 5220; VI-SDAG-NEXT: s_mov_b32 s4, 0x7fc00000 5221; VI-SDAG-NEXT: v_add_f32_e64 v1, -v0, s4 5222; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5223; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 5224; VI-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 5225; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 5226; 5227; VI-GISEL-LABEL: v_exp_f32_undef: 5228; VI-GISEL: ; %bb.0: 5229; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5230; VI-GISEL-NEXT: v_sub_f32_e64 v0, s4, 0 5231; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8a000 5232; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x39a3b295 5233; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v0 5234; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 5235; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0, v1 5236; VI-GISEL-NEXT: v_add_f32_e32 v0, v0, v3 5237; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0, v2 5238; VI-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 5239; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v1 5240; VI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 5241; VI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 5242; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 5243; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 5244; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 5245; VI-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 5246; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 5247; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 5248; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 5249; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 5250; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 5251; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5252; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 5253; 5254; GFX900-SDAG-LABEL: v_exp_f32_undef: 5255; GFX900-SDAG: ; %bb.0: 5256; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5257; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 5258; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000 5259; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1 5260; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 5261; GFX900-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 5262; GFX900-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000 5263; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1 5264; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 5265; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 5266; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 5267; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 5268; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5269; 5270; GFX900-GISEL-LABEL: v_exp_f32_undef: 5271; GFX900-GISEL: ; %bb.0: 5272; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5273; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 5274; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, s4, v0 5275; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v0, -v1 5276; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5277; GFX900-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0 5278; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v1 5279; GFX900-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 5280; GFX900-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 5281; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 5282; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 5283; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 5284; GFX900-GISEL-NEXT: v_ldexp_f32 v0, v0, v1 5285; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 5286; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 5287; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 5288; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 5289; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 5290; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5291; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5292; 5293; SI-SDAG-LABEL: v_exp_f32_undef: 5294; SI-SDAG: ; %bb.0: 5295; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5296; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 5297; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xffc00000 5298; SI-SDAG-NEXT: v_fma_f32 v0, s4, v0, v1 5299; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x32a5705f 5300; SI-SDAG-NEXT: v_fma_f32 v0, s4, v1, v0 5301; SI-SDAG-NEXT: v_rndne_f32_e32 v1, 0x7fc00000 5302; SI-SDAG-NEXT: v_sub_f32_e32 v2, 0x7fc00000, v1 5303; SI-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 5304; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 5305; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 5306; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 5307; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 5308; 5309; SI-GISEL-LABEL: v_exp_f32_undef: 5310; SI-GISEL: ; %bb.0: 5311; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5312; SI-GISEL-NEXT: v_mov_b32_e32 v0, 0x3fb8aa3b 5313; SI-GISEL-NEXT: v_mul_f32_e32 v1, s4, v0 5314; SI-GISEL-NEXT: v_fma_f32 v0, s4, v0, -v1 5315; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5316; SI-GISEL-NEXT: v_fma_f32 v0, s4, v2, v0 5317; SI-GISEL-NEXT: v_rndne_f32_e32 v2, v1 5318; SI-GISEL-NEXT: v_sub_f32_e32 v1, v1, v2 5319; SI-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 5320; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v2 5321; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 5322; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 5323; SI-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v1 5324; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0xc2ce8ed0 5325; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 5326; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x42b17218 5327; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 5328; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 5329; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5330; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 5331; 5332; R600-LABEL: v_exp_f32_undef: 5333; R600: ; %bb.0: 5334; R600-NEXT: CF_END 5335; R600-NEXT: PAD 5336; 5337; CM-LABEL: v_exp_f32_undef: 5338; CM: ; %bb.0: 5339; CM-NEXT: CF_END 5340; CM-NEXT: PAD 5341 %result = call float @llvm.exp.f32(float undef) 5342 ret float %result 5343} 5344 5345define float @v_exp_f32_0() { 5346; GCN-LABEL: v_exp_f32_0: 5347; GCN: ; %bb.0: 5348; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5349; GCN-NEXT: v_mov_b32_e32 v0, 1.0 5350; GCN-NEXT: s_setpc_b64 s[30:31] 5351; 5352; SI-LABEL: v_exp_f32_0: 5353; SI: ; %bb.0: 5354; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5355; SI-NEXT: v_mov_b32_e32 v0, 1.0 5356; SI-NEXT: s_setpc_b64 s[30:31] 5357; 5358; R600-LABEL: v_exp_f32_0: 5359; R600: ; %bb.0: 5360; R600-NEXT: CF_END 5361; R600-NEXT: PAD 5362; 5363; CM-LABEL: v_exp_f32_0: 5364; CM: ; %bb.0: 5365; CM-NEXT: CF_END 5366; CM-NEXT: PAD 5367 %result = call float @llvm.exp.f32(float 0.0) 5368 ret float %result 5369} 5370 5371define float @v_exp_f32_from_fpext_f16(i16 %src.i) { 5372; VI-SDAG-LABEL: v_exp_f32_from_fpext_f16: 5373; VI-SDAG: ; %bb.0: 5374; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5375; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5376; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5377; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5378; VI-SDAG-NEXT: v_sub_f32_e32 v3, v0, v1 5379; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 5380; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v3 5381; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3 5382; VI-SDAG-NEXT: v_rndne_f32_e32 v4, v2 5383; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 5384; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 5385; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 5386; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 5387; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 5388; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v4 5389; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5390; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5391; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5392; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5393; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5394; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5395; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5396; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5397; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 5398; 5399; VI-GISEL-LABEL: v_exp_f32_from_fpext_f16: 5400; VI-GISEL: ; %bb.0: 5401; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5402; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5403; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5404; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 5405; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 5406; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 5407; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 5408; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 5409; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 5410; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 5411; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 5412; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 5413; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 5414; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 5415; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5416; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5417; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5418; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5419; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5420; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5421; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5422; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5423; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5424; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 5425; 5426; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_f16: 5427; GFX900-SDAG: ; %bb.0: 5428; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5429; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5430; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5431; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f 5432; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5433; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 5434; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 5435; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 5436; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3 5437; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 5438; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 5439; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 5440; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5441; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5442; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5443; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5444; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5445; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5446; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5447; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5448; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5449; 5450; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_f16: 5451; GFX900-GISEL: ; %bb.0: 5452; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5453; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5454; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5455; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5456; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 5457; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 5458; GFX900-GISEL-NEXT: v_rndne_f32_e32 v4, v3 5459; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 5460; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 5461; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 5462; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 5463; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 5464; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 5465; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 5466; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5467; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5468; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5469; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5470; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5471; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5472; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5473; 5474; SI-SDAG-LABEL: v_exp_f32_from_fpext_f16: 5475; SI-SDAG: ; %bb.0: 5476; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5477; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5478; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5479; SI-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f 5480; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5481; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 5482; SI-SDAG-NEXT: v_fma_f32 v3, v0, s4, -v1 5483; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 5484; SI-SDAG-NEXT: v_fma_f32 v3, v0, s5, v3 5485; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v3 5486; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 5487; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5488; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5489; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5490; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5491; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 5492; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5493; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5494; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5495; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5496; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 5497; 5498; SI-GISEL-LABEL: v_exp_f32_from_fpext_f16: 5499; SI-GISEL: ; %bb.0: 5500; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5501; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5502; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5503; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5504; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 5505; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 5506; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 5507; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 5508; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 5509; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 5510; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v4 5511; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5512; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0xc2ce8ed0 5513; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 5514; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5515; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 5516; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5517; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5518; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5519; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5520; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 5521; 5522; R600-LABEL: v_exp_f32_from_fpext_f16: 5523; R600: ; %bb.0: 5524; R600-NEXT: CF_END 5525; R600-NEXT: PAD 5526; 5527; CM-LABEL: v_exp_f32_from_fpext_f16: 5528; CM: ; %bb.0: 5529; CM-NEXT: CF_END 5530; CM-NEXT: PAD 5531 %src = bitcast i16 %src.i to half 5532 %fpext = fpext half %src to float 5533 %result = call float @llvm.exp.f32(float %fpext) 5534 ret float %result 5535} 5536 5537define float @v_exp_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { 5538; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: 5539; VI-SDAG: ; %bb.0: 5540; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5541; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 5542; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5543; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5544; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5545; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 5546; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 5547; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 5548; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1 5549; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 5550; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 5551; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 5552; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 5553; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 5554; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 5555; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5556; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5557; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5558; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5559; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5560; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5561; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5562; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5563; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5564; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 5565; 5566; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: 5567; VI-GISEL: ; %bb.0: 5568; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5569; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 5570; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5571; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5572; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 5573; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 5574; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 5575; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 5576; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 5577; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 5578; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 5579; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 5580; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 5581; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 5582; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 5583; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5584; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5585; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5586; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5587; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5588; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5589; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5590; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5591; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5592; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 5593; 5594; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: 5595; GFX900-SDAG: ; %bb.0: 5596; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5597; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 5598; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5599; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5600; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f 5601; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5602; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 5603; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1 5604; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 5605; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 5606; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 5607; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5608; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 5609; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5610; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5611; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5612; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5613; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5614; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5615; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5616; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5617; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5618; 5619; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: 5620; GFX900-GISEL: ; %bb.0: 5621; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5622; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 5623; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5624; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5625; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5626; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 5627; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 5628; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 5629; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v3 5630; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 5631; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 5632; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 5633; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 5634; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5635; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5636; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5637; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5638; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5639; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5640; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5641; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5642; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5643; 5644; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16: 5645; SI-SDAG: ; %bb.0: 5646; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5647; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5648; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 5649; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5650; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 5651; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5652; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 5653; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 5654; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 5655; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 5656; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 5657; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 5658; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5659; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5660; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5661; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5662; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5663; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 5664; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5665; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5666; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5667; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5668; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 5669; 5670; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16: 5671; SI-GISEL: ; %bb.0: 5672; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5673; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5674; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 5675; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5676; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 5677; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 5678; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 5679; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5680; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5681; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 5682; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 5683; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 5684; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 5685; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 5686; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 5687; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 5688; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5689; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5690; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 5691; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5692; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 5693; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5694; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 5695; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 5696; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 5697; 5698; R600-LABEL: v_exp_f32_from_fpext_math_f16: 5699; R600: ; %bb.0: 5700; R600-NEXT: CF_END 5701; R600-NEXT: PAD 5702; 5703; CM-LABEL: v_exp_f32_from_fpext_math_f16: 5704; CM: ; %bb.0: 5705; CM-NEXT: CF_END 5706; CM-NEXT: PAD 5707 %src0 = bitcast i16 %src0.i to half 5708 %src1 = bitcast i16 %src1.i to half 5709 %fadd = fadd half %src0, %src1 5710 %fpext = fpext half %fadd to float 5711 %result = call float @llvm.exp.f32(float %fpext) 5712 ret float %result 5713} 5714 5715define float @v_exp_f32_from_fpext_bf16(bfloat %src) { 5716; VI-LABEL: v_exp_f32_from_fpext_bf16: 5717; VI: ; %bb.0: 5718; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5719; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 5720; VI-NEXT: v_sub_f32_e32 v3, v0, v0 5721; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v0 5722; VI-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v3 5723; VI-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v3 5724; VI-NEXT: v_rndne_f32_e32 v2, v1 5725; VI-NEXT: v_add_f32_e32 v3, v3, v4 5726; VI-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v0 5727; VI-NEXT: v_sub_f32_e32 v1, v1, v2 5728; VI-NEXT: v_add_f32_e32 v3, v4, v3 5729; VI-NEXT: v_add_f32_e32 v1, v1, v3 5730; VI-NEXT: v_exp_f32_e32 v1, v1 5731; VI-NEXT: v_cvt_i32_f32_e32 v2, v2 5732; VI-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5733; VI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5734; VI-NEXT: s_mov_b32 s4, 0x42b17218 5735; VI-NEXT: v_ldexp_f32 v1, v1, v2 5736; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5737; VI-NEXT: v_mov_b32_e32 v2, 0x7f800000 5738; VI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5739; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5740; VI-NEXT: s_setpc_b64 s[30:31] 5741; 5742; GFX900-LABEL: v_exp_f32_from_fpext_bf16: 5743; GFX900: ; %bb.0: 5744; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5745; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 5746; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5747; GFX900-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5748; GFX900-NEXT: v_rndne_f32_e32 v2, v1 5749; GFX900-NEXT: v_sub_f32_e32 v3, v1, v2 5750; GFX900-NEXT: v_fma_f32 v1, v0, s4, -v1 5751; GFX900-NEXT: s_mov_b32 s4, 0x32a5705f 5752; GFX900-NEXT: v_fma_f32 v1, v0, s4, v1 5753; GFX900-NEXT: v_add_f32_e32 v1, v3, v1 5754; GFX900-NEXT: v_exp_f32_e32 v1, v1 5755; GFX900-NEXT: v_cvt_i32_f32_e32 v2, v2 5756; GFX900-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5757; GFX900-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5758; GFX900-NEXT: s_mov_b32 s4, 0x42b17218 5759; GFX900-NEXT: v_ldexp_f32 v1, v1, v2 5760; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5761; GFX900-NEXT: v_mov_b32_e32 v2, 0x7f800000 5762; GFX900-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5763; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5764; GFX900-NEXT: s_setpc_b64 s[30:31] 5765; 5766; SI-LABEL: v_exp_f32_from_fpext_bf16: 5767; SI: ; %bb.0: 5768; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5769; SI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5770; SI-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5771; SI-NEXT: v_rndne_f32_e32 v2, v1 5772; SI-NEXT: v_sub_f32_e32 v3, v1, v2 5773; SI-NEXT: v_fma_f32 v1, v0, s4, -v1 5774; SI-NEXT: s_mov_b32 s4, 0x32a5705f 5775; SI-NEXT: v_fma_f32 v1, v0, s4, v1 5776; SI-NEXT: v_add_f32_e32 v1, v3, v1 5777; SI-NEXT: v_exp_f32_e32 v1, v1 5778; SI-NEXT: v_cvt_i32_f32_e32 v2, v2 5779; SI-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5780; SI-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5781; SI-NEXT: s_mov_b32 s4, 0x42b17218 5782; SI-NEXT: v_ldexp_f32_e32 v1, v1, v2 5783; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5784; SI-NEXT: v_mov_b32_e32 v2, 0x7f800000 5785; SI-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5786; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5787; SI-NEXT: s_setpc_b64 s[30:31] 5788; 5789; R600-LABEL: v_exp_f32_from_fpext_bf16: 5790; R600: ; %bb.0: 5791; R600-NEXT: CF_END 5792; R600-NEXT: PAD 5793; 5794; CM-LABEL: v_exp_f32_from_fpext_bf16: 5795; CM: ; %bb.0: 5796; CM-NEXT: CF_END 5797; CM-NEXT: PAD 5798 %fpext = fpext bfloat %src to float 5799 %result = call float @llvm.exp.f32(float %fpext) 5800 ret float %result 5801} 5802 5803define float @v_exp_f32_from_fpext_math_f16_fast(i16 %src0.i, i16 %src1.i) { 5804; GCN-LABEL: v_exp_f32_from_fpext_math_f16_fast: 5805; GCN: ; %bb.0: 5806; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5807; GCN-NEXT: v_add_f16_e32 v0, v0, v1 5808; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 5809; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 5810; GCN-NEXT: v_exp_f32_e32 v0, v0 5811; GCN-NEXT: s_setpc_b64 s[30:31] 5812; 5813; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_fast: 5814; SI-SDAG: ; %bb.0: 5815; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5816; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5817; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 5818; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2aeac50 5819; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 5820; SI-SDAG-NEXT: v_add_f32_e32 v1, 0x42800000, v0 5821; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 5822; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 5823; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 5824; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 5825; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x114b4ea4, v0 5826; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 5827; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 5828; 5829; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_fast: 5830; SI-GISEL: ; %bb.0: 5831; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5832; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5833; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 5834; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 5835; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 5836; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5837; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 5838; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 5839; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 5840; 5841; R600-LABEL: v_exp_f32_from_fpext_math_f16_fast: 5842; R600: ; %bb.0: 5843; R600-NEXT: CF_END 5844; R600-NEXT: PAD 5845; 5846; CM-LABEL: v_exp_f32_from_fpext_math_f16_fast: 5847; CM: ; %bb.0: 5848; CM-NEXT: CF_END 5849; CM-NEXT: PAD 5850 %src0 = bitcast i16 %src0.i to half 5851 %src1 = bitcast i16 %src1.i to half 5852 %fadd = fadd half %src0, %src1 5853 %fpext = fpext half %fadd to float 5854 %result = call fast float @llvm.exp.f32(float %fpext) 5855 ret float %result 5856} 5857 5858define float @v_exp_f32_from_fpext_math_f16_daz(i16 %src0.i, i16 %src1.i) #0 { 5859; VI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: 5860; VI-SDAG: ; %bb.0: 5861; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5862; VI-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 5863; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5864; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5865; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5866; VI-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 5867; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 5868; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 5869; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x39a3b295, v1 5870; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 5871; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 5872; VI-SDAG-NEXT: v_add_f32_e32 v2, v3, v2 5873; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 5874; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 5875; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 5876; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5877; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5878; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5879; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5880; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5881; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5882; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5883; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5884; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5885; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 5886; 5887; VI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: 5888; VI-GISEL: ; %bb.0: 5889; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5890; VI-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 5891; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5892; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 5893; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 5894; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 5895; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 5896; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 5897; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 5898; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 5899; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 5900; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 5901; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 5902; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 5903; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 5904; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 5905; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5906; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5907; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5908; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5909; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5910; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5911; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5912; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5913; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 5914; 5915; GFX900-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: 5916; GFX900-SDAG: ; %bb.0: 5917; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5918; GFX900-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 5919; GFX900-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5920; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5921; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x32a5705f 5922; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5923; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 5924; GFX900-SDAG-NEXT: v_rndne_f32_e32 v3, v1 5925; GFX900-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 5926; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 5927; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 5928; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5929; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 5930; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5931; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5932; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5933; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 5934; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5935; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5936; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5937; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5938; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 5939; 5940; GFX900-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: 5941; GFX900-GISEL: ; %bb.0: 5942; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5943; GFX900-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 5944; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5945; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 5946; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5947; GFX900-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 5948; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 5949; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 5950; GFX900-GISEL-NEXT: v_rndne_f32_e32 v2, v3 5951; GFX900-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 5952; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 5953; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 5954; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 5955; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 5956; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 5957; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 5958; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 5959; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 5960; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 5961; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 5962; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 5963; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 5964; 5965; SI-SDAG-LABEL: v_exp_f32_from_fpext_math_f16_daz: 5966; SI-SDAG: ; %bb.0: 5967; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5968; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 5969; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 5970; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 5971; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 5972; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 5973; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 5974; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 5975; SI-SDAG-NEXT: v_rndne_f32_e32 v3, v1 5976; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 5977; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 5978; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 5979; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 5980; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 5981; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 5982; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 5983; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 5984; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 5985; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 5986; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 5987; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 5988; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 5989; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 5990; 5991; SI-GISEL-LABEL: v_exp_f32_from_fpext_math_f16_daz: 5992; SI-GISEL: ; %bb.0: 5993; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5994; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 5995; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 5996; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x32a5705f 5997; SI-GISEL-NEXT: v_mov_b32_e32 v5, 0x7f800000 5998; SI-GISEL-NEXT: v_add_f32_e32 v0, v0, v1 5999; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6000; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 6001; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6002; SI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v0 6003; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v3 6004; SI-GISEL-NEXT: v_rndne_f32_e32 v4, v3 6005; SI-GISEL-NEXT: v_fma_f32 v1, v0, v2, v1 6006; SI-GISEL-NEXT: v_sub_f32_e32 v2, v3, v4 6007; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 6008; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v3, v4 6009; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6010; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 6011; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x42b17218 6012; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 6013; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3 6014; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 6015; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v4 6016; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc 6017; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6018; 6019; R600-LABEL: v_exp_f32_from_fpext_math_f16_daz: 6020; R600: ; %bb.0: 6021; R600-NEXT: CF_END 6022; R600-NEXT: PAD 6023; 6024; CM-LABEL: v_exp_f32_from_fpext_math_f16_daz: 6025; CM: ; %bb.0: 6026; CM-NEXT: CF_END 6027; CM-NEXT: PAD 6028 %src0 = bitcast i16 %src0.i to half 6029 %src1 = bitcast i16 %src1.i to half 6030 %fadd = fadd half %src0, %src1 6031 %fpext = fpext half %fadd to float 6032 %result = call float @llvm.exp.f32(float %fpext) 6033 ret float %result 6034} 6035 6036; FIXME: Fold out fp16_to_fp (FP_TO_FP16) on no-f16 targets 6037define half @v_exp_f16(half %in) { 6038; GCN-LABEL: v_exp_f16: 6039; GCN: ; %bb.0: 6040; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6041; GCN-NEXT: v_cvt_f32_f16_e32 v0, v0 6042; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6043; GCN-NEXT: v_exp_f32_e32 v0, v0 6044; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 6045; GCN-NEXT: s_setpc_b64 s[30:31] 6046; 6047; SI-SDAG-LABEL: v_exp_f16: 6048; SI-SDAG: ; %bb.0: 6049; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6050; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6051; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6052; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6053; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6054; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6055; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6056; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6057; 6058; SI-GISEL-LABEL: v_exp_f16: 6059; SI-GISEL: ; %bb.0: 6060; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6061; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6062; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6063; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6064; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6065; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6066; 6067; R600-LABEL: v_exp_f16: 6068; R600: ; %bb.0: 6069; R600-NEXT: CF_END 6070; R600-NEXT: PAD 6071; 6072; CM-LABEL: v_exp_f16: 6073; CM: ; %bb.0: 6074; CM-NEXT: CF_END 6075; CM-NEXT: PAD 6076 %result = call half @llvm.exp.f16(half %in) 6077 ret half %result 6078} 6079 6080define half @v_exp_fabs_f16(half %in) { 6081; GCN-LABEL: v_exp_fabs_f16: 6082; GCN: ; %bb.0: 6083; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6084; GCN-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6085; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6086; GCN-NEXT: v_exp_f32_e32 v0, v0 6087; GCN-NEXT: v_cvt_f16_f32_e32 v0, v0 6088; GCN-NEXT: s_setpc_b64 s[30:31] 6089; 6090; SI-SDAG-LABEL: v_exp_fabs_f16: 6091; SI-SDAG: ; %bb.0: 6092; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6093; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6094; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6095; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6096; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6097; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6098; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6099; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6100; 6101; SI-GISEL-LABEL: v_exp_fabs_f16: 6102; SI-GISEL: ; %bb.0: 6103; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6104; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6105; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6106; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6107; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6108; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6109; 6110; R600-LABEL: v_exp_fabs_f16: 6111; R600: ; %bb.0: 6112; R600-NEXT: CF_END 6113; R600-NEXT: PAD 6114; 6115; CM-LABEL: v_exp_fabs_f16: 6116; CM: ; %bb.0: 6117; CM-NEXT: CF_END 6118; CM-NEXT: PAD 6119 %fabs = call half @llvm.fabs.f16(half %in) 6120 %result = call half @llvm.exp.f16(half %fabs) 6121 ret half %result 6122} 6123 6124define half @v_exp_fneg_fabs_f16(half %in) { 6125; GCN-SDAG-LABEL: v_exp_fneg_fabs_f16: 6126; GCN-SDAG: ; %bb.0: 6127; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6128; GCN-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6129; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 6130; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 6131; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6132; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 6133; 6134; GCN-GISEL-LABEL: v_exp_fneg_fabs_f16: 6135; GCN-GISEL: ; %bb.0: 6136; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6137; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| 6138; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6139; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 6140; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6141; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 6142; 6143; SI-SDAG-LABEL: v_exp_fneg_fabs_f16: 6144; SI-SDAG: ; %bb.0: 6145; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6146; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6147; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6148; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 6149; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6150; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6151; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6152; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6153; 6154; SI-GISEL-LABEL: v_exp_fneg_fabs_f16: 6155; SI-GISEL: ; %bb.0: 6156; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6157; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -|v0| 6158; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6159; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6160; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6161; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6162; 6163; R600-LABEL: v_exp_fneg_fabs_f16: 6164; R600: ; %bb.0: 6165; R600-NEXT: CF_END 6166; R600-NEXT: PAD 6167; 6168; CM-LABEL: v_exp_fneg_fabs_f16: 6169; CM: ; %bb.0: 6170; CM-NEXT: CF_END 6171; CM-NEXT: PAD 6172 %fabs = call half @llvm.fabs.f16(half %in) 6173 %fneg.fabs = fneg half %fabs 6174 %result = call half @llvm.exp.f16(half %fneg.fabs) 6175 ret half %result 6176} 6177 6178define half @v_exp_fneg_f16(half %in) { 6179; GCN-SDAG-LABEL: v_exp_fneg_f16: 6180; GCN-SDAG: ; %bb.0: 6181; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6182; GCN-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6183; GCN-SDAG-NEXT: v_mul_f32_e32 v0, 0xbfb8aa3b, v0 6184; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 6185; GCN-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6186; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] 6187; 6188; GCN-GISEL-LABEL: v_exp_fneg_f16: 6189; GCN-GISEL: ; %bb.0: 6190; GCN-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6191; GCN-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 6192; GCN-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6193; GCN-GISEL-NEXT: v_exp_f32_e32 v0, v0 6194; GCN-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6195; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] 6196; 6197; SI-SDAG-LABEL: v_exp_fneg_f16: 6198; SI-SDAG: ; %bb.0: 6199; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6200; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 6201; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6202; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6203; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6204; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6205; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6206; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6207; 6208; SI-GISEL-LABEL: v_exp_fneg_f16: 6209; SI-GISEL: ; %bb.0: 6210; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6211; SI-GISEL-NEXT: v_cvt_f32_f16_e64 v0, -v0 6212; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6213; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6214; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6215; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6216; 6217; R600-LABEL: v_exp_fneg_f16: 6218; R600: ; %bb.0: 6219; R600-NEXT: CF_END 6220; R600-NEXT: PAD 6221; 6222; CM-LABEL: v_exp_fneg_f16: 6223; CM: ; %bb.0: 6224; CM-NEXT: CF_END 6225; CM-NEXT: PAD 6226 %fneg = fneg half %in 6227 %result = call half @llvm.exp.f16(half %fneg) 6228 ret half %result 6229} 6230 6231define half @v_exp_f16_fast(half %in) { 6232; GCN-LABEL: v_exp_f16_fast: 6233; GCN: ; %bb.0: 6234; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6235; GCN-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 6236; GCN-NEXT: v_exp_f16_e32 v0, v0 6237; GCN-NEXT: s_setpc_b64 s[30:31] 6238; 6239; SI-SDAG-LABEL: v_exp_f16_fast: 6240; SI-SDAG: ; %bb.0: 6241; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6242; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6243; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6244; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 6245; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6246; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6247; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6248; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6249; 6250; SI-GISEL-LABEL: v_exp_f16_fast: 6251; SI-GISEL: ; %bb.0: 6252; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6253; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6254; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, 0x3dc5 6255; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 6256; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6257; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6258; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6259; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6260; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6261; 6262; R600-LABEL: v_exp_f16_fast: 6263; R600: ; %bb.0: 6264; R600-NEXT: CF_END 6265; R600-NEXT: PAD 6266; 6267; CM-LABEL: v_exp_f16_fast: 6268; CM: ; %bb.0: 6269; CM-NEXT: CF_END 6270; CM-NEXT: PAD 6271 %result = call fast half @llvm.exp.f16(half %in) 6272 ret half %result 6273} 6274 6275define <2 x half> @v_exp_v2f16(<2 x half> %in) { 6276; VI-SDAG-LABEL: v_exp_v2f16: 6277; VI-SDAG: ; %bb.0: 6278; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6279; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6280; VI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6281; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6282; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6283; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6284; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6285; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6286; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6287; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6288; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 6289; 6290; VI-GISEL-LABEL: v_exp_v2f16: 6291; VI-GISEL: ; %bb.0: 6292; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6293; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6294; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6295; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6296; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6297; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6298; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6299; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6300; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6301; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6302; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 6303; 6304; GFX900-LABEL: v_exp_v2f16: 6305; GFX900: ; %bb.0: 6306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6307; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v0 6308; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6309; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6310; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6311; GFX900-NEXT: v_exp_f32_e32 v1, v1 6312; GFX900-NEXT: v_exp_f32_e32 v0, v0 6313; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 6314; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 6315; GFX900-NEXT: v_pack_b32_f16 v0, v1, v0 6316; GFX900-NEXT: s_setpc_b64 s[30:31] 6317; 6318; SI-SDAG-LABEL: v_exp_v2f16: 6319; SI-SDAG: ; %bb.0: 6320; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6321; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6322; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6323; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6324; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6325; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6326; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6327; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6328; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6329; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6330; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6331; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6332; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6333; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6334; 6335; SI-GISEL-LABEL: v_exp_v2f16: 6336; SI-GISEL: ; %bb.0: 6337; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6338; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6339; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 6340; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6341; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6342; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6343; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6344; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6345; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6346; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6347; 6348; R600-LABEL: v_exp_v2f16: 6349; R600: ; %bb.0: 6350; R600-NEXT: CF_END 6351; R600-NEXT: PAD 6352; 6353; CM-LABEL: v_exp_v2f16: 6354; CM: ; %bb.0: 6355; CM-NEXT: CF_END 6356; CM-NEXT: PAD 6357 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %in) 6358 ret <2 x half> %result 6359} 6360 6361define <2 x half> @v_exp_fabs_v2f16(<2 x half> %in) { 6362; VI-SDAG-LABEL: v_exp_fabs_v2f16: 6363; VI-SDAG: ; %bb.0: 6364; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6365; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6366; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6367; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6368; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6369; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6370; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6371; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6372; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6373; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6374; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 6375; 6376; VI-GISEL-LABEL: v_exp_fabs_v2f16: 6377; VI-GISEL: ; %bb.0: 6378; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6379; VI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 6380; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6381; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6382; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6383; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6384; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6385; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6386; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6387; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6388; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6389; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 6390; 6391; GFX900-SDAG-LABEL: v_exp_fabs_v2f16: 6392; GFX900-SDAG: ; %bb.0: 6393; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6394; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v0| 6395; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6396; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6397; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6398; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 6399; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 6400; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6401; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6402; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 6403; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6404; 6405; GFX900-GISEL-LABEL: v_exp_fabs_v2f16: 6406; GFX900-GISEL: ; %bb.0: 6407; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6408; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 6409; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6410; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6411; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6412; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6413; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 6414; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 6415; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6416; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6417; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 6418; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6419; 6420; SI-SDAG-LABEL: v_exp_fabs_v2f16: 6421; SI-SDAG: ; %bb.0: 6422; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6423; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6424; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6425; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, |v0| 6426; SI-SDAG-NEXT: v_cvt_f32_f16_e64 v1, |v1| 6427; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6428; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6429; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6430; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6431; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6432; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6433; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6434; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6435; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6436; 6437; SI-GISEL-LABEL: v_exp_fabs_v2f16: 6438; SI-GISEL: ; %bb.0: 6439; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6440; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6441; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 6442; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6443; SI-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 6444; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6445; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 6446; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6447; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6448; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6449; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6450; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 6451; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 6452; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 6453; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6454; 6455; R600-LABEL: v_exp_fabs_v2f16: 6456; R600: ; %bb.0: 6457; R600-NEXT: CF_END 6458; R600-NEXT: PAD 6459; 6460; CM-LABEL: v_exp_fabs_v2f16: 6461; CM: ; %bb.0: 6462; CM-NEXT: CF_END 6463; CM-NEXT: PAD 6464 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) 6465 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fabs) 6466 ret <2 x half> %result 6467} 6468 6469define <2 x half> @v_exp_fneg_fabs_v2f16(<2 x half> %in) { 6470; VI-SDAG-LABEL: v_exp_fneg_fabs_v2f16: 6471; VI-SDAG: ; %bb.0: 6472; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6473; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6474; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -|v0| 6475; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6476; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6477; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6478; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6479; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6480; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6481; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6482; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 6483; 6484; VI-GISEL-LABEL: v_exp_fneg_fabs_v2f16: 6485; VI-GISEL: ; %bb.0: 6486; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6487; VI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 6488; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6489; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6490; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6491; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6492; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6493; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6494; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6495; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6496; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6497; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 6498; 6499; GFX900-SDAG-LABEL: v_exp_fneg_fabs_v2f16: 6500; GFX900-SDAG: ; %bb.0: 6501; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6502; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -|v0| 6503; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -|v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6504; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6505; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6506; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 6507; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 6508; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6509; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6510; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 6511; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6512; 6513; GFX900-GISEL-LABEL: v_exp_fneg_fabs_v2f16: 6514; GFX900-GISEL: ; %bb.0: 6515; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6516; GFX900-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 6517; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6518; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6519; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6520; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6521; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 6522; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 6523; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6524; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6525; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 6526; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6527; 6528; SI-SDAG-LABEL: v_exp_fneg_fabs_v2f16: 6529; SI-SDAG: ; %bb.0: 6530; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6531; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6532; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6533; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6534; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6535; SI-SDAG-NEXT: v_or_b32_e32 v0, 0x80008000, v0 6536; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 6537; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 6538; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6539; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6540; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6541; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6542; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6543; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6544; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 6545; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 6546; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 6547; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6548; 6549; SI-GISEL-LABEL: v_exp_fneg_fabs_v2f16: 6550; SI-GISEL: ; %bb.0: 6551; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6552; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6553; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 6554; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6555; SI-GISEL-NEXT: v_or_b32_e32 v0, 0x80008000, v0 6556; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6557; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 6558; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6559; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6560; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6561; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6562; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 6563; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 6564; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 6565; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6566; 6567; R600-LABEL: v_exp_fneg_fabs_v2f16: 6568; R600: ; %bb.0: 6569; R600-NEXT: CF_END 6570; R600-NEXT: PAD 6571; 6572; CM-LABEL: v_exp_fneg_fabs_v2f16: 6573; CM: ; %bb.0: 6574; CM-NEXT: CF_END 6575; CM-NEXT: PAD 6576 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) 6577 %fneg.fabs = fneg <2 x half> %fabs 6578 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fneg.fabs) 6579 ret <2 x half> %result 6580} 6581 6582define <2 x half> @v_exp_fneg_v2f16(<2 x half> %in) { 6583; VI-SDAG-LABEL: v_exp_fneg_v2f16: 6584; VI-SDAG: ; %bb.0: 6585; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6586; VI-SDAG-NEXT: v_cvt_f32_f16_sdwa v1, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6587; VI-SDAG-NEXT: v_cvt_f32_f16_e64 v0, -v0 6588; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6589; VI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6590; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6591; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6592; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6593; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6594; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6595; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 6596; 6597; VI-GISEL-LABEL: v_exp_fneg_v2f16: 6598; VI-GISEL: ; %bb.0: 6599; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6600; VI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 6601; VI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6602; VI-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6603; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6604; VI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6605; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6606; VI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6607; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6608; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6609; VI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6610; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 6611; 6612; GFX900-SDAG-LABEL: v_exp_fneg_v2f16: 6613; GFX900-SDAG: ; %bb.0: 6614; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6615; GFX900-SDAG-NEXT: v_cvt_f32_f16_e64 v1, -v0 6616; GFX900-SDAG-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6617; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6618; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6619; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 6620; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 6621; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6622; GFX900-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6623; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 6624; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6625; 6626; GFX900-GISEL-LABEL: v_exp_fneg_v2f16: 6627; GFX900-GISEL: ; %bb.0: 6628; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6629; GFX900-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 6630; GFX900-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6631; GFX900-GISEL-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6632; GFX900-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6633; GFX900-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6634; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 6635; GFX900-GISEL-NEXT: v_exp_f32_e32 v0, v0 6636; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6637; GFX900-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6638; GFX900-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 6639; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6640; 6641; SI-SDAG-LABEL: v_exp_fneg_v2f16: 6642; SI-SDAG: ; %bb.0: 6643; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6644; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6645; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6646; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6647; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6648; SI-SDAG-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 6649; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v0 6650; SI-SDAG-NEXT: v_lshrrev_b32_e32 v0, 16, v0 6651; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6652; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6653; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6654; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6655; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6656; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6657; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v0 6658; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v1 6659; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v2 6660; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6661; 6662; SI-GISEL-LABEL: v_exp_fneg_v2f16: 6663; SI-GISEL: ; %bb.0: 6664; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6665; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 6666; SI-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 6667; SI-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 6668; SI-GISEL-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 6669; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v0 6670; SI-GISEL-NEXT: v_lshrrev_b32_e32 v0, 16, v0 6671; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6672; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6673; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6674; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6675; SI-GISEL-NEXT: v_exp_f32_e32 v2, v0 6676; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v1 6677; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2 6678; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6679; 6680; R600-LABEL: v_exp_fneg_v2f16: 6681; R600: ; %bb.0: 6682; R600-NEXT: CF_END 6683; R600-NEXT: PAD 6684; 6685; CM-LABEL: v_exp_fneg_v2f16: 6686; CM: ; %bb.0: 6687; CM-NEXT: CF_END 6688; CM-NEXT: PAD 6689 %fneg = fneg <2 x half> %in 6690 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %fneg) 6691 ret <2 x half> %result 6692} 6693 6694define <2 x half> @v_exp_v2f16_fast(<2 x half> %in) { 6695; VI-SDAG-LABEL: v_exp_v2f16_fast: 6696; VI-SDAG: ; %bb.0: 6697; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6698; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x3dc5 6699; VI-SDAG-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6700; VI-SDAG-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 6701; VI-SDAG-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6702; VI-SDAG-NEXT: v_exp_f16_e32 v0, v0 6703; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 6704; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 6705; 6706; VI-GISEL-LABEL: v_exp_v2f16_fast: 6707; VI-GISEL: ; %bb.0: 6708; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6709; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 6710; VI-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 6711; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6712; VI-GISEL-NEXT: v_exp_f16_e32 v2, v2 6713; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6714; VI-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 6715; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 6716; 6717; GFX900-SDAG-LABEL: v_exp_v2f16_fast: 6718; GFX900-SDAG: ; %bb.0: 6719; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6720; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 6721; GFX900-SDAG-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] 6722; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v0 6723; GFX900-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6724; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v1, v0 6725; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6726; 6727; GFX900-GISEL-LABEL: v_exp_v2f16_fast: 6728; GFX900-GISEL: ; %bb.0: 6729; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6730; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3dc5 6731; GFX900-GISEL-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 6732; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6733; GFX900-GISEL-NEXT: v_exp_f16_e32 v2, v2 6734; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 6735; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2 6736; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6737; 6738; SI-SDAG-LABEL: v_exp_v2f16_fast: 6739; SI-SDAG: ; %bb.0: 6740; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6741; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6742; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6743; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6744; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6745; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 6746; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 6747; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6748; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6749; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6750; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6751; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6752; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6753; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6754; 6755; SI-GISEL-LABEL: v_exp_v2f16_fast: 6756; SI-GISEL: ; %bb.0: 6757; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6758; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6759; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 0x3dc5 6760; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 6761; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v2 6762; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v2 6763; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6764; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6765; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6766; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 6767; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6768; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6769; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6770; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6771; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6772; 6773; R600-LABEL: v_exp_v2f16_fast: 6774; R600: ; %bb.0: 6775; R600-NEXT: CF_END 6776; R600-NEXT: PAD 6777; 6778; CM-LABEL: v_exp_v2f16_fast: 6779; CM: ; %bb.0: 6780; CM-NEXT: CF_END 6781; CM-NEXT: PAD 6782 %result = call fast <2 x half> @llvm.exp.v2f16(<2 x half> %in) 6783 ret <2 x half> %result 6784} 6785 6786define <3 x half> @v_exp_v3f16(<3 x half> %in) { 6787; VI-LABEL: v_exp_v3f16: 6788; VI: ; %bb.0: 6789; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6790; VI-NEXT: v_cvt_f32_f16_e32 v2, v0 6791; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6792; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 6793; VI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 6794; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6795; VI-NEXT: v_exp_f32_e32 v2, v2 6796; VI-NEXT: v_exp_f32_e32 v0, v0 6797; VI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6798; VI-NEXT: v_exp_f32_e32 v1, v1 6799; VI-NEXT: v_cvt_f16_f32_e32 v2, v2 6800; VI-NEXT: v_cvt_f16_f32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6801; VI-NEXT: v_cvt_f16_f32_e32 v1, v1 6802; VI-NEXT: v_or_b32_e32 v0, v2, v0 6803; VI-NEXT: s_setpc_b64 s[30:31] 6804; 6805; GFX900-LABEL: v_exp_v3f16: 6806; GFX900: ; %bb.0: 6807; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6808; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v0 6809; GFX900-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 6810; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 6811; GFX900-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 6812; GFX900-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6813; GFX900-NEXT: v_exp_f32_e32 v2, v2 6814; GFX900-NEXT: v_exp_f32_e32 v0, v0 6815; GFX900-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6816; GFX900-NEXT: v_exp_f32_e32 v1, v1 6817; GFX900-NEXT: v_cvt_f16_f32_e32 v2, v2 6818; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0 6819; GFX900-NEXT: v_cvt_f16_f32_e32 v1, v1 6820; GFX900-NEXT: v_pack_b32_f16 v0, v2, v0 6821; GFX900-NEXT: s_setpc_b64 s[30:31] 6822; 6823; SI-SDAG-LABEL: v_exp_v3f16: 6824; SI-SDAG: ; %bb.0: 6825; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6826; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6827; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6828; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 6829; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6830; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6831; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 6832; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6833; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6834; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 6835; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6836; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6837; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 6838; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6839; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6840; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 6841; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6842; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6843; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 6844; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6845; 6846; SI-GISEL-LABEL: v_exp_v3f16: 6847; SI-GISEL: ; %bb.0: 6848; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6849; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6850; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 6851; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 6852; SI-GISEL-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 6853; SI-GISEL-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 6854; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 6855; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6856; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6857; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 6858; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6859; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6860; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 6861; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6862; 6863; R600-LABEL: v_exp_v3f16: 6864; R600: ; %bb.0: 6865; R600-NEXT: CF_END 6866; R600-NEXT: PAD 6867; 6868; CM-LABEL: v_exp_v3f16: 6869; CM: ; %bb.0: 6870; CM-NEXT: CF_END 6871; CM-NEXT: PAD 6872 %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %in) 6873 ret <3 x half> %result 6874} 6875 6876define <3 x half> @v_exp_v3f16_afn(<3 x half> %in) { 6877; VI-SDAG-LABEL: v_exp_v3f16_afn: 6878; VI-SDAG: ; %bb.0: 6879; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6880; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x3dc5 6881; VI-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 6882; VI-SDAG-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6883; VI-SDAG-NEXT: v_exp_f16_e32 v2, v2 6884; VI-SDAG-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6885; VI-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 6886; VI-SDAG-NEXT: v_exp_f16_e32 v1, v1 6887; VI-SDAG-NEXT: v_or_b32_e32 v0, v2, v0 6888; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 6889; 6890; VI-GISEL-LABEL: v_exp_v3f16_afn: 6891; VI-GISEL: ; %bb.0: 6892; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6893; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 6894; VI-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0 6895; VI-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6896; VI-GISEL-NEXT: v_exp_f16_e32 v3, v3 6897; VI-GISEL-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 6898; VI-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 6899; VI-GISEL-NEXT: v_exp_f16_e32 v1, v1 6900; VI-GISEL-NEXT: v_or_b32_e32 v0, v3, v0 6901; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 6902; 6903; GFX900-SDAG-LABEL: v_exp_v3f16_afn: 6904; GFX900-SDAG: ; %bb.0: 6905; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6906; GFX900-SDAG-NEXT: s_movk_i32 s4, 0x3dc5 6907; GFX900-SDAG-NEXT: v_mul_f16_e32 v2, 0x3dc5, v0 6908; GFX900-SDAG-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6909; GFX900-SDAG-NEXT: v_exp_f16_e32 v2, v2 6910; GFX900-SDAG-NEXT: v_exp_f16_e32 v0, v0 6911; GFX900-SDAG-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 6912; GFX900-SDAG-NEXT: v_exp_f16_e32 v1, v1 6913; GFX900-SDAG-NEXT: v_pack_b32_f16 v0, v2, v0 6914; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 6915; 6916; GFX900-GISEL-LABEL: v_exp_v3f16_afn: 6917; GFX900-GISEL: ; %bb.0: 6918; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6919; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x3dc5 6920; GFX900-GISEL-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0 6921; GFX900-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 6922; GFX900-GISEL-NEXT: v_exp_f16_e32 v3, v3 6923; GFX900-GISEL-NEXT: v_exp_f16_e32 v0, v0 6924; GFX900-GISEL-NEXT: v_mul_f16_e32 v1, 0x3dc5, v1 6925; GFX900-GISEL-NEXT: v_exp_f16_e32 v1, v1 6926; GFX900-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3 6927; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 6928; 6929; SI-SDAG-LABEL: v_exp_v3f16_afn: 6930; SI-SDAG: ; %bb.0: 6931; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6932; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6933; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6934; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 6935; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6936; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6937; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 6938; SI-SDAG-NEXT: v_mul_f32_e32 v0, 0x3fb8a000, v0 6939; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8a000, v1 6940; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 6941; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 6942; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 6943; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 6944; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 6945; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 6946; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 6947; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 6948; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 6949; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 6950; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 6951; 6952; SI-GISEL-LABEL: v_exp_v3f16_afn: 6953; SI-GISEL: ; %bb.0: 6954; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6955; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6956; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 0x3dc5 6957; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 6958; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 6959; SI-GISEL-NEXT: v_mul_f32_e32 v0, v0, v3 6960; SI-GISEL-NEXT: v_mul_f32_e32 v1, v1, v3 6961; SI-GISEL-NEXT: v_mul_f32_e32 v2, v2, v3 6962; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6963; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6964; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 6965; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 6966; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 6967; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 6968; SI-GISEL-NEXT: v_exp_f32_e32 v0, v0 6969; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 6970; SI-GISEL-NEXT: v_exp_f32_e32 v2, v2 6971; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 6972; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 6973; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 6974; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 6975; 6976; R600-LABEL: v_exp_v3f16_afn: 6977; R600: ; %bb.0: 6978; R600-NEXT: CF_END 6979; R600-NEXT: PAD 6980; 6981; CM-LABEL: v_exp_v3f16_afn: 6982; CM: ; %bb.0: 6983; CM-NEXT: CF_END 6984; CM-NEXT: PAD 6985 %result = call afn <3 x half> @llvm.exp.v3f16(<3 x half> %in) 6986 ret <3 x half> %result 6987} 6988 6989define float @v_exp_f32_contract(float %in) { 6990; VI-SDAG-LABEL: v_exp_f32_contract: 6991; VI-SDAG: ; %bb.0: 6992; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6993; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 6994; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 6995; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 6996; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 6997; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 6998; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 6999; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 7000; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 7001; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 7002; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 7003; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 7004; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 7005; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 7006; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7007; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7008; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 7009; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 7010; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 7011; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 7012; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 7013; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 7014; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 7015; 7016; VI-GISEL-LABEL: v_exp_f32_contract: 7017; VI-GISEL: ; %bb.0: 7018; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7019; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 7020; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 7021; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 7022; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 7023; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 7024; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 7025; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 7026; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 7027; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 7028; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 7029; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 7030; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 7031; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 7032; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 7033; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 7034; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7035; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7036; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 7037; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 7038; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 7039; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 7040; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 7041; 7042; GFX900-SDAG-LABEL: v_exp_f32_contract: 7043; GFX900-SDAG: ; %bb.0: 7044; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7045; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 7046; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 7047; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 7048; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 7049; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 7050; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 7051; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 7052; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 7053; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 7054; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 7055; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7056; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7057; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 7058; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 7059; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 7060; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 7061; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 7062; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 7063; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 7064; 7065; GFX900-GISEL-LABEL: v_exp_f32_contract: 7066; GFX900-GISEL: ; %bb.0: 7067; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7068; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 7069; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 7070; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 7071; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 7072; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 7073; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 7074; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 7075; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 7076; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 7077; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 7078; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 7079; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 7080; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7081; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7082; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 7083; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 7084; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 7085; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 7086; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 7087; 7088; SI-SDAG-LABEL: v_exp_f32_contract: 7089; SI-SDAG: ; %bb.0: 7090; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7091; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 7092; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 7093; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 7094; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 7095; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 7096; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 7097; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 7098; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 7099; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 7100; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 7101; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7102; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7103; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 7104; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 7105; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 7106; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 7107; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 7108; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 7109; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 7110; 7111; SI-GISEL-LABEL: v_exp_f32_contract: 7112; SI-GISEL: ; %bb.0: 7113; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7114; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 7115; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 7116; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 7117; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 7118; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 7119; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 7120; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 7121; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 7122; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 7123; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 7124; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 7125; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 7126; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7127; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7128; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 7129; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 7130; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 7131; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 7132; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 7133; 7134; R600-LABEL: v_exp_f32_contract: 7135; R600: ; %bb.0: 7136; R600-NEXT: CF_END 7137; R600-NEXT: PAD 7138; 7139; CM-LABEL: v_exp_f32_contract: 7140; CM: ; %bb.0: 7141; CM-NEXT: CF_END 7142; CM-NEXT: PAD 7143 %result = call contract float @llvm.exp.f32(float %in) 7144 ret float %result 7145} 7146 7147define float @v_exp_f32_contract_daz(float %in) #0 { 7148; VI-SDAG-LABEL: v_exp_f32_contract_daz: 7149; VI-SDAG: ; %bb.0: 7150; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7151; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 7152; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 7153; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 7154; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 7155; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 7156; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 7157; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 7158; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 7159; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 7160; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 7161; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 7162; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 7163; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 7164; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7165; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7166; VI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 7167; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 7168; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 7169; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 7170; VI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 7171; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 7172; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 7173; 7174; VI-GISEL-LABEL: v_exp_f32_contract_daz: 7175; VI-GISEL: ; %bb.0: 7176; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7177; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 7178; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 7179; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 7180; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 7181; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 7182; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 7183; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 7184; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 7185; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 7186; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 7187; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 7188; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 7189; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 7190; VI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 7191; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 7192; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7193; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7194; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 7195; VI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 7196; VI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 7197; VI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 7198; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 7199; 7200; GFX900-SDAG-LABEL: v_exp_f32_contract_daz: 7201; GFX900-SDAG: ; %bb.0: 7202; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7203; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 7204; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 7205; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 7206; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 7207; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 7208; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 7209; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 7210; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 7211; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 7212; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 7213; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7214; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7215; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 7216; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 7217; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 7218; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 7219; GFX900-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 7220; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 7221; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 7222; 7223; GFX900-GISEL-LABEL: v_exp_f32_contract_daz: 7224; GFX900-GISEL: ; %bb.0: 7225; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7226; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 7227; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 7228; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 7229; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 7230; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 7231; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 7232; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 7233; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 7234; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 7235; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 7236; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 7237; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 7238; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7239; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7240; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 7241; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 7242; GFX900-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 7243; GFX900-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 7244; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 7245; 7246; SI-SDAG-LABEL: v_exp_f32_contract_daz: 7247; SI-SDAG: ; %bb.0: 7248; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7249; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 7250; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 7251; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 7252; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 7253; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 7254; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 7255; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 7256; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 7257; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 7258; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 7259; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7260; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7261; SI-SDAG-NEXT: s_mov_b32 s4, 0x42b17218 7262; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 7263; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 7264; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x7f800000 7265; SI-SDAG-NEXT: v_cmp_nlt_f32_e32 vcc, s4, v0 7266; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 7267; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 7268; 7269; SI-GISEL-LABEL: v_exp_f32_contract_daz: 7270; SI-GISEL: ; %bb.0: 7271; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7272; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 7273; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 7274; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 7275; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 7276; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 7277; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 7278; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 7279; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 7280; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 7281; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 7282; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 7283; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 7284; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7285; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7286; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0x42b17218 7287; SI-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 7288; SI-GISEL-NEXT: v_cmp_gt_f32_e32 vcc, v0, v2 7289; SI-GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 7290; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 7291; 7292; R600-LABEL: v_exp_f32_contract_daz: 7293; R600: ; %bb.0: 7294; R600-NEXT: CF_END 7295; R600-NEXT: PAD 7296; 7297; CM-LABEL: v_exp_f32_contract_daz: 7298; CM: ; %bb.0: 7299; CM-NEXT: CF_END 7300; CM-NEXT: PAD 7301 %result = call contract float @llvm.exp.f32(float %in) 7302 ret float %result 7303} 7304 7305define float @v_exp_f32_contract_nnan_ninf(float %in) { 7306; VI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: 7307; VI-SDAG: ; %bb.0: 7308; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7309; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 7310; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v1 7311; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v1 7312; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x39a3b295, v4 7313; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3fb8a000, v4 7314; VI-SDAG-NEXT: v_rndne_f32_e32 v3, v2 7315; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 7316; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 7317; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 7318; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v4 7319; VI-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 7320; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 7321; VI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v3 7322; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7323; VI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7324; VI-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 7325; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 7326; VI-SDAG-NEXT: s_setpc_b64 s[30:31] 7327; 7328; VI-GISEL-LABEL: v_exp_f32_contract_nnan_ninf: 7329; VI-GISEL: ; %bb.0: 7330; VI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7331; VI-GISEL-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 7332; VI-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 7333; VI-GISEL-NEXT: v_mul_f32_e32 v4, 0x39a3b295, v2 7334; VI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8a000, v2 7335; VI-GISEL-NEXT: v_mul_f32_e32 v3, 0x3fb8a000, v1 7336; VI-GISEL-NEXT: v_add_f32_e32 v2, v2, v4 7337; VI-GISEL-NEXT: v_mul_f32_e32 v1, 0x39a3b295, v1 7338; VI-GISEL-NEXT: v_add_f32_e32 v1, v1, v2 7339; VI-GISEL-NEXT: v_rndne_f32_e32 v2, v3 7340; VI-GISEL-NEXT: v_sub_f32_e32 v3, v3, v2 7341; VI-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 7342; VI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v2 7343; VI-GISEL-NEXT: v_exp_f32_e32 v1, v1 7344; VI-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 7345; VI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7346; VI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7347; VI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 7348; VI-GISEL-NEXT: s_setpc_b64 s[30:31] 7349; 7350; GFX900-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: 7351; GFX900-SDAG: ; %bb.0: 7352; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7353; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 7354; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 7355; GFX900-SDAG-NEXT: v_rndne_f32_e32 v2, v1 7356; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 7357; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 7358; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 7359; GFX900-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 7360; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 7361; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 7362; GFX900-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 7363; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7364; GFX900-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7365; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v1, v2 7366; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 7367; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] 7368; 7369; GFX900-GISEL-LABEL: v_exp_f32_contract_nnan_ninf: 7370; GFX900-GISEL: ; %bb.0: 7371; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7372; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 7373; GFX900-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 7374; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 7375; GFX900-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 7376; GFX900-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 7377; GFX900-GISEL-NEXT: v_rndne_f32_e32 v3, v2 7378; GFX900-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 7379; GFX900-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 7380; GFX900-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 7381; GFX900-GISEL-NEXT: v_exp_f32_e32 v1, v1 7382; GFX900-GISEL-NEXT: v_ldexp_f32 v1, v1, v2 7383; GFX900-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7384; GFX900-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7385; GFX900-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 7386; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31] 7387; 7388; SI-SDAG-LABEL: v_exp_f32_contract_nnan_ninf: 7389; SI-SDAG: ; %bb.0: 7390; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7391; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v0 7392; SI-SDAG-NEXT: s_mov_b32 s4, 0x3fb8aa3b 7393; SI-SDAG-NEXT: v_rndne_f32_e32 v2, v1 7394; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 7395; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, -v1 7396; SI-SDAG-NEXT: s_mov_b32 s4, 0x32a5705f 7397; SI-SDAG-NEXT: v_fma_f32 v1, v0, s4, v1 7398; SI-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 7399; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 7400; SI-SDAG-NEXT: v_cvt_i32_f32_e32 v2, v2 7401; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2ce8ed0 7402; SI-SDAG-NEXT: v_cmp_ngt_f32_e32 vcc, s4, v0 7403; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v2 7404; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc 7405; SI-SDAG-NEXT: s_setpc_b64 s[30:31] 7406; 7407; SI-GISEL-LABEL: v_exp_f32_contract_nnan_ninf: 7408; SI-GISEL: ; %bb.0: 7409; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7410; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0x3fb8aa3b 7411; SI-GISEL-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v0 7412; SI-GISEL-NEXT: v_fma_f32 v1, v0, v1, -v2 7413; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x32a5705f 7414; SI-GISEL-NEXT: v_fma_f32 v1, v0, v3, v1 7415; SI-GISEL-NEXT: v_rndne_f32_e32 v3, v2 7416; SI-GISEL-NEXT: v_sub_f32_e32 v2, v2, v3 7417; SI-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 7418; SI-GISEL-NEXT: v_cvt_i32_f32_e32 v2, v3 7419; SI-GISEL-NEXT: v_exp_f32_e32 v1, v1 7420; SI-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v2 7421; SI-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2ce8ed0 7422; SI-GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 7423; SI-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 7424; SI-GISEL-NEXT: s_setpc_b64 s[30:31] 7425; 7426; R600-LABEL: v_exp_f32_contract_nnan_ninf: 7427; R600: ; %bb.0: 7428; R600-NEXT: CF_END 7429; R600-NEXT: PAD 7430; 7431; CM-LABEL: v_exp_f32_contract_nnan_ninf: 7432; CM: ; %bb.0: 7433; CM-NEXT: CF_END 7434; CM-NEXT: PAD 7435 %result = call contract nnan ninf float @llvm.exp.f32(float %in) 7436 ret float %result 7437} 7438 7439declare float @llvm.fabs.f32(float) #2 7440declare float @llvm.exp.f32(float) #2 7441declare <2 x float> @llvm.exp.v2f32(<2 x float>) #2 7442declare <3 x float> @llvm.exp.v3f32(<3 x float>) #2 7443declare <4 x float> @llvm.exp.v4f32(<4 x float>) #2 7444declare half @llvm.fabs.f16(half) #2 7445declare half @llvm.exp.f16(half) #2 7446declare <2 x half> @llvm.exp.v2f16(<2 x half>) #2 7447declare <3 x half> @llvm.exp.v3f16(<3 x half>) #2 7448declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 7449 7450attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" } 7451attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" } 7452attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 7453