1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-GISEL %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-SDAG %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10-GISEL %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-SDAG %s 7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-GISEL %s 8 9define i32 @intrinsic_lround_i32_f32(float %arg) { 10; GFX9-SDAG-LABEL: intrinsic_lround_i32_f32: 11; GFX9-SDAG: ; %bb.0: ; %entry 12; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX9-SDAG-NEXT: v_trunc_f32_e32 v1, v0 14; GFX9-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 15; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 16; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 17; GFX9-SDAG-NEXT: s_brev_b32 s4, -2 18; GFX9-SDAG-NEXT: v_bfi_b32 v0, s4, v2, v0 19; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 20; GFX9-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 21; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX9-GISEL-LABEL: intrinsic_lround_i32_f32: 24; GFX9-GISEL: ; %bb.0: ; %entry 25; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 27; GFX9-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 28; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 29; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 30; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v3, 1 31; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v3, v2 32; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 33; GFX9-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 34; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX10-SDAG-LABEL: intrinsic_lround_i32_f32: 37; GFX10-SDAG: ; %bb.0: ; %entry 38; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX10-SDAG-NEXT: v_trunc_f32_e32 v1, v0 40; GFX10-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 41; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v2|, 0.5 42; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s4 43; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0 44; GFX10-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 45; GFX10-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 46; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX10-GISEL-LABEL: intrinsic_lround_i32_f32: 49; GFX10-GISEL: ; %bb.0: ; %entry 50; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 52; GFX10-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 53; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v2|, 0.5 54; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s4 55; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v2 56; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 57; GFX10-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 58; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX11-SDAG-LABEL: intrinsic_lround_i32_f32: 61; GFX11-SDAG: ; %bb.0: ; %entry 62; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX11-SDAG-NEXT: v_trunc_f32_e32 v1, v0 64; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 65; GFX11-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 66; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5 67; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 68; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0 69; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0 70; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 71; GFX11-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 72; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 73; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 74; 75; GFX11-GISEL-LABEL: intrinsic_lround_i32_f32: 76; GFX11-GISEL: ; %bb.0: ; %entry 77; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 79; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 80; GFX11-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 81; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5 82; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 83; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0 84; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v2 85; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 86; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 87; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 88; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 89entry: 90 %res = tail call i32 @llvm.lround.i32.f32(float %arg) 91 ret i32 %res 92} 93 94define i32 @intrinsic_lround_i32_f64(double %arg) { 95; GFX9-SDAG-LABEL: intrinsic_lround_i32_f64: 96; GFX9-SDAG: ; %bb.0: ; %entry 97; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX9-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 99; GFX9-SDAG-NEXT: s_brev_b32 s4, -2 100; GFX9-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 101; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x3ff00000 102; GFX9-SDAG-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 103; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 104; GFX9-SDAG-NEXT: v_bfi_b32 v1, s4, v0, v1 105; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 106; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 107; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 108; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX9-GISEL-LABEL: intrinsic_lround_i32_f64: 111; GFX9-GISEL: ; %bb.0: ; %entry 112; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 114; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000 115; GFX9-GISEL-NEXT: s_brev_b32 s4, 1 116; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 117; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 118; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 119; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc 120; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4 121; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 122; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 123; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX10-SDAG-LABEL: intrinsic_lround_i32_f64: 126; GFX10-SDAG: ; %bb.0: ; %entry 127; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX10-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 129; GFX10-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 130; GFX10-SDAG-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5 131; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x3ff00000, s4 132; GFX10-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v0, v1 133; GFX10-SDAG-NEXT: v_mov_b32_e32 v0, 0 134; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 135; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 136; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 137; 138; GFX10-GISEL-LABEL: intrinsic_lround_i32_f64: 139; GFX10-GISEL: ; %bb.0: ; %entry 140; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 142; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 143; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 144; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5 145; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4 146; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4 147; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 148; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 149; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 150; 151; GFX11-SDAG-LABEL: intrinsic_lround_i32_f64: 152; GFX11-SDAG: ; %bb.0: ; %entry 153; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX11-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 155; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 156; GFX11-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 157; GFX11-SDAG-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5 158; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 159; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x3ff00000, s0 160; GFX11-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v0, v1 161; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 162; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 163; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 164; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 165; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX11-GISEL-LABEL: intrinsic_lround_i32_f64: 168; GFX11-GISEL: ; %bb.0: ; %entry 169; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 171; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 172; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 173; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 174; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5 175; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 176; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0 177; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4 178; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 179; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 180; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 181; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 182entry: 183 %res = tail call i32 @llvm.lround.i32.f64(double %arg) 184 ret i32 %res 185} 186 187define i64 @intrinsic_lround_i64_f32(float %arg) { 188; GFX9-SDAG-LABEL: intrinsic_lround_i64_f32: 189; GFX9-SDAG: ; %bb.0: ; %entry 190; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX9-SDAG-NEXT: v_trunc_f32_e32 v1, v0 192; GFX9-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 193; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 194; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 195; GFX9-SDAG-NEXT: s_brev_b32 s4, -2 196; GFX9-SDAG-NEXT: v_bfi_b32 v0, s4, v2, v0 197; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 198; GFX9-SDAG-NEXT: v_trunc_f32_e32 v0, v0 199; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000 200; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 201; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1 202; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcf800000 203; GFX9-SDAG-NEXT: v_fma_f32 v2, v1, s4, |v0| 204; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 205; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 206; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 207; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v2, v3 208; GFX9-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 209; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 210; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 211; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 212; 213; GFX9-GISEL-LABEL: intrinsic_lround_i64_f32: 214; GFX9-GISEL: ; %bb.0: ; %entry 215; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 217; GFX9-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 218; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 219; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 220; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v3, 1 221; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v3, v2 222; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 223; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 224; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x2f800000 225; GFX9-GISEL-NEXT: v_mul_f32_e64 v2, |v1|, v2 226; GFX9-GISEL-NEXT: v_floor_f32_e32 v2, v2 227; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcf800000 228; GFX9-GISEL-NEXT: v_fma_f32 v1, v2, v3, |v1| 229; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 230; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 231; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 232; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v1, v3 233; GFX9-GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 234; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 235; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 236; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 237; 238; GFX10-SDAG-LABEL: intrinsic_lround_i64_f32: 239; GFX10-SDAG: ; %bb.0: ; %entry 240; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; GFX10-SDAG-NEXT: v_trunc_f32_e32 v1, v0 242; GFX10-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 243; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v2|, 0.5 244; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s4 245; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0 246; GFX10-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 247; GFX10-SDAG-NEXT: v_trunc_f32_e32 v0, v0 248; GFX10-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 249; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 250; GFX10-SDAG-NEXT: v_floor_f32_e32 v1, v1 251; GFX10-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 252; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 253; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 254; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 255; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 256; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 257; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 258; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX10-GISEL-LABEL: intrinsic_lround_i64_f32: 261; GFX10-GISEL: ; %bb.0: ; %entry 262; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 264; GFX10-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 265; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v2|, 0.5 266; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s4 267; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v2 268; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 269; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 270; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 271; GFX10-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 272; GFX10-GISEL-NEXT: v_floor_f32_e32 v2, v2 273; GFX10-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 274; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 275; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 276; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 277; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 278; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 279; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 280; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 281; 282; GFX11-SDAG-LABEL: intrinsic_lround_i64_f32: 283; GFX11-SDAG: ; %bb.0: ; %entry 284; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX11-SDAG-NEXT: v_trunc_f32_e32 v1, v0 286; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 287; GFX11-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 288; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5 289; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 290; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0 291; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0 292; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 293; GFX11-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 294; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, v0 295; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 296; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 297; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 298; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 299; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 300; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 301; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 302; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 303; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 304; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 305; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 306; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 307; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 308; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 309; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX11-GISEL-LABEL: intrinsic_lround_i64_f32: 312; GFX11-GISEL: ; %bb.0: ; %entry 313; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 315; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 316; GFX11-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 317; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5 318; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 319; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0 320; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v2 321; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 322; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 323; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 324; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 325; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 326; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 327; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2 328; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 329; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 330; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 331; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 332; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 333; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 334; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 335; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 336; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 337; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 338; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 339entry: 340 %res = tail call i64 @llvm.lround.i64.f32(float %arg) 341 ret i64 %res 342} 343 344define i64 @intrinsic_lround_i64_f64(double %arg) { 345; GFX9-SDAG-LABEL: intrinsic_lround_i64_f64: 346; GFX9-SDAG: ; %bb.0: ; %entry 347; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX9-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 349; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3ff00000 350; GFX9-SDAG-NEXT: s_brev_b32 s4, -2 351; GFX9-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 352; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 353; GFX9-SDAG-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 354; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc 355; GFX9-SDAG-NEXT: v_bfi_b32 v1, s4, v4, v1 356; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 357; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffe0 358; GFX9-SDAG-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 359; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4 360; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 361; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc1f00000 362; GFX9-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 363; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1] 364; GFX9-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 365; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 366; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 367; 368; GFX9-GISEL-LABEL: intrinsic_lround_i64_f64: 369; GFX9-GISEL: ; %bb.0: ; %entry 370; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 371; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 372; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000 373; GFX9-GISEL-NEXT: s_brev_b32 s4, 1 374; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 375; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 376; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 377; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc 378; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4 379; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 380; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 381; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000 382; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 383; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc1f00000 384; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 385; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] 386; GFX9-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 387; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1] 388; GFX9-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 389; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 390; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX10-SDAG-LABEL: intrinsic_lround_i64_f64: 393; GFX10-SDAG: ; %bb.0: ; %entry 394; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX10-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 396; GFX10-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 397; GFX10-SDAG-NEXT: v_mov_b32_e32 v0, 0 398; GFX10-SDAG-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5 399; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4 400; GFX10-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v4, v1 401; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 402; GFX10-SDAG-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 403; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 404; GFX10-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 405; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 406; GFX10-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 407; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 408; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 409; 410; GFX10-GISEL-LABEL: intrinsic_lround_i64_f64: 411; GFX10-GISEL: ; %bb.0: ; %entry 412; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 413; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 414; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 415; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 416; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5 417; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4 418; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4 419; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 420; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 421; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 422; GFX10-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 423; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 424; GFX10-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 425; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 426; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX11-SDAG-LABEL: intrinsic_lround_i64_f64: 429; GFX11-SDAG: ; %bb.0: ; %entry 430; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX11-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 432; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 433; GFX11-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 434; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 435; GFX11-SDAG-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5 436; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 437; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0 438; GFX11-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v4, v1 439; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 440; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 441; GFX11-SDAG-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 442; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 443; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 444; GFX11-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 445; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 446; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 447; GFX11-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 448; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 449; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 450; 451; GFX11-GISEL-LABEL: intrinsic_lround_i64_f64: 452; GFX11-GISEL: ; %bb.0: ; %entry 453; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 454; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 455; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 456; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 457; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 458; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5 459; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 460; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0 461; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4 462; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 463; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 464; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 465; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 466; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 467; GFX11-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 468; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 469; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 470; GFX11-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 471; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 472; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 473entry: 474 %res = tail call i64 @llvm.lround.i64.f64(double %arg) 475 ret i64 %res 476} 477 478define i64 @intrinsic_llround_i64_f32(float %arg) { 479; GFX9-SDAG-LABEL: intrinsic_llround_i64_f32: 480; GFX9-SDAG: ; %bb.0: ; %entry 481; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482; GFX9-SDAG-NEXT: v_trunc_f32_e32 v1, v0 483; GFX9-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 484; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 485; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 486; GFX9-SDAG-NEXT: s_brev_b32 s4, -2 487; GFX9-SDAG-NEXT: v_bfi_b32 v0, s4, v2, v0 488; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 489; GFX9-SDAG-NEXT: v_trunc_f32_e32 v0, v0 490; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000 491; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 492; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1 493; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcf800000 494; GFX9-SDAG-NEXT: v_fma_f32 v2, v1, s4, |v0| 495; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 496; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 497; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 498; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v2, v3 499; GFX9-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 500; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 501; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 502; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 503; 504; GFX9-GISEL-LABEL: intrinsic_llround_i64_f32: 505; GFX9-GISEL: ; %bb.0: ; %entry 506; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 507; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 508; GFX9-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 509; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 510; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 511; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v3, 1 512; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v3, v2 513; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 514; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 515; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x2f800000 516; GFX9-GISEL-NEXT: v_mul_f32_e64 v2, |v1|, v2 517; GFX9-GISEL-NEXT: v_floor_f32_e32 v2, v2 518; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcf800000 519; GFX9-GISEL-NEXT: v_fma_f32 v1, v2, v3, |v1| 520; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 521; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 522; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 523; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v1, v3 524; GFX9-GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 525; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 526; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 527; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 528; 529; GFX10-SDAG-LABEL: intrinsic_llround_i64_f32: 530; GFX10-SDAG: ; %bb.0: ; %entry 531; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 532; GFX10-SDAG-NEXT: v_trunc_f32_e32 v1, v0 533; GFX10-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 534; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v2|, 0.5 535; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s4 536; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0 537; GFX10-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 538; GFX10-SDAG-NEXT: v_trunc_f32_e32 v0, v0 539; GFX10-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 540; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 541; GFX10-SDAG-NEXT: v_floor_f32_e32 v1, v1 542; GFX10-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 543; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 544; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 545; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 546; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 547; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 548; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 549; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX10-GISEL-LABEL: intrinsic_llround_i64_f32: 552; GFX10-GISEL: ; %bb.0: ; %entry 553; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 555; GFX10-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 556; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v2|, 0.5 557; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s4 558; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v2 559; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 560; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 561; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 562; GFX10-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 563; GFX10-GISEL-NEXT: v_floor_f32_e32 v2, v2 564; GFX10-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 565; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 566; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 567; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 568; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 569; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 570; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 571; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 572; 573; GFX11-SDAG-LABEL: intrinsic_llround_i64_f32: 574; GFX11-SDAG: ; %bb.0: ; %entry 575; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 576; GFX11-SDAG-NEXT: v_trunc_f32_e32 v1, v0 577; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 578; GFX11-SDAG-NEXT: v_sub_f32_e32 v2, v0, v1 579; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5 580; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 581; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0 582; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0 583; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 584; GFX11-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 585; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, v0 586; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 587; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 588; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 589; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 590; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 591; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 592; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 593; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 594; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 595; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 596; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 597; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 598; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 599; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 600; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 601; 602; GFX11-GISEL-LABEL: intrinsic_llround_i64_f32: 603; GFX11-GISEL: ; %bb.0: ; %entry 604; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 605; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 606; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 607; GFX11-GISEL-NEXT: v_sub_f32_e32 v2, v0, v1 608; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5 609; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 610; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0 611; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v2 612; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 613; GFX11-GISEL-NEXT: v_add_f32_e32 v0, v1, v0 614; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 615; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 616; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 617; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 618; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2 619; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 620; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 621; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 622; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 623; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 624; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 625; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 626; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 627; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 628; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 629; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 630entry: 631 %res = tail call i64 @llvm.llround.i64.f32(float %arg) 632 ret i64 %res 633} 634 635define i64 @intrinsic_llround_i64_f64(double %arg) { 636; GFX9-SDAG-LABEL: intrinsic_llround_i64_f64: 637; GFX9-SDAG: ; %bb.0: ; %entry 638; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 639; GFX9-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 640; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x3ff00000 641; GFX9-SDAG-NEXT: s_brev_b32 s4, -2 642; GFX9-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 643; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 644; GFX9-SDAG-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 645; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc 646; GFX9-SDAG-NEXT: v_bfi_b32 v1, s4, v4, v1 647; GFX9-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 648; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffe0 649; GFX9-SDAG-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 650; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4 651; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 652; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc1f00000 653; GFX9-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 654; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1] 655; GFX9-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 656; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 657; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 658; 659; GFX9-GISEL-LABEL: intrinsic_llround_i64_f64: 660; GFX9-GISEL: ; %bb.0: ; %entry 661; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 662; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 663; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x3ff00000 664; GFX9-GISEL-NEXT: s_brev_b32 s4, 1 665; GFX9-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 666; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 667; GFX9-GISEL-NEXT: v_cmp_ge_f64_e64 vcc, |v[4:5]|, 0.5 668; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v6, vcc 669; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, s4, v4 670; GFX9-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 671; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 672; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000 673; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 674; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc1f00000 675; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 676; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] 677; GFX9-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 678; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1] 679; GFX9-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 680; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 681; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 682; 683; GFX10-SDAG-LABEL: intrinsic_llround_i64_f64: 684; GFX10-SDAG: ; %bb.0: ; %entry 685; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 686; GFX10-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 687; GFX10-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 688; GFX10-SDAG-NEXT: v_mov_b32_e32 v0, 0 689; GFX10-SDAG-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5 690; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4 691; GFX10-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v4, v1 692; GFX10-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 693; GFX10-SDAG-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 694; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 695; GFX10-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 696; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 697; GFX10-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 698; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 699; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 700; 701; GFX10-GISEL-LABEL: intrinsic_llround_i64_f64: 702; GFX10-GISEL: ; %bb.0: ; %entry 703; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 704; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 705; GFX10-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 706; GFX10-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 707; GFX10-GISEL-NEXT: v_cmp_ge_f64_e64 s4, |v[4:5]|, 0.5 708; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s4 709; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4 710; GFX10-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 711; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 712; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 713; GFX10-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 714; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 715; GFX10-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 716; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 717; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 718; 719; GFX11-SDAG-LABEL: intrinsic_llround_i64_f64: 720; GFX11-SDAG: ; %bb.0: ; %entry 721; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GFX11-SDAG-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 723; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 724; GFX11-SDAG-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 725; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0 726; GFX11-SDAG-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5 727; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 728; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0 729; GFX11-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v4, v1 730; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 731; GFX11-SDAG-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 732; GFX11-SDAG-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 733; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 734; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 735; GFX11-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 736; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 737; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 738; GFX11-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 739; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 740; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 741; 742; GFX11-GISEL-LABEL: intrinsic_llround_i64_f64: 743; GFX11-GISEL: ; %bb.0: ; %entry 744; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 745; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[2:3], v[0:1] 746; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 747; GFX11-GISEL-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 748; GFX11-GISEL-NEXT: v_and_or_b32 v0, v0, 0, 0 749; GFX11-GISEL-NEXT: v_cmp_ge_f64_e64 s0, |v[4:5]|, 0.5 750; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 751; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 0x3ff00000, s0 752; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v4 753; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 754; GFX11-GISEL-NEXT: v_add_f64 v[0:1], v[2:3], v[0:1] 755; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 756; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 757; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 758; GFX11-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 759; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 760; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 761; GFX11-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 762; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 763; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 764entry: 765 %res = tail call i64 @llvm.llround.i64.f64(double %arg) 766 ret i64 %res 767} 768 769define half @intrinsic_fround_half(half %arg) { 770; GFX9-SDAG-LABEL: intrinsic_fround_half: 771; GFX9-SDAG: ; %bb.0: ; %entry 772; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; GFX9-SDAG-NEXT: v_trunc_f16_e32 v1, v0 774; GFX9-SDAG-NEXT: v_sub_f16_e32 v2, v0, v1 775; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00 776; GFX9-SDAG-NEXT: v_cmp_ge_f16_e64 vcc, |v2|, 0.5 777; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 778; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff 779; GFX9-SDAG-NEXT: v_bfi_b32 v0, s4, v2, v0 780; GFX9-SDAG-NEXT: v_add_f16_e32 v0, v1, v0 781; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 782; 783; GFX9-GISEL-LABEL: intrinsic_fround_half: 784; GFX9-GISEL: ; %bb.0: ; %entry 785; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 786; GFX9-GISEL-NEXT: v_trunc_f16_e32 v1, v0 787; GFX9-GISEL-NEXT: v_sub_f16_e32 v2, v0, v1 788; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 789; GFX9-GISEL-NEXT: v_cmp_ge_f16_e64 vcc, |v2|, 0.5 790; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 791; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff8000, v0 792; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 793; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v1, v0 794; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 795; 796; GFX10-SDAG-LABEL: intrinsic_fround_half: 797; GFX10-SDAG: ; %bb.0: ; %entry 798; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX10-SDAG-NEXT: v_trunc_f16_e32 v1, v0 800; GFX10-SDAG-NEXT: v_sub_f16_e32 v2, v0, v1 801; GFX10-SDAG-NEXT: v_cmp_ge_f16_e64 s4, |v2|, 0.5 802; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s4 803; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 804; GFX10-SDAG-NEXT: v_add_f16_e32 v0, v1, v0 805; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 806; 807; GFX10-GISEL-LABEL: intrinsic_fround_half: 808; GFX10-GISEL: ; %bb.0: ; %entry 809; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 810; GFX10-GISEL-NEXT: v_trunc_f16_e32 v1, v0 811; GFX10-GISEL-NEXT: v_sub_f16_e32 v2, v0, v1 812; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff8000, v0 813; GFX10-GISEL-NEXT: v_cmp_ge_f16_e64 s4, |v2|, 0.5 814; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s4 815; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 816; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v1, v0 817; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 818; 819; GFX11-SDAG-LABEL: intrinsic_fround_half: 820; GFX11-SDAG: ; %bb.0: ; %entry 821; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 822; GFX11-SDAG-NEXT: v_trunc_f16_e32 v1, v0 823; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 824; GFX11-SDAG-NEXT: v_sub_f16_e32 v2, v0, v1 825; GFX11-SDAG-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5 826; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 827; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0 828; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 829; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 830; GFX11-SDAG-NEXT: v_add_f16_e32 v0, v1, v0 831; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 832; 833; GFX11-GISEL-LABEL: intrinsic_fround_half: 834; GFX11-GISEL: ; %bb.0: ; %entry 835; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 836; GFX11-GISEL-NEXT: v_trunc_f16_e32 v1, v0 837; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 838; GFX11-GISEL-NEXT: v_sub_f16_e32 v2, v0, v1 839; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff8000, v0 840; GFX11-GISEL-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5 841; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 842; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0 843; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 844; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 845; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v1, v0 846; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 847entry: 848 %res = tail call half @llvm.round.f16(half %arg) 849 ret half %res 850} 851 852define i32 @intrinsic_lround_i32_f16(half %arg) { 853; GFX9-SDAG-LABEL: intrinsic_lround_i32_f16: 854; GFX9-SDAG: ; %bb.0: ; %entry 855; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 856; GFX9-SDAG-NEXT: v_trunc_f16_e32 v1, v0 857; GFX9-SDAG-NEXT: v_sub_f16_e32 v2, v0, v1 858; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00 859; GFX9-SDAG-NEXT: v_cmp_ge_f16_e64 vcc, |v2|, 0.5 860; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 861; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7fff 862; GFX9-SDAG-NEXT: v_bfi_b32 v0, s4, v2, v0 863; GFX9-SDAG-NEXT: v_add_f16_e32 v0, v1, v0 864; GFX9-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 865; GFX9-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 866; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 867; 868; GFX9-GISEL-LABEL: intrinsic_lround_i32_f16: 869; GFX9-GISEL: ; %bb.0: ; %entry 870; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 871; GFX9-GISEL-NEXT: v_trunc_f16_e32 v1, v0 872; GFX9-GISEL-NEXT: v_sub_f16_e32 v2, v0, v1 873; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3c00 874; GFX9-GISEL-NEXT: v_cmp_ge_f16_e64 vcc, |v2|, 0.5 875; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc 876; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff8000, v0 877; GFX9-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 878; GFX9-GISEL-NEXT: v_add_f16_e32 v0, v1, v0 879; GFX9-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 880; GFX9-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 881; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 882; 883; GFX10-SDAG-LABEL: intrinsic_lround_i32_f16: 884; GFX10-SDAG: ; %bb.0: ; %entry 885; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 886; GFX10-SDAG-NEXT: v_trunc_f16_e32 v1, v0 887; GFX10-SDAG-NEXT: v_sub_f16_e32 v2, v0, v1 888; GFX10-SDAG-NEXT: v_cmp_ge_f16_e64 s4, |v2|, 0.5 889; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s4 890; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 891; GFX10-SDAG-NEXT: v_add_f16_e32 v0, v1, v0 892; GFX10-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 893; GFX10-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 894; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 895; 896; GFX10-GISEL-LABEL: intrinsic_lround_i32_f16: 897; GFX10-GISEL: ; %bb.0: ; %entry 898; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 899; GFX10-GISEL-NEXT: v_trunc_f16_e32 v1, v0 900; GFX10-GISEL-NEXT: v_sub_f16_e32 v2, v0, v1 901; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff8000, v0 902; GFX10-GISEL-NEXT: v_cmp_ge_f16_e64 s4, |v2|, 0.5 903; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s4 904; GFX10-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 905; GFX10-GISEL-NEXT: v_add_f16_e32 v0, v1, v0 906; GFX10-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 907; GFX10-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 908; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX11-SDAG-LABEL: intrinsic_lround_i32_f16: 911; GFX11-SDAG: ; %bb.0: ; %entry 912; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GFX11-SDAG-NEXT: v_trunc_f16_e32 v1, v0 914; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 915; GFX11-SDAG-NEXT: v_sub_f16_e32 v2, v0, v1 916; GFX11-SDAG-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5 917; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 918; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0 919; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fff, v2, v0 920; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 921; GFX11-SDAG-NEXT: v_add_f16_e32 v0, v1, v0 922; GFX11-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 923; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 924; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 925; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 926; 927; GFX11-GISEL-LABEL: intrinsic_lround_i32_f16: 928; GFX11-GISEL: ; %bb.0: ; %entry 929; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 930; GFX11-GISEL-NEXT: v_trunc_f16_e32 v1, v0 931; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 932; GFX11-GISEL-NEXT: v_sub_f16_e32 v2, v0, v1 933; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff8000, v0 934; GFX11-GISEL-NEXT: v_cmp_ge_f16_e64 s0, |v2|, 0.5 935; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 936; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 0x3c00, s0 937; GFX11-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 938; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 939; GFX11-GISEL-NEXT: v_add_f16_e32 v0, v1, v0 940; GFX11-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 941; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 942; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 943; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 944entry: 945 %res = tail call i32 @llvm.lround.i32.f16(half %arg) 946 ret i32 %res 947} 948 949define <2 x i32> @intrinsic_lround_v2i32_v2f32(<2 x float> %arg) { 950; GFX9-SDAG-LABEL: intrinsic_lround_v2i32_v2f32: 951; GFX9-SDAG: ; %bb.0: ; %entry 952; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 953; GFX9-SDAG-NEXT: v_trunc_f32_e32 v2, v0 954; GFX9-SDAG-NEXT: v_sub_f32_e32 v3, v0, v2 955; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5 956; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, s[4:5] 957; GFX9-SDAG-NEXT: s_brev_b32 s6, -2 958; GFX9-SDAG-NEXT: v_bfi_b32 v0, s6, v3, v0 959; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 960; GFX9-SDAG-NEXT: v_trunc_f32_e32 v2, v1 961; GFX9-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 962; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5 963; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, s[4:5] 964; GFX9-SDAG-NEXT: v_bfi_b32 v1, s6, v3, v1 965; GFX9-SDAG-NEXT: v_add_f32_e32 v1, v2, v1 966; GFX9-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 967; GFX9-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 968; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 969; 970; GFX9-GISEL-LABEL: intrinsic_lround_v2i32_v2f32: 971; GFX9-GISEL: ; %bb.0: ; %entry 972; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 973; GFX9-GISEL-NEXT: v_trunc_f32_e32 v2, v0 974; GFX9-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 975; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5 976; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, s[4:5] 977; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v4, 1 978; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v4, v3 979; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 980; GFX9-GISEL-NEXT: v_trunc_f32_e32 v2, v1 981; GFX9-GISEL-NEXT: v_sub_f32_e32 v3, v1, v2 982; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5 983; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, s[4:5] 984; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v4, v3 985; GFX9-GISEL-NEXT: v_add_f32_e32 v1, v2, v1 986; GFX9-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 987; GFX9-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 988; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 989; 990; GFX10-SDAG-LABEL: intrinsic_lround_v2i32_v2f32: 991; GFX10-SDAG: ; %bb.0: ; %entry 992; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 993; GFX10-SDAG-NEXT: v_trunc_f32_e32 v2, v0 994; GFX10-SDAG-NEXT: v_trunc_f32_e32 v3, v1 995; GFX10-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2 996; GFX10-SDAG-NEXT: v_sub_f32_e32 v5, v1, v3 997; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v4|, 0.5 998; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s4 999; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v5|, 0.5 1000; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v4, v0 1001; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s4 1002; GFX10-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 1003; GFX10-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v5, v1 1004; GFX10-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 1005; GFX10-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 1006; GFX10-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 1007; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1008; 1009; GFX10-GISEL-LABEL: intrinsic_lround_v2i32_v2f32: 1010; GFX10-GISEL: ; %bb.0: ; %entry 1011; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1012; GFX10-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1013; GFX10-GISEL-NEXT: v_trunc_f32_e32 v3, v1 1014; GFX10-GISEL-NEXT: v_sub_f32_e32 v4, v0, v2 1015; GFX10-GISEL-NEXT: v_sub_f32_e32 v5, v1, v3 1016; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v4|, 0.5 1017; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s4 1018; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v5|, 0.5 1019; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v4 1020; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s4 1021; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 1022; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v5 1023; GFX10-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 1024; GFX10-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 1025; GFX10-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 1026; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1027; 1028; GFX11-SDAG-LABEL: intrinsic_lround_v2i32_v2f32: 1029; GFX11-SDAG: ; %bb.0: ; %entry 1030; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1031; GFX11-SDAG-NEXT: v_trunc_f32_e32 v2, v0 1032; GFX11-SDAG-NEXT: v_trunc_f32_e32 v3, v1 1033; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1034; GFX11-SDAG-NEXT: v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3 1035; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v4|, 0.5 1036; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 1037; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s0 1038; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v5|, 0.5 1039; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1040; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v4, v0 1041; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s0 1042; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1043; GFX11-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v5, v1 1044; GFX11-SDAG-NEXT: v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1 1045; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1046; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v0, v0 1047; GFX11-SDAG-NEXT: v_cvt_i32_f32_e32 v1, v1 1048; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1049; 1050; GFX11-GISEL-LABEL: intrinsic_lround_v2i32_v2f32: 1051; GFX11-GISEL: ; %bb.0: ; %entry 1052; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1053; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1054; GFX11-GISEL-NEXT: v_trunc_f32_e32 v3, v1 1055; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1056; GFX11-GISEL-NEXT: v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3 1057; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v4|, 0.5 1058; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 1059; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s0 1060; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v5|, 0.5 1061; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1062; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v4 1063; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s0 1064; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1065; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v5 1066; GFX11-GISEL-NEXT: v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1 1067; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1068; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v0, v0 1069; GFX11-GISEL-NEXT: v_cvt_i32_f32_e32 v1, v1 1070; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1071entry: 1072 %res = tail call <2 x i32> @llvm.lround.v2i32.v2f32(<2 x float> %arg) 1073 ret <2 x i32> %res 1074} 1075 1076define <2 x i64> @intrinsic_lround_v2i64_v2f32(<2 x float> %arg) { 1077; GFX9-SDAG-LABEL: intrinsic_lround_v2i64_v2f32: 1078; GFX9-SDAG: ; %bb.0: ; %entry 1079; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1080; GFX9-SDAG-NEXT: v_trunc_f32_e32 v2, v0 1081; GFX9-SDAG-NEXT: v_sub_f32_e32 v3, v0, v2 1082; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5 1083; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, s[4:5] 1084; GFX9-SDAG-NEXT: s_brev_b32 s6, -2 1085; GFX9-SDAG-NEXT: v_bfi_b32 v0, s6, v3, v0 1086; GFX9-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 1087; GFX9-SDAG-NEXT: v_trunc_f32_e32 v0, v0 1088; GFX9-SDAG-NEXT: s_mov_b32 s7, 0x2f800000 1089; GFX9-SDAG-NEXT: v_mul_f32_e64 v2, |v0|, s7 1090; GFX9-SDAG-NEXT: v_floor_f32_e32 v2, v2 1091; GFX9-SDAG-NEXT: s_mov_b32 s8, 0xcf800000 1092; GFX9-SDAG-NEXT: v_fma_f32 v3, v2, s8, |v0| 1093; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 1094; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v4, 31, v0 1095; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 1096; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v3, v4 1097; GFX9-SDAG-NEXT: v_trunc_f32_e32 v3, v1 1098; GFX9-SDAG-NEXT: v_sub_f32_e32 v5, v1, v3 1099; GFX9-SDAG-NEXT: v_cmp_ge_f32_e64 s[4:5], |v5|, 0.5 1100; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s[4:5] 1101; GFX9-SDAG-NEXT: v_bfi_b32 v1, s6, v5, v1 1102; GFX9-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 1103; GFX9-SDAG-NEXT: v_trunc_f32_e32 v3, v1 1104; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v3|, s7 1105; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1 1106; GFX9-SDAG-NEXT: v_fma_f32 v5, v1, s8, |v3| 1107; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v5, v5 1108; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v6, v1 1109; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v2, v4 1110; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 1111; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v3 1112; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v4, vcc 1113; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v5, v3 1114; GFX9-SDAG-NEXT: v_xor_b32_e32 v4, v6, v3 1115; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 1116; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc 1117; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1118; 1119; GFX9-GISEL-LABEL: intrinsic_lround_v2i64_v2f32: 1120; GFX9-GISEL: ; %bb.0: ; %entry 1121; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1122; GFX9-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1123; GFX9-GISEL-NEXT: v_sub_f32_e32 v3, v0, v2 1124; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v3|, 0.5 1125; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1.0, s[4:5] 1126; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v4, 1 1127; GFX9-GISEL-NEXT: v_and_or_b32 v0, v0, v4, v3 1128; GFX9-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 1129; GFX9-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1130; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x2f800000 1131; GFX9-GISEL-NEXT: v_mul_f32_e64 v5, |v2|, v3 1132; GFX9-GISEL-NEXT: v_floor_f32_e32 v5, v5 1133; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xcf800000 1134; GFX9-GISEL-NEXT: v_fma_f32 v2, v5, v6, |v2| 1135; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 1136; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 1137; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v0 1138; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v2, v7 1139; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, v5, v7 1140; GFX9-GISEL-NEXT: v_trunc_f32_e32 v5, v1 1141; GFX9-GISEL-NEXT: v_sub_f32_e32 v8, v1, v5 1142; GFX9-GISEL-NEXT: v_cmp_ge_f32_e64 s[4:5], |v8|, 0.5 1143; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1.0, s[4:5] 1144; GFX9-GISEL-NEXT: v_and_or_b32 v1, v1, v4, v8 1145; GFX9-GISEL-NEXT: v_add_f32_e32 v4, v5, v1 1146; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v4 1147; GFX9-GISEL-NEXT: v_mul_f32_e64 v3, |v1|, v3 1148; GFX9-GISEL-NEXT: v_floor_f32_e32 v3, v3 1149; GFX9-GISEL-NEXT: v_fma_f32 v1, v3, v6, |v1| 1150; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v5, v1 1151; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 1152; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v7 1153; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v4 1154; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v7, vcc 1155; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, v5, v4 1156; GFX9-GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 1157; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v4 1158; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 1159; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1160; 1161; GFX10-SDAG-LABEL: intrinsic_lround_v2i64_v2f32: 1162; GFX10-SDAG: ; %bb.0: ; %entry 1163; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1164; GFX10-SDAG-NEXT: v_trunc_f32_e32 v2, v0 1165; GFX10-SDAG-NEXT: v_trunc_f32_e32 v3, v1 1166; GFX10-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2 1167; GFX10-SDAG-NEXT: v_sub_f32_e32 v5, v1, v3 1168; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v4|, 0.5 1169; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s4 1170; GFX10-SDAG-NEXT: v_cmp_ge_f32_e64 s4, |v5|, 0.5 1171; GFX10-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v4, v0 1172; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s4 1173; GFX10-SDAG-NEXT: v_add_f32_e32 v0, v2, v0 1174; GFX10-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v5, v1 1175; GFX10-SDAG-NEXT: v_trunc_f32_e32 v0, v0 1176; GFX10-SDAG-NEXT: v_add_f32_e32 v1, v3, v1 1177; GFX10-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0| 1178; GFX10-SDAG-NEXT: v_trunc_f32_e32 v1, v1 1179; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0 1180; GFX10-SDAG-NEXT: v_floor_f32_e32 v2, v2 1181; GFX10-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1| 1182; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v6, 31, v1 1183; GFX10-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0| 1184; GFX10-SDAG-NEXT: v_floor_f32_e32 v3, v3 1185; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 1186; GFX10-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1| 1187; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v4 1188; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 1189; GFX10-SDAG-NEXT: v_xor_b32_e32 v2, v2, v5 1190; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 1191; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v5 1192; GFX10-SDAG-NEXT: v_xor_b32_e32 v3, v3, v6 1193; GFX10-SDAG-NEXT: v_xor_b32_e32 v4, v0, v6 1194; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5 1195; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo 1196; GFX10-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6 1197; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo 1198; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1199; 1200; GFX10-GISEL-LABEL: intrinsic_lround_v2i64_v2f32: 1201; GFX10-GISEL: ; %bb.0: ; %entry 1202; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1203; GFX10-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1204; GFX10-GISEL-NEXT: v_trunc_f32_e32 v3, v1 1205; GFX10-GISEL-NEXT: v_sub_f32_e32 v4, v0, v2 1206; GFX10-GISEL-NEXT: v_sub_f32_e32 v5, v1, v3 1207; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v4|, 0.5 1208; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s4 1209; GFX10-GISEL-NEXT: v_cmp_ge_f32_e64 s4, |v5|, 0.5 1210; GFX10-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v4 1211; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s4 1212; GFX10-GISEL-NEXT: v_add_f32_e32 v0, v2, v0 1213; GFX10-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v5 1214; GFX10-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1215; GFX10-GISEL-NEXT: v_add_f32_e32 v1, v3, v1 1216; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0 1217; GFX10-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2| 1218; GFX10-GISEL-NEXT: v_trunc_f32_e32 v3, v1 1219; GFX10-GISEL-NEXT: v_floor_f32_e32 v4, v4 1220; GFX10-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3| 1221; GFX10-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2| 1222; GFX10-GISEL-NEXT: v_floor_f32_e32 v5, v5 1223; GFX10-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3| 1224; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 1225; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 1226; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v4 1227; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v5 1228; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 1229; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 1230; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 1231; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, v4, v3 1232; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, v0, v3 1233; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6 1234; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo 1235; GFX10-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3 1236; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo 1237; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1238; 1239; GFX11-SDAG-LABEL: intrinsic_lround_v2i64_v2f32: 1240; GFX11-SDAG: ; %bb.0: ; %entry 1241; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1242; GFX11-SDAG-NEXT: v_trunc_f32_e32 v2, v0 1243; GFX11-SDAG-NEXT: v_trunc_f32_e32 v3, v1 1244; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1245; GFX11-SDAG-NEXT: v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3 1246; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v4|, 0.5 1247; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 1248; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s0 1249; GFX11-SDAG-NEXT: v_cmp_ge_f32_e64 s0, |v5|, 0.5 1250; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1251; GFX11-SDAG-NEXT: v_bfi_b32 v0, 0x7fffffff, v4, v0 1252; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s0 1253; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1254; GFX11-SDAG-NEXT: v_bfi_b32 v1, 0x7fffffff, v5, v1 1255; GFX11-SDAG-NEXT: v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1 1256; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1257; GFX11-SDAG-NEXT: v_trunc_f32_e32 v0, v0 1258; GFX11-SDAG-NEXT: v_trunc_f32_e32 v1, v1 1259; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1260; GFX11-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0| 1261; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0 1262; GFX11-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1| 1263; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1264; GFX11-SDAG-NEXT: v_floor_f32_e32 v2, v2 1265; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v6, 31, v1 1266; GFX11-SDAG-NEXT: v_floor_f32_e32 v3, v3 1267; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1268; GFX11-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0| 1269; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 1270; GFX11-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1| 1271; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 1272; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v4 1273; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 1274; GFX11-SDAG-NEXT: v_xor_b32_e32 v2, v2, v5 1275; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1276; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 1277; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v5 1278; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 1279; GFX11-SDAG-NEXT: v_xor_b32_e32 v3, v3, v6 1280; GFX11-SDAG-NEXT: v_xor_b32_e32 v4, v0, v6 1281; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1282; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5 1283; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo 1284; GFX11-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6 1285; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo 1286; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1287; 1288; GFX11-GISEL-LABEL: intrinsic_lround_v2i64_v2f32: 1289; GFX11-GISEL: ; %bb.0: ; %entry 1290; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1291; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1292; GFX11-GISEL-NEXT: v_trunc_f32_e32 v3, v1 1293; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1294; GFX11-GISEL-NEXT: v_dual_sub_f32 v4, v0, v2 :: v_dual_sub_f32 v5, v1, v3 1295; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v4|, 0.5 1296; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 1297; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1.0, s0 1298; GFX11-GISEL-NEXT: v_cmp_ge_f32_e64 s0, |v5|, 0.5 1299; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1300; GFX11-GISEL-NEXT: v_and_or_b32 v0, 0x80000000, v0, v4 1301; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1.0, s0 1302; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1303; GFX11-GISEL-NEXT: v_and_or_b32 v1, 0x80000000, v1, v5 1304; GFX11-GISEL-NEXT: v_dual_add_f32 v0, v2, v0 :: v_dual_add_f32 v1, v3, v1 1305; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 1306; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, v0 1307; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0 1308; GFX11-GISEL-NEXT: v_trunc_f32_e32 v3, v1 1309; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 1310; GFX11-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2| 1311; GFX11-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3| 1312; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1313; GFX11-GISEL-NEXT: v_floor_f32_e32 v4, v4 1314; GFX11-GISEL-NEXT: v_floor_f32_e32 v5, v5 1315; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1316; GFX11-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2| 1317; GFX11-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3| 1318; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 1319; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4) 1320; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 1321; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v4 1322; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v5 1323; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 1324; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 1325; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1326; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 1327; GFX11-GISEL-NEXT: v_xor_b32_e32 v4, v4, v3 1328; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1329; GFX11-GISEL-NEXT: v_xor_b32_e32 v5, v0, v3 1330; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6 1331; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 1332; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo 1333; GFX11-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3 1334; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo 1335; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1336entry: 1337 %res = tail call <2 x i64> @llvm.lround.v2i64.v2f32(<2 x float> %arg) 1338 ret <2 x i64> %res 1339} 1340