1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5 2 3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s 4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s 5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-SDAG %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10,GFX10-GISEL %s 7; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s 8; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s 9 10declare float @llvm.rint.f32(float) 11declare i32 @llvm.lrint.i32.f32(float) 12declare i32 @llvm.lrint.i32.f64(double) 13declare i64 @llvm.lrint.i64.f32(float) 14declare i64 @llvm.lrint.i64.f64(double) 15declare i64 @llvm.llrint.i64.f32(float) 16declare half @llvm.rint.f16(half) 17declare i32 @llvm.lrint.i32.f16(half %arg) 18declare <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg) 19declare <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg) 20declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg) 21 22define float @intrinsic_frint(float %arg) { 23; GCN-LABEL: intrinsic_frint: 24; GCN: ; %bb.0: ; %entry 25; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GCN-NEXT: v_rndne_f32_e32 v0, v0 27; GCN-NEXT: s_setpc_b64 s[30:31] 28entry: 29 %res = tail call float @llvm.rint.f32(float %arg) 30 ret float %res 31} 32 33define i32 @intrinsic_lrint_i32_f32(float %arg) { 34; GFX9-LABEL: intrinsic_lrint_i32_f32: 35; GFX9: ; %bb.0: ; %entry 36; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX9-NEXT: v_rndne_f32_e32 v0, v0 38; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0 39; GFX9-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX10-LABEL: intrinsic_lrint_i32_f32: 42; GFX10: ; %bb.0: ; %entry 43; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX10-NEXT: v_rndne_f32_e32 v0, v0 45; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0 46; GFX10-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX11-LABEL: intrinsic_lrint_i32_f32: 49; GFX11: ; %bb.0: ; %entry 50; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX11-NEXT: v_rndne_f32_e32 v0, v0 52; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 53; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0 54; GFX11-NEXT: s_setpc_b64 s[30:31] 55entry: 56 %res = tail call i32 @llvm.lrint.i32.f32(float %arg) 57 ret i32 %res 58} 59 60define i32 @intrinsic_lrint_i32_f64(double %arg) { 61; GFX9-LABEL: intrinsic_lrint_i32_f64: 62; GFX9: ; %bb.0: ; %entry 63; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 65; GFX9-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 66; GFX9-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX10-LABEL: intrinsic_lrint_i32_f64: 69; GFX10: ; %bb.0: ; %entry 70; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 72; GFX10-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 73; GFX10-NEXT: s_setpc_b64 s[30:31] 74; 75; GFX11-LABEL: intrinsic_lrint_i32_f64: 76; GFX11: ; %bb.0: ; %entry 77; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX11-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 79; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 80; GFX11-NEXT: v_cvt_i32_f64_e32 v0, v[0:1] 81; GFX11-NEXT: s_setpc_b64 s[30:31] 82entry: 83 %res = tail call i32 @llvm.lrint.i32.f64(double %arg) 84 ret i32 %res 85} 86 87define i64 @intrinsic_lrint_i64_f32(float %arg) { 88; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f32: 89; GFX9-SDAG: ; %bb.0: ; %entry 90; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX9-SDAG-NEXT: v_rndne_f32_e32 v0, v0 92; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000 93; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 94; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1 95; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcf800000 96; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v1 97; GFX9-SDAG-NEXT: v_fma_f32 v1, v1, s4, |v0| 98; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 99; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 100; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v2, v3 101; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v1, v3 102; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 103; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc 104; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f32: 107; GFX9-GISEL: ; %bb.0: ; %entry 108; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX9-GISEL-NEXT: v_rndne_f32_e32 v0, v0 110; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 111; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x2f800000 112; GFX9-GISEL-NEXT: v_mul_f32_e64 v2, |v1|, v2 113; GFX9-GISEL-NEXT: v_floor_f32_e32 v2, v2 114; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcf800000 115; GFX9-GISEL-NEXT: v_fma_f32 v1, v2, v3, |v1| 116; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 117; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 118; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 119; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v1, v3 120; GFX9-GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 121; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 122; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 123; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f32: 126; GFX10-SDAG: ; %bb.0: ; %entry 127; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX10-SDAG-NEXT: v_rndne_f32_e32 v0, v0 129; GFX10-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 130; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 131; GFX10-SDAG-NEXT: v_floor_f32_e32 v1, v1 132; GFX10-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 133; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 134; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 135; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 136; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 137; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 138; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 139; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 140; 141; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f32: 142; GFX10-GISEL: ; %bb.0: ; %entry 143; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX10-GISEL-NEXT: v_rndne_f32_e32 v0, v0 145; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 146; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 147; GFX10-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 148; GFX10-GISEL-NEXT: v_floor_f32_e32 v2, v2 149; GFX10-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 150; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 151; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 152; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 153; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 154; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 155; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 156; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 157; 158; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f32: 159; GFX11-SDAG: ; %bb.0: ; %entry 160; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX11-SDAG-NEXT: v_rndne_f32_e32 v0, v0 162; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 163; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 164; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 165; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 166; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 167; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 168; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 169; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 170; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 171; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 172; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 173; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 174; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 175; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 176; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f32: 179; GFX11-GISEL: ; %bb.0: ; %entry 180; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX11-GISEL-NEXT: v_rndne_f32_e32 v0, v0 182; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 183; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 184; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 185; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 186; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 187; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2 188; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 189; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 190; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 191; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 192; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 193; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 194; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 195; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 196; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 197; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 198; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 199entry: 200 %res = tail call i64 @llvm.lrint.i64.f32(float %arg) 201 ret i64 %res 202} 203 204define i64 @intrinsic_lrint_i64_f64(double %arg) { 205; GFX9-SDAG-LABEL: intrinsic_lrint_i64_f64: 206; GFX9-SDAG: ; %bb.0: ; %entry 207; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GFX9-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 209; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffe0 210; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4 211; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 212; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc1f00000 213; GFX9-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 214; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1] 215; GFX9-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 216; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 217; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX9-GISEL-LABEL: intrinsic_lrint_i64_f64: 220; GFX9-GISEL: ; %bb.0: ; %entry 221; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX9-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 223; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 224; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000 225; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 226; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc1f00000 227; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 228; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] 229; GFX9-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 230; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1] 231; GFX9-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 232; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 233; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 234; 235; GFX10-SDAG-LABEL: intrinsic_lrint_i64_f64: 236; GFX10-SDAG: ; %bb.0: ; %entry 237; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX10-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 239; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 240; GFX10-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 241; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 242; GFX10-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 243; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 244; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX10-GISEL-LABEL: intrinsic_lrint_i64_f64: 247; GFX10-GISEL: ; %bb.0: ; %entry 248; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX10-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 250; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 251; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 252; GFX10-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 253; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 254; GFX10-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 255; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 256; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 257; 258; GFX11-SDAG-LABEL: intrinsic_lrint_i64_f64: 259; GFX11-SDAG: ; %bb.0: ; %entry 260; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; GFX11-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 262; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 263; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 264; GFX11-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 265; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 266; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 267; GFX11-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 268; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 269; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 270; 271; GFX11-GISEL-LABEL: intrinsic_lrint_i64_f64: 272; GFX11-GISEL: ; %bb.0: ; %entry 273; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX11-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 275; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 276; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 277; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 278; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 279; GFX11-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 280; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 281; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 282; GFX11-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 283; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 284; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 285entry: 286 %res = tail call i64 @llvm.lrint.i64.f64(double %arg) 287 ret i64 %res 288} 289 290define i64 @intrinsic_llrint_i64_f32(float %arg) { 291; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f32: 292; GFX9-SDAG: ; %bb.0: ; %entry 293; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX9-SDAG-NEXT: v_rndne_f32_e32 v0, v0 295; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000 296; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v0|, s4 297; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1 298; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xcf800000 299; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v1 300; GFX9-SDAG-NEXT: v_fma_f32 v1, v1, s4, |v0| 301; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 302; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 303; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v2, v3 304; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v1, v3 305; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 306; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v3, vcc 307; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 308; 309; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f32: 310; GFX9-GISEL: ; %bb.0: ; %entry 311; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 312; GFX9-GISEL-NEXT: v_rndne_f32_e32 v0, v0 313; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v0 314; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x2f800000 315; GFX9-GISEL-NEXT: v_mul_f32_e64 v2, |v1|, v2 316; GFX9-GISEL-NEXT: v_floor_f32_e32 v2, v2 317; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0xcf800000 318; GFX9-GISEL-NEXT: v_fma_f32 v1, v2, v3, |v1| 319; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v1 320; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 321; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 322; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v1, v3 323; GFX9-GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 324; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v3 325; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc 326; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 327; 328; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f32: 329; GFX10-SDAG: ; %bb.0: ; %entry 330; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 331; GFX10-SDAG-NEXT: v_rndne_f32_e32 v0, v0 332; GFX10-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 333; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 334; GFX10-SDAG-NEXT: v_floor_f32_e32 v1, v1 335; GFX10-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 336; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 337; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 338; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 339; GFX10-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 340; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 341; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 342; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 343; 344; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f32: 345; GFX10-GISEL: ; %bb.0: ; %entry 346; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX10-GISEL-NEXT: v_rndne_f32_e32 v0, v0 348; GFX10-GISEL-NEXT: v_trunc_f32_e32 v1, v0 349; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 350; GFX10-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 351; GFX10-GISEL-NEXT: v_floor_f32_e32 v2, v2 352; GFX10-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 353; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 354; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 355; GFX10-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 356; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 357; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 358; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 359; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 360; 361; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f32: 362; GFX11-SDAG: ; %bb.0: ; %entry 363; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX11-SDAG-NEXT: v_rndne_f32_e32 v0, v0 365; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 366; GFX11-SDAG-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0| 367; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v0 368; GFX11-SDAG-NEXT: v_floor_f32_e32 v1, v1 369; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 370; GFX11-SDAG-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0| 371; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v1 372; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v2 373; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 374; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 375; GFX11-SDAG-NEXT: v_xor_b32_e32 v0, v0, v3 376; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 377; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 378; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 379; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 380; 381; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f32: 382; GFX11-GISEL: ; %bb.0: ; %entry 383; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX11-GISEL-NEXT: v_rndne_f32_e32 v0, v0 385; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 386; GFX11-GISEL-NEXT: v_trunc_f32_e32 v1, v0 387; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v0 388; GFX11-GISEL-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v1| 389; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 390; GFX11-GISEL-NEXT: v_floor_f32_e32 v2, v2 391; GFX11-GISEL-NEXT: v_fma_f32 v1, 0xcf800000, v2, |v1| 392; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 393; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v1 394; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 395; GFX11-GISEL-NEXT: v_xor_b32_e32 v0, v0, v3 396; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 397; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 398; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v3 399; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 400; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo 401; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 402entry: 403 %res = tail call i64 @llvm.llrint.i64.f32(float %arg) 404 ret i64 %res 405} 406 407define i64 @intrinsic_llrint_i64_f64(double %arg) { 408; GFX9-SDAG-LABEL: intrinsic_llrint_i64_f64: 409; GFX9-SDAG: ; %bb.0: ; %entry 410; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 411; GFX9-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 412; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xffe0 413; GFX9-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], s4 414; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 415; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc1f00000 416; GFX9-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 417; GFX9-SDAG-NEXT: v_fma_f64 v[0:1], v[2:3], s[4:5], v[0:1] 418; GFX9-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 419; GFX9-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 420; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 421; 422; GFX9-GISEL-LABEL: intrinsic_llrint_i64_f64: 423; GFX9-GISEL: ; %bb.0: ; %entry 424; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX9-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 426; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0 427; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x3df00000 428; GFX9-GISEL-NEXT: v_mov_b32_e32 v4, 0 429; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xc1f00000 430; GFX9-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 431; GFX9-GISEL-NEXT: v_mul_f64 v[2:3], v[0:1], v[2:3] 432; GFX9-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 433; GFX9-GISEL-NEXT: v_fma_f64 v[0:1], v[2:3], v[4:5], v[0:1] 434; GFX9-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 435; GFX9-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 436; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 437; 438; GFX10-SDAG-LABEL: intrinsic_llrint_i64_f64: 439; GFX10-SDAG: ; %bb.0: ; %entry 440; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX10-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 442; GFX10-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 443; GFX10-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 444; GFX10-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 445; GFX10-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 446; GFX10-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 447; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX10-GISEL-LABEL: intrinsic_llrint_i64_f64: 450; GFX10-GISEL: ; %bb.0: ; %entry 451; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX10-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 453; GFX10-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 454; GFX10-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 455; GFX10-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 456; GFX10-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 457; GFX10-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 458; GFX10-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 459; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 460; 461; GFX11-SDAG-LABEL: intrinsic_llrint_i64_f64: 462; GFX11-SDAG: ; %bb.0: ; %entry 463; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 464; GFX11-SDAG-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 465; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 466; GFX11-SDAG-NEXT: v_ldexp_f64 v[2:3], v[0:1], 0xffffffe0 467; GFX11-SDAG-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 468; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 469; GFX11-SDAG-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 470; GFX11-SDAG-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 471; GFX11-SDAG-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 472; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 473; 474; GFX11-GISEL-LABEL: intrinsic_llrint_i64_f64: 475; GFX11-GISEL: ; %bb.0: ; %entry 476; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX11-GISEL-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 478; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 479; GFX11-GISEL-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] 480; GFX11-GISEL-NEXT: v_mul_f64 v[2:3], 0x3df00000, v[0:1] 481; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 482; GFX11-GISEL-NEXT: v_floor_f64_e32 v[2:3], v[2:3] 483; GFX11-GISEL-NEXT: v_fma_f64 v[0:1], 0xc1f00000, v[2:3], v[0:1] 484; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 485; GFX11-GISEL-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] 486; GFX11-GISEL-NEXT: v_cvt_i32_f64_e32 v1, v[2:3] 487; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 488entry: 489 %res = tail call i64 @llvm.llrint.i64.f64(double %arg) 490 ret i64 %res 491} 492 493define half @intrinsic_frint_half(half %arg) { 494; GCN-LABEL: intrinsic_frint_half: 495; GCN: ; %bb.0: ; %entry 496; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 497; GCN-NEXT: v_rndne_f16_e32 v0, v0 498; GCN-NEXT: s_setpc_b64 s[30:31] 499entry: 500 %res = tail call half @llvm.rint.f16(half %arg) 501 ret half %res 502} 503 504define i32 @intrinsic_lrint_i32_f16(half %arg) { 505; GFX9-LABEL: intrinsic_lrint_i32_f16: 506; GFX9: ; %bb.0: ; %entry 507; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; GFX9-NEXT: v_rndne_f16_e32 v0, v0 509; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 510; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0 511; GFX9-NEXT: s_setpc_b64 s[30:31] 512; 513; GFX10-LABEL: intrinsic_lrint_i32_f16: 514; GFX10: ; %bb.0: ; %entry 515; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX10-NEXT: v_rndne_f16_e32 v0, v0 517; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 518; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0 519; GFX10-NEXT: s_setpc_b64 s[30:31] 520; 521; GFX11-LABEL: intrinsic_lrint_i32_f16: 522; GFX11: ; %bb.0: ; %entry 523; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 524; GFX11-NEXT: v_rndne_f16_e32 v0, v0 525; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 526; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 527; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0 528; GFX11-NEXT: s_setpc_b64 s[30:31] 529entry: 530 %res = tail call i32 @llvm.lrint.i32.f16(half %arg) 531 ret i32 %res 532} 533 534define <2 x float> @intrinsic_frint_v2f32_v2f32(<2 x float> %arg) { 535; GCN-LABEL: intrinsic_frint_v2f32_v2f32: 536; GCN: ; %bb.0: ; %entry 537; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 538; GCN-NEXT: v_rndne_f32_e32 v0, v0 539; GCN-NEXT: v_rndne_f32_e32 v1, v1 540; GCN-NEXT: s_setpc_b64 s[30:31] 541entry: 542 %res = tail call <2 x float> @llvm.rint.v2f32.v2f32(<2 x float> %arg) 543 ret <2 x float> %res 544} 545 546define <2 x i32> @intrinsic_lrint_v2i32_v2f32(<2 x float> %arg) { 547; GFX9-LABEL: intrinsic_lrint_v2i32_v2f32: 548; GFX9: ; %bb.0: ; %entry 549; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; GFX9-NEXT: v_rndne_f32_e32 v0, v0 551; GFX9-NEXT: v_rndne_f32_e32 v1, v1 552; GFX9-NEXT: v_cvt_i32_f32_e32 v0, v0 553; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1 554; GFX9-NEXT: s_setpc_b64 s[30:31] 555; 556; GFX10-LABEL: intrinsic_lrint_v2i32_v2f32: 557; GFX10: ; %bb.0: ; %entry 558; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX10-NEXT: v_rndne_f32_e32 v0, v0 560; GFX10-NEXT: v_rndne_f32_e32 v1, v1 561; GFX10-NEXT: v_cvt_i32_f32_e32 v0, v0 562; GFX10-NEXT: v_cvt_i32_f32_e32 v1, v1 563; GFX10-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX11-LABEL: intrinsic_lrint_v2i32_v2f32: 566; GFX11: ; %bb.0: ; %entry 567; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX11-NEXT: v_rndne_f32_e32 v0, v0 569; GFX11-NEXT: v_rndne_f32_e32 v1, v1 570; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 571; GFX11-NEXT: v_cvt_i32_f32_e32 v0, v0 572; GFX11-NEXT: v_cvt_i32_f32_e32 v1, v1 573; GFX11-NEXT: s_setpc_b64 s[30:31] 574entry: 575 %res = tail call <2 x i32> @llvm.lrint.v2i32.v2f32(<2 x float> %arg) 576 ret <2 x i32> %res 577} 578 579define <2 x i64> @intrinsic_lrint_v2i64_v2f32(<2 x float> %arg) { 580; GFX9-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32: 581; GFX9-SDAG: ; %bb.0: ; %entry 582; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583; GFX9-SDAG-NEXT: v_rndne_f32_e32 v0, v0 584; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x2f800000 585; GFX9-SDAG-NEXT: v_mul_f32_e64 v2, |v0|, s4 586; GFX9-SDAG-NEXT: v_floor_f32_e32 v2, v2 587; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xcf800000 588; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v2 589; GFX9-SDAG-NEXT: v_fma_f32 v2, v2, s5, |v0| 590; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 591; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v4, 31, v0 592; GFX9-SDAG-NEXT: v_xor_b32_e32 v3, v3, v4 593; GFX9-SDAG-NEXT: v_xor_b32_e32 v0, v2, v4 594; GFX9-SDAG-NEXT: v_rndne_f32_e32 v2, v1 595; GFX9-SDAG-NEXT: v_mul_f32_e64 v1, |v2|, s4 596; GFX9-SDAG-NEXT: v_floor_f32_e32 v1, v1 597; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v5, v1 598; GFX9-SDAG-NEXT: v_fma_f32 v1, v1, s5, |v2| 599; GFX9-SDAG-NEXT: v_cvt_u32_f32_e32 v6, v1 600; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 601; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v4, vcc 602; GFX9-SDAG-NEXT: v_ashrrev_i32_e32 v3, 31, v2 603; GFX9-SDAG-NEXT: v_xor_b32_e32 v2, v6, v3 604; GFX9-SDAG-NEXT: v_xor_b32_e32 v4, v5, v3 605; GFX9-SDAG-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v3 606; GFX9-SDAG-NEXT: v_subb_co_u32_e32 v3, vcc, v4, v3, vcc 607; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX9-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32: 610; GFX9-GISEL: ; %bb.0: ; %entry 611; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX9-GISEL-NEXT: v_rndne_f32_e32 v0, v0 613; GFX9-GISEL-NEXT: v_trunc_f32_e32 v2, v0 614; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x2f800000 615; GFX9-GISEL-NEXT: v_mul_f32_e64 v4, |v2|, v3 616; GFX9-GISEL-NEXT: v_floor_f32_e32 v4, v4 617; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xcf800000 618; GFX9-GISEL-NEXT: v_fma_f32 v2, v4, v5, |v2| 619; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 620; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 621; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0 622; GFX9-GISEL-NEXT: v_xor_b32_e32 v0, v2, v6 623; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, v4, v6 624; GFX9-GISEL-NEXT: v_rndne_f32_e32 v4, v1 625; GFX9-GISEL-NEXT: v_trunc_f32_e32 v1, v4 626; GFX9-GISEL-NEXT: v_mul_f32_e64 v3, |v1|, v3 627; GFX9-GISEL-NEXT: v_floor_f32_e32 v3, v3 628; GFX9-GISEL-NEXT: v_fma_f32 v1, v3, v5, |v1| 629; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v5, v1 630; GFX9-GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 631; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v6 632; GFX9-GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v4 633; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v6, vcc 634; GFX9-GISEL-NEXT: v_xor_b32_e32 v2, v5, v4 635; GFX9-GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 636; GFX9-GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, v2, v4 637; GFX9-GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v4, vcc 638; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX10-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32: 641; GFX10-SDAG: ; %bb.0: ; %entry 642; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX10-SDAG-NEXT: v_rndne_f32_e32 v0, v0 644; GFX10-SDAG-NEXT: v_rndne_f32_e32 v1, v1 645; GFX10-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0| 646; GFX10-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1| 647; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0 648; GFX10-SDAG-NEXT: v_ashrrev_i32_e32 v6, 31, v1 649; GFX10-SDAG-NEXT: v_floor_f32_e32 v2, v2 650; GFX10-SDAG-NEXT: v_floor_f32_e32 v3, v3 651; GFX10-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0| 652; GFX10-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1| 653; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 654; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 655; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v4 656; GFX10-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 657; GFX10-SDAG-NEXT: v_xor_b32_e32 v2, v2, v5 658; GFX10-SDAG-NEXT: v_xor_b32_e32 v3, v3, v6 659; GFX10-SDAG-NEXT: v_xor_b32_e32 v1, v1, v5 660; GFX10-SDAG-NEXT: v_xor_b32_e32 v4, v0, v6 661; GFX10-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5 662; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo 663; GFX10-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6 664; GFX10-SDAG-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo 665; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX10-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32: 668; GFX10-GISEL: ; %bb.0: ; %entry 669; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; GFX10-GISEL-NEXT: v_rndne_f32_e32 v0, v0 671; GFX10-GISEL-NEXT: v_rndne_f32_e32 v1, v1 672; GFX10-GISEL-NEXT: v_trunc_f32_e32 v2, v0 673; GFX10-GISEL-NEXT: v_trunc_f32_e32 v3, v1 674; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0 675; GFX10-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2| 676; GFX10-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3| 677; GFX10-GISEL-NEXT: v_floor_f32_e32 v4, v4 678; GFX10-GISEL-NEXT: v_floor_f32_e32 v5, v5 679; GFX10-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2| 680; GFX10-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3| 681; GFX10-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 682; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 683; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v4 684; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 685; GFX10-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v5 686; GFX10-GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 687; GFX10-GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 688; GFX10-GISEL-NEXT: v_xor_b32_e32 v5, v0, v3 689; GFX10-GISEL-NEXT: v_xor_b32_e32 v4, v4, v3 690; GFX10-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6 691; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo 692; GFX10-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3 693; GFX10-GISEL-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo 694; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 695; 696; GFX11-SDAG-LABEL: intrinsic_lrint_v2i64_v2f32: 697; GFX11-SDAG: ; %bb.0: ; %entry 698; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 699; GFX11-SDAG-NEXT: v_rndne_f32_e32 v0, v0 700; GFX11-SDAG-NEXT: v_rndne_f32_e32 v1, v1 701; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 702; GFX11-SDAG-NEXT: v_mul_f32_e64 v2, 0x2f800000, |v0| 703; GFX11-SDAG-NEXT: v_mul_f32_e64 v3, 0x2f800000, |v1| 704; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v5, 31, v0 705; GFX11-SDAG-NEXT: v_ashrrev_i32_e32 v6, 31, v1 706; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 707; GFX11-SDAG-NEXT: v_floor_f32_e32 v2, v2 708; GFX11-SDAG-NEXT: v_floor_f32_e32 v3, v3 709; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 710; GFX11-SDAG-NEXT: v_fma_f32 v4, 0xcf800000, v2, |v0| 711; GFX11-SDAG-NEXT: v_fma_f32 v0, 0xcf800000, v3, |v1| 712; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v2, v2 713; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v3, v3 714; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 715; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v1, v4 716; GFX11-SDAG-NEXT: v_cvt_u32_f32_e32 v0, v0 717; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 718; GFX11-SDAG-NEXT: v_xor_b32_e32 v2, v2, v5 719; GFX11-SDAG-NEXT: v_xor_b32_e32 v3, v3, v6 720; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 721; GFX11-SDAG-NEXT: v_xor_b32_e32 v1, v1, v5 722; GFX11-SDAG-NEXT: v_xor_b32_e32 v4, v0, v6 723; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 724; GFX11-SDAG-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v5 725; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v5, vcc_lo 726; GFX11-SDAG-NEXT: v_sub_co_u32 v2, vcc_lo, v4, v6 727; GFX11-SDAG-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v6, vcc_lo 728; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 729; 730; GFX11-GISEL-LABEL: intrinsic_lrint_v2i64_v2f32: 731; GFX11-GISEL: ; %bb.0: ; %entry 732; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 733; GFX11-GISEL-NEXT: v_rndne_f32_e32 v0, v0 734; GFX11-GISEL-NEXT: v_rndne_f32_e32 v1, v1 735; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 736; GFX11-GISEL-NEXT: v_trunc_f32_e32 v2, v0 737; GFX11-GISEL-NEXT: v_trunc_f32_e32 v3, v1 738; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v0 739; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 740; GFX11-GISEL-NEXT: v_mul_f32_e64 v4, 0x2f800000, |v2| 741; GFX11-GISEL-NEXT: v_mul_f32_e64 v5, 0x2f800000, |v3| 742; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 743; GFX11-GISEL-NEXT: v_floor_f32_e32 v4, v4 744; GFX11-GISEL-NEXT: v_floor_f32_e32 v5, v5 745; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 746; GFX11-GISEL-NEXT: v_fma_f32 v2, 0xcf800000, v4, |v2| 747; GFX11-GISEL-NEXT: v_fma_f32 v0, 0xcf800000, v5, |v3| 748; GFX11-GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 749; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 750; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v1, v2 751; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v2, v4 752; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 753; GFX11-GISEL-NEXT: v_cvt_u32_f32_e32 v4, v5 754; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 755; GFX11-GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 756; GFX11-GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 757; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 758; GFX11-GISEL-NEXT: v_xor_b32_e32 v5, v0, v3 759; GFX11-GISEL-NEXT: v_xor_b32_e32 v4, v4, v3 760; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 761; GFX11-GISEL-NEXT: v_sub_co_u32 v0, vcc_lo, v1, v6 762; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v2, v6, vcc_lo 763; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 764; GFX11-GISEL-NEXT: v_sub_co_u32 v2, vcc_lo, v5, v3 765; GFX11-GISEL-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v4, v3, vcc_lo 766; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 767entry: 768 %res = tail call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> %arg) 769 ret <2 x i64> %res 770} 771 772