; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s ; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s declare void @llvm.set.rounding(i32) declare i32 @llvm.get.rounding() define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) { ; GFX678-LABEL: s_set_rounding: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_add_i32 s34, s4, -4 ; GFX678-NEXT: s_min_u32 s34, s4, s34 ; GFX678-NEXT: s_lshl_b32 s36, s34, 2 ; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_add_i32 s34, s4, -4 ; GFX9-NEXT: s_min_u32 s34, s4, s34 ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_add_i32 s34, s4, -4 ; GFX10-NEXT: s_min_u32 s36, s4, s34 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_add_i32 s0, s4, -4 ; GFX11-NEXT: s_min_u32 s2, s4, s0 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { ; GFX6-LABEL: s_set_rounding_kernel: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_load_dword s2, s[4:5], 0x9 ; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX6-NEXT: ;;#ASMSTART ; GFX6-NEXT: ;;#ASMEND ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s3, s2, -4 ; GFX6-NEXT: s_min_u32 s2, s2, s3 ; GFX6-NEXT: s_lshl_b32 s2, s2, 2 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: s_set_rounding_kernel: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9 ; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: s_add_i32 s3, s2, -4 ; GFX7-NEXT: s_min_u32 s2, s2, s3 ; GFX7-NEXT: s_lshl_b32 s2, s2, 2 ; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: s_set_rounding_kernel: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24 ; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_add_i32 s3, s2, -4 ; GFX8-NEXT: s_min_u32 s2, s2, s3 ; GFX8-NEXT: s_lshl_b32 s2, s2, 2 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: s_set_rounding_kernel: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[4:5], 0x24 ; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_add_i32 s3, s2, -4 ; GFX9-NEXT: s_min_u32 s2, s2, s3 ; GFX9-NEXT: s_lshl_b32 s2, s2, 2 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: s_set_rounding_kernel: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24 ; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_add_i32 s3, s2, -4 ; GFX10-NEXT: s_min_u32 s2, s2, s3 ; GFX10-NEXT: s_lshl_b32 s2, s2, 2 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: s_set_rounding_kernel: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_add_i32 s3, s2, -4 ; GFX11-NEXT: s_min_u32 s2, s2, s3 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_endpgm call void @llvm.set.rounding(i32 %rounding) call void asm sideeffect "",""() ret void } define void @v_set_rounding(i32 %rounding) { ; GFX6-LABEL: v_set_rounding: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0 ; GFX6-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 ; GFX6-NEXT: v_readfirstlane_b32 s4, v0 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_set_rounding: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0 ; GFX7-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 ; GFX7-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_set_rounding: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0 ; GFX8-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_set_rounding: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_add_u32_e32 v1, -4, v0 ; GFX9-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_set_rounding: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0 ; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX10-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_set_rounding: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: v_min_u32_e32 v0, v0, v1 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 %rounding) ret void } define void @set_rounding_get_rounding() { ; GFX678-LABEL: set_rounding_get_rounding: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) ; GFX678-NEXT: s_lshl_b32 s6, s4, 2 ; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71 ; GFX678-NEXT: s_mov_b32 s5, 0xc96f385 ; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX678-NEXT: s_and_b32 s4, s4, 15 ; GFX678-NEXT: s_add_i32 s5, s4, 4 ; GFX678-NEXT: s_cmp_lt_u32 s4, 4 ; GFX678-NEXT: s_cselect_b32 s4, s4, s5 ; GFX678-NEXT: s_add_i32 s5, s4, -4 ; GFX678-NEXT: s_min_u32 s4, s4, s5 ; GFX678-NEXT: s_lshl_b32 s6, s4, 2 ; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: set_rounding_get_rounding: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) ; GFX9-NEXT: s_lshl_b32 s6, s4, 2 ; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71 ; GFX9-NEXT: s_mov_b32 s5, 0xc96f385 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX9-NEXT: s_and_b32 s4, s4, 15 ; GFX9-NEXT: s_add_i32 s5, s4, 4 ; GFX9-NEXT: s_cmp_lt_u32 s4, 4 ; GFX9-NEXT: s_cselect_b32 s4, s4, s5 ; GFX9-NEXT: s_add_i32 s5, s4, -4 ; GFX9-NEXT: s_min_u32 s4, s4, s5 ; GFX9-NEXT: s_lshl_b32 s6, s4, 2 ; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: set_rounding_get_rounding: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) ; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71 ; GFX10-NEXT: s_mov_b32 s5, 0xc96f385 ; GFX10-NEXT: s_lshl_b32 s6, s6, 2 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX10-NEXT: s_and_b32 s4, s4, 15 ; GFX10-NEXT: s_add_i32 s5, s4, 4 ; GFX10-NEXT: s_cmp_lt_u32 s4, 4 ; GFX10-NEXT: s_cselect_b32 s4, s4, s5 ; GFX10-NEXT: s_add_i32 s5, s4, -4 ; GFX10-NEXT: s_min_u32 s6, s4, s5 ; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s6, s6, 2 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: set_rounding_get_rounding: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) ; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_and_b32 s0, s0, 15 ; GFX11-NEXT: s_add_i32 s1, s0, 4 ; GFX11-NEXT: s_cmp_lt_u32 s0, 4 ; GFX11-NEXT: s_cselect_b32 s0, s0, s1 ; GFX11-NEXT: s_add_i32 s1, s0, -4 ; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %rounding = call i32 @llvm.get.rounding() call void @llvm.set.rounding(i32 %rounding) ret void } define void @s_set_rounding_0() { ; GFX678-LABEL: s_set_rounding_0: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_0: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xf ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 0) ret void } define void @s_set_rounding_1() { ; GFX678-LABEL: s_set_rounding_1: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_1: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x0 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 1) ret void } define void @s_set_rounding_2() { ; GFX678-LABEL: s_set_rounding_2: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_2: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x5 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 2) ret void } define void @s_set_rounding_3() { ; GFX678-LABEL: s_set_rounding_3: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_3: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xa ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 3) ret void } ; Unsupported mode. define void @s_set_rounding_4() { ; GFX678-LABEL: s_set_rounding_4: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_4: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xf ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 4) ret void } ; undefined define void @s_set_rounding_5() { ; GFX678-LABEL: s_set_rounding_5: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_5: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x0 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 5) ret void } ; undefined define void @s_set_rounding_6() { ; GFX678-LABEL: s_set_rounding_6: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_6: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_6: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x5 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 6) ret void } ; "Dynamic" define void @s_set_rounding_7() { ; GFX678-LABEL: s_set_rounding_7: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_7: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_7: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xa ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 7) ret void } ; Invalid define void @s_set_rounding_neg1() { ; GFX678-LABEL: s_set_rounding_neg1: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_neg1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_neg1: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xb ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 -1) ret void } ; -------------------------------------------------------------------- ; Test extended values ; -------------------------------------------------------------------- ; NearestTiesToEvenF32_TowardPositiveF64 = 8 define void @s_set_rounding_8() { ; GFX678-LABEL: s_set_rounding_8: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_8: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x4 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 8) ret void } ; NearestTiesToEvenF32_TowardNegativeF64 = 9 define void @s_set_rounding_9() { ; GFX678-LABEL: s_set_rounding_9: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_9: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_9: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x8 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 9) ret void } ; NearestTiesToEvenF32_TowardZeroF64 = 10 define void @s_set_rounding_10() { ; GFX678-LABEL: s_set_rounding_10: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_10: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_10: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xc ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 10) ret void } ; TowardPositiveF32_NearestTiesToEvenF64 = 11 define void @s_set_rounding_11() { ; GFX678-LABEL: s_set_rounding_11: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_11: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_11: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x1 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 11) ret void } ; TowardPositiveF32_TowardNegativeF64 = 12 define void @s_set_rounding_12() { ; GFX678-LABEL: s_set_rounding_12: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_12: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_12: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x9 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 12) ret void } ; TowardPositiveF32_TowardZeroF64 = 13 define void @s_set_rounding_13() { ; GFX678-LABEL: s_set_rounding_13: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_13: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_13: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xd ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 13) ret void } ; TowardNegativeF32_NearestTiesToEvenF64 = 14 define void @s_set_rounding_14() { ; GFX678-LABEL: s_set_rounding_14: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_14: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_14: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x2 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 14) ret void } ; TowardNegativeF32_TowardPositiveF64 = 15 define void @s_set_rounding_15() { ; GFX678-LABEL: s_set_rounding_15: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_15: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_15: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x6 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 15) ret void } ; TowardNegativeF32_TowardZeroF64 = 16 define void @s_set_rounding_16() { ; GFX678-LABEL: s_set_rounding_16: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_16: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xe ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 16) ret void } ; TowardZeroF32_NearestTiesToEvenF64 = 17 define void @s_set_rounding_17() { ; GFX678-LABEL: s_set_rounding_17: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_17: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_17: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x3 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 17) ret void } ; TowardZeroF32_TowardPositiveF64 = 18 define void @s_set_rounding_18() { ; GFX678-LABEL: s_set_rounding_18: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_18: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_18: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0x7 ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 18) ret void } ; TowardZeroF32_TowardNegativeF64 = 19, define void @s_set_rounding_19() { ; GFX678-LABEL: s_set_rounding_19: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_19: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_19: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xb ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 19) ret void } ; Invalid, out of bounds define void @s_set_rounding_20() { ; GFX678-LABEL: s_set_rounding_20: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_20: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_20: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xb ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 20) ret void } define void @s_set_rounding_0xffff() { ; GFX678-LABEL: s_set_rounding_0xffff: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_0xffff: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1011-LABEL: s_set_rounding_0xffff: ; GFX1011: ; %bb.0: ; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1011-NEXT: s_round_mode 0xb ; GFX1011-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 65535) ret void } ; -------------------------------------------------------------------- ; Test optimization knowing the value can only be in the standard ; range ; -------------------------------------------------------------------- define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) { ; GFX6-LABEL: s_set_rounding_i2_zeroext: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_lshl_b32 s34, s4, 2 ; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: s_set_rounding_i2_zeroext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_lshl_b32 s34, s4, 2 ; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: s_set_rounding_i2_zeroext: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX8-NEXT: s_lshl_b32 s34, s34, 2 ; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_i2_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX9-NEXT: s_lshl_b32 s34, s34, 2 ; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_i2_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX10-NEXT: s_lshl_b32 s34, s34, 2 ; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_i2_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %zext.rounding = zext i2 %rounding to i32 call void @llvm.set.rounding(i32 %zext.rounding) ret void } define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) { ; GFX6-LABEL: s_set_rounding_i2_signext: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s34, s4, -4 ; GFX6-NEXT: s_min_u32 s34, s4, s34 ; GFX6-NEXT: s_lshl_b32 s36, s34, 2 ; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: s_set_rounding_i2_signext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_add_i32 s34, s4, -4 ; GFX7-NEXT: s_min_u32 s34, s4, s34 ; GFX7-NEXT: s_lshl_b32 s36, s34, 2 ; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: s_set_rounding_i2_signext: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_sext_i32_i16 s34, s4 ; GFX8-NEXT: s_add_i32 s35, s34, -4 ; GFX8-NEXT: s_min_u32 s34, s34, s35 ; GFX8-NEXT: s_lshl_b32 s36, s34, 2 ; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_i2_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_sext_i32_i16 s34, s4 ; GFX9-NEXT: s_add_i32 s35, s34, -4 ; GFX9-NEXT: s_min_u32 s34, s34, s35 ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_i2_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_sext_i32_i16 s34, s4 ; GFX10-NEXT: s_add_i32 s35, s34, -4 ; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_i2_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_sext_i32_i16 s0, s4 ; GFX11-NEXT: s_add_i32 s1, s0, -4 ; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext.rounding = sext i2 %rounding to i32 call void @llvm.set.rounding(i32 %sext.rounding) ret void } define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) { ; GFX6-LABEL: s_set_rounding_i3_signext: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s34, s4, -4 ; GFX6-NEXT: s_min_u32 s34, s4, s34 ; GFX6-NEXT: s_lshl_b32 s36, s34, 2 ; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: s_set_rounding_i3_signext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_add_i32 s34, s4, -4 ; GFX7-NEXT: s_min_u32 s34, s4, s34 ; GFX7-NEXT: s_lshl_b32 s36, s34, 2 ; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: s_set_rounding_i3_signext: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_sext_i32_i16 s34, s4 ; GFX8-NEXT: s_add_i32 s35, s34, -4 ; GFX8-NEXT: s_min_u32 s34, s34, s35 ; GFX8-NEXT: s_lshl_b32 s36, s34, 2 ; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_i3_signext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_sext_i32_i16 s34, s4 ; GFX9-NEXT: s_add_i32 s35, s34, -4 ; GFX9-NEXT: s_min_u32 s34, s34, s35 ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_i3_signext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_sext_i32_i16 s34, s4 ; GFX10-NEXT: s_add_i32 s35, s34, -4 ; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_i3_signext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_sext_i32_i16 s0, s4 ; GFX11-NEXT: s_add_i32 s1, s0, -4 ; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext.rounding = sext i3 %rounding to i32 call void @llvm.set.rounding(i32 %sext.rounding) ret void } define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) { ; GFX6-LABEL: s_set_rounding_i3_zeroext: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_add_i32 s34, s4, -4 ; GFX6-NEXT: s_min_u32 s34, s4, s34 ; GFX6-NEXT: s_lshl_b32 s36, s34, 2 ; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: s_set_rounding_i3_zeroext: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_add_i32 s34, s4, -4 ; GFX7-NEXT: s_min_u32 s34, s4, s34 ; GFX7-NEXT: s_lshl_b32 s36, s34, 2 ; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: s_set_rounding_i3_zeroext: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX8-NEXT: s_add_i32 s35, s34, -4 ; GFX8-NEXT: s_min_u32 s34, s34, s35 ; GFX8-NEXT: s_lshl_b32 s36, s34, 2 ; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_i3_zeroext: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX9-NEXT: s_add_i32 s35, s34, -4 ; GFX9-NEXT: s_min_u32 s34, s34, s35 ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_i3_zeroext: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 ; GFX10-NEXT: s_add_i32 s35, s34, -4 ; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_i3_zeroext: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 ; GFX11-NEXT: s_add_i32 s1, s0, -4 ; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %sext.rounding = zext i3 %rounding to i32 call void @llvm.set.rounding(i32 %sext.rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) { ; GFX6-LABEL: s_set_rounding_select_0_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_cmp_lg_u32 s4, 0 ; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 ; GFX6-NEXT: v_readfirstlane_b32 s34, v0 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: s_set_rounding_select_0_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_cmp_lg_u32 s4, 0 ; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 ; GFX7-NEXT: v_readfirstlane_b32 s34, v0 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: s_set_rounding_select_0_1: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_cmp_lg_u32 s4, 0 ; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX8-NEXT: s_mov_b32 s34, 0xa50f ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34 ; GFX8-NEXT: v_readfirstlane_b32 s34, v0 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_0_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_lg_u32 s4, 0 ; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_mov_b32 s34, 0xa50f ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34 ; GFX9-NEXT: v_readfirstlane_b32 s34, v0 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_0_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_lg_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, -1, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f ; GFX10-NEXT: v_readfirstlane_b32 s34, v0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_0_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_lg_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, -1, 0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 0, i32 1 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) { ; GFX678-LABEL: s_set_rounding_select_1_3: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 ; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_1_3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_1_3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_1_3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 1, i32 3 call void @llvm.set.rounding(i32 %rounding) ret void } define void @v_set_rounding_select_1_3(i32 %cond) { ; GFX678-LABEL: v_set_rounding_select_1_3: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50 ; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc ; GFX678-NEXT: v_readfirstlane_b32 s4, v0 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_set_rounding_select_1_3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_set_rounding_select_1_3: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo ; GFX10-NEXT: v_readfirstlane_b32 s4, v0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_set_rounding_select_1_3: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 1, i32 3 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) { ; GFX6-LABEL: s_set_rounding_select_2_0: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_cmp_eq_u32 s4, 0 ; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 ; GFX6-NEXT: v_readfirstlane_b32 s34, v0 ; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: s_set_rounding_select_2_0: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_cmp_eq_u32 s4, 0 ; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 ; GFX7-NEXT: v_readfirstlane_b32 s34, v0 ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: s_set_rounding_select_2_0: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_cmp_eq_u32 s4, 0 ; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX8-NEXT: s_mov_b32 s34, 0xa50f ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34 ; GFX8-NEXT: v_readfirstlane_b32 s34, v0 ; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_2_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX9-NEXT: s_mov_b32 s34, 0xa50f ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34 ; GFX9-NEXT: v_readfirstlane_b32 s34, v0 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_2_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, -1, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f ; GFX10-NEXT: v_readfirstlane_b32 s34, v0 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_2_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, -1, 0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 2, i32 0 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) { ; GFX678-LABEL: s_set_rounding_select_2_1: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 ; GFX678-NEXT: s_movk_i32 s34, 0xa5 ; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_2_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_movk_i32 s34, 0xa5 ; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_2_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_movk_i32 s34, 0xa5 ; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_2_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_movk_i32 s0, 0xa5 ; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 2, i32 1 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) { ; GFX678-LABEL: s_set_rounding_select_1_2: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 ; GFX678-NEXT: s_movk_i32 s34, 0xa50 ; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_1_2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_movk_i32 s34, 0xa50 ; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_1_2: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_movk_i32 s34, 0xa50 ; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_1_2: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_movk_i32 s0, 0xa50 ; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 1, i32 2 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) { ; GFX678-LABEL: s_set_rounding_select_3_0: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 ; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_3_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_3_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_3_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 3, i32 0 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) { ; GFX678-LABEL: s_set_rounding_select_4_0: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 ; GFX678-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX678-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX678-NEXT: v_readfirstlane_b32 s34, v0 ; GFX678-NEXT: s_lshl_b32 s34, s34, 2 ; GFX678-NEXT: s_add_i32 s35, s34, -4 ; GFX678-NEXT: s_min_u32 s34, s34, s35 ; GFX678-NEXT: s_lshl_b32 s36, s34, 2 ; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_4_0: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] ; GFX9-NEXT: v_readfirstlane_b32 s34, v0 ; GFX9-NEXT: s_lshl_b32 s34, s34, 2 ; GFX9-NEXT: s_add_i32 s35, s34, -4 ; GFX9-NEXT: s_min_u32 s34, s34, s35 ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_4_0: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, -1, 0 ; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 ; GFX10-NEXT: v_readfirstlane_b32 s34, v0 ; GFX10-NEXT: s_lshl_b32 s34, s34, 2 ; GFX10-NEXT: s_add_i32 s35, s34, -4 ; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_4_0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, -1, 0 ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_add_i32 s1, s0, -4 ; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 4, i32 0 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { ; GFX678-LABEL: s_set_rounding_select_3_5: ; GFX678: ; %bb.0: ; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX678-NEXT: s_cmp_eq_u32 s4, 0 ; GFX678-NEXT: s_cselect_b32 s34, 3, 5 ; GFX678-NEXT: s_add_i32 s35, s34, -4 ; GFX678-NEXT: s_min_u32 s34, s34, s35 ; GFX678-NEXT: s_lshl_b32 s36, s34, 2 ; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX678-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_set_rounding_select_3_5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_cmp_eq_u32 s4, 0 ; GFX9-NEXT: s_cselect_b32 s34, 3, 5 ; GFX9-NEXT: s_add_i32 s35, s34, -4 ; GFX9-NEXT: s_min_u32 s34, s34, s35 ; GFX9-NEXT: s_lshl_b32 s36, s34, 2 ; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_set_rounding_select_3_5: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_cmp_eq_u32 s4, 0 ; GFX10-NEXT: s_cselect_b32 s34, 3, 5 ; GFX10-NEXT: s_add_i32 s35, s34, -4 ; GFX10-NEXT: s_min_u32 s36, s34, s35 ; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f ; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 ; GFX10-NEXT: s_lshl_b32 s36, s36, 2 ; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_set_rounding_select_3_5: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_cmp_eq_u32 s4, 0 ; GFX11-NEXT: s_cselect_b32 s0, 3, 5 ; GFX11-NEXT: s_add_i32 s1, s0, -4 ; GFX11-NEXT: s_min_u32 s2, s0, s1 ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 3, i32 5 call void @llvm.set.rounding(i32 %rounding) ret void } define amdgpu_kernel void @get_rounding_after_set_rounding_1() { ; GFX6-LABEL: get_rounding_after_set_rounding_1: ; GFX6: ; %bb.0: ; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_nop 0 ; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) ; GFX6-NEXT: s_lshl_b32 s2, s0, 2 ; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX6-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX6-NEXT: s_and_b32 s0, s0, 15 ; GFX6-NEXT: s_add_i32 s1, s0, 4 ; GFX6-NEXT: s_cmp_lt_u32 s0, 4 ; GFX6-NEXT: s_cselect_b32 s4, s0, s1 ; GFX6-NEXT: s_mov_b32 s0, 0 ; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_mov_b32 s1, s0 ; GFX6-NEXT: v_mov_b32_e32 v0, s4 ; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX6-NEXT: s_waitcnt vmcnt(0) ; GFX6-NEXT: s_endpgm ; ; GFX7-LABEL: get_rounding_after_set_rounding_1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: s_nop 0 ; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) ; GFX7-NEXT: s_lshl_b32 s2, s0, 2 ; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX7-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX7-NEXT: s_and_b32 s0, s0, 15 ; GFX7-NEXT: s_add_i32 s1, s0, 4 ; GFX7-NEXT: s_cmp_lt_u32 s0, 4 ; GFX7-NEXT: s_cselect_b32 s4, s0, s1 ; GFX7-NEXT: s_mov_b32 s0, 0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s1, s0 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_endpgm ; ; GFX8-LABEL: get_rounding_after_set_rounding_1: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) ; GFX8-NEXT: s_lshl_b32 s2, s0, 2 ; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX8-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX8-NEXT: s_and_b32 s0, s0, 15 ; GFX8-NEXT: s_add_i32 s1, s0, 4 ; GFX8-NEXT: s_cmp_lt_u32 s0, 4 ; GFX8-NEXT: s_cselect_b32 s0, s0, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: flat_store_dword v[0:1], v2 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: get_rounding_after_set_rounding_1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) ; GFX9-NEXT: s_lshl_b32 s2, s0, 2 ; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX9-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_add_i32 s1, s0, 4 ; GFX9-NEXT: s_cmp_lt_u32 s0, 4 ; GFX9-NEXT: s_cselect_b32 s0, s0, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: global_store_dword v[0:1], v2, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: get_rounding_after_set_rounding_1: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_round_mode 0x0 ; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) ; GFX10-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX10-NEXT: s_lshl_b32 s2, s2, 2 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_and_b32 s0, s0, 15 ; GFX10-NEXT: s_add_i32 s1, s0, 4 ; GFX10-NEXT: s_cmp_lt_u32 s0, 4 ; GFX10-NEXT: s_cselect_b32 s0, s0, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: get_rounding_after_set_rounding_1: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_round_mode 0x0 ; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) ; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 ; GFX11-NEXT: s_lshl_b32 s2, s2, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 ; GFX11-NEXT: s_and_b32 s0, s0, 15 ; GFX11-NEXT: s_add_i32 s1, s0, 4 ; GFX11-NEXT: s_cmp_lt_u32 s0, 4 ; GFX11-NEXT: s_cselect_b32 s0, s0, s1 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_endpgm tail call void @llvm.set.rounding(i32 1) %set.mode = tail call i32 @llvm.get.rounding() store volatile i32 %set.mode, ptr addrspace(1) null ret void } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; GCN: {{.*}}