xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll (revision 89cb0eefcbb6303ba6813238d5ad37b103495d11)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s
3; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s
4; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
8
9declare i32 @llvm.get.rounding()
10
11define i32 @func_rounding() {
12; GFX678-LABEL: func_rounding:
13; GFX678:       ; %bb.0:
14; GFX678-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX678-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
16; GFX678-NEXT:    s_lshl_b32 s6, s4, 2
17; GFX678-NEXT:    s_mov_b32 s4, 0xeb24da71
18; GFX678-NEXT:    s_mov_b32 s5, 0xc96f385
19; GFX678-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
20; GFX678-NEXT:    s_and_b32 s4, s4, 15
21; GFX678-NEXT:    s_add_i32 s5, s4, 4
22; GFX678-NEXT:    s_cmp_lt_u32 s4, 4
23; GFX678-NEXT:    s_cselect_b32 s4, s4, s5
24; GFX678-NEXT:    v_mov_b32_e32 v0, s4
25; GFX678-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX9-LABEL: func_rounding:
28; GFX9:       ; %bb.0:
29; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX9-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
31; GFX9-NEXT:    s_lshl_b32 s6, s4, 2
32; GFX9-NEXT:    s_mov_b32 s4, 0xeb24da71
33; GFX9-NEXT:    s_mov_b32 s5, 0xc96f385
34; GFX9-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
35; GFX9-NEXT:    s_and_b32 s4, s4, 15
36; GFX9-NEXT:    s_add_i32 s5, s4, 4
37; GFX9-NEXT:    s_cmp_lt_u32 s4, 4
38; GFX9-NEXT:    s_cselect_b32 s4, s4, s5
39; GFX9-NEXT:    v_mov_b32_e32 v0, s4
40; GFX9-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX10-LABEL: func_rounding:
43; GFX10:       ; %bb.0:
44; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX10-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
46; GFX10-NEXT:    s_mov_b32 s4, 0xeb24da71
47; GFX10-NEXT:    s_mov_b32 s5, 0xc96f385
48; GFX10-NEXT:    s_lshl_b32 s6, s6, 2
49; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
50; GFX10-NEXT:    s_and_b32 s4, s4, 15
51; GFX10-NEXT:    s_add_i32 s5, s4, 4
52; GFX10-NEXT:    s_cmp_lt_u32 s4, 4
53; GFX10-NEXT:    s_cselect_b32 s4, s4, s5
54; GFX10-NEXT:    v_mov_b32_e32 v0, s4
55; GFX10-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX11-LABEL: func_rounding:
58; GFX11:       ; %bb.0:
59; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX11-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
61; GFX11-NEXT:    s_mov_b32 s0, 0xeb24da71
62; GFX11-NEXT:    s_mov_b32 s1, 0xc96f385
63; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
64; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
65; GFX11-NEXT:    s_and_b32 s0, s0, 15
66; GFX11-NEXT:    s_add_i32 s1, s0, 4
67; GFX11-NEXT:    s_cmp_lt_u32 s0, 4
68; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
69; GFX11-NEXT:    v_mov_b32_e32 v0, s0
70; GFX11-NEXT:    s_setpc_b64 s[30:31]
71  %rounding = call i32 @llvm.get.rounding()
72  ret i32 %rounding
73}
74;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
75; GCN: {{.*}}
76; GFX1011: {{.*}}
77; GFX6: {{.*}}
78; GFX7: {{.*}}
79; GFX8: {{.*}}
80