xref: /llvm-project/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
4
5; For gfx1010, overestimate the branch size in case we need to insert
6; a nop for the buggy offset.
7
8; GCN-LABEL: long_forward_scc_branch_3f_offset_bug:
9; GFX1030: s_cmp_lg_u32
10; GFX1030: s_cbranch_scc1  [[ENDBB:.LBB[0-9]+_[0-9]+]]
11
12; GFX1010: s_cmp_lg_u32
13; GFX1010-NEXT: s_cbranch_scc0  [[RELAX_BB:.LBB[0-9]+_[0-9]+]]
14; GFX1010: s_getpc_b64
15; GFX1010-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
16; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
17; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
18; GFX1010: [[RELAX_BB]]:
19
20; GCN: v_nop
21; GCN: s_sleep
22; GCN: s_cbranch_scc1
23
24; GCN: [[ENDBB]]:
25; GCN: global_store_{{dword|b32}}
26define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(ptr addrspace(1) %arg, i32 %cnd0) #0 {
27bb0:
28  %cmp0 = icmp eq i32 %cnd0, 0
29  br i1 %cmp0, label %bb2, label %bb3
30
31bb2:
32  %val = call i32 asm sideeffect
33   "s_mov_b32 $0, 0
34    v_nop_e64
35    v_nop_e64
36    v_nop_e64
37    v_nop_e64
38    v_nop_e64
39    v_nop_e64
40    v_nop_e64
41    v_nop_e64
42    v_nop_e64
43    v_nop_e64
44    v_nop_e64", "=s"()   ; 20 * 12 = 240
45  call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
46  %cmp1 = icmp eq i32 %val, 0           ; +4 = 248
47  br i1 %cmp1, label %bb2, label %bb3   ; +4 (gfx1030), +8 with workaround (gfx1010)
48
49bb3:
50  store volatile i32 %cnd0, ptr addrspace(1) %arg
51  ret void
52}
53
54; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug:
55; GFX1030: s_mov_b32
56; GFX1030: v_cmpx_eq_u32
57; GFX1030: s_cbranch_execnz [[RELAX_BB:.LBB[0-9]+_[0-9]+]]
58
59; GFX1010: v_cmp_eq_u32
60; GFX1010: s_and_saveexec_b32
61; GFX1010-NEXT: s_cbranch_execnz  [[RELAX_BB:.LBB[0-9]+_[0-9]+]]
62
63; GCN: s_getpc_b64
64; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
65; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
66; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
67; GCN: [[RELAX_BB]]:
68
69; GCN: v_nop
70; GCN: s_sleep
71; GCN: s_cbranch_execz
72
73; GCN: [[ENDBB]]:
74; GCN: global_store_{{dword|b32}}
75define void @long_forward_exec_branch_3f_offset_bug(ptr addrspace(1) %arg, i32 %cnd0) #0 {
76bb0:
77  %cmp0 = icmp eq i32 %cnd0, 0
78  br i1 %cmp0, label %bb2, label %bb3
79
80bb2:
81  %val = call i32 asm sideeffect
82   "v_mov_b32 $0, 0
83    v_nop_e64
84    v_nop_e64
85    v_nop_e64
86    v_nop_e64
87    v_nop_e64
88    v_nop_e64
89    v_nop_e64
90    v_nop_e64
91    v_nop_e64
92    v_nop_e64
93    v_nop_e64", "=v"()   ; 20 * 12 = 240
94  call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
95  %cmp1 = icmp eq i32 %val, 0           ; +4 = 248
96  br i1 %cmp1, label %bb2, label %bb3   ; +4 (gfx1030), +8 with workaround (gfx1010)
97
98bb3:
99  store volatile i32 %cnd0, ptr addrspace(1) %arg
100  ret void
101}
102
103declare void @llvm.amdgcn.s.sleep(i32 immarg)
104