1; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s 4 5; For gfx1010, overestimate the branch size in case we need to insert 6; a nop for the buggy offset. 7 8; GCN-LABEL: long_forward_scc_branch_3f_offset_bug: 9; GFX1030: s_cmp_lg_u32 10; GFX1030: s_cbranch_scc1 [[ENDBB:.LBB[0-9]+_[0-9]+]] 11 12; GFX1010: s_cmp_lg_u32 13; GFX1010-NEXT: s_cbranch_scc0 [[RELAX_BB:.LBB[0-9]+_[0-9]+]] 14; GFX1010: s_getpc_b64 15; GFX1010-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}} 16; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295 17; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32 18; GFX1010: [[RELAX_BB]]: 19 20; GCN: v_nop 21; GCN: s_sleep 22; GCN: s_cbranch_scc1 23 24; GCN: [[ENDBB]]: 25; GCN: global_store_{{dword|b32}} 26define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(ptr addrspace(1) %arg, i32 %cnd0) #0 { 27bb0: 28 %cmp0 = icmp eq i32 %cnd0, 0 29 br i1 %cmp0, label %bb2, label %bb3 30 31bb2: 32 %val = call i32 asm sideeffect 33 "s_mov_b32 $0, 0 34 v_nop_e64 35 v_nop_e64 36 v_nop_e64 37 v_nop_e64 38 v_nop_e64 39 v_nop_e64 40 v_nop_e64 41 v_nop_e64 42 v_nop_e64 43 v_nop_e64 44 v_nop_e64", "=s"() ; 20 * 12 = 240 45 call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244 46 %cmp1 = icmp eq i32 %val, 0 ; +4 = 248 47 br i1 %cmp1, label %bb2, label %bb3 ; +4 (gfx1030), +8 with workaround (gfx1010) 48 49bb3: 50 store volatile i32 %cnd0, ptr addrspace(1) %arg 51 ret void 52} 53 54; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug: 55; GFX1030: s_mov_b32 56; GFX1030: v_cmpx_eq_u32 57; GFX1030: s_cbranch_execnz [[RELAX_BB:.LBB[0-9]+_[0-9]+]] 58 59; GFX1010: v_cmp_eq_u32 60; GFX1010: s_and_saveexec_b32 61; GFX1010-NEXT: s_cbranch_execnz [[RELAX_BB:.LBB[0-9]+_[0-9]+]] 62 63; GCN: s_getpc_b64 64; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}} 65; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295 66; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32 67; GCN: [[RELAX_BB]]: 68 69; GCN: v_nop 70; GCN: s_sleep 71; GCN: s_cbranch_execz 72 73; GCN: [[ENDBB]]: 74; GCN: global_store_{{dword|b32}} 75define void @long_forward_exec_branch_3f_offset_bug(ptr addrspace(1) %arg, i32 %cnd0) #0 { 76bb0: 77 %cmp0 = icmp eq i32 %cnd0, 0 78 br i1 %cmp0, label %bb2, label %bb3 79 80bb2: 81 %val = call i32 asm sideeffect 82 "v_mov_b32 $0, 0 83 v_nop_e64 84 v_nop_e64 85 v_nop_e64 86 v_nop_e64 87 v_nop_e64 88 v_nop_e64 89 v_nop_e64 90 v_nop_e64 91 v_nop_e64 92 v_nop_e64 93 v_nop_e64", "=v"() ; 20 * 12 = 240 94 call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244 95 %cmp1 = icmp eq i32 %val, 0 ; +4 = 248 96 br i1 %cmp1, label %bb2, label %bb3 ; +4 (gfx1030), +8 with workaround (gfx1010) 97 98bb3: 99 store volatile i32 %cnd0, ptr addrspace(1) %arg 100 ret void 101} 102 103declare void @llvm.amdgcn.s.sleep(i32 immarg) 104