1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=4 -simplifycfg-require-and-preserve-domtree=1 -amdgpu-long-branch-factor=0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -amdgpu-s-branch-bits=5 -simplifycfg-require-and-preserve-domtree=1 -amdgpu-long-branch-factor=0 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -amdgpu-s-branch-bits=5 -simplifycfg-require-and-preserve-domtree=1 -amdgpu-long-branch-factor=0 < %s | FileCheck -enable-var-scope -check-prefix=GFX12 %s 5 6 7; FIXME: We should use llvm-mc for this, but we can't even parse our own output. 8; See PR33579. 9; RUN: llc -mtriple=amdgcn -verify-machineinstrs -amdgpu-s-branch-bits=4 -amdgpu-long-branch-factor=0 -o %t.o -filetype=obj -simplifycfg-require-and-preserve-domtree=1 %s 10; RUN: llvm-readobj -r %t.o | FileCheck --check-prefix=OBJ %s 11 12; OBJ: Relocations [ 13; OBJ-NEXT: ] 14 15; Restrict maximum branch to between +7 and -8 dwords 16 17; Used to emit an always 4 byte instruction. Inline asm always assumes 18; each instruction is the maximum size. 19declare void @llvm.amdgcn.s.sleep(i32) #0 20 21declare i32 @llvm.amdgcn.workitem.id.x() #1 22 23 24define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 { 25; GCN-LABEL: uniform_conditional_max_short_forward_branch: 26; GCN: ; %bb.0: ; %bb 27; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 28; GCN-NEXT: s_waitcnt lgkmcnt(0) 29; GCN-NEXT: s_cmp_eq_u32 s0, 0 30; GCN-NEXT: s_cbranch_scc1 .LBB0_2 31; GCN-NEXT: ; %bb.1: ; %bb2 32; GCN-NEXT: ;;#ASMSTART 33; GCN-NEXT: v_nop_e64 34; GCN-NEXT: v_nop_e64 35; GCN-NEXT: v_nop_e64 36; GCN-NEXT: ;;#ASMEND 37; GCN-NEXT: s_sleep 0 38; GCN-NEXT: .LBB0_2: ; %bb3 39; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 40; GCN-NEXT: s_mov_b32 s7, 0xf000 41; GCN-NEXT: s_mov_b32 s6, -1 42; GCN-NEXT: v_mov_b32_e32 v0, s0 43; GCN-NEXT: s_waitcnt lgkmcnt(0) 44; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_endpgm 47; 48; GFX11-LABEL: uniform_conditional_max_short_forward_branch: 49; GFX11: ; %bb.0: ; %bb 50; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c 51; GFX11-NEXT: s_waitcnt lgkmcnt(0) 52; GFX11-NEXT: s_cmp_eq_u32 s0, 0 53; GFX11-NEXT: s_cbranch_scc0 .LBB0_1 54; GFX11-NEXT: ; %bb.3: ; %bb 55; GFX11-NEXT: s_getpc_b64 s[2:3] 56; GFX11-NEXT: .Lpost_getpc0: 57; GFX11-NEXT: s_waitcnt_depctr 0xfffe 58; GFX11-NEXT: s_add_u32 s2, s2, (.LBB0_2-.Lpost_getpc0)&4294967295 59; GFX11-NEXT: s_addc_u32 s3, s3, (.LBB0_2-.Lpost_getpc0)>>32 60; GFX11-NEXT: s_waitcnt_depctr 0xfffe 61; GFX11-NEXT: s_setpc_b64 s[2:3] 62; GFX11-NEXT: .LBB0_1: ; %bb2 63; GFX11-NEXT: ;;#ASMSTART 64; GFX11-NEXT: v_nop_e64 65; GFX11-NEXT: v_nop_e64 66; GFX11-NEXT: v_nop_e64 67; GFX11-NEXT: ;;#ASMEND 68; GFX11-NEXT: s_sleep 0 69; GFX11-NEXT: .LBB0_2: ; %bb3 70; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 71; GFX11-NEXT: v_mov_b32_e32 v0, 0 72; GFX11-NEXT: v_mov_b32_e32 v1, s0 73; GFX11-NEXT: s_waitcnt lgkmcnt(0) 74; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] dlc 75; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 76; GFX11-NEXT: s_endpgm 77; 78; GFX12-LABEL: uniform_conditional_max_short_forward_branch: 79; GFX12: ; %bb.0: ; %bb 80; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x2c 81; GFX12-NEXT: s_wait_kmcnt 0x0 82; GFX12-NEXT: s_cmp_eq_u32 s0, 0 83; GFX12-NEXT: s_cbranch_scc0 .LBB0_1 84; GFX12-NEXT: ; %bb.3: ; %bb 85; GFX12-NEXT: s_getpc_b64 s[2:3] 86; GFX12-NEXT: .Lpost_getpc0: 87; GFX12-NEXT: s_wait_alu 0xfffe 88; GFX12-NEXT: s_add_co_u32 s2, s2, (.LBB0_2-.Lpost_getpc0)&4294967295 89; GFX12-NEXT: s_add_co_ci_u32 s3, s3, (.LBB0_2-.Lpost_getpc0)>>32 90; GFX12-NEXT: s_wait_alu 0xfffe 91; GFX12-NEXT: s_setpc_b64 s[2:3] 92; GFX12-NEXT: .LBB0_1: ; %bb2 93; GFX12-NEXT: ;;#ASMSTART 94; GFX12-NEXT: v_nop_e64 95; GFX12-NEXT: v_nop_e64 96; GFX12-NEXT: v_nop_e64 97; GFX12-NEXT: ;;#ASMEND 98; GFX12-NEXT: s_sleep 0 99; GFX12-NEXT: .LBB0_2: ; %bb3 100; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 101; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 102; GFX12-NEXT: s_wait_kmcnt 0x0 103; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS 104; GFX12-NEXT: s_wait_storecnt 0x0 105; GFX12-NEXT: s_endpgm 106bb: 107 %cmp = icmp eq i32 %cnd, 0 108 br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch 109 110bb2: 111; 24 bytes 112 call void asm sideeffect 113 "v_nop_e64 114 v_nop_e64 115 v_nop_e64", ""() #0 116 call void @llvm.amdgcn.s.sleep(i32 0) 117 br label %bb3 118 119bb3: 120 store volatile i32 %cnd, ptr addrspace(1) %arg 121 ret void 122} 123 124define amdgpu_kernel void @uniform_conditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 { 125; GCN-LABEL: uniform_conditional_min_long_forward_branch: 126; GCN: ; %bb.0: ; %bb0 127; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 128; GCN-NEXT: s_waitcnt lgkmcnt(0) 129; GCN-NEXT: s_cmp_eq_u32 s0, 0 130; GCN-NEXT: s_cbranch_scc0 .LBB1_1 131; GCN-NEXT: ; %bb.3: ; %bb0 132; GCN-NEXT: s_getpc_b64 s[2:3] 133; GCN-NEXT: .Lpost_getpc0: 134; GCN-NEXT: s_add_u32 s2, s2, (.LBB1_2-.Lpost_getpc0)&4294967295 135; GCN-NEXT: s_addc_u32 s3, s3, (.LBB1_2-.Lpost_getpc0)>>32 136; GCN-NEXT: s_setpc_b64 s[2:3] 137; GCN-NEXT: .LBB1_1: ; %bb2 138; GCN-NEXT: ;;#ASMSTART 139; GCN-NEXT: v_nop_e64 140; GCN-NEXT: v_nop_e64 141; GCN-NEXT: v_nop_e64 142; GCN-NEXT: v_nop_e64 143; GCN-NEXT: ;;#ASMEND 144; GCN-NEXT: .LBB1_2: ; %bb3 145; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 146; GCN-NEXT: s_mov_b32 s7, 0xf000 147; GCN-NEXT: s_mov_b32 s6, -1 148; GCN-NEXT: v_mov_b32_e32 v0, s0 149; GCN-NEXT: s_waitcnt lgkmcnt(0) 150; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 151; GCN-NEXT: s_waitcnt vmcnt(0) 152; GCN-NEXT: s_endpgm 153; 154; GFX11-LABEL: uniform_conditional_min_long_forward_branch: 155; GFX11: ; %bb.0: ; %bb0 156; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c 157; GFX11-NEXT: s_waitcnt lgkmcnt(0) 158; GFX11-NEXT: s_cmp_eq_u32 s0, 0 159; GFX11-NEXT: s_cbranch_scc0 .LBB1_1 160; GFX11-NEXT: ; %bb.3: ; %bb0 161; GFX11-NEXT: s_getpc_b64 s[2:3] 162; GFX11-NEXT: .Lpost_getpc1: 163; GFX11-NEXT: s_waitcnt_depctr 0xfffe 164; GFX11-NEXT: s_add_u32 s2, s2, (.LBB1_2-.Lpost_getpc1)&4294967295 165; GFX11-NEXT: s_addc_u32 s3, s3, (.LBB1_2-.Lpost_getpc1)>>32 166; GFX11-NEXT: s_waitcnt_depctr 0xfffe 167; GFX11-NEXT: s_setpc_b64 s[2:3] 168; GFX11-NEXT: .LBB1_1: ; %bb2 169; GFX11-NEXT: ;;#ASMSTART 170; GFX11-NEXT: v_nop_e64 171; GFX11-NEXT: v_nop_e64 172; GFX11-NEXT: v_nop_e64 173; GFX11-NEXT: v_nop_e64 174; GFX11-NEXT: ;;#ASMEND 175; GFX11-NEXT: .LBB1_2: ; %bb3 176; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 177; GFX11-NEXT: v_mov_b32_e32 v0, 0 178; GFX11-NEXT: v_mov_b32_e32 v1, s0 179; GFX11-NEXT: s_waitcnt lgkmcnt(0) 180; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] dlc 181; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 182; GFX11-NEXT: s_endpgm 183; 184; GFX12-LABEL: uniform_conditional_min_long_forward_branch: 185; GFX12: ; %bb.0: ; %bb0 186; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x2c 187; GFX12-NEXT: s_wait_kmcnt 0x0 188; GFX12-NEXT: s_cmp_eq_u32 s0, 0 189; GFX12-NEXT: s_cbranch_scc0 .LBB1_1 190; GFX12-NEXT: ; %bb.3: ; %bb0 191; GFX12-NEXT: s_getpc_b64 s[2:3] 192; GFX12-NEXT: .Lpost_getpc1: 193; GFX12-NEXT: s_wait_alu 0xfffe 194; GFX12-NEXT: s_add_co_u32 s2, s2, (.LBB1_2-.Lpost_getpc1)&4294967295 195; GFX12-NEXT: s_add_co_ci_u32 s3, s3, (.LBB1_2-.Lpost_getpc1)>>32 196; GFX12-NEXT: s_wait_alu 0xfffe 197; GFX12-NEXT: s_setpc_b64 s[2:3] 198; GFX12-NEXT: .LBB1_1: ; %bb2 199; GFX12-NEXT: ;;#ASMSTART 200; GFX12-NEXT: v_nop_e64 201; GFX12-NEXT: v_nop_e64 202; GFX12-NEXT: v_nop_e64 203; GFX12-NEXT: v_nop_e64 204; GFX12-NEXT: ;;#ASMEND 205; GFX12-NEXT: .LBB1_2: ; %bb3 206; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 207; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 208; GFX12-NEXT: s_wait_kmcnt 0x0 209; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS 210; GFX12-NEXT: s_wait_storecnt 0x0 211; GFX12-NEXT: s_endpgm 212bb0: 213 %cmp = icmp eq i32 %cnd, 0 214 br i1 %cmp, label %bb3, label %bb2 ; +9 dword branch 215 216bb2: 217; 32 bytes 218 call void asm sideeffect 219 "v_nop_e64 220 v_nop_e64 221 v_nop_e64 222 v_nop_e64", ""() #0 223 br label %bb3 224 225bb3: 226 store volatile i32 %cnd, ptr addrspace(1) %arg 227 ret void 228} 229 230define amdgpu_kernel void @uniform_conditional_min_long_forward_vcnd_branch(ptr addrspace(1) %arg, float %cnd) #0 { 231; GCN-LABEL: uniform_conditional_min_long_forward_vcnd_branch: 232; GCN: ; %bb.0: ; %bb0 233; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 234; GCN-NEXT: s_waitcnt lgkmcnt(0) 235; GCN-NEXT: v_cmp_eq_f32_e64 s[2:3], s0, 0 236; GCN-NEXT: s_and_b64 vcc, exec, s[2:3] 237; GCN-NEXT: s_cbranch_vccz .LBB2_1 238; GCN-NEXT: ; %bb.3: ; %bb0 239; GCN-NEXT: s_getpc_b64 s[2:3] 240; GCN-NEXT: .Lpost_getpc1: 241; GCN-NEXT: s_add_u32 s2, s2, (.LBB2_2-.Lpost_getpc1)&4294967295 242; GCN-NEXT: s_addc_u32 s3, s3, (.LBB2_2-.Lpost_getpc1)>>32 243; GCN-NEXT: s_setpc_b64 s[2:3] 244; GCN-NEXT: .LBB2_1: ; %bb2 245; GCN-NEXT: ;;#ASMSTART 246; GCN-NEXT: ; 32 bytes 247; GCN-NEXT: v_nop_e64 248; GCN-NEXT: v_nop_e64 249; GCN-NEXT: v_nop_e64 250; GCN-NEXT: v_nop_e64 251; GCN-NEXT: ;;#ASMEND 252; GCN-NEXT: .LBB2_2: ; %bb3 253; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 254; GCN-NEXT: s_mov_b32 s7, 0xf000 255; GCN-NEXT: s_mov_b32 s6, -1 256; GCN-NEXT: v_mov_b32_e32 v0, s0 257; GCN-NEXT: s_waitcnt lgkmcnt(0) 258; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 259; GCN-NEXT: s_waitcnt vmcnt(0) 260; GCN-NEXT: s_endpgm 261; 262; GFX11-LABEL: uniform_conditional_min_long_forward_vcnd_branch: 263; GFX11: ; %bb.0: ; %bb0 264; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c 265; GFX11-NEXT: s_waitcnt lgkmcnt(0) 266; GFX11-NEXT: v_cmp_eq_f32_e64 s[2:3], s0, 0 267; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 268; GFX11-NEXT: s_and_b64 vcc, exec, s[2:3] 269; GFX11-NEXT: s_cbranch_vccz .LBB2_1 270; GFX11-NEXT: ; %bb.3: ; %bb0 271; GFX11-NEXT: s_getpc_b64 s[2:3] 272; GFX11-NEXT: .Lpost_getpc2: 273; GFX11-NEXT: s_waitcnt_depctr 0xfffe 274; GFX11-NEXT: s_add_u32 s2, s2, (.LBB2_2-.Lpost_getpc2)&4294967295 275; GFX11-NEXT: s_addc_u32 s3, s3, (.LBB2_2-.Lpost_getpc2)>>32 276; GFX11-NEXT: s_waitcnt_depctr 0xfffe 277; GFX11-NEXT: s_setpc_b64 s[2:3] 278; GFX11-NEXT: .LBB2_1: ; %bb2 279; GFX11-NEXT: ;;#ASMSTART 280; GFX11-NEXT: ; 32 bytes 281; GFX11-NEXT: v_nop_e64 282; GFX11-NEXT: v_nop_e64 283; GFX11-NEXT: v_nop_e64 284; GFX11-NEXT: v_nop_e64 285; GFX11-NEXT: ;;#ASMEND 286; GFX11-NEXT: .LBB2_2: ; %bb3 287; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 288; GFX11-NEXT: v_mov_b32_e32 v0, 0 289; GFX11-NEXT: v_mov_b32_e32 v1, s0 290; GFX11-NEXT: s_waitcnt lgkmcnt(0) 291; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] dlc 292; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 293; GFX11-NEXT: s_endpgm 294; 295; GFX12-LABEL: uniform_conditional_min_long_forward_vcnd_branch: 296; GFX12: ; %bb.0: ; %bb0 297; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x2c 298; GFX12-NEXT: s_wait_kmcnt 0x0 299; GFX12-NEXT: s_cmp_eq_f32 s0, 0 300; GFX12-NEXT: s_cbranch_scc0 .LBB2_1 301; GFX12-NEXT: ; %bb.3: ; %bb0 302; GFX12-NEXT: s_getpc_b64 s[2:3] 303; GFX12-NEXT: .Lpost_getpc2: 304; GFX12-NEXT: s_wait_alu 0xfffe 305; GFX12-NEXT: s_add_co_u32 s2, s2, (.LBB2_2-.Lpost_getpc2)&4294967295 306; GFX12-NEXT: s_add_co_ci_u32 s3, s3, (.LBB2_2-.Lpost_getpc2)>>32 307; GFX12-NEXT: s_wait_alu 0xfffe 308; GFX12-NEXT: s_setpc_b64 s[2:3] 309; GFX12-NEXT: .LBB2_1: ; %bb2 310; GFX12-NEXT: ;;#ASMSTART 311; GFX12-NEXT: ; 32 bytes 312; GFX12-NEXT: v_nop_e64 313; GFX12-NEXT: v_nop_e64 314; GFX12-NEXT: v_nop_e64 315; GFX12-NEXT: v_nop_e64 316; GFX12-NEXT: ;;#ASMEND 317; GFX12-NEXT: .LBB2_2: ; %bb3 318; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 319; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 320; GFX12-NEXT: s_wait_kmcnt 0x0 321; GFX12-NEXT: global_store_b32 v0, v1, s[2:3] scope:SCOPE_SYS 322; GFX12-NEXT: s_wait_storecnt 0x0 323; GFX12-NEXT: s_endpgm 324bb0: 325 %cmp = fcmp oeq float %cnd, 0.0 326 br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch 327 328bb2: 329 call void asm sideeffect " ; 32 bytes 330 v_nop_e64 331 v_nop_e64 332 v_nop_e64 333 v_nop_e64", ""() #0 334 br label %bb3 335 336bb3: 337 store volatile float %cnd, ptr addrspace(1) %arg 338 ret void 339} 340 341define amdgpu_kernel void @min_long_forward_vbranch(ptr addrspace(1) %arg) #0 { 342; GCN-LABEL: min_long_forward_vbranch: 343; GCN: ; %bb.0: ; %bb 344; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 345; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 346; GCN-NEXT: v_mov_b32_e32 v1, 0 347; GCN-NEXT: s_mov_b32 s3, 0xf000 348; GCN-NEXT: s_mov_b32 s2, 0 349; GCN-NEXT: s_waitcnt lgkmcnt(0) 350; GCN-NEXT: buffer_load_dword v2, v[0:1], s[0:3], 0 addr64 glc 351; GCN-NEXT: s_waitcnt vmcnt(0) 352; GCN-NEXT: v_mov_b32_e32 v1, s1 353; GCN-NEXT: v_add_i32_e32 v0, vcc, s0, v0 354; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 355; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 356; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc 357; GCN-NEXT: s_cbranch_execnz .LBB3_1 358; GCN-NEXT: ; %bb.3: ; %bb 359; GCN-NEXT: s_getpc_b64 s[4:5] 360; GCN-NEXT: .Lpost_getpc2: 361; GCN-NEXT: s_add_u32 s4, s4, (.LBB3_2-.Lpost_getpc2)&4294967295 362; GCN-NEXT: s_addc_u32 s5, s5, (.LBB3_2-.Lpost_getpc2)>>32 363; GCN-NEXT: s_setpc_b64 s[4:5] 364; GCN-NEXT: .LBB3_1: ; %bb2 365; GCN-NEXT: ;;#ASMSTART 366; GCN-NEXT: ; 32 bytes 367; GCN-NEXT: v_nop_e64 368; GCN-NEXT: v_nop_e64 369; GCN-NEXT: v_nop_e64 370; GCN-NEXT: v_nop_e64 371; GCN-NEXT: ;;#ASMEND 372; GCN-NEXT: .LBB3_2: ; %bb3 373; GCN-NEXT: s_or_b64 exec, exec, s[0:1] 374; GCN-NEXT: s_mov_b32 s0, s2 375; GCN-NEXT: s_mov_b32 s1, s2 376; GCN-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 377; GCN-NEXT: s_waitcnt vmcnt(0) 378; GCN-NEXT: s_endpgm 379; 380; GFX11-LABEL: min_long_forward_vbranch: 381; GFX11: ; %bb.0: ; %bb 382; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 383; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 384; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 385; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 386; GFX11-NEXT: s_waitcnt lgkmcnt(0) 387; GFX11-NEXT: global_load_b32 v2, v0, s[0:1] glc dlc 388; GFX11-NEXT: s_waitcnt vmcnt(0) 389; GFX11-NEXT: v_add_co_u32 v0, s[2:3], s0, v0 390; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s[2:3] 391; GFX11-NEXT: s_mov_b64 s[0:1], exec 392; GFX11-NEXT: v_cmpx_ne_u32_e32 0, v2 393; GFX11-NEXT: s_cbranch_execnz .LBB3_1 394; GFX11-NEXT: ; %bb.3: ; %bb 395; GFX11-NEXT: s_getpc_b64 s[2:3] 396; GFX11-NEXT: .Lpost_getpc3: 397; GFX11-NEXT: s_waitcnt_depctr 0xfffe 398; GFX11-NEXT: s_add_u32 s2, s2, (.LBB3_2-.Lpost_getpc3)&4294967295 399; GFX11-NEXT: s_addc_u32 s3, s3, (.LBB3_2-.Lpost_getpc3)>>32 400; GFX11-NEXT: s_waitcnt_depctr 0xfffe 401; GFX11-NEXT: s_setpc_b64 s[2:3] 402; GFX11-NEXT: .LBB3_1: ; %bb2 403; GFX11-NEXT: ;;#ASMSTART 404; GFX11-NEXT: ; 32 bytes 405; GFX11-NEXT: v_nop_e64 406; GFX11-NEXT: v_nop_e64 407; GFX11-NEXT: v_nop_e64 408; GFX11-NEXT: v_nop_e64 409; GFX11-NEXT: ;;#ASMEND 410; GFX11-NEXT: .LBB3_2: ; %bb3 411; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 412; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc 413; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 414; GFX11-NEXT: s_endpgm 415; 416; GFX12-LABEL: min_long_forward_vbranch: 417; GFX12: ; %bb.0: ; %bb 418; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 419; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 420; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 421; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 422; GFX12-NEXT: s_wait_kmcnt 0x0 423; GFX12-NEXT: global_load_b32 v2, v0, s[0:1] scope:SCOPE_SYS 424; GFX12-NEXT: s_wait_loadcnt 0x0 425; GFX12-NEXT: v_add_co_u32 v0, s0, s0, v0 426; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0 427; GFX12-NEXT: s_mov_b32 s0, exec_lo 428; GFX12-NEXT: v_cmpx_ne_u32_e32 0, v2 429; GFX12-NEXT: s_cbranch_execnz .LBB3_1 430; GFX12-NEXT: ; %bb.3: ; %bb 431; GFX12-NEXT: s_getpc_b64 s[2:3] 432; GFX12-NEXT: .Lpost_getpc3: 433; GFX12-NEXT: s_wait_alu 0xfffe 434; GFX12-NEXT: s_add_co_u32 s2, s2, (.LBB3_2-.Lpost_getpc3)&4294967295 435; GFX12-NEXT: s_add_co_ci_u32 s3, s3, (.LBB3_2-.Lpost_getpc3)>>32 436; GFX12-NEXT: s_wait_alu 0xfffe 437; GFX12-NEXT: s_setpc_b64 s[2:3] 438; GFX12-NEXT: .LBB3_1: ; %bb2 439; GFX12-NEXT: ;;#ASMSTART 440; GFX12-NEXT: ; 32 bytes 441; GFX12-NEXT: v_nop_e64 442; GFX12-NEXT: v_nop_e64 443; GFX12-NEXT: v_nop_e64 444; GFX12-NEXT: v_nop_e64 445; GFX12-NEXT: ;;#ASMEND 446; GFX12-NEXT: .LBB3_2: ; %bb3 447; GFX12-NEXT: s_wait_alu 0xfffe 448; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s0 449; GFX12-NEXT: global_store_b32 v[0:1], v2, off scope:SCOPE_SYS 450; GFX12-NEXT: s_wait_storecnt 0x0 451; GFX12-NEXT: s_endpgm 452bb: 453 %tid = call i32 @llvm.amdgcn.workitem.id.x() 454 %tid.ext = zext i32 %tid to i64 455 %gep = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tid.ext 456 %load = load volatile i32, ptr addrspace(1) %gep 457 %cmp = icmp eq i32 %load, 0 458 br i1 %cmp, label %bb3, label %bb2 ; + 8 dword branch 459 460bb2: 461 call void asm sideeffect " ; 32 bytes 462 v_nop_e64 463 v_nop_e64 464 v_nop_e64 465 v_nop_e64", ""() #0 466 br label %bb3 467 468bb3: 469 store volatile i32 %load, ptr addrspace(1) %gep 470 ret void 471} 472 473define amdgpu_kernel void @long_backward_sbranch(ptr addrspace(1) %arg) #0 { 474; GCN-LABEL: long_backward_sbranch: 475; GCN: ; %bb.0: ; %bb 476; GCN-NEXT: s_mov_b32 s0, 0 477; GCN-NEXT: .LBB4_1: ; %bb2 478; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 479; GCN-NEXT: s_add_i32 s0, s0, 1 480; GCN-NEXT: s_cmp_lt_i32 s0, 10 481; GCN-NEXT: ;;#ASMSTART 482; GCN-NEXT: v_nop_e64 483; GCN-NEXT: v_nop_e64 484; GCN-NEXT: v_nop_e64 485; GCN-NEXT: ;;#ASMEND 486; GCN-NEXT: s_cbranch_scc0 .LBB4_2 487; GCN-NEXT: ; %bb.3: ; %bb2 488; GCN-NEXT: ; in Loop: Header=BB4_1 Depth=1 489; GCN-NEXT: s_getpc_b64 s[2:3] 490; GCN-NEXT: .Lpost_getpc3: 491; GCN-NEXT: s_add_u32 s2, s2, (.LBB4_1-.Lpost_getpc3)&4294967295 492; GCN-NEXT: s_addc_u32 s3, s3, (.LBB4_1-.Lpost_getpc3)>>32 493; GCN-NEXT: s_setpc_b64 s[2:3] 494; GCN-NEXT: .LBB4_2: ; %bb3 495; GCN-NEXT: s_endpgm 496; 497; GFX11-LABEL: long_backward_sbranch: 498; GFX11: ; %bb.0: ; %bb 499; GFX11-NEXT: s_mov_b32 s0, 0 500; GFX11-NEXT: .p2align 6 501; GFX11-NEXT: .LBB4_1: ; %bb2 502; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 503; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 504; GFX11-NEXT: s_add_i32 s0, s0, 1 505; GFX11-NEXT: ;;#ASMSTART 506; GFX11-NEXT: v_nop_e64 507; GFX11-NEXT: v_nop_e64 508; GFX11-NEXT: v_nop_e64 509; GFX11-NEXT: ;;#ASMEND 510; GFX11-NEXT: s_cmp_lt_i32 s0, 10 511; GFX11-NEXT: s_cbranch_scc0 .LBB4_2 512; GFX11-NEXT: ; %bb.3: ; %bb2 513; GFX11-NEXT: ; in Loop: Header=BB4_1 Depth=1 514; GFX11-NEXT: s_getpc_b64 s[2:3] 515; GFX11-NEXT: .Lpost_getpc4: 516; GFX11-NEXT: s_waitcnt_depctr 0xfffe 517; GFX11-NEXT: s_add_u32 s2, s2, (.LBB4_1-.Lpost_getpc4)&4294967295 518; GFX11-NEXT: s_addc_u32 s3, s3, (.LBB4_1-.Lpost_getpc4)>>32 519; GFX11-NEXT: s_waitcnt_depctr 0xfffe 520; GFX11-NEXT: s_setpc_b64 s[2:3] 521; GFX11-NEXT: .LBB4_2: ; %bb3 522; GFX11-NEXT: s_endpgm 523; 524; GFX12-LABEL: long_backward_sbranch: 525; GFX12: ; %bb.0: ; %bb 526; GFX12-NEXT: s_mov_b32 s0, 0 527; GFX12-NEXT: .LBB4_1: ; %bb2 528; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 529; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 530; GFX12-NEXT: s_add_co_i32 s0, s0, 1 531; GFX12-NEXT: ;;#ASMSTART 532; GFX12-NEXT: v_nop_e64 533; GFX12-NEXT: v_nop_e64 534; GFX12-NEXT: v_nop_e64 535; GFX12-NEXT: ;;#ASMEND 536; GFX12-NEXT: s_cmp_lt_i32 s0, 10 537; GFX12-NEXT: s_cbranch_scc0 .LBB4_2 538; GFX12-NEXT: ; %bb.3: ; %bb2 539; GFX12-NEXT: ; in Loop: Header=BB4_1 Depth=1 540; GFX12-NEXT: s_getpc_b64 s[2:3] 541; GFX12-NEXT: .Lpost_getpc4: 542; GFX12-NEXT: s_wait_alu 0xfffe 543; GFX12-NEXT: s_add_co_u32 s2, s2, (.LBB4_1-.Lpost_getpc4)&4294967295 544; GFX12-NEXT: s_add_co_ci_u32 s3, s3, (.LBB4_1-.Lpost_getpc4)>>32 545; GFX12-NEXT: s_wait_alu 0xfffe 546; GFX12-NEXT: s_setpc_b64 s[2:3] 547; GFX12-NEXT: .LBB4_2: ; %bb3 548; GFX12-NEXT: s_endpgm 549bb: 550 br label %bb2 551 552bb2: 553 %loop.idx = phi i32 [ 0, %bb ], [ %inc, %bb2 ] 554 ; 24 bytes 555 call void asm sideeffect 556 "v_nop_e64 557 v_nop_e64 558 v_nop_e64", ""() #0 559 %inc = add nsw i32 %loop.idx, 1 ; add cost 4 560 %cmp = icmp slt i32 %inc, 10 ; condition cost = 8 561 br i1 %cmp, label %bb2, label %bb3 ; - 562 563bb3: 564 ret void 565} 566 567; Requires expansion of unconditional branch from %bb2 to %bb4 (and 568; expansion of conditional branch from %bb to %bb3. 569 570define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(ptr addrspace(1) %arg, i32 %arg1) { 571; GCN-LABEL: uniform_unconditional_min_long_forward_branch: 572; GCN: ; %bb.0: ; %bb0 573; GCN-NEXT: s_load_dword s0, s[4:5], 0xb 574; GCN-NEXT: s_waitcnt lgkmcnt(0) 575; GCN-NEXT: s_cmp_eq_u32 s0, 0 576; GCN-NEXT: s_mov_b64 s[0:1], -1 577; GCN-NEXT: s_cbranch_scc0 .LBB5_1 578; GCN-NEXT: ; %bb.7: ; %bb0 579; GCN-NEXT: s_getpc_b64 s[0:1] 580; GCN-NEXT: .Lpost_getpc5: 581; GCN-NEXT: s_add_u32 s0, s0, (.LBB5_4-.Lpost_getpc5)&4294967295 582; GCN-NEXT: s_addc_u32 s1, s1, (.LBB5_4-.Lpost_getpc5)>>32 583; GCN-NEXT: s_setpc_b64 s[0:1] 584; GCN-NEXT: .LBB5_1: ; %Flow 585; GCN-NEXT: s_andn2_b64 vcc, exec, s[0:1] 586; GCN-NEXT: s_cbranch_vccnz .LBB5_3 587; GCN-NEXT: .LBB5_2: ; %bb2 588; GCN-NEXT: s_mov_b32 s3, 0xf000 589; GCN-NEXT: s_mov_b32 s2, -1 590; GCN-NEXT: v_mov_b32_e32 v0, 17 591; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 592; GCN-NEXT: s_waitcnt vmcnt(0) 593; GCN-NEXT: .LBB5_3: ; %bb4 594; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 595; GCN-NEXT: s_mov_b32 s3, 0xf000 596; GCN-NEXT: s_mov_b32 s2, -1 597; GCN-NEXT: s_waitcnt expcnt(0) 598; GCN-NEXT: v_mov_b32_e32 v0, 63 599; GCN-NEXT: s_waitcnt lgkmcnt(0) 600; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 601; GCN-NEXT: s_waitcnt vmcnt(0) 602; GCN-NEXT: s_endpgm 603; GCN-NEXT: .LBB5_4: ; %bb3 604; GCN-NEXT: ;;#ASMSTART 605; GCN-NEXT: v_nop_e64 606; GCN-NEXT: v_nop_e64 607; GCN-NEXT: v_nop_e64 608; GCN-NEXT: v_nop_e64 609; GCN-NEXT: ;;#ASMEND 610; GCN-NEXT: s_cbranch_execnz .LBB5_5 611; GCN-NEXT: ; %bb.9: ; %bb3 612; GCN-NEXT: s_getpc_b64 s[0:1] 613; GCN-NEXT: .Lpost_getpc6: 614; GCN-NEXT: s_add_u32 s0, s0, (.LBB5_2-.Lpost_getpc6)&4294967295 615; GCN-NEXT: s_addc_u32 s1, s1, (.LBB5_2-.Lpost_getpc6)>>32 616; GCN-NEXT: s_setpc_b64 s[0:1] 617; GCN-NEXT: .LBB5_5: ; %bb3 618; GCN-NEXT: s_getpc_b64 s[0:1] 619; GCN-NEXT: .Lpost_getpc4: 620; GCN-NEXT: s_add_u32 s0, s0, (.LBB5_3-.Lpost_getpc4)&4294967295 621; GCN-NEXT: s_addc_u32 s1, s1, (.LBB5_3-.Lpost_getpc4)>>32 622; GCN-NEXT: s_setpc_b64 s[0:1] 623; 624; GFX11-LABEL: uniform_unconditional_min_long_forward_branch: 625; GFX11: ; %bb.0: ; %bb0 626; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c 627; GFX11-NEXT: s_waitcnt lgkmcnt(0) 628; GFX11-NEXT: s_cmp_eq_u32 s0, 0 629; GFX11-NEXT: s_mov_b64 s[0:1], -1 630; GFX11-NEXT: s_cbranch_scc1 .LBB5_4 631; GFX11-NEXT: ; %bb.1: ; %Flow 632; GFX11-NEXT: s_and_not1_b64 vcc, exec, s[0:1] 633; GFX11-NEXT: s_cbranch_vccnz .LBB5_3 634; GFX11-NEXT: .LBB5_2: ; %bb2 635; GFX11-NEXT: v_mov_b32_e32 v0, 17 636; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 637; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 638; GFX11-NEXT: .LBB5_3: ; %bb4 639; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 640; GFX11-NEXT: v_mov_b32_e32 v0, 0 641; GFX11-NEXT: v_mov_b32_e32 v1, 63 642; GFX11-NEXT: s_waitcnt lgkmcnt(0) 643; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc 644; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 645; GFX11-NEXT: s_endpgm 646; GFX11-NEXT: .LBB5_4: ; %bb3 647; GFX11-NEXT: ;;#ASMSTART 648; GFX11-NEXT: v_nop_e64 649; GFX11-NEXT: v_nop_e64 650; GFX11-NEXT: v_nop_e64 651; GFX11-NEXT: v_nop_e64 652; GFX11-NEXT: ;;#ASMEND 653; GFX11-NEXT: s_cbranch_execnz .LBB5_5 654; GFX11-NEXT: ; %bb.7: ; %bb3 655; GFX11-NEXT: s_getpc_b64 s[0:1] 656; GFX11-NEXT: .Lpost_getpc6: 657; GFX11-NEXT: s_waitcnt_depctr 0xfffe 658; GFX11-NEXT: s_add_u32 s0, s0, (.LBB5_2-.Lpost_getpc6)&4294967295 659; GFX11-NEXT: s_addc_u32 s1, s1, (.LBB5_2-.Lpost_getpc6)>>32 660; GFX11-NEXT: s_waitcnt_depctr 0xfffe 661; GFX11-NEXT: s_setpc_b64 s[0:1] 662; GFX11-NEXT: .LBB5_5: ; %bb3 663; GFX11-NEXT: s_getpc_b64 s[0:1] 664; GFX11-NEXT: .Lpost_getpc5: 665; GFX11-NEXT: s_waitcnt_depctr 0xfffe 666; GFX11-NEXT: s_add_u32 s0, s0, (.LBB5_3-.Lpost_getpc5)&4294967295 667; GFX11-NEXT: s_addc_u32 s1, s1, (.LBB5_3-.Lpost_getpc5)>>32 668; GFX11-NEXT: s_waitcnt_depctr 0xfffe 669; GFX11-NEXT: s_setpc_b64 s[0:1] 670; 671; GFX12-LABEL: uniform_unconditional_min_long_forward_branch: 672; GFX12: ; %bb.0: ; %bb0 673; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x2c 674; GFX12-NEXT: s_wait_kmcnt 0x0 675; GFX12-NEXT: s_cmp_eq_u32 s0, 0 676; GFX12-NEXT: s_mov_b32 s0, -1 677; GFX12-NEXT: s_cbranch_scc0 .LBB5_1 678; GFX12-NEXT: ; %bb.7: ; %bb0 679; GFX12-NEXT: s_getpc_b64 s[0:1] 680; GFX12-NEXT: .Lpost_getpc6: 681; GFX12-NEXT: s_wait_alu 0xfffe 682; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB5_4-.Lpost_getpc6)&4294967295 683; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB5_4-.Lpost_getpc6)>>32 684; GFX12-NEXT: s_wait_alu 0xfffe 685; GFX12-NEXT: s_setpc_b64 s[0:1] 686; GFX12-NEXT: .LBB5_1: ; %Flow 687; GFX12-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 688; GFX12-NEXT: s_cbranch_vccnz .LBB5_3 689; GFX12-NEXT: .LBB5_2: ; %bb2 690; GFX12-NEXT: v_mov_b32_e32 v0, 17 691; GFX12-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS 692; GFX12-NEXT: s_wait_storecnt 0x0 693; GFX12-NEXT: .LBB5_3: ; %bb4 694; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 695; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 63 696; GFX12-NEXT: s_wait_kmcnt 0x0 697; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS 698; GFX12-NEXT: s_wait_storecnt 0x0 699; GFX12-NEXT: s_endpgm 700; GFX12-NEXT: .LBB5_4: ; %bb3 701; GFX12-NEXT: ;;#ASMSTART 702; GFX12-NEXT: v_nop_e64 703; GFX12-NEXT: v_nop_e64 704; GFX12-NEXT: v_nop_e64 705; GFX12-NEXT: v_nop_e64 706; GFX12-NEXT: ;;#ASMEND 707; GFX12-NEXT: s_cbranch_execnz .LBB5_5 708; GFX12-NEXT: ; %bb.9: ; %bb3 709; GFX12-NEXT: s_getpc_b64 s[0:1] 710; GFX12-NEXT: .Lpost_getpc7: 711; GFX12-NEXT: s_wait_alu 0xfffe 712; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB5_2-.Lpost_getpc7)&4294967295 713; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB5_2-.Lpost_getpc7)>>32 714; GFX12-NEXT: s_wait_alu 0xfffe 715; GFX12-NEXT: s_setpc_b64 s[0:1] 716; GFX12-NEXT: .LBB5_5: ; %bb3 717; GFX12-NEXT: s_getpc_b64 s[0:1] 718; GFX12-NEXT: .Lpost_getpc5: 719; GFX12-NEXT: s_wait_alu 0xfffe 720; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB5_3-.Lpost_getpc5)&4294967295 721; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB5_3-.Lpost_getpc5)>>32 722; GFX12-NEXT: s_wait_alu 0xfffe 723; GFX12-NEXT: s_setpc_b64 s[0:1] 724bb0: 725 %tmp = icmp ne i32 %arg1, 0 726 br i1 %tmp, label %bb2, label %bb3 727 728bb2: 729 store volatile i32 17, ptr addrspace(1) undef 730 br label %bb4 731 732bb3: 733 ; 32 byte asm 734 call void asm sideeffect 735 "v_nop_e64 736 v_nop_e64 737 v_nop_e64 738 v_nop_e64", ""() #0 739 br label %bb4 740 741bb4: 742 store volatile i32 63, ptr addrspace(1) %arg 743 ret void 744} 745 746define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(ptr addrspace(1) %arg, i32 %arg1) { 747; GCN-LABEL: uniform_unconditional_min_long_backward_branch: 748; GCN: ; %bb.0: ; %entry 749; GCN-NEXT: s_and_b64 vcc, exec, -1 750; GCN-NEXT: .LBB6_1: ; %loop 751; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 752; GCN-NEXT: ;;#ASMSTART 753; GCN-NEXT: v_nop_e64 754; GCN-NEXT: v_nop_e64 755; GCN-NEXT: v_nop_e64 756; GCN-NEXT: v_nop_e64 757; GCN-NEXT: ;;#ASMEND 758; GCN-NEXT: s_mov_b64 vcc, vcc 759; GCN-NEXT: s_cbranch_vccz .LBB6_2 760; GCN-NEXT: ; %bb.3: ; %loop 761; GCN-NEXT: ; in Loop: Header=BB6_1 Depth=1 762; GCN-NEXT: s_getpc_b64 s[0:1] 763; GCN-NEXT: .Lpost_getpc7: 764; GCN-NEXT: s_add_u32 s0, s0, (.LBB6_1-.Lpost_getpc7)&4294967295 765; GCN-NEXT: s_addc_u32 s1, s1, (.LBB6_1-.Lpost_getpc7)>>32 766; GCN-NEXT: s_setpc_b64 s[0:1] 767; GCN-NEXT: .LBB6_2: ; %DummyReturnBlock 768; GCN-NEXT: s_endpgm 769; 770; GFX11-LABEL: uniform_unconditional_min_long_backward_branch: 771; GFX11: ; %bb.0: ; %entry 772; GFX11-NEXT: s_and_b64 vcc, exec, -1 773; GFX11-NEXT: .p2align 6 774; GFX11-NEXT: .LBB6_1: ; %loop 775; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 776; GFX11-NEXT: ;;#ASMSTART 777; GFX11-NEXT: v_nop_e64 778; GFX11-NEXT: v_nop_e64 779; GFX11-NEXT: v_nop_e64 780; GFX11-NEXT: v_nop_e64 781; GFX11-NEXT: ;;#ASMEND 782; GFX11-NEXT: s_cbranch_vccz .LBB6_2 783; GFX11-NEXT: ; %bb.3: ; %loop 784; GFX11-NEXT: ; in Loop: Header=BB6_1 Depth=1 785; GFX11-NEXT: s_getpc_b64 s[0:1] 786; GFX11-NEXT: .Lpost_getpc7: 787; GFX11-NEXT: s_waitcnt_depctr 0xfffe 788; GFX11-NEXT: s_add_u32 s0, s0, (.LBB6_1-.Lpost_getpc7)&4294967295 789; GFX11-NEXT: s_addc_u32 s1, s1, (.LBB6_1-.Lpost_getpc7)>>32 790; GFX11-NEXT: s_waitcnt_depctr 0xfffe 791; GFX11-NEXT: s_setpc_b64 s[0:1] 792; GFX11-NEXT: .LBB6_2: ; %DummyReturnBlock 793; GFX11-NEXT: s_endpgm 794; 795; GFX12-LABEL: uniform_unconditional_min_long_backward_branch: 796; GFX12: ; %bb.0: ; %entry 797; GFX12-NEXT: s_mov_b32 vcc_lo, exec_lo 798; GFX12-NEXT: .LBB6_1: ; %loop 799; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 800; GFX12-NEXT: ;;#ASMSTART 801; GFX12-NEXT: v_nop_e64 802; GFX12-NEXT: v_nop_e64 803; GFX12-NEXT: v_nop_e64 804; GFX12-NEXT: v_nop_e64 805; GFX12-NEXT: ;;#ASMEND 806; GFX12-NEXT: s_cbranch_vccz .LBB6_2 807; GFX12-NEXT: ; %bb.3: ; %loop 808; GFX12-NEXT: ; in Loop: Header=BB6_1 Depth=1 809; GFX12-NEXT: s_getpc_b64 s[0:1] 810; GFX12-NEXT: .Lpost_getpc8: 811; GFX12-NEXT: s_wait_alu 0xfffe 812; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB6_1-.Lpost_getpc8)&4294967295 813; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB6_1-.Lpost_getpc8)>>32 814; GFX12-NEXT: s_wait_alu 0xfffe 815; GFX12-NEXT: s_setpc_b64 s[0:1] 816; GFX12-NEXT: .LBB6_2: ; %DummyReturnBlock 817; GFX12-NEXT: s_endpgm 818entry: 819 br label %loop 820 821loop: 822 ; 32 byte asm 823 call void asm sideeffect 824 "v_nop_e64 825 v_nop_e64 826 v_nop_e64 827 v_nop_e64", ""() #0 828 br label %loop 829} 830 831; Expansion of branch from %bb1 to %bb3 introduces need to expand 832; branch from %bb0 to %bb2 833 834define amdgpu_kernel void @expand_requires_expand(i32 %cond0) #0 { 835; GCN-LABEL: expand_requires_expand: 836; GCN: ; %bb.0: ; %bb0 837; GCN-NEXT: s_load_dword s0, s[4:5], 0x9 838; GCN-NEXT: s_waitcnt lgkmcnt(0) 839; GCN-NEXT: s_cmp_lt_i32 s0, 0 840; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 841; GCN-NEXT: s_and_b64 vcc, exec, s[0:1] 842; GCN-NEXT: s_cbranch_vccnz .LBB7_2 843; GCN-NEXT: ; %bb.1: ; %bb1 844; GCN-NEXT: s_load_dword s0, s[0:1], 0x0 845; GCN-NEXT: s_waitcnt lgkmcnt(0) 846; GCN-NEXT: s_cmp_lg_u32 s0, 3 847; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 848; GCN-NEXT: .LBB7_2: ; %Flow 849; GCN-NEXT: s_andn2_b64 vcc, exec, s[0:1] 850; GCN-NEXT: s_cbranch_vccz .LBB7_3 851; GCN-NEXT: ; %bb.5: ; %Flow 852; GCN-NEXT: s_getpc_b64 s[0:1] 853; GCN-NEXT: .Lpost_getpc8: 854; GCN-NEXT: s_add_u32 s0, s0, (.LBB7_4-.Lpost_getpc8)&4294967295 855; GCN-NEXT: s_addc_u32 s1, s1, (.LBB7_4-.Lpost_getpc8)>>32 856; GCN-NEXT: s_setpc_b64 s[0:1] 857; GCN-NEXT: .LBB7_3: ; %bb2 858; GCN-NEXT: ;;#ASMSTART 859; GCN-NEXT: v_nop_e64 860; GCN-NEXT: v_nop_e64 861; GCN-NEXT: v_nop_e64 862; GCN-NEXT: v_nop_e64 863; GCN-NEXT: ;;#ASMEND 864; GCN-NEXT: .LBB7_4: ; %bb3 865; GCN-NEXT: ;;#ASMSTART 866; GCN-NEXT: v_nop_e64 867; GCN-NEXT: ;;#ASMEND 868; GCN-NEXT: ;;#ASMSTART 869; GCN-NEXT: v_nop_e64 870; GCN-NEXT: ;;#ASMEND 871; GCN-NEXT: s_endpgm 872; 873; GFX11-LABEL: expand_requires_expand: 874; GFX11: ; %bb.0: ; %bb0 875; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 876; GFX11-NEXT: s_waitcnt lgkmcnt(0) 877; GFX11-NEXT: s_cmp_lt_i32 s0, 0 878; GFX11-NEXT: s_cselect_b64 s[0:1], -1, 0 879; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 880; GFX11-NEXT: s_and_b64 vcc, exec, s[0:1] 881; GFX11-NEXT: s_cbranch_vccnz .LBB7_2 882; GFX11-NEXT: ; %bb.1: ; %bb1 883; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 884; GFX11-NEXT: s_waitcnt lgkmcnt(0) 885; GFX11-NEXT: s_cmp_lg_u32 s0, 3 886; GFX11-NEXT: s_cselect_b64 s[0:1], -1, 0 887; GFX11-NEXT: .LBB7_2: ; %Flow 888; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 889; GFX11-NEXT: s_and_not1_b64 vcc, exec, s[0:1] 890; GFX11-NEXT: s_cbranch_vccz .LBB7_3 891; GFX11-NEXT: ; %bb.5: ; %Flow 892; GFX11-NEXT: s_getpc_b64 s[0:1] 893; GFX11-NEXT: .Lpost_getpc8: 894; GFX11-NEXT: s_waitcnt_depctr 0xfffe 895; GFX11-NEXT: s_add_u32 s0, s0, (.LBB7_4-.Lpost_getpc8)&4294967295 896; GFX11-NEXT: s_addc_u32 s1, s1, (.LBB7_4-.Lpost_getpc8)>>32 897; GFX11-NEXT: s_waitcnt_depctr 0xfffe 898; GFX11-NEXT: s_setpc_b64 s[0:1] 899; GFX11-NEXT: .LBB7_3: ; %bb2 900; GFX11-NEXT: ;;#ASMSTART 901; GFX11-NEXT: v_nop_e64 902; GFX11-NEXT: v_nop_e64 903; GFX11-NEXT: v_nop_e64 904; GFX11-NEXT: v_nop_e64 905; GFX11-NEXT: ;;#ASMEND 906; GFX11-NEXT: .LBB7_4: ; %bb3 907; GFX11-NEXT: ;;#ASMSTART 908; GFX11-NEXT: v_nop_e64 909; GFX11-NEXT: ;;#ASMEND 910; GFX11-NEXT: ;;#ASMSTART 911; GFX11-NEXT: v_nop_e64 912; GFX11-NEXT: ;;#ASMEND 913; GFX11-NEXT: s_endpgm 914; 915; GFX12-LABEL: expand_requires_expand: 916; GFX12: ; %bb.0: ; %bb0 917; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24 918; GFX12-NEXT: s_wait_kmcnt 0x0 919; GFX12-NEXT: s_cmp_lt_i32 s0, 0 920; GFX12-NEXT: s_cselect_b32 s0, -1, 0 921; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 922; GFX12-NEXT: s_and_b32 vcc_lo, exec_lo, s0 923; GFX12-NEXT: s_cbranch_vccnz .LBB7_2 924; GFX12-NEXT: ; %bb.1: ; %bb1 925; GFX12-NEXT: s_load_b32 s0, s[0:1], 0x0 926; GFX12-NEXT: s_wait_kmcnt 0x0 927; GFX12-NEXT: s_cmp_lg_u32 s0, 3 928; GFX12-NEXT: s_cselect_b32 s0, -1, 0 929; GFX12-NEXT: .LBB7_2: ; %Flow 930; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 931; GFX12-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 932; GFX12-NEXT: s_cbranch_vccz .LBB7_3 933; GFX12-NEXT: ; %bb.5: ; %Flow 934; GFX12-NEXT: s_getpc_b64 s[0:1] 935; GFX12-NEXT: .Lpost_getpc9: 936; GFX12-NEXT: s_wait_alu 0xfffe 937; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB7_4-.Lpost_getpc9)&4294967295 938; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB7_4-.Lpost_getpc9)>>32 939; GFX12-NEXT: s_wait_alu 0xfffe 940; GFX12-NEXT: s_setpc_b64 s[0:1] 941; GFX12-NEXT: .LBB7_3: ; %bb2 942; GFX12-NEXT: ;;#ASMSTART 943; GFX12-NEXT: v_nop_e64 944; GFX12-NEXT: v_nop_e64 945; GFX12-NEXT: v_nop_e64 946; GFX12-NEXT: v_nop_e64 947; GFX12-NEXT: ;;#ASMEND 948; GFX12-NEXT: .LBB7_4: ; %bb3 949; GFX12-NEXT: ;;#ASMSTART 950; GFX12-NEXT: v_nop_e64 951; GFX12-NEXT: ;;#ASMEND 952; GFX12-NEXT: ;;#ASMSTART 953; GFX12-NEXT: v_nop_e64 954; GFX12-NEXT: ;;#ASMEND 955; GFX12-NEXT: s_endpgm 956bb0: 957 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() #0 958 %cmp0 = icmp slt i32 %cond0, 0 959 br i1 %cmp0, label %bb2, label %bb1 960 961bb1: 962 %val = load volatile i32, ptr addrspace(4) undef 963 %cmp1 = icmp eq i32 %val, 3 964 br i1 %cmp1, label %bb3, label %bb2 965 966bb2: 967 call void asm sideeffect 968 "v_nop_e64 969 v_nop_e64 970 v_nop_e64 971 v_nop_e64", ""() #0 972 br label %bb3 973 974bb3: 975; These NOPs prevent tail-duplication-based outlining 976; from firing, which defeats the need to expand the branches and this test. 977 call void asm sideeffect 978 "v_nop_e64", ""() #0 979 call void asm sideeffect 980 "v_nop_e64", ""() #0 981 ret void 982} 983 984; Requires expanding of required skip branch. 985 986define amdgpu_kernel void @uniform_inside_divergent(ptr addrspace(1) %out, i32 %cond) #0 { 987; GCN-LABEL: uniform_inside_divergent: 988; GCN: ; %bb.0: ; %entry 989; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 16, v0 990; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc 991; GCN-NEXT: s_cbranch_execnz .LBB8_1 992; GCN-NEXT: ; %bb.4: ; %entry 993; GCN-NEXT: s_getpc_b64 s[0:1] 994; GCN-NEXT: .Lpost_getpc9: 995; GCN-NEXT: s_add_u32 s0, s0, (.LBB8_3-.Lpost_getpc9)&4294967295 996; GCN-NEXT: s_addc_u32 s1, s1, (.LBB8_3-.Lpost_getpc9)>>32 997; GCN-NEXT: s_setpc_b64 s[0:1] 998; GCN-NEXT: .LBB8_1: ; %if 999; GCN-NEXT: s_load_dword s8, s[4:5], 0xb 1000; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1001; GCN-NEXT: s_mov_b32 s3, 0xf000 1002; GCN-NEXT: s_mov_b32 s2, -1 1003; GCN-NEXT: v_mov_b32_e32 v0, 0 1004; GCN-NEXT: s_waitcnt lgkmcnt(0) 1005; GCN-NEXT: s_cmp_lg_u32 s8, 0 1006; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 1007; GCN-NEXT: s_cbranch_scc1 .LBB8_3 1008; GCN-NEXT: ; %bb.2: ; %if_uniform 1009; GCN-NEXT: s_waitcnt expcnt(0) 1010; GCN-NEXT: v_mov_b32_e32 v0, 1 1011; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 1012; GCN-NEXT: .LBB8_3: ; %endif 1013; GCN-NEXT: s_or_b64 exec, exec, s[6:7] 1014; GCN-NEXT: s_sleep 5 1015; GCN-NEXT: s_endpgm 1016; 1017; GFX11-LABEL: uniform_inside_divergent: 1018; GFX11: ; %bb.0: ; %entry 1019; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1020; GFX11-NEXT: s_mov_b64 s[0:1], exec 1021; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1022; GFX11-NEXT: v_cmpx_gt_u32_e32 16, v0 1023; GFX11-NEXT: s_cbranch_execz .LBB8_3 1024; GFX11-NEXT: ; %bb.1: ; %if 1025; GFX11-NEXT: s_clause 0x1 1026; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c 1027; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 1028; GFX11-NEXT: v_mov_b32_e32 v0, 0 1029; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1030; GFX11-NEXT: s_cmp_lg_u32 s6, 0 1031; GFX11-NEXT: global_store_b32 v0, v0, s[2:3] 1032; GFX11-NEXT: s_cbranch_scc1 .LBB8_3 1033; GFX11-NEXT: ; %bb.2: ; %if_uniform 1034; GFX11-NEXT: v_mov_b32_e32 v1, 1 1035; GFX11-NEXT: global_store_b32 v0, v1, s[2:3] 1036; GFX11-NEXT: .LBB8_3: ; %endif 1037; GFX11-NEXT: s_or_b64 exec, exec, s[0:1] 1038; GFX11-NEXT: s_sleep 5 1039; GFX11-NEXT: s_endpgm 1040; 1041; GFX12-LABEL: uniform_inside_divergent: 1042; GFX12: ; %bb.0: ; %entry 1043; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1044; GFX12-NEXT: s_mov_b32 s3, exec_lo 1045; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1046; GFX12-NEXT: v_cmpx_gt_u32_e32 16, v0 1047; GFX12-NEXT: s_cbranch_execz .LBB8_3 1048; GFX12-NEXT: ; %bb.1: ; %if 1049; GFX12-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 1050; GFX12-NEXT: v_mov_b32_e32 v0, 0 1051; GFX12-NEXT: s_wait_kmcnt 0x0 1052; GFX12-NEXT: s_cmp_lg_u32 s2, 0 1053; GFX12-NEXT: global_store_b32 v0, v0, s[0:1] 1054; GFX12-NEXT: s_cbranch_scc1 .LBB8_3 1055; GFX12-NEXT: ; %bb.2: ; %if_uniform 1056; GFX12-NEXT: v_mov_b32_e32 v1, 1 1057; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 1058; GFX12-NEXT: .LBB8_3: ; %endif 1059; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s3 1060; GFX12-NEXT: s_sleep 5 1061; GFX12-NEXT: s_endpgm 1062entry: 1063 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1064 %d_cmp = icmp ult i32 %tid, 16 1065 br i1 %d_cmp, label %if, label %endif 1066 1067if: 1068 store i32 0, ptr addrspace(1) %out 1069 %u_cmp = icmp eq i32 %cond, 0 1070 br i1 %u_cmp, label %if_uniform, label %endif 1071 1072if_uniform: 1073 store i32 1, ptr addrspace(1) %out 1074 br label %endif 1075 1076endif: 1077 ; layout can remove the split branch if it can copy the return block. 1078 ; This call makes the return block long enough that it doesn't get copied. 1079 call void @llvm.amdgcn.s.sleep(i32 5); 1080 ret void 1081} 1082 1083; si_mask_branch 1084 1085define amdgpu_kernel void @analyze_mask_branch() #0 { 1086; GCN-LABEL: analyze_mask_branch: 1087; GCN: ; %bb.0: ; %entry 1088; GCN-NEXT: ;;#ASMSTART 1089; GCN-NEXT: v_mov_b32_e64 v0, 0 1090; GCN-NEXT: ;;#ASMEND 1091; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 1092; GCN-NEXT: s_and_saveexec_b64 s[0:1], vcc 1093; GCN-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 1094; GCN-NEXT: s_cbranch_execz .LBB9_2 1095; GCN-NEXT: ; %bb.1: ; %ret 1096; GCN-NEXT: s_mov_b32 s3, 0xf000 1097; GCN-NEXT: s_mov_b32 s2, -1 1098; GCN-NEXT: v_mov_b32_e32 v0, 7 1099; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 1100; GCN-NEXT: s_waitcnt vmcnt(0) 1101; GCN-NEXT: .LBB9_2: ; %Flow1 1102; GCN-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 1103; GCN-NEXT: s_cbranch_execnz .LBB9_3 1104; GCN-NEXT: ; %bb.6: ; %Flow1 1105; GCN-NEXT: s_getpc_b64 s[0:1] 1106; GCN-NEXT: .Lpost_getpc10: 1107; GCN-NEXT: s_add_u32 s0, s0, (.LBB9_5-.Lpost_getpc10)&4294967295 1108; GCN-NEXT: s_addc_u32 s1, s1, (.LBB9_5-.Lpost_getpc10)>>32 1109; GCN-NEXT: s_setpc_b64 s[0:1] 1110; GCN-NEXT: .LBB9_3: ; %loop.preheader 1111; GCN-NEXT: s_and_b64 vcc, exec, 0 1112; GCN-NEXT: .LBB9_4: ; %loop 1113; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 1114; GCN-NEXT: ;;#ASMSTART 1115; GCN-NEXT: v_nop_e64 1116; GCN-NEXT: v_nop_e64 1117; GCN-NEXT: ;;#ASMEND 1118; GCN-NEXT: ;;#ASMSTART 1119; GCN-NEXT: v_nop_e64 1120; GCN-NEXT: v_nop_e64 1121; GCN-NEXT: v_nop_e64 1122; GCN-NEXT: v_nop_e64 1123; GCN-NEXT: ;;#ASMEND 1124; GCN-NEXT: s_mov_b64 vcc, vcc 1125; GCN-NEXT: s_cbranch_vccnz .LBB9_5 1126; GCN-NEXT: ; %bb.8: ; %loop 1127; GCN-NEXT: ; in Loop: Header=BB9_4 Depth=1 1128; GCN-NEXT: s_getpc_b64 s[0:1] 1129; GCN-NEXT: .Lpost_getpc11: 1130; GCN-NEXT: s_add_u32 s0, s0, (.LBB9_4-.Lpost_getpc11)&4294967295 1131; GCN-NEXT: s_addc_u32 s1, s1, (.LBB9_4-.Lpost_getpc11)>>32 1132; GCN-NEXT: s_setpc_b64 s[0:1] 1133; GCN-NEXT: .LBB9_5: ; %UnifiedReturnBlock 1134; GCN-NEXT: s_endpgm 1135; 1136; GFX11-LABEL: analyze_mask_branch: 1137; GFX11: ; %bb.0: ; %entry 1138; GFX11-NEXT: s_mov_b64 s[0:1], exec 1139; GFX11-NEXT: ;;#ASMSTART 1140; GFX11-NEXT: v_mov_b32_e64 v0, 0 1141; GFX11-NEXT: ;;#ASMEND 1142; GFX11-NEXT: v_cmpx_nlt_f32_e32 0, v0 1143; GFX11-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 1144; GFX11-NEXT: s_cbranch_execz .LBB9_2 1145; GFX11-NEXT: ; %bb.1: ; %ret 1146; GFX11-NEXT: v_mov_b32_e32 v0, 7 1147; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1148; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1149; GFX11-NEXT: .LBB9_2: ; %Flow1 1150; GFX11-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] 1151; GFX11-NEXT: s_cbranch_execnz .LBB9_3 1152; GFX11-NEXT: ; %bb.6: ; %Flow1 1153; GFX11-NEXT: s_getpc_b64 s[0:1] 1154; GFX11-NEXT: .Lpost_getpc9: 1155; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1156; GFX11-NEXT: s_add_u32 s0, s0, (.LBB9_5-.Lpost_getpc9)&4294967295 1157; GFX11-NEXT: s_addc_u32 s1, s1, (.LBB9_5-.Lpost_getpc9)>>32 1158; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1159; GFX11-NEXT: s_setpc_b64 s[0:1] 1160; GFX11-NEXT: .LBB9_3: ; %loop.preheader 1161; GFX11-NEXT: s_and_b64 vcc, exec, 0 1162; GFX11-NEXT: .p2align 6 1163; GFX11-NEXT: .LBB9_4: ; %loop 1164; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 1165; GFX11-NEXT: ;;#ASMSTART 1166; GFX11-NEXT: v_nop_e64 1167; GFX11-NEXT: v_nop_e64 1168; GFX11-NEXT: ;;#ASMEND 1169; GFX11-NEXT: ;;#ASMSTART 1170; GFX11-NEXT: v_nop_e64 1171; GFX11-NEXT: v_nop_e64 1172; GFX11-NEXT: v_nop_e64 1173; GFX11-NEXT: v_nop_e64 1174; GFX11-NEXT: ;;#ASMEND 1175; GFX11-NEXT: s_cbranch_vccnz .LBB9_5 1176; GFX11-NEXT: ; %bb.8: ; %loop 1177; GFX11-NEXT: ; in Loop: Header=BB9_4 Depth=1 1178; GFX11-NEXT: s_getpc_b64 s[0:1] 1179; GFX11-NEXT: .Lpost_getpc10: 1180; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1181; GFX11-NEXT: s_add_u32 s0, s0, (.LBB9_4-.Lpost_getpc10)&4294967295 1182; GFX11-NEXT: s_addc_u32 s1, s1, (.LBB9_4-.Lpost_getpc10)>>32 1183; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1184; GFX11-NEXT: s_setpc_b64 s[0:1] 1185; GFX11-NEXT: .LBB9_5: ; %UnifiedReturnBlock 1186; GFX11-NEXT: s_endpgm 1187; 1188; GFX12-LABEL: analyze_mask_branch: 1189; GFX12: ; %bb.0: ; %entry 1190; GFX12-NEXT: s_mov_b32 s0, exec_lo 1191; GFX12-NEXT: ;;#ASMSTART 1192; GFX12-NEXT: v_mov_b32_e64 v0, 0 1193; GFX12-NEXT: ;;#ASMEND 1194; GFX12-NEXT: v_cmpx_nlt_f32_e32 0, v0 1195; GFX12-NEXT: s_xor_b32 s0, exec_lo, s0 1196; GFX12-NEXT: s_cbranch_execz .LBB9_2 1197; GFX12-NEXT: ; %bb.1: ; %ret 1198; GFX12-NEXT: v_mov_b32_e32 v0, 7 1199; GFX12-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS 1200; GFX12-NEXT: s_wait_storecnt 0x0 1201; GFX12-NEXT: .LBB9_2: ; %Flow1 1202; GFX12-NEXT: s_and_not1_saveexec_b32 s0, s0 1203; GFX12-NEXT: s_cbranch_execnz .LBB9_3 1204; GFX12-NEXT: ; %bb.6: ; %Flow1 1205; GFX12-NEXT: s_getpc_b64 s[0:1] 1206; GFX12-NEXT: .Lpost_getpc10: 1207; GFX12-NEXT: s_wait_alu 0xfffe 1208; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB9_5-.Lpost_getpc10)&4294967295 1209; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB9_5-.Lpost_getpc10)>>32 1210; GFX12-NEXT: s_wait_alu 0xfffe 1211; GFX12-NEXT: s_setpc_b64 s[0:1] 1212; GFX12-NEXT: .LBB9_3: ; %loop.preheader 1213; GFX12-NEXT: s_mov_b32 vcc_lo, 0 1214; GFX12-NEXT: .LBB9_4: ; %loop 1215; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 1216; GFX12-NEXT: ;;#ASMSTART 1217; GFX12-NEXT: v_nop_e64 1218; GFX12-NEXT: v_nop_e64 1219; GFX12-NEXT: ;;#ASMEND 1220; GFX12-NEXT: ;;#ASMSTART 1221; GFX12-NEXT: v_nop_e64 1222; GFX12-NEXT: v_nop_e64 1223; GFX12-NEXT: v_nop_e64 1224; GFX12-NEXT: v_nop_e64 1225; GFX12-NEXT: ;;#ASMEND 1226; GFX12-NEXT: s_cbranch_vccnz .LBB9_5 1227; GFX12-NEXT: ; %bb.8: ; %loop 1228; GFX12-NEXT: ; in Loop: Header=BB9_4 Depth=1 1229; GFX12-NEXT: s_getpc_b64 s[0:1] 1230; GFX12-NEXT: .Lpost_getpc11: 1231; GFX12-NEXT: s_wait_alu 0xfffe 1232; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB9_4-.Lpost_getpc11)&4294967295 1233; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB9_4-.Lpost_getpc11)>>32 1234; GFX12-NEXT: s_wait_alu 0xfffe 1235; GFX12-NEXT: s_setpc_b64 s[0:1] 1236; GFX12-NEXT: .LBB9_5: ; %UnifiedReturnBlock 1237; GFX12-NEXT: s_endpgm 1238entry: 1239 %reg = call float asm sideeffect "v_mov_b32_e64 $0, 0", "=v"() 1240 %cmp0 = fcmp ogt float %reg, 0.000000e+00 1241 br i1 %cmp0, label %loop, label %ret 1242 1243loop: 1244 %phi = phi float [ 0.000000e+00, %loop_body ], [ 1.000000e+00, %entry ] 1245 call void asm sideeffect 1246 "v_nop_e64 1247 v_nop_e64", ""() #0 1248 %cmp1 = fcmp olt float %phi, 8.0 1249 br i1 %cmp1, label %loop_body, label %ret 1250 1251loop_body: 1252 call void asm sideeffect 1253 "v_nop_e64 1254 v_nop_e64 1255 v_nop_e64 1256 v_nop_e64", ""() #0 1257 br label %loop 1258 1259ret: 1260 store volatile i32 7, ptr addrspace(1) undef 1261 ret void 1262} 1263 1264define amdgpu_kernel void @long_branch_hang(ptr addrspace(1) nocapture %arg, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i64 %arg5) #0 { 1265; GCN-LABEL: long_branch_hang: 1266; GCN: ; %bb.0: ; %bb 1267; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 1268; GCN-NEXT: s_waitcnt lgkmcnt(0) 1269; GCN-NEXT: s_cmp_eq_u32 s0, 0 1270; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0 1271; GCN-NEXT: s_cmp_lg_u32 s0, 0 1272; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 1273; GCN-NEXT: s_cmp_lt_i32 s3, 6 1274; GCN-NEXT: s_cbranch_scc1 .LBB10_1 1275; GCN-NEXT: ; %bb.8: ; %bb 1276; GCN-NEXT: s_getpc_b64 s[8:9] 1277; GCN-NEXT: .Lpost_getpc12: 1278; GCN-NEXT: s_add_u32 s8, s8, (.LBB10_2-.Lpost_getpc12)&4294967295 1279; GCN-NEXT: s_addc_u32 s9, s9, (.LBB10_2-.Lpost_getpc12)>>32 1280; GCN-NEXT: s_setpc_b64 s[8:9] 1281; GCN-NEXT: .LBB10_1: ; %bb13 1282; GCN-NEXT: ;;#ASMSTART 1283; GCN-NEXT: v_nop_e64 1284; GCN-NEXT: v_nop_e64 1285; GCN-NEXT: v_nop_e64 1286; GCN-NEXT: v_nop_e64 1287; GCN-NEXT: ;;#ASMEND 1288; GCN-NEXT: s_cbranch_execz .LBB10_3 1289; GCN-NEXT: s_branch .LBB10_4 1290; GCN-NEXT: .LBB10_2: 1291; GCN-NEXT: s_mov_b64 s[8:9], 0 1292; GCN-NEXT: .LBB10_3: ; %bb9 1293; GCN-NEXT: s_cmp_lt_i32 s3, 11 1294; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 1295; GCN-NEXT: s_cmp_ge_i32 s2, s3 1296; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0 1297; GCN-NEXT: s_and_b64 s[8:9], s[10:11], s[8:9] 1298; GCN-NEXT: .LBB10_4: ; %Flow5 1299; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9] 1300; GCN-NEXT: s_cbranch_vccz .LBB10_5 1301; GCN-NEXT: ; %bb.10: ; %Flow5 1302; GCN-NEXT: s_getpc_b64 s[0:1] 1303; GCN-NEXT: .Lpost_getpc13: 1304; GCN-NEXT: s_add_u32 s0, s0, (.LBB10_6-.Lpost_getpc13)&4294967295 1305; GCN-NEXT: s_addc_u32 s1, s1, (.LBB10_6-.Lpost_getpc13)>>32 1306; GCN-NEXT: s_setpc_b64 s[0:1] 1307; GCN-NEXT: .LBB10_5: ; %bb14 1308; GCN-NEXT: s_cmp_lt_i32 s1, 9 1309; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 1310; GCN-NEXT: s_cmp_lt_i32 s2, s3 1311; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0 1312; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1313; GCN-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1314; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1315; GCN-NEXT: s_branch .LBB10_7 1316; GCN-NEXT: .LBB10_6: 1317; GCN-NEXT: ; implicit-def: $vgpr0 1318; GCN-NEXT: .LBB10_7: ; %bb19 1319; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xf 1320; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1321; GCN-NEXT: s_mov_b32 s3, 0xf000 1322; GCN-NEXT: s_mov_b32 s2, 0 1323; GCN-NEXT: s_waitcnt lgkmcnt(0) 1324; GCN-NEXT: s_lshl_b64 s[4:5], s[6:7], 2 1325; GCN-NEXT: v_mov_b32_e32 v1, s4 1326; GCN-NEXT: v_mov_b32_e32 v2, s5 1327; GCN-NEXT: buffer_store_dword v0, v[1:2], s[0:3], 0 addr64 1328; GCN-NEXT: s_endpgm 1329; 1330; GFX11-LABEL: long_branch_hang: 1331; GFX11: ; %bb.0: ; %bb 1332; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c 1333; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1334; GFX11-NEXT: s_cmp_eq_u32 s0, 0 1335; GFX11-NEXT: s_cselect_b64 s[6:7], -1, 0 1336; GFX11-NEXT: s_cmp_lg_u32 s0, 0 1337; GFX11-NEXT: s_cselect_b64 s[8:9], -1, 0 1338; GFX11-NEXT: s_cmp_lt_i32 s3, 6 1339; GFX11-NEXT: s_cbranch_scc1 .LBB10_1 1340; GFX11-NEXT: ; %bb.8: ; %bb 1341; GFX11-NEXT: s_getpc_b64 s[8:9] 1342; GFX11-NEXT: .Lpost_getpc11: 1343; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1344; GFX11-NEXT: s_add_u32 s8, s8, (.LBB10_2-.Lpost_getpc11)&4294967295 1345; GFX11-NEXT: s_addc_u32 s9, s9, (.LBB10_2-.Lpost_getpc11)>>32 1346; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1347; GFX11-NEXT: s_setpc_b64 s[8:9] 1348; GFX11-NEXT: .LBB10_1: ; %bb13 1349; GFX11-NEXT: ;;#ASMSTART 1350; GFX11-NEXT: v_nop_e64 1351; GFX11-NEXT: v_nop_e64 1352; GFX11-NEXT: v_nop_e64 1353; GFX11-NEXT: v_nop_e64 1354; GFX11-NEXT: ;;#ASMEND 1355; GFX11-NEXT: s_cbranch_execz .LBB10_3 1356; GFX11-NEXT: s_branch .LBB10_4 1357; GFX11-NEXT: .LBB10_2: 1358; GFX11-NEXT: s_mov_b64 s[8:9], 0 1359; GFX11-NEXT: .LBB10_3: ; %bb9 1360; GFX11-NEXT: s_cmp_lt_i32 s3, 11 1361; GFX11-NEXT: s_cselect_b64 s[8:9], -1, 0 1362; GFX11-NEXT: s_cmp_ge_i32 s2, s3 1363; GFX11-NEXT: s_cselect_b64 s[10:11], -1, 0 1364; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1365; GFX11-NEXT: s_and_b64 s[8:9], s[10:11], s[8:9] 1366; GFX11-NEXT: .LBB10_4: ; %Flow5 1367; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1368; GFX11-NEXT: s_and_not1_b64 vcc, exec, s[8:9] 1369; GFX11-NEXT: s_cbranch_vccnz .LBB10_6 1370; GFX11-NEXT: ; %bb.5: ; %bb14 1371; GFX11-NEXT: s_cmp_lt_i32 s1, 9 1372; GFX11-NEXT: s_cselect_b64 s[0:1], -1, 0 1373; GFX11-NEXT: s_cmp_lt_i32 s2, s3 1374; GFX11-NEXT: s_cselect_b64 s[2:3], -1, 0 1375; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1376; GFX11-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 1377; GFX11-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1] 1378; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1379; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1380; GFX11-NEXT: s_branch .LBB10_7 1381; GFX11-NEXT: .LBB10_6: 1382; GFX11-NEXT: ; implicit-def: $vgpr0 1383; GFX11-NEXT: .LBB10_7: ; %bb19 1384; GFX11-NEXT: s_clause 0x1 1385; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x3c 1386; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 1387; GFX11-NEXT: v_mov_b32_e32 v1, 0 1388; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1389; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 1390; GFX11-NEXT: s_waitcnt_depctr 0xfffe 1391; GFX11-NEXT: s_add_u32 s0, s2, s0 1392; GFX11-NEXT: s_addc_u32 s1, s3, s1 1393; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1394; GFX11-NEXT: s_endpgm 1395; 1396; GFX12-LABEL: long_branch_hang: 1397; GFX12: ; %bb.0: ; %bb 1398; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c 1399; GFX12-NEXT: s_mov_b32 s7, -1 1400; GFX12-NEXT: s_wait_kmcnt 0x0 1401; GFX12-NEXT: s_cmp_eq_u32 s0, 0 1402; GFX12-NEXT: s_cselect_b32 s6, -1, 0 1403; GFX12-NEXT: s_cmp_lg_u32 s0, 0 1404; GFX12-NEXT: s_mov_b32 s0, 0 1405; GFX12-NEXT: s_cselect_b32 s8, -1, 0 1406; GFX12-NEXT: s_cmp_lt_i32 s3, 6 1407; GFX12-NEXT: s_cbranch_scc0 .LBB10_1 1408; GFX12-NEXT: ; %bb.18: ; %bb 1409; GFX12-NEXT: s_getpc_b64 s[10:11] 1410; GFX12-NEXT: .Lpost_getpc17: 1411; GFX12-NEXT: s_wait_alu 0xfffe 1412; GFX12-NEXT: s_add_co_u32 s10, s10, (.LBB10_4-.Lpost_getpc17)&4294967295 1413; GFX12-NEXT: s_add_co_ci_u32 s11, s11, (.LBB10_4-.Lpost_getpc17)>>32 1414; GFX12-NEXT: s_wait_alu 0xfffe 1415; GFX12-NEXT: s_setpc_b64 s[10:11] 1416; GFX12-NEXT: .LBB10_1: ; %Flow 1417; GFX12-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s7 1418; GFX12-NEXT: s_cbranch_vccnz .LBB10_2 1419; GFX12-NEXT: ; %bb.10: ; %Flow 1420; GFX12-NEXT: s_getpc_b64 s[8:9] 1421; GFX12-NEXT: .Lpost_getpc13: 1422; GFX12-NEXT: s_wait_alu 0xfffe 1423; GFX12-NEXT: s_add_co_u32 s8, s8, (.LBB10_5-.Lpost_getpc13)&4294967295 1424; GFX12-NEXT: s_add_co_ci_u32 s9, s9, (.LBB10_5-.Lpost_getpc13)>>32 1425; GFX12-NEXT: s_wait_alu 0xfffe 1426; GFX12-NEXT: s_setpc_b64 s[8:9] 1427; GFX12-NEXT: .LBB10_2: ; %Flow5 1428; GFX12-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 1429; GFX12-NEXT: s_cbranch_vccz .LBB10_3 1430; GFX12-NEXT: ; %bb.12: ; %Flow5 1431; GFX12-NEXT: s_getpc_b64 s[0:1] 1432; GFX12-NEXT: .Lpost_getpc14: 1433; GFX12-NEXT: s_wait_alu 0xfffe 1434; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB10_6-.Lpost_getpc14)&4294967295 1435; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB10_6-.Lpost_getpc14)>>32 1436; GFX12-NEXT: s_wait_alu 0xfffe 1437; GFX12-NEXT: s_setpc_b64 s[0:1] 1438; GFX12-NEXT: .LBB10_3: ; %bb14 1439; GFX12-NEXT: s_cmp_lt_i32 s1, 9 1440; GFX12-NEXT: s_cselect_b32 s0, -1, 0 1441; GFX12-NEXT: s_cmp_lt_i32 s2, s3 1442; GFX12-NEXT: s_cselect_b32 s1, -1, 0 1443; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1444; GFX12-NEXT: s_or_b32 s0, s1, s0 1445; GFX12-NEXT: s_and_b32 s0, s6, s0 1446; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1447; GFX12-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1448; GFX12-NEXT: ; %bb.8: ; %bb14 1449; GFX12-NEXT: s_getpc_b64 s[0:1] 1450; GFX12-NEXT: .Lpost_getpc12: 1451; GFX12-NEXT: s_wait_alu 0xfffe 1452; GFX12-NEXT: s_add_co_u32 s0, s0, (.LBB10_7-.Lpost_getpc12)&4294967295 1453; GFX12-NEXT: s_add_co_ci_u32 s1, s1, (.LBB10_7-.Lpost_getpc12)>>32 1454; GFX12-NEXT: s_wait_alu 0xfffe 1455; GFX12-NEXT: s_setpc_b64 s[0:1] 1456; GFX12-NEXT: .LBB10_4: ; %bb13 1457; GFX12-NEXT: s_mov_b32 s0, s8 1458; GFX12-NEXT: ;;#ASMSTART 1459; GFX12-NEXT: v_nop_e64 1460; GFX12-NEXT: v_nop_e64 1461; GFX12-NEXT: v_nop_e64 1462; GFX12-NEXT: v_nop_e64 1463; GFX12-NEXT: ;;#ASMEND 1464; GFX12-NEXT: s_cbranch_execz .LBB10_5 1465; GFX12-NEXT: ; %bb.14: ; %bb13 1466; GFX12-NEXT: s_getpc_b64 s[8:9] 1467; GFX12-NEXT: .Lpost_getpc15: 1468; GFX12-NEXT: s_wait_alu 0xfffe 1469; GFX12-NEXT: s_add_co_u32 s8, s8, (.LBB10_2-.Lpost_getpc15)&4294967295 1470; GFX12-NEXT: s_add_co_ci_u32 s9, s9, (.LBB10_2-.Lpost_getpc15)>>32 1471; GFX12-NEXT: s_wait_alu 0xfffe 1472; GFX12-NEXT: s_setpc_b64 s[8:9] 1473; GFX12-NEXT: .LBB10_5: ; %bb9 1474; GFX12-NEXT: s_cmp_lt_i32 s3, 11 1475; GFX12-NEXT: s_cselect_b32 s0, -1, 0 1476; GFX12-NEXT: s_cmp_ge_i32 s2, s3 1477; GFX12-NEXT: s_cselect_b32 s7, -1, 0 1478; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1479; GFX12-NEXT: s_and_b32 s0, s7, s0 1480; GFX12-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 1481; GFX12-NEXT: s_cbranch_vccnz .LBB10_6 1482; GFX12-NEXT: ; %bb.16: ; %bb9 1483; GFX12-NEXT: s_getpc_b64 s[8:9] 1484; GFX12-NEXT: .Lpost_getpc16: 1485; GFX12-NEXT: s_wait_alu 0xfffe 1486; GFX12-NEXT: s_add_co_u32 s8, s8, (.LBB10_3-.Lpost_getpc16)&4294967295 1487; GFX12-NEXT: s_add_co_ci_u32 s9, s9, (.LBB10_3-.Lpost_getpc16)>>32 1488; GFX12-NEXT: s_wait_alu 0xfffe 1489; GFX12-NEXT: s_setpc_b64 s[8:9] 1490; GFX12-NEXT: .LBB10_6: 1491; GFX12-NEXT: ; implicit-def: $vgpr0 1492; GFX12-NEXT: .LBB10_7: ; %bb19 1493; GFX12-NEXT: s_clause 0x1 1494; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x3c 1495; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 1496; GFX12-NEXT: v_mov_b32_e32 v1, 0 1497; GFX12-NEXT: s_wait_kmcnt 0x0 1498; GFX12-NEXT: s_wait_alu 0xfffe 1499; GFX12-NEXT: s_lshl_b64 s[0:1], s[0:1], 2 1500; GFX12-NEXT: s_wait_alu 0xfffe 1501; GFX12-NEXT: s_add_nc_u64 s[0:1], s[2:3], s[0:1] 1502; GFX12-NEXT: global_store_b32 v1, v0, s[0:1] 1503; GFX12-NEXT: s_endpgm 1504bb: 1505 %tmp = icmp slt i32 %arg2, 9 1506 %tmp6 = icmp eq i32 %arg1, 0 1507 %tmp8 = icmp sgt i32 %arg4, 5 1508 br i1 %tmp8, label %bb9, label %bb13 1509 1510bb9: ; preds = %bb 1511 %tmp7 = icmp sgt i32 %arg4, 10 ; avoid being optimized away through the domination 1512 %tmp11 = icmp slt i32 %arg3, %arg4 1513 %tmp12 = or i1 %tmp11, %tmp7 1514 br i1 %tmp12, label %bb19, label %bb14 1515 1516bb13: ; preds = %bb 1517 call void asm sideeffect 1518 "v_nop_e64 1519 v_nop_e64 1520 v_nop_e64 1521 v_nop_e64", ""() #0 1522 br i1 %tmp6, label %bb19, label %bb14 1523 1524bb14: ; preds = %bb13, %bb9 1525 %tmp15 = icmp slt i32 %arg3, %arg4 1526 %tmp16 = or i1 %tmp15, %tmp 1527 %tmp17 = and i1 %tmp6, %tmp16 1528 %tmp18 = zext i1 %tmp17 to i32 1529 br label %bb19 1530 1531bb19: ; preds = %bb14, %bb13, %bb9 1532 %tmp20 = phi i32 [ undef, %bb9 ], [ undef, %bb13 ], [ %tmp18, %bb14 ] 1533 %tmp21 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %arg5 1534 store i32 %tmp20, ptr addrspace(1) %tmp21, align 4 1535 ret void 1536} 1537 1538attributes #0 = { nounwind } 1539attributes #1 = { nounwind readnone } 1540