1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s 4 5@bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison 6@bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison 7@bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison 8 9define void @func1() { 10; GFX12-SDAG-LABEL: func1: 11; GFX12-SDAG: ; %bb.0: 12; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 13; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 14; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 15; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 16; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 17; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003 18; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 19; GFX12-SDAG-NEXT: s_barrier_signal m0 20; GFX12-SDAG-NEXT: s_mov_b32 m0, 3 21; GFX12-SDAG-NEXT: s_barrier_join m0 22; GFX12-SDAG-NEXT: s_barrier_wait 1 23; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 24; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX12-GISEL-LABEL: func1: 27; GFX12-GISEL: ; %bb.0: 28; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 29; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 30; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 31; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 32; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 33; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70003 34; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 35; GFX12-GISEL-NEXT: s_barrier_signal m0 36; GFX12-GISEL-NEXT: s_barrier_join 3 37; GFX12-GISEL-NEXT: s_barrier_wait 1 38; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 39; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 40 call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7) 41 call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3) 42 call void @llvm.amdgcn.s.barrier.wait(i16 1) 43 ret void 44} 45 46define void @func2() { 47; GFX12-SDAG-LABEL: func2: 48; GFX12-SDAG: ; %bb.0: 49; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 50; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 51; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 52; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 53; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 54; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001 55; GFX12-SDAG-NEXT: s_wait_storecnt 0x0 56; GFX12-SDAG-NEXT: s_barrier_signal m0 57; GFX12-SDAG-NEXT: s_mov_b32 m0, 1 58; GFX12-SDAG-NEXT: s_barrier_join m0 59; GFX12-SDAG-NEXT: s_barrier_wait 1 60; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 61; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX12-GISEL-LABEL: func2: 64; GFX12-GISEL: ; %bb.0: 65; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 66; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 67; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 68; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 69; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 70; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70001 71; GFX12-GISEL-NEXT: s_wait_storecnt 0x0 72; GFX12-GISEL-NEXT: s_barrier_signal m0 73; GFX12-GISEL-NEXT: s_barrier_join 1 74; GFX12-GISEL-NEXT: s_barrier_wait 1 75; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 76; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 77 call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7) 78 call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2) 79 call void @llvm.amdgcn.s.barrier.wait(i16 1) 80 ret void 81} 82 83define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 { 84; GFX12-SDAG-LABEL: kernel1: 85; GFX12-SDAG: ; %bb.0: 86; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7] 87; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] 88; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c 89; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002 90; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0 91; GFX12-SDAG-NEXT: s_barrier_init m0 92; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48 93; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] 94; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 95; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 96; GFX12-SDAG-NEXT: s_lshr_b32 s2, s2, 4 97; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 98; GFX12-SDAG-NEXT: s_and_b32 s2, s2, 63 99; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 100; GFX12-SDAG-NEXT: s_or_b32 s3, 0x90000, s2 101; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 102; GFX12-SDAG-NEXT: s_mov_b32 m0, s3 103; GFX12-SDAG-NEXT: s_barrier_init m0 104; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002 105; GFX12-SDAG-NEXT: s_barrier_signal m0 106; GFX12-SDAG-NEXT: s_mov_b32 m0, s3 107; GFX12-SDAG-NEXT: s_barrier_signal m0 108; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 109; GFX12-SDAG-NEXT: s_barrier_signal -1 110; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1 111; GFX12-SDAG-NEXT: s_barrier_join m0 112; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 113; GFX12-SDAG-NEXT: s_barrier_wait 1 114; GFX12-SDAG-NEXT: s_barrier_leave 115; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0 116; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 117; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0 118; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 119; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3] 120; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 121; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3 122; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func1@gotpcrel32@lo+12 123; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 124; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func1@gotpcrel32@hi+24 125; GFX12-SDAG-NEXT: s_barrier_signal -1 126; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 127; GFX12-SDAG-NEXT: s_barrier_wait -1 128; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 129; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 130; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3] 131; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3] 132; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 133; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3 134; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func2@gotpcrel32@lo+12 135; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 136; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func2@gotpcrel32@hi+24 137; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 138; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 139; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 140; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3] 141; GFX12-SDAG-NEXT: s_get_barrier_state s0, -1 142; GFX12-SDAG-NEXT: s_endpgm 143; 144; GFX12-GISEL-LABEL: kernel1: 145; GFX12-GISEL: ; %bb.0: 146; GFX12-GISEL-NEXT: s_mov_b64 s[12:13], s[4:5] 147; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] 148; GFX12-GISEL-NEXT: s_load_b32 s0, s[12:13], 0x2c 149; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002 150; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0 151; GFX12-GISEL-NEXT: s_barrier_init m0 152; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7] 153; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] 154; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 155; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 156; GFX12-GISEL-NEXT: s_lshr_b32 s0, s0, 4 157; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 158; GFX12-GISEL-NEXT: s_and_b32 s0, s0, 63 159; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 160; GFX12-GISEL-NEXT: s_or_b32 s1, s0, 0x90000 161; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 162; GFX12-GISEL-NEXT: s_mov_b32 m0, s1 163; GFX12-GISEL-NEXT: s_barrier_init m0 164; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002 165; GFX12-GISEL-NEXT: s_barrier_signal m0 166; GFX12-GISEL-NEXT: s_mov_b32 m0, s1 167; GFX12-GISEL-NEXT: s_barrier_signal m0 168; GFX12-GISEL-NEXT: s_barrier_signal -1 169; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1 170; GFX12-GISEL-NEXT: s_mov_b32 m0, s0 171; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48 172; GFX12-GISEL-NEXT: s_barrier_join m0 173; GFX12-GISEL-NEXT: s_barrier_wait 1 174; GFX12-GISEL-NEXT: s_barrier_leave 175; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2 176; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 177; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0 178; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0 179; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 180; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1] 181; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 182; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1 183; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func1@gotpcrel32@lo+12 184; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 185; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func1@gotpcrel32@hi+24 186; GFX12-GISEL-NEXT: s_barrier_signal -1 187; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 188; GFX12-GISEL-NEXT: s_barrier_wait -1 189; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 190; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 191; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] 192; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48 193; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0 194; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1] 195; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 196; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1 197; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func2@gotpcrel32@lo+12 198; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 199; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func2@gotpcrel32@hi+24 200; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 201; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 202; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 203; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1] 204; GFX12-GISEL-NEXT: s_get_barrier_state s0, -1 205; GFX12-GISEL-NEXT: s_endpgm 206 call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) @bar, i32 12) 207 call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %in, i32 9) 208 call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 12) 209 call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %in, i32 9) 210 call void @llvm.amdgcn.s.barrier.signal(i32 -1) 211 %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1) 212 call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in) 213 call void @llvm.amdgcn.s.barrier.wait(i16 1) 214 call void @llvm.amdgcn.s.barrier.leave(i16 1) 215 %state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar) 216 %state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in) 217 call void @llvm.amdgcn.s.barrier() 218 call void @func1() 219 call void @func2() 220 %state3 = call i32 @llvm.amdgcn.s.get.barrier.state(i32 -1) 221 ret void 222} 223 224define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 { 225; GFX12-SDAG-LABEL: kernel2: 226; GFX12-SDAG: ; %bb.0: 227; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7] 228; GFX12-SDAG-NEXT: s_getpc_b64 s[6:7] 229; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 230; GFX12-SDAG-NEXT: s_sext_i32_i16 s7, s7 231; GFX12-SDAG-NEXT: s_add_co_u32 s6, s6, func2@gotpcrel32@lo+12 232; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 233; GFX12-SDAG-NEXT: s_add_co_ci_u32 s7, s7, func2@gotpcrel32@hi+24 234; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0 235; GFX12-SDAG-NEXT: s_load_b64 s[12:13], s[6:7], 0x0 236; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70002 237; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48 238; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 239; GFX12-SDAG-NEXT: s_barrier_signal m0 240; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 241; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] 242; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] 243; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 244; GFX12-SDAG-NEXT: s_barrier_join m0 245; GFX12-SDAG-NEXT: s_barrier_wait 1 246; GFX12-SDAG-NEXT: s_wait_alu 0xfffe 247; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13] 248; GFX12-SDAG-NEXT: s_endpgm 249; 250; GFX12-GISEL-LABEL: kernel2: 251; GFX12-GISEL: ; %bb.0: 252; GFX12-GISEL-NEXT: s_add_co_u32 s8, s4, 48 253; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s5, 0 254; GFX12-GISEL-NEXT: s_getpc_b64 s[4:5] 255; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 256; GFX12-GISEL-NEXT: s_sext_i32_i16 s5, s5 257; GFX12-GISEL-NEXT: s_add_co_u32 s4, s4, func2@gotpcrel32@lo+12 258; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 259; GFX12-GISEL-NEXT: s_add_co_ci_u32 s5, s5, func2@gotpcrel32@hi+24 260; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0 261; GFX12-GISEL-NEXT: s_load_b64 s[12:13], s[4:5], 0x0 262; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7] 263; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70002 264; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] 265; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] 266; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 267; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 268; GFX12-GISEL-NEXT: s_barrier_signal m0 269; GFX12-GISEL-NEXT: s_barrier_join 2 270; GFX12-GISEL-NEXT: s_barrier_wait 1 271; GFX12-GISEL-NEXT: s_wait_alu 0xfffe 272; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13] 273; GFX12-GISEL-NEXT: s_endpgm 274 call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 7) 275 call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar) 276 call void @llvm.amdgcn.s.barrier.wait(i16 1) 277 278 call void @func2() 279 ret void 280} 281 282declare void @llvm.amdgcn.s.barrier() #1 283declare void @llvm.amdgcn.s.barrier.wait(i16) #1 284declare void @llvm.amdgcn.s.barrier.signal(i32) #1 285declare void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3), i32) #1 286declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1 287declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1 288declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1 289declare void @llvm.amdgcn.s.barrier.leave(i16) #1 290declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1 291declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1 292 293attributes #0 = { nounwind } 294attributes #1 = { convergent nounwind } 295attributes #2 = { nounwind readnone } 296