1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=CHECK 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefix=CHECK 4 5define amdgpu_kernel void @struct_atomic_buffer_load_i32(<4 x i32> %addr, i32 %index) { 6; CHECK-LABEL: struct_atomic_buffer_load_i32: 7; CHECK: ; %bb.0: ; %bb 8; CHECK-NEXT: s_clause 0x1 9; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 10; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 11; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 12; CHECK-NEXT: s_mov_b32 s4, 0 13; CHECK-NEXT: s_waitcnt lgkmcnt(0) 14; CHECK-NEXT: v_mov_b32_e32 v1, s6 15; CHECK-NEXT: .LBB0_1: ; %bb1 16; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 17; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc 18; CHECK-NEXT: s_waitcnt vmcnt(0) 19; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 20; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 21; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 22; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 23; CHECK-NEXT: s_cbranch_execnz .LBB0_1 24; CHECK-NEXT: ; %bb.2: ; %bb2 25; CHECK-NEXT: s_endpgm 26bb: 27 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 28 br label %bb1 29bb1: 30 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1) 31 %cmp = icmp eq i32 %load, %id 32 br i1 %cmp, label %bb1, label %bb2 33bb2: 34 ret void 35} 36 37define amdgpu_kernel void @struct_atomic_buffer_load_i32_const_idx(<4 x i32> %addr) { 38; CHECK-LABEL: struct_atomic_buffer_load_i32_const_idx: 39; CHECK: ; %bb.0: ; %bb 40; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 41; CHECK-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 42; CHECK-NEXT: s_mov_b32 s4, 0 43; CHECK-NEXT: .LBB1_1: ; %bb1 44; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 45; CHECK-NEXT: s_waitcnt lgkmcnt(0) 46; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc 47; CHECK-NEXT: s_waitcnt vmcnt(0) 48; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 49; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 50; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 51; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 52; CHECK-NEXT: s_cbranch_execnz .LBB1_1 53; CHECK-NEXT: ; %bb.2: ; %bb2 54; CHECK-NEXT: s_endpgm 55bb: 56 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 57 br label %bb1 58bb1: 59 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 15, i32 0, i32 0, i32 1) 60 %cmp = icmp eq i32 %load, %id 61 br i1 %cmp, label %bb1, label %bb2 62bb2: 63 ret void 64} 65 66define amdgpu_kernel void @struct_atomic_buffer_load_i32_off(<4 x i32> %addr, i32 %index) { 67; CHECK-LABEL: struct_atomic_buffer_load_i32_off: 68; CHECK: ; %bb.0: ; %bb 69; CHECK-NEXT: s_clause 0x1 70; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 71; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 72; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 73; CHECK-NEXT: s_mov_b32 s4, 0 74; CHECK-NEXT: s_waitcnt lgkmcnt(0) 75; CHECK-NEXT: v_mov_b32_e32 v1, s6 76; CHECK-NEXT: .LBB2_1: ; %bb1 77; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 78; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc 79; CHECK-NEXT: s_waitcnt vmcnt(0) 80; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 81; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 82; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 83; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 84; CHECK-NEXT: s_cbranch_execnz .LBB2_1 85; CHECK-NEXT: ; %bb.2: ; %bb2 86; CHECK-NEXT: s_endpgm 87bb: 88 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 89 br label %bb1 90bb1: 91 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1) 92 %cmp = icmp eq i32 %load, %id 93 br i1 %cmp, label %bb1, label %bb2 94bb2: 95 ret void 96} 97 98define amdgpu_kernel void @struct_atomic_buffer_load_i32_soff(<4 x i32> %addr, i32 %index) { 99; CHECK-LABEL: struct_atomic_buffer_load_i32_soff: 100; CHECK: ; %bb.0: ; %bb 101; CHECK-NEXT: s_clause 0x1 102; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 103; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 104; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 105; CHECK-NEXT: s_mov_b32 s4, 0 106; CHECK-NEXT: s_waitcnt lgkmcnt(0) 107; CHECK-NEXT: v_mov_b32_e32 v1, s6 108; CHECK-NEXT: .LBB3_1: ; %bb1 109; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 110; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 4 idxen offset:4 glc 111; CHECK-NEXT: s_waitcnt vmcnt(0) 112; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 113; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 114; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 115; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 116; CHECK-NEXT: s_cbranch_execnz .LBB3_1 117; CHECK-NEXT: ; %bb.2: ; %bb2 118; CHECK-NEXT: s_endpgm 119bb: 120 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 121 br label %bb1 122bb1: 123 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 4, i32 1) 124 %cmp = icmp eq i32 %load, %id 125 br i1 %cmp, label %bb1, label %bb2 126bb2: 127 ret void 128} 129define amdgpu_kernel void @struct_atomic_buffer_load_i32_dlc(<4 x i32> %addr, i32 %index) { 130; CHECK-LABEL: struct_atomic_buffer_load_i32_dlc: 131; CHECK: ; %bb.0: ; %bb 132; CHECK-NEXT: s_clause 0x1 133; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 134; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 135; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 136; CHECK-NEXT: s_mov_b32 s4, 0 137; CHECK-NEXT: s_waitcnt lgkmcnt(0) 138; CHECK-NEXT: v_mov_b32_e32 v1, s6 139; CHECK-NEXT: .LBB4_1: ; %bb1 140; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 141; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen offset:4 dlc 142; CHECK-NEXT: s_waitcnt vmcnt(0) 143; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 144; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 145; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 146; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 147; CHECK-NEXT: s_cbranch_execnz .LBB4_1 148; CHECK-NEXT: ; %bb.2: ; %bb2 149; CHECK-NEXT: s_endpgm 150bb: 151 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 152 br label %bb1 153bb1: 154 %load = call i32 @llvm.amdgcn.struct.atomic.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 4) 155 %cmp = icmp eq i32 %load, %id 156 br i1 %cmp, label %bb1, label %bb2 157bb2: 158 ret void 159} 160 161define amdgpu_kernel void @struct_nonatomic_buffer_load_i32(<4 x i32> %addr, i32 %index) { 162; CHECK-LABEL: struct_nonatomic_buffer_load_i32: 163; CHECK: ; %bb.0: ; %bb 164; CHECK-NEXT: s_clause 0x1 165; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 166; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 167; CHECK-NEXT: s_waitcnt lgkmcnt(0) 168; CHECK-NEXT: v_dual_mov_b32 v1, s6 :: v_dual_and_b32 v0, 0x3ff, v0 169; CHECK-NEXT: buffer_load_b32 v1, v1, s[0:3], 0 idxen offset:4 glc 170; CHECK-NEXT: s_mov_b32 s0, 0 171; CHECK-NEXT: s_waitcnt vmcnt(0) 172; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v0 173; CHECK-NEXT: .LBB5_1: ; %bb1 174; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 175; CHECK-NEXT: s_and_b32 s1, exec_lo, vcc_lo 176; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 177; CHECK-NEXT: s_or_b32 s0, s1, s0 178; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s0 179; CHECK-NEXT: s_cbranch_execnz .LBB5_1 180; CHECK-NEXT: ; %bb.2: ; %bb2 181; CHECK-NEXT: s_endpgm 182bb: 183 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 184 br label %bb1 185bb1: 186 %load = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1) 187 %cmp = icmp eq i32 %load, %id 188 br i1 %cmp, label %bb1, label %bb2 189bb2: 190 ret void 191} 192 193define amdgpu_kernel void @struct_atomic_buffer_load_i64(<4 x i32> %addr, i32 %index) { 194; CHECK-LABEL: struct_atomic_buffer_load_i64: 195; CHECK: ; %bb.0: ; %bb 196; CHECK-NEXT: s_clause 0x1 197; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 198; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 199; CHECK-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 200; CHECK-NEXT: s_mov_b32 s4, 0 201; CHECK-NEXT: s_waitcnt lgkmcnt(0) 202; CHECK-NEXT: v_mov_b32_e32 v2, s6 203; CHECK-NEXT: .LBB6_1: ; %bb1 204; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 205; CHECK-NEXT: buffer_load_b64 v[3:4], v2, s[0:3], 0 idxen offset:4 glc 206; CHECK-NEXT: s_waitcnt vmcnt(0) 207; CHECK-NEXT: v_cmp_ne_u64_e32 vcc_lo, v[3:4], v[0:1] 208; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 209; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 210; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 211; CHECK-NEXT: s_cbranch_execnz .LBB6_1 212; CHECK-NEXT: ; %bb.2: ; %bb2 213; CHECK-NEXT: s_endpgm 214bb: 215 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 216 %id.zext = zext i32 %id to i64 217 br label %bb1 218bb1: 219 %load = call i64 @llvm.amdgcn.struct.atomic.buffer.load.i64(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1) 220 %cmp = icmp eq i64 %load, %id.zext 221 br i1 %cmp, label %bb1, label %bb2 222bb2: 223 ret void 224} 225 226define amdgpu_kernel void @struct_atomic_buffer_load_v2i16(<4 x i32> %addr, i32 %index) { 227; CHECK-LABEL: struct_atomic_buffer_load_v2i16: 228; CHECK: ; %bb.0: ; %bb 229; CHECK-NEXT: s_clause 0x1 230; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 231; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 232; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 233; CHECK-NEXT: s_mov_b32 s4, 0 234; CHECK-NEXT: s_waitcnt lgkmcnt(0) 235; CHECK-NEXT: v_mov_b32_e32 v1, s6 236; CHECK-NEXT: .LBB7_1: ; %bb1 237; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 238; CHECK-NEXT: buffer_load_b32 v2, v1, s[0:3], 0 idxen glc 239; CHECK-NEXT: s_waitcnt vmcnt(0) 240; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 241; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 242; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 243; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 244; CHECK-NEXT: s_cbranch_execnz .LBB7_1 245; CHECK-NEXT: ; %bb.2: ; %bb2 246; CHECK-NEXT: s_endpgm 247bb: 248 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 249 br label %bb1 250bb1: 251 %load = call <2 x i16> @llvm.amdgcn.struct.atomic.buffer.load.v2i16(<4 x i32> %addr, i32 %index, i32 0, i32 0, i32 1) 252 %bitcast = bitcast <2 x i16> %load to i32 253 %cmp = icmp eq i32 %bitcast, %id 254 br i1 %cmp, label %bb1, label %bb2 255bb2: 256 ret void 257} 258 259define amdgpu_kernel void @struct_atomic_buffer_load_v4i16(<4 x i32> %addr, i32 %index) { 260; CHECK-LABEL: struct_atomic_buffer_load_v4i16: 261; CHECK: ; %bb.0: ; %bb 262; CHECK-NEXT: s_clause 0x1 263; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 264; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 265; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 266; CHECK-NEXT: s_mov_b32 s4, 0 267; CHECK-NEXT: s_waitcnt lgkmcnt(0) 268; CHECK-NEXT: v_mov_b32_e32 v1, s6 269; CHECK-NEXT: .LBB8_1: ; %bb1 270; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 271; CHECK-NEXT: buffer_load_b64 v[2:3], v1, s[0:3], 0 idxen offset:4 glc 272; CHECK-NEXT: s_waitcnt vmcnt(0) 273; CHECK-NEXT: v_and_b32_e32 v2, 0xffff, v2 274; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 275; CHECK-NEXT: v_lshl_or_b32 v2, v3, 16, v2 276; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 277; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 278; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 279; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 280; CHECK-NEXT: s_cbranch_execnz .LBB8_1 281; CHECK-NEXT: ; %bb.2: ; %bb2 282; CHECK-NEXT: s_endpgm 283bb: 284 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 285 br label %bb1 286bb1: 287 %load = call <4 x i16> @llvm.amdgcn.struct.atomic.buffer.load.v4i16(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1) 288 %shortened = shufflevector <4 x i16> %load, <4 x i16> poison, <2 x i32> <i32 0, i32 2> 289 %bitcast = bitcast <2 x i16> %shortened to i32 290 %cmp = icmp eq i32 %bitcast, %id 291 br i1 %cmp, label %bb1, label %bb2 292bb2: 293 ret void 294} 295 296define amdgpu_kernel void @struct_atomic_buffer_load_v4i32(<4 x i32> %addr, i32 %index) { 297; CHECK-LABEL: struct_atomic_buffer_load_v4i32: 298; CHECK: ; %bb.0: ; %bb 299; CHECK-NEXT: s_clause 0x1 300; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 301; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 302; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 303; CHECK-NEXT: s_mov_b32 s4, 0 304; CHECK-NEXT: s_waitcnt lgkmcnt(0) 305; CHECK-NEXT: v_mov_b32_e32 v1, s6 306; CHECK-NEXT: .LBB9_1: ; %bb1 307; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 308; CHECK-NEXT: buffer_load_b128 v[2:5], v1, s[0:3], 0 idxen offset:4 glc 309; CHECK-NEXT: s_waitcnt vmcnt(0) 310; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v5, v0 311; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 312; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 313; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 314; CHECK-NEXT: s_cbranch_execnz .LBB9_1 315; CHECK-NEXT: ; %bb.2: ; %bb2 316; CHECK-NEXT: s_endpgm 317bb: 318 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 319 br label %bb1 320bb1: 321 %load = call <4 x i32> @llvm.amdgcn.struct.atomic.buffer.load.v4i32(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1) 322 %extracted = extractelement <4 x i32> %load, i32 3 323 %cmp = icmp eq i32 %extracted, %id 324 br i1 %cmp, label %bb1, label %bb2 325bb2: 326 ret void 327} 328 329define amdgpu_kernel void @struct_atomic_buffer_load_ptr(<4 x i32> %addr, i32 %index) { 330; CHECK-LABEL: struct_atomic_buffer_load_ptr: 331; CHECK: ; %bb.0: ; %bb 332; CHECK-NEXT: s_clause 0x1 333; CHECK-NEXT: s_load_b32 s6, s[4:5], 0x34 334; CHECK-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 335; CHECK-NEXT: v_and_b32_e32 v0, 0x3ff, v0 336; CHECK-NEXT: s_mov_b32 s4, 0 337; CHECK-NEXT: s_waitcnt lgkmcnt(0) 338; CHECK-NEXT: v_mov_b32_e32 v1, s6 339; CHECK-NEXT: .LBB10_1: ; %bb1 340; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 341; CHECK-NEXT: buffer_load_b64 v[2:3], v1, s[0:3], 0 idxen offset:4 glc 342; CHECK-NEXT: s_waitcnt vmcnt(0) 343; CHECK-NEXT: flat_load_b32 v2, v[2:3] 344; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 345; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v0 346; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 347; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 348; CHECK-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 349; CHECK-NEXT: s_cbranch_execnz .LBB10_1 350; CHECK-NEXT: ; %bb.2: ; %bb2 351; CHECK-NEXT: s_endpgm 352bb: 353 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 354 br label %bb1 355bb1: 356 %load = call ptr @llvm.amdgcn.struct.atomic.buffer.load.ptr(<4 x i32> %addr, i32 %index, i32 4, i32 0, i32 1) 357 %elem = load i32, ptr %load 358 %cmp = icmp eq i32 %elem, %id 359 br i1 %cmp, label %bb1, label %bb2 360bb2: 361 ret void 362} 363 364declare i32 @llvm.amdgcn.struct.atom.buffer.load.i32(<4 x i32>, i32, i32, i32, i32 immarg) 365declare i64 @llvm.amdgcn.struct.atom.buffer.load.i64(<4 x i32>, i32, i32, i32, i32 immarg) 366declare <2 x i16> @llvm.amdgcn.struct.atom.buffer.load.v2i16(<4 x i32>, i32, i32, i32, i32 immarg) 367declare <4 x i16> @llvm.amdgcn.struct.atom.buffer.load.v4i16(<4 x i32>, i32, i32, i32, i32 immarg) 368declare <4 x i32> @llvm.amdgcn.struct.atom.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32 immarg) 369declare ptr @llvm.amdgcn.struct.atom.buffer.load.ptr(<4 x i32>, i32, i32, i32, i32 immarg) 370declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32 immarg) 371declare i32 @llvm.amdgcn.workitem.id.x() 372