1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s 3 4; LDS is allocated per-kernel. Module scope variables are gathered into a struct which is 5; allocated at address zero, if used by the kernel. Kernel scope variables are gathered into 6; a per-kernel struct and allocated immediately after the module scope. 7; This test checks that the module and kernel scope variables are allocated in deterministic 8; order without spurious alignment padding between the two 9 10; External LDS is checked because it influences LDS padding in general and because it will 11; not be moved into either module or kernel struct 12 13@module_variable = addrspace(3) global i16 undef 14 15; Variables are allocated into module scope block when used by a non-kernel function 16define void @use_module() #0 { 17; CHECK-LABEL: use_module: 18; CHECK: ; %bb.0: 19; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; CHECK-NEXT: v_mov_b32_e32 v0, 0 21; CHECK-NEXT: ds_write_b16 v0, v0 22; CHECK-NEXT: s_waitcnt lgkmcnt(0) 23; CHECK-NEXT: s_setpc_b64 s[30:31] 24 store i16 0, ptr addrspace(3) @module_variable 25 ret void 26} 27 28; Variables only used by kernels are specialised and allocated per-kernel 29@kernel_normal = addrspace(3) global i16 undef 30@kernel_overalign = addrspace(3) global i16 undef, align 4 31 32; External LDS shall not introduce padding between module and kernel scope variables 33@extern_normal = external addrspace(3) global [0 x float] 34@extern_overalign = external addrspace(3) global [0 x float], align 8 35 36 37; External LDS does not influence the frame when called indirectly either 38define void @use_extern_normal() #0 { 39; CHECK-LABEL: use_extern_normal: 40; CHECK: ; %bb.0: 41; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; CHECK-NEXT: s_getpc_b64 s[6:7] 43; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4 44; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12 45; CHECK-NEXT: s_mov_b32 s4, s15 46; CHECK-NEXT: s_ashr_i32 s5, s15, 31 47; CHECK-NEXT: v_mov_b32_e32 v0, 0x4048f5c3 48; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 49; CHECK-NEXT: s_add_u32 s4, s4, s6 50; CHECK-NEXT: s_addc_u32 s5, s5, s7 51; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 52; CHECK-NEXT: s_waitcnt lgkmcnt(0) 53; CHECK-NEXT: v_mov_b32_e32 v1, s4 54; CHECK-NEXT: ds_write_b32 v1, v0 55; CHECK-NEXT: s_waitcnt lgkmcnt(0) 56; CHECK-NEXT: s_setpc_b64 s[30:31] 57 %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 0 58 store float 0x40091EB860000000, ptr addrspace(3) %arrayidx 59 ret void 60} 61 62define void @use_extern_overalign() #0 { 63; CHECK-LABEL: use_extern_overalign: 64; CHECK: ; %bb.0: 65; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; CHECK-NEXT: s_getpc_b64 s[6:7] 67; CHECK-NEXT: s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4 68; CHECK-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12 69; CHECK-NEXT: s_mov_b32 s4, s15 70; CHECK-NEXT: s_ashr_i32 s5, s15, 31 71; CHECK-NEXT: v_mov_b32_e32 v0, 0x42280000 72; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 2 73; CHECK-NEXT: s_add_u32 s4, s4, s6 74; CHECK-NEXT: s_addc_u32 s5, s5, s7 75; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 76; CHECK-NEXT: s_waitcnt lgkmcnt(0) 77; CHECK-NEXT: v_mov_b32_e32 v1, s4 78; CHECK-NEXT: ds_write_b32 v1, v0 offset:4 79; CHECK-NEXT: s_waitcnt lgkmcnt(0) 80; CHECK-NEXT: s_setpc_b64 s[30:31] 81 %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 1 82 store float 4.200000e+01, ptr addrspace(3) %arrayidx 83 ret void 84} 85 86 87; First 2^3 of 2^4 cases encoded into function names 88; no use of extern variable from nested function 89; module_variable used/not-used 90; kernel variable normal/overaligned 91; extern variable normal/overaligned 92 93define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) { 94; CHECK-LABEL: module_0_kernel_normal_extern_normal: 95; CHECK: ; %bb.0: 96; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 97; CHECK-NEXT: v_mov_b32_e32 v0, 2 98; CHECK-NEXT: v_mov_b32_e32 v1, 0 99; CHECK-NEXT: s_waitcnt lgkmcnt(0) 100; CHECK-NEXT: s_lshl_b32 s0, s0, 2 101; CHECK-NEXT: s_add_i32 s0, s0, 4 102; CHECK-NEXT: v_mov_b32_e32 v2, s0 103; CHECK-NEXT: ds_write_b16 v1, v0 104; CHECK-NEXT: ds_write_b32 v2, v1 105; CHECK-NEXT: s_endpgm 106 store i16 2, ptr addrspace(3) @kernel_normal 107 108 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx 109 store float 0.0, ptr addrspace(3) %arrayidx1 110 ret void 111} 112 113define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) { 114; CHECK-LABEL: module_1_kernel_normal_extern_normal: 115; CHECK: ; %bb.0: 116; CHECK-NEXT: s_add_u32 s12, s12, s17 117; CHECK-NEXT: s_mov_b32 s32, 0 118; CHECK-NEXT: s_addc_u32 s13, s13, 0 119; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 120; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 121; CHECK-NEXT: s_add_u32 s0, s0, s17 122; CHECK-NEXT: s_addc_u32 s1, s1, 0 123; CHECK-NEXT: s_add_u32 s12, s8, 8 124; CHECK-NEXT: s_addc_u32 s13, s9, 0 125; CHECK-NEXT: s_getpc_b64 s[18:19] 126; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 127; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 128; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 129; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 130; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 131; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 132; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] 133; CHECK-NEXT: s_mov_b32 s12, s14 134; CHECK-NEXT: s_mov_b32 s13, s15 135; CHECK-NEXT: s_mov_b32 s14, s16 136; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 137; CHECK-NEXT: s_waitcnt lgkmcnt(0) 138; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] 139; CHECK-NEXT: s_lshl_b32 s4, s17, 2 140; CHECK-NEXT: v_mov_b32_e32 v0, 1 141; CHECK-NEXT: v_mov_b32_e32 v1, 0 142; CHECK-NEXT: s_add_i32 s4, s4, 4 143; CHECK-NEXT: v_mov_b32_e32 v2, 2 144; CHECK-NEXT: v_mov_b32_e32 v3, s4 145; CHECK-NEXT: ds_write_b16 v1, v0 146; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 147; CHECK-NEXT: ds_write_b32 v3, v1 148; CHECK-NEXT: s_endpgm 149 call void @use_module() 150 store i16 1, ptr addrspace(3) @module_variable 151 152 store i16 2, ptr addrspace(3) @kernel_normal 153 154 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx 155 store float 0.0, ptr addrspace(3) %arrayidx1 156 ret void 157} 158 159define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) { 160; CHECK-LABEL: module_0_kernel_overalign_extern_normal: 161; CHECK: ; %bb.0: 162; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 163; CHECK-NEXT: v_mov_b32_e32 v0, 2 164; CHECK-NEXT: v_mov_b32_e32 v1, 0 165; CHECK-NEXT: s_waitcnt lgkmcnt(0) 166; CHECK-NEXT: s_lshl_b32 s0, s0, 2 167; CHECK-NEXT: s_add_i32 s0, s0, 4 168; CHECK-NEXT: v_mov_b32_e32 v2, s0 169; CHECK-NEXT: ds_write_b16 v1, v0 170; CHECK-NEXT: ds_write_b32 v2, v1 171; CHECK-NEXT: s_endpgm 172 store i16 2, ptr addrspace(3) @kernel_overalign 173 174 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx 175 store float 0.0, ptr addrspace(3) %arrayidx1 176 ret void 177} 178 179define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) { 180; CHECK-LABEL: module_1_kernel_overalign_extern_normal: 181; CHECK: ; %bb.0: 182; CHECK-NEXT: s_add_u32 s12, s12, s17 183; CHECK-NEXT: s_mov_b32 s32, 0 184; CHECK-NEXT: s_addc_u32 s13, s13, 0 185; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 186; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 187; CHECK-NEXT: s_add_u32 s0, s0, s17 188; CHECK-NEXT: s_addc_u32 s1, s1, 0 189; CHECK-NEXT: s_add_u32 s12, s8, 8 190; CHECK-NEXT: s_addc_u32 s13, s9, 0 191; CHECK-NEXT: s_getpc_b64 s[18:19] 192; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 193; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 194; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 195; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 196; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 197; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 198; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] 199; CHECK-NEXT: s_mov_b32 s12, s14 200; CHECK-NEXT: s_mov_b32 s13, s15 201; CHECK-NEXT: s_mov_b32 s14, s16 202; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 203; CHECK-NEXT: s_waitcnt lgkmcnt(0) 204; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] 205; CHECK-NEXT: s_lshl_b32 s4, s17, 2 206; CHECK-NEXT: v_mov_b32_e32 v0, 1 207; CHECK-NEXT: v_mov_b32_e32 v1, 0 208; CHECK-NEXT: s_add_i32 s4, s4, 8 209; CHECK-NEXT: v_mov_b32_e32 v2, 2 210; CHECK-NEXT: v_mov_b32_e32 v3, s4 211; CHECK-NEXT: ds_write_b16 v1, v0 212; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 213; CHECK-NEXT: ds_write_b32 v3, v1 214; CHECK-NEXT: s_endpgm 215 call void @use_module() 216 store i16 1, ptr addrspace(3) @module_variable 217 218 store i16 2, ptr addrspace(3) @kernel_overalign 219 220 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx 221 store float 0.0, ptr addrspace(3) %arrayidx1 222 ret void 223} 224 225define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) { 226; CHECK-LABEL: module_0_kernel_normal_extern_overalign: 227; CHECK: ; %bb.0: 228; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 229; CHECK-NEXT: v_mov_b32_e32 v0, 2 230; CHECK-NEXT: v_mov_b32_e32 v1, 0 231; CHECK-NEXT: s_waitcnt lgkmcnt(0) 232; CHECK-NEXT: s_lshl_b32 s0, s0, 2 233; CHECK-NEXT: s_add_i32 s0, s0, 8 234; CHECK-NEXT: v_mov_b32_e32 v2, s0 235; CHECK-NEXT: ds_write_b16 v1, v0 236; CHECK-NEXT: ds_write_b32 v2, v1 237; CHECK-NEXT: s_endpgm 238 store i16 2, ptr addrspace(3) @kernel_normal 239 240 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx 241 store float 0.0, ptr addrspace(3) %arrayidx1 242 ret void 243} 244 245define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) { 246; CHECK-LABEL: module_1_kernel_normal_extern_overalign: 247; CHECK: ; %bb.0: 248; CHECK-NEXT: s_add_u32 s12, s12, s17 249; CHECK-NEXT: s_mov_b32 s32, 0 250; CHECK-NEXT: s_addc_u32 s13, s13, 0 251; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 252; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 253; CHECK-NEXT: s_add_u32 s0, s0, s17 254; CHECK-NEXT: s_addc_u32 s1, s1, 0 255; CHECK-NEXT: s_add_u32 s12, s8, 8 256; CHECK-NEXT: s_addc_u32 s13, s9, 0 257; CHECK-NEXT: s_getpc_b64 s[18:19] 258; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 259; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 260; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 261; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 262; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 263; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 264; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] 265; CHECK-NEXT: s_mov_b32 s12, s14 266; CHECK-NEXT: s_mov_b32 s13, s15 267; CHECK-NEXT: s_mov_b32 s14, s16 268; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 269; CHECK-NEXT: s_waitcnt lgkmcnt(0) 270; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] 271; CHECK-NEXT: s_lshl_b32 s4, s17, 2 272; CHECK-NEXT: v_mov_b32_e32 v0, 1 273; CHECK-NEXT: v_mov_b32_e32 v1, 0 274; CHECK-NEXT: s_add_i32 s4, s4, 8 275; CHECK-NEXT: v_mov_b32_e32 v2, 2 276; CHECK-NEXT: v_mov_b32_e32 v3, s4 277; CHECK-NEXT: ds_write_b16 v1, v0 278; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 279; CHECK-NEXT: ds_write_b32 v3, v1 280; CHECK-NEXT: s_endpgm 281 call void @use_module() 282 store i16 1, ptr addrspace(3) @module_variable 283 284 store i16 2, ptr addrspace(3) @kernel_normal 285 286 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx 287 store float 0.0, ptr addrspace(3) %arrayidx1 288 ret void 289} 290 291define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) { 292; CHECK-LABEL: module_0_kernel_overalign_extern_overalign: 293; CHECK: ; %bb.0: 294; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 295; CHECK-NEXT: v_mov_b32_e32 v0, 2 296; CHECK-NEXT: v_mov_b32_e32 v1, 0 297; CHECK-NEXT: s_waitcnt lgkmcnt(0) 298; CHECK-NEXT: s_lshl_b32 s0, s0, 2 299; CHECK-NEXT: s_add_i32 s0, s0, 8 300; CHECK-NEXT: v_mov_b32_e32 v2, s0 301; CHECK-NEXT: ds_write_b16 v1, v0 302; CHECK-NEXT: ds_write_b32 v2, v1 303; CHECK-NEXT: s_endpgm 304 store i16 2, ptr addrspace(3) @kernel_overalign 305 306 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx 307 store float 0.0, ptr addrspace(3) %arrayidx1 308 ret void 309} 310 311define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) { 312; CHECK-LABEL: module_1_kernel_overalign_extern_overalign: 313; CHECK: ; %bb.0: 314; CHECK-NEXT: s_add_u32 s12, s12, s17 315; CHECK-NEXT: s_mov_b32 s32, 0 316; CHECK-NEXT: s_addc_u32 s13, s13, 0 317; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 318; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 319; CHECK-NEXT: s_add_u32 s0, s0, s17 320; CHECK-NEXT: s_addc_u32 s1, s1, 0 321; CHECK-NEXT: s_add_u32 s12, s8, 8 322; CHECK-NEXT: s_addc_u32 s13, s9, 0 323; CHECK-NEXT: s_getpc_b64 s[18:19] 324; CHECK-NEXT: s_add_u32 s18, s18, use_module@gotpcrel32@lo+4 325; CHECK-NEXT: s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12 326; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 327; CHECK-NEXT: s_load_dwordx2 s[20:21], s[18:19], 0x0 328; CHECK-NEXT: s_load_dword s17, s[8:9], 0x0 329; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 330; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] 331; CHECK-NEXT: s_mov_b32 s12, s14 332; CHECK-NEXT: s_mov_b32 s13, s15 333; CHECK-NEXT: s_mov_b32 s14, s16 334; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 335; CHECK-NEXT: s_waitcnt lgkmcnt(0) 336; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21] 337; CHECK-NEXT: s_lshl_b32 s4, s17, 2 338; CHECK-NEXT: v_mov_b32_e32 v0, 1 339; CHECK-NEXT: v_mov_b32_e32 v1, 0 340; CHECK-NEXT: s_add_i32 s4, s4, 8 341; CHECK-NEXT: v_mov_b32_e32 v2, 2 342; CHECK-NEXT: v_mov_b32_e32 v3, s4 343; CHECK-NEXT: ds_write_b16 v1, v0 344; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 345; CHECK-NEXT: ds_write_b32 v3, v1 346; CHECK-NEXT: s_endpgm 347 call void @use_module() 348 store i16 1, ptr addrspace(3) @module_variable 349 350 store i16 2, ptr addrspace(3) @kernel_overalign 351 352 %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx 353 store float 0.0, ptr addrspace(3) %arrayidx1 354 ret void 355} 356 357 358;; Second 2^3 of 2^4 cases encoded into function names 359; with extern variable from nested function 360; module_variable used/not-used 361; kernel variable normal/overaligned 362; extern variable normal/overaligned 363 364define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %idx) { 365; CHECK-LABEL: module_0_kernel_normal_indirect_extern_normal: 366; CHECK: ; %bb.0: 367; CHECK-NEXT: s_add_u32 s12, s12, s17 368; CHECK-NEXT: s_mov_b32 s32, 0 369; CHECK-NEXT: s_addc_u32 s13, s13, 0 370; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 371; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 372; CHECK-NEXT: s_add_u32 s0, s0, s17 373; CHECK-NEXT: s_addc_u32 s1, s1, 0 374; CHECK-NEXT: s_add_u32 s8, s8, 8 375; CHECK-NEXT: s_addc_u32 s9, s9, 0 376; CHECK-NEXT: s_mov_b32 s13, s15 377; CHECK-NEXT: s_mov_b32 s12, s14 378; CHECK-NEXT: s_getpc_b64 s[14:15] 379; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 380; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 381; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 382; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 383; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 384; CHECK-NEXT: v_mov_b32_e32 v3, 2 385; CHECK-NEXT: v_mov_b32_e32 v4, 0 386; CHECK-NEXT: s_mov_b32 s14, s16 387; CHECK-NEXT: s_mov_b32 s15, 0 388; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 389; CHECK-NEXT: ds_write_b16 v4, v3 390; CHECK-NEXT: s_waitcnt lgkmcnt(0) 391; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 392; CHECK-NEXT: s_endpgm 393 store i16 2, ptr addrspace(3) @kernel_normal 394 395 call void @use_extern_normal() 396 ret void 397} 398 399define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %idx) { 400; CHECK-LABEL: module_1_kernel_normal_indirect_extern_normal: 401; CHECK: ; %bb.0: 402; CHECK-NEXT: s_add_u32 s12, s12, s17 403; CHECK-NEXT: s_mov_b32 s32, 0 404; CHECK-NEXT: s_addc_u32 s13, s13, 0 405; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 406; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 407; CHECK-NEXT: s_add_u32 s0, s0, s17 408; CHECK-NEXT: s_addc_u32 s1, s1, 0 409; CHECK-NEXT: s_add_u32 s8, s8, 8 410; CHECK-NEXT: s_addc_u32 s9, s9, 0 411; CHECK-NEXT: s_mov_b32 s13, s15 412; CHECK-NEXT: s_mov_b32 s12, s14 413; CHECK-NEXT: s_getpc_b64 s[14:15] 414; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 415; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 416; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 417; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 418; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 419; CHECK-NEXT: s_mov_b32 s14, s16 420; CHECK-NEXT: s_mov_b32 s15, 4 421; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 422; CHECK-NEXT: s_waitcnt lgkmcnt(0) 423; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 424; CHECK-NEXT: s_getpc_b64 s[14:15] 425; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 426; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 427; CHECK-NEXT: v_mov_b32_e32 v0, 1 428; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 429; CHECK-NEXT: v_mov_b32_e32 v1, 0 430; CHECK-NEXT: v_mov_b32_e32 v2, 2 431; CHECK-NEXT: s_mov_b32 s14, s16 432; CHECK-NEXT: s_mov_b32 s15, 4 433; CHECK-NEXT: ds_write_b16 v1, v0 434; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 435; CHECK-NEXT: s_waitcnt lgkmcnt(0) 436; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 437; CHECK-NEXT: s_endpgm 438 call void @use_module() 439 store i16 1, ptr addrspace(3) @module_variable 440 441 store i16 2, ptr addrspace(3) @kernel_normal 442 443 call void @use_extern_normal() 444 ret void 445} 446 447define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32 %idx) { 448; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_normal: 449; CHECK: ; %bb.0: 450; CHECK-NEXT: s_add_u32 s12, s12, s17 451; CHECK-NEXT: s_mov_b32 s32, 0 452; CHECK-NEXT: s_addc_u32 s13, s13, 0 453; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 454; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 455; CHECK-NEXT: s_add_u32 s0, s0, s17 456; CHECK-NEXT: s_addc_u32 s1, s1, 0 457; CHECK-NEXT: s_add_u32 s8, s8, 8 458; CHECK-NEXT: s_addc_u32 s9, s9, 0 459; CHECK-NEXT: s_mov_b32 s13, s15 460; CHECK-NEXT: s_mov_b32 s12, s14 461; CHECK-NEXT: s_getpc_b64 s[14:15] 462; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 463; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 464; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 465; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 466; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 467; CHECK-NEXT: v_mov_b32_e32 v3, 2 468; CHECK-NEXT: v_mov_b32_e32 v4, 0 469; CHECK-NEXT: s_mov_b32 s14, s16 470; CHECK-NEXT: s_mov_b32 s15, 2 471; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 472; CHECK-NEXT: ds_write_b16 v4, v3 473; CHECK-NEXT: s_waitcnt lgkmcnt(0) 474; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 475; CHECK-NEXT: s_endpgm 476 store i16 2, ptr addrspace(3) @kernel_overalign 477 478 call void @use_extern_normal() 479 ret void 480} 481 482define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32 %idx) { 483; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_normal: 484; CHECK: ; %bb.0: 485; CHECK-NEXT: s_add_u32 s12, s12, s17 486; CHECK-NEXT: s_mov_b32 s32, 0 487; CHECK-NEXT: s_addc_u32 s13, s13, 0 488; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 489; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 490; CHECK-NEXT: s_add_u32 s0, s0, s17 491; CHECK-NEXT: s_addc_u32 s1, s1, 0 492; CHECK-NEXT: s_add_u32 s8, s8, 8 493; CHECK-NEXT: s_addc_u32 s9, s9, 0 494; CHECK-NEXT: s_mov_b32 s13, s15 495; CHECK-NEXT: s_mov_b32 s12, s14 496; CHECK-NEXT: s_getpc_b64 s[14:15] 497; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 498; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 499; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 500; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 501; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 502; CHECK-NEXT: s_mov_b32 s14, s16 503; CHECK-NEXT: s_mov_b32 s15, 6 504; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 505; CHECK-NEXT: s_waitcnt lgkmcnt(0) 506; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 507; CHECK-NEXT: s_getpc_b64 s[14:15] 508; CHECK-NEXT: s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4 509; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12 510; CHECK-NEXT: v_mov_b32_e32 v0, 1 511; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 512; CHECK-NEXT: v_mov_b32_e32 v1, 0 513; CHECK-NEXT: v_mov_b32_e32 v2, 2 514; CHECK-NEXT: s_mov_b32 s14, s16 515; CHECK-NEXT: s_mov_b32 s15, 6 516; CHECK-NEXT: ds_write_b16 v1, v0 517; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 518; CHECK-NEXT: s_waitcnt lgkmcnt(0) 519; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 520; CHECK-NEXT: s_endpgm 521 call void @use_module() 522 store i16 1, ptr addrspace(3) @module_variable 523 524 store i16 2, ptr addrspace(3) @kernel_overalign 525 526 call void @use_extern_normal() 527 ret void 528} 529 530define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32 %idx) { 531; CHECK-LABEL: module_0_kernel_normal_indirect_extern_overalign: 532; CHECK: ; %bb.0: 533; CHECK-NEXT: s_add_u32 s12, s12, s17 534; CHECK-NEXT: s_mov_b32 s32, 0 535; CHECK-NEXT: s_addc_u32 s13, s13, 0 536; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 537; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 538; CHECK-NEXT: s_add_u32 s0, s0, s17 539; CHECK-NEXT: s_addc_u32 s1, s1, 0 540; CHECK-NEXT: s_add_u32 s8, s8, 8 541; CHECK-NEXT: s_addc_u32 s9, s9, 0 542; CHECK-NEXT: s_mov_b32 s13, s15 543; CHECK-NEXT: s_mov_b32 s12, s14 544; CHECK-NEXT: s_getpc_b64 s[14:15] 545; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 546; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 547; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 548; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 549; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 550; CHECK-NEXT: v_mov_b32_e32 v3, 2 551; CHECK-NEXT: v_mov_b32_e32 v4, 0 552; CHECK-NEXT: s_mov_b32 s14, s16 553; CHECK-NEXT: s_mov_b32 s15, 1 554; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 555; CHECK-NEXT: ds_write_b16 v4, v3 556; CHECK-NEXT: s_waitcnt lgkmcnt(0) 557; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 558; CHECK-NEXT: s_endpgm 559 store i16 2, ptr addrspace(3) @kernel_normal 560 561 call void @use_extern_overalign() 562 ret void 563} 564 565define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32 %idx) { 566; CHECK-LABEL: module_1_kernel_normal_indirect_extern_overalign: 567; CHECK: ; %bb.0: 568; CHECK-NEXT: s_add_u32 s12, s12, s17 569; CHECK-NEXT: s_mov_b32 s32, 0 570; CHECK-NEXT: s_addc_u32 s13, s13, 0 571; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 572; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 573; CHECK-NEXT: s_add_u32 s0, s0, s17 574; CHECK-NEXT: s_addc_u32 s1, s1, 0 575; CHECK-NEXT: s_add_u32 s8, s8, 8 576; CHECK-NEXT: s_addc_u32 s9, s9, 0 577; CHECK-NEXT: s_mov_b32 s13, s15 578; CHECK-NEXT: s_mov_b32 s12, s14 579; CHECK-NEXT: s_getpc_b64 s[14:15] 580; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 581; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 582; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 583; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 584; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 585; CHECK-NEXT: s_mov_b32 s14, s16 586; CHECK-NEXT: s_mov_b32 s15, 5 587; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 588; CHECK-NEXT: s_waitcnt lgkmcnt(0) 589; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 590; CHECK-NEXT: s_getpc_b64 s[14:15] 591; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 592; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 593; CHECK-NEXT: v_mov_b32_e32 v0, 1 594; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 595; CHECK-NEXT: v_mov_b32_e32 v1, 0 596; CHECK-NEXT: v_mov_b32_e32 v2, 2 597; CHECK-NEXT: s_mov_b32 s14, s16 598; CHECK-NEXT: s_mov_b32 s15, 5 599; CHECK-NEXT: ds_write_b16 v1, v0 600; CHECK-NEXT: ds_write_b16 v1, v2 offset:2 601; CHECK-NEXT: s_waitcnt lgkmcnt(0) 602; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 603; CHECK-NEXT: s_endpgm 604 call void @use_module() 605 store i16 1, ptr addrspace(3) @module_variable 606 607 store i16 2, ptr addrspace(3) @kernel_normal 608 609 call void @use_extern_overalign() 610 ret void 611} 612 613define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i32 %idx) { 614; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_overalign: 615; CHECK: ; %bb.0: 616; CHECK-NEXT: s_add_u32 s12, s12, s17 617; CHECK-NEXT: s_mov_b32 s32, 0 618; CHECK-NEXT: s_addc_u32 s13, s13, 0 619; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 620; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 621; CHECK-NEXT: s_add_u32 s0, s0, s17 622; CHECK-NEXT: s_addc_u32 s1, s1, 0 623; CHECK-NEXT: s_add_u32 s8, s8, 8 624; CHECK-NEXT: s_addc_u32 s9, s9, 0 625; CHECK-NEXT: s_mov_b32 s13, s15 626; CHECK-NEXT: s_mov_b32 s12, s14 627; CHECK-NEXT: s_getpc_b64 s[14:15] 628; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 629; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 630; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 631; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 632; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 633; CHECK-NEXT: v_mov_b32_e32 v3, 2 634; CHECK-NEXT: v_mov_b32_e32 v4, 0 635; CHECK-NEXT: s_mov_b32 s14, s16 636; CHECK-NEXT: s_mov_b32 s15, 3 637; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 638; CHECK-NEXT: ds_write_b16 v4, v3 639; CHECK-NEXT: s_waitcnt lgkmcnt(0) 640; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 641; CHECK-NEXT: s_endpgm 642 store i16 2, ptr addrspace(3) @kernel_overalign 643 644 call void @use_extern_overalign() 645 ret void 646} 647 648define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i32 %idx) { 649; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_overalign: 650; CHECK: ; %bb.0: 651; CHECK-NEXT: s_add_u32 s12, s12, s17 652; CHECK-NEXT: s_mov_b32 s32, 0 653; CHECK-NEXT: s_addc_u32 s13, s13, 0 654; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 655; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 656; CHECK-NEXT: s_add_u32 s0, s0, s17 657; CHECK-NEXT: s_addc_u32 s1, s1, 0 658; CHECK-NEXT: s_add_u32 s8, s8, 8 659; CHECK-NEXT: s_addc_u32 s9, s9, 0 660; CHECK-NEXT: s_mov_b32 s13, s15 661; CHECK-NEXT: s_mov_b32 s12, s14 662; CHECK-NEXT: s_getpc_b64 s[14:15] 663; CHECK-NEXT: s_add_u32 s14, s14, use_module@gotpcrel32@lo+4 664; CHECK-NEXT: s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12 665; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 666; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 667; CHECK-NEXT: v_lshlrev_b32_e32 v1, 10, v1 668; CHECK-NEXT: s_mov_b32 s14, s16 669; CHECK-NEXT: s_mov_b32 s15, 7 670; CHECK-NEXT: v_or3_b32 v31, v0, v1, v2 671; CHECK-NEXT: s_waitcnt lgkmcnt(0) 672; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 673; CHECK-NEXT: s_getpc_b64 s[14:15] 674; CHECK-NEXT: s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4 675; CHECK-NEXT: s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12 676; CHECK-NEXT: v_mov_b32_e32 v0, 1 677; CHECK-NEXT: s_load_dwordx2 s[18:19], s[14:15], 0x0 678; CHECK-NEXT: v_mov_b32_e32 v1, 0 679; CHECK-NEXT: v_mov_b32_e32 v2, 2 680; CHECK-NEXT: s_mov_b32 s14, s16 681; CHECK-NEXT: s_mov_b32 s15, 7 682; CHECK-NEXT: ds_write_b16 v1, v0 683; CHECK-NEXT: ds_write_b16 v1, v2 offset:4 684; CHECK-NEXT: s_waitcnt lgkmcnt(0) 685; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] 686; CHECK-NEXT: s_endpgm 687 call void @use_module() 688 store i16 1, ptr addrspace(3) @module_variable 689 690 store i16 2, ptr addrspace(3) @kernel_overalign 691 692 call void @use_extern_overalign() 693 ret void 694} 695 696 697attributes #0 = { noinline } 698 699!llvm.module.flags = !{!0} 700!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 701