1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s 5 6@gv = external addrspace(4) constant i32 7 8define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) { 9; GFX9-LABEL: kernel_dynamic_stackalloc_sgpr_align4: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0 12; GFX9-NEXT: s_add_u32 s0, s0, s17 13; GFX9-NEXT: s_movk_i32 s32, 0x400 14; GFX9-NEXT: s_addc_u32 s1, s1, 0 15; GFX9-NEXT: s_mov_b32 s4, s32 16; GFX9-NEXT: s_waitcnt lgkmcnt(0) 17; GFX9-NEXT: s_lshl2_add_u32 s5, s5, 15 18; GFX9-NEXT: s_and_b32 s5, s5, -16 19; GFX9-NEXT: v_mov_b32_e32 v0, 0 20; GFX9-NEXT: v_mov_b32_e32 v1, s4 21; GFX9-NEXT: s_lshl_b32 s5, s5, 6 22; GFX9-NEXT: s_mov_b32 s33, 0 23; GFX9-NEXT: s_add_u32 s32, s4, s5 24; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 25; GFX9-NEXT: s_endpgm 26; 27; GFX10-LABEL: kernel_dynamic_stackalloc_sgpr_align4: 28; GFX10: ; %bb.0: 29; GFX10-NEXT: s_load_dword s5, s[8:9], 0x0 30; GFX10-NEXT: s_movk_i32 s32, 0x200 31; GFX10-NEXT: s_add_u32 s0, s0, s17 32; GFX10-NEXT: s_mov_b32 s4, s32 33; GFX10-NEXT: s_addc_u32 s1, s1, 0 34; GFX10-NEXT: v_mov_b32_e32 v0, 0 35; GFX10-NEXT: v_mov_b32_e32 v1, s4 36; GFX10-NEXT: s_mov_b32 s33, 0 37; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 38; GFX10-NEXT: s_waitcnt lgkmcnt(0) 39; GFX10-NEXT: s_lshl2_add_u32 s5, s5, 15 40; GFX10-NEXT: s_and_b32 s5, s5, -16 41; GFX10-NEXT: s_lshl_b32 s5, s5, 5 42; GFX10-NEXT: s_add_u32 s32, s4, s5 43; GFX10-NEXT: s_endpgm 44; 45; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align4: 46; GFX11: ; %bb.0: 47; GFX11-NEXT: s_load_b32 s1, s[4:5], 0x0 48; GFX11-NEXT: v_mov_b32_e32 v0, 0 49; GFX11-NEXT: s_mov_b32 s32, 16 50; GFX11-NEXT: s_mov_b32 s33, 0 51; GFX11-NEXT: s_mov_b32 s0, s32 52; GFX11-NEXT: scratch_store_b32 off, v0, s0 53; GFX11-NEXT: s_waitcnt lgkmcnt(0) 54; GFX11-NEXT: s_lshl2_add_u32 s1, s1, 15 55; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 56; GFX11-NEXT: s_and_b32 s1, s1, -16 57; GFX11-NEXT: s_lshl_b32 s1, s1, 5 58; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 59; GFX11-NEXT: s_add_u32 s32, s0, s1 60; GFX11-NEXT: s_endpgm 61 %alloca = alloca i32, i32 %n, align 4, addrspace(5) 62 store i32 0, ptr addrspace(5) %alloca 63 ret void 64} 65 66define void @func_dynamic_stackalloc_sgpr_align4() { 67; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align4: 68; GFX9: ; %bb.0: 69; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX9-NEXT: s_mov_b32 s7, s33 71; GFX9-NEXT: s_mov_b32 s33, s32 72; GFX9-NEXT: s_addk_i32 s32, 0x400 73; GFX9-NEXT: s_getpc_b64 s[4:5] 74; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 75; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 76; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 77; GFX9-NEXT: s_mov_b32 s6, s32 78; GFX9-NEXT: v_mov_b32_e32 v0, 0 79; GFX9-NEXT: v_mov_b32_e32 v1, s6 80; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 81; GFX9-NEXT: s_waitcnt lgkmcnt(0) 82; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 83; GFX9-NEXT: s_waitcnt lgkmcnt(0) 84; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15 85; GFX9-NEXT: s_and_b32 s4, s4, -16 86; GFX9-NEXT: s_lshl_b32 s4, s4, 6 87; GFX9-NEXT: s_add_u32 s32, s6, s4 88; GFX9-NEXT: s_mov_b32 s32, s33 89; GFX9-NEXT: s_mov_b32 s33, s7 90; GFX9-NEXT: s_waitcnt vmcnt(0) 91; GFX9-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align4: 94; GFX10: ; %bb.0: 95; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX10-NEXT: s_mov_b32 s7, s33 97; GFX10-NEXT: s_mov_b32 s33, s32 98; GFX10-NEXT: s_addk_i32 s32, 0x200 99; GFX10-NEXT: s_getpc_b64 s[4:5] 100; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 101; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 102; GFX10-NEXT: s_mov_b32 s6, s32 103; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 104; GFX10-NEXT: v_mov_b32_e32 v0, 0 105; GFX10-NEXT: v_mov_b32_e32 v1, s6 106; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 107; GFX10-NEXT: s_waitcnt lgkmcnt(0) 108; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 109; GFX10-NEXT: s_waitcnt lgkmcnt(0) 110; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15 111; GFX10-NEXT: s_and_b32 s4, s4, -16 112; GFX10-NEXT: s_lshl_b32 s4, s4, 5 113; GFX10-NEXT: s_add_u32 s32, s6, s4 114; GFX10-NEXT: s_mov_b32 s32, s33 115; GFX10-NEXT: s_mov_b32 s33, s7 116; GFX10-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align4: 119; GFX11: ; %bb.0: 120; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX11-NEXT: s_mov_b32 s3, s33 122; GFX11-NEXT: s_mov_b32 s33, s32 123; GFX11-NEXT: s_add_i32 s32, s32, 16 124; GFX11-NEXT: s_getpc_b64 s[0:1] 125; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4 126; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 127; GFX11-NEXT: v_mov_b32_e32 v0, 0 128; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 129; GFX11-NEXT: s_mov_b32 s2, s32 130; GFX11-NEXT: scratch_store_b32 off, v0, s2 131; GFX11-NEXT: s_waitcnt lgkmcnt(0) 132; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 133; GFX11-NEXT: s_waitcnt lgkmcnt(0) 134; GFX11-NEXT: s_lshl2_add_u32 s0, s0, 15 135; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 136; GFX11-NEXT: s_and_b32 s0, s0, -16 137; GFX11-NEXT: s_lshl_b32 s0, s0, 5 138; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 139; GFX11-NEXT: s_add_u32 s32, s2, s0 140; GFX11-NEXT: s_mov_b32 s32, s33 141; GFX11-NEXT: s_mov_b32 s33, s3 142; GFX11-NEXT: s_setpc_b64 s[30:31] 143 %n = load i32, ptr addrspace(4) @gv, align 4 144 %alloca = alloca i32, i32 %n, addrspace(5) 145 store i32 0, ptr addrspace(5) %alloca 146 ret void 147} 148 149define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) { 150; GFX9-LABEL: kernel_dynamic_stackalloc_sgpr_align16: 151; GFX9: ; %bb.0: 152; GFX9-NEXT: s_load_dword s5, s[8:9], 0x0 153; GFX9-NEXT: s_add_u32 s0, s0, s17 154; GFX9-NEXT: s_movk_i32 s32, 0x400 155; GFX9-NEXT: s_addc_u32 s1, s1, 0 156; GFX9-NEXT: s_mov_b32 s4, s32 157; GFX9-NEXT: s_waitcnt lgkmcnt(0) 158; GFX9-NEXT: s_lshl2_add_u32 s5, s5, 15 159; GFX9-NEXT: s_and_b32 s5, s5, -16 160; GFX9-NEXT: v_mov_b32_e32 v0, 0 161; GFX9-NEXT: v_mov_b32_e32 v1, s4 162; GFX9-NEXT: s_lshl_b32 s5, s5, 6 163; GFX9-NEXT: s_mov_b32 s33, 0 164; GFX9-NEXT: s_add_u32 s32, s4, s5 165; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 166; GFX9-NEXT: s_endpgm 167; 168; GFX10-LABEL: kernel_dynamic_stackalloc_sgpr_align16: 169; GFX10: ; %bb.0: 170; GFX10-NEXT: s_load_dword s5, s[8:9], 0x0 171; GFX10-NEXT: s_movk_i32 s32, 0x200 172; GFX10-NEXT: s_add_u32 s0, s0, s17 173; GFX10-NEXT: s_mov_b32 s4, s32 174; GFX10-NEXT: s_addc_u32 s1, s1, 0 175; GFX10-NEXT: v_mov_b32_e32 v0, 0 176; GFX10-NEXT: v_mov_b32_e32 v1, s4 177; GFX10-NEXT: s_mov_b32 s33, 0 178; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 179; GFX10-NEXT: s_waitcnt lgkmcnt(0) 180; GFX10-NEXT: s_lshl2_add_u32 s5, s5, 15 181; GFX10-NEXT: s_and_b32 s5, s5, -16 182; GFX10-NEXT: s_lshl_b32 s5, s5, 5 183; GFX10-NEXT: s_add_u32 s32, s4, s5 184; GFX10-NEXT: s_endpgm 185; 186; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align16: 187; GFX11: ; %bb.0: 188; GFX11-NEXT: s_load_b32 s1, s[4:5], 0x0 189; GFX11-NEXT: v_mov_b32_e32 v0, 0 190; GFX11-NEXT: s_mov_b32 s32, 16 191; GFX11-NEXT: s_mov_b32 s33, 0 192; GFX11-NEXT: s_mov_b32 s0, s32 193; GFX11-NEXT: scratch_store_b32 off, v0, s0 194; GFX11-NEXT: s_waitcnt lgkmcnt(0) 195; GFX11-NEXT: s_lshl2_add_u32 s1, s1, 15 196; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 197; GFX11-NEXT: s_and_b32 s1, s1, -16 198; GFX11-NEXT: s_lshl_b32 s1, s1, 5 199; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 200; GFX11-NEXT: s_add_u32 s32, s0, s1 201; GFX11-NEXT: s_endpgm 202 %alloca = alloca i32, i32 %n, align 16, addrspace(5) 203 store i32 0, ptr addrspace(5) %alloca 204 ret void 205} 206 207define void @func_dynamic_stackalloc_sgpr_align16() { 208; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align16: 209; GFX9: ; %bb.0: 210; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GFX9-NEXT: s_mov_b32 s7, s33 212; GFX9-NEXT: s_mov_b32 s33, s32 213; GFX9-NEXT: s_addk_i32 s32, 0x400 214; GFX9-NEXT: s_getpc_b64 s[4:5] 215; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 216; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 217; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 218; GFX9-NEXT: s_mov_b32 s6, s32 219; GFX9-NEXT: v_mov_b32_e32 v0, 0 220; GFX9-NEXT: v_mov_b32_e32 v1, s6 221; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 222; GFX9-NEXT: s_waitcnt lgkmcnt(0) 223; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 224; GFX9-NEXT: s_waitcnt lgkmcnt(0) 225; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15 226; GFX9-NEXT: s_and_b32 s4, s4, -16 227; GFX9-NEXT: s_lshl_b32 s4, s4, 6 228; GFX9-NEXT: s_add_u32 s32, s6, s4 229; GFX9-NEXT: s_mov_b32 s32, s33 230; GFX9-NEXT: s_mov_b32 s33, s7 231; GFX9-NEXT: s_waitcnt vmcnt(0) 232; GFX9-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align16: 235; GFX10: ; %bb.0: 236; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX10-NEXT: s_mov_b32 s7, s33 238; GFX10-NEXT: s_mov_b32 s33, s32 239; GFX10-NEXT: s_addk_i32 s32, 0x200 240; GFX10-NEXT: s_getpc_b64 s[4:5] 241; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 242; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 243; GFX10-NEXT: s_mov_b32 s6, s32 244; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 245; GFX10-NEXT: v_mov_b32_e32 v0, 0 246; GFX10-NEXT: v_mov_b32_e32 v1, s6 247; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 248; GFX10-NEXT: s_waitcnt lgkmcnt(0) 249; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 250; GFX10-NEXT: s_waitcnt lgkmcnt(0) 251; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15 252; GFX10-NEXT: s_and_b32 s4, s4, -16 253; GFX10-NEXT: s_lshl_b32 s4, s4, 5 254; GFX10-NEXT: s_add_u32 s32, s6, s4 255; GFX10-NEXT: s_mov_b32 s32, s33 256; GFX10-NEXT: s_mov_b32 s33, s7 257; GFX10-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align16: 260; GFX11: ; %bb.0: 261; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX11-NEXT: s_mov_b32 s3, s33 263; GFX11-NEXT: s_mov_b32 s33, s32 264; GFX11-NEXT: s_add_i32 s32, s32, 16 265; GFX11-NEXT: s_getpc_b64 s[0:1] 266; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4 267; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 268; GFX11-NEXT: v_mov_b32_e32 v0, 0 269; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 270; GFX11-NEXT: s_mov_b32 s2, s32 271; GFX11-NEXT: scratch_store_b32 off, v0, s2 272; GFX11-NEXT: s_waitcnt lgkmcnt(0) 273; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 274; GFX11-NEXT: s_waitcnt lgkmcnt(0) 275; GFX11-NEXT: s_lshl2_add_u32 s0, s0, 15 276; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 277; GFX11-NEXT: s_and_b32 s0, s0, -16 278; GFX11-NEXT: s_lshl_b32 s0, s0, 5 279; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 280; GFX11-NEXT: s_add_u32 s32, s2, s0 281; GFX11-NEXT: s_mov_b32 s32, s33 282; GFX11-NEXT: s_mov_b32 s33, s3 283; GFX11-NEXT: s_setpc_b64 s[30:31] 284 %n = load i32, ptr addrspace(4) @gv, align 16 285 %alloca = alloca i32, i32 %n, addrspace(5) 286 store i32 0, ptr addrspace(5) %alloca 287 ret void 288} 289 290define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) { 291; GFX9-LABEL: kernel_dynamic_stackalloc_sgpr_align32: 292; GFX9: ; %bb.0: 293; GFX9-NEXT: s_load_dword s4, s[8:9], 0x0 294; GFX9-NEXT: s_movk_i32 s32, 0x800 295; GFX9-NEXT: s_add_u32 s0, s0, s17 296; GFX9-NEXT: s_addc_u32 s1, s1, 0 297; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff 298; GFX9-NEXT: s_waitcnt lgkmcnt(0) 299; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15 300; GFX9-NEXT: s_and_b32 s5, s5, 0xfffff800 301; GFX9-NEXT: s_and_b32 s4, s4, -16 302; GFX9-NEXT: v_mov_b32_e32 v0, 0 303; GFX9-NEXT: v_mov_b32_e32 v1, s5 304; GFX9-NEXT: s_lshl_b32 s4, s4, 6 305; GFX9-NEXT: s_mov_b32 s33, 0 306; GFX9-NEXT: s_add_u32 s32, s5, s4 307; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 308; GFX9-NEXT: s_endpgm 309; 310; GFX10-LABEL: kernel_dynamic_stackalloc_sgpr_align32: 311; GFX10: ; %bb.0: 312; GFX10-NEXT: s_load_dword s4, s[8:9], 0x0 313; GFX10-NEXT: s_movk_i32 s32, 0x400 314; GFX10-NEXT: s_add_u32 s0, s0, s17 315; GFX10-NEXT: s_addc_u32 s1, s1, 0 316; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff 317; GFX10-NEXT: v_mov_b32_e32 v0, 0 318; GFX10-NEXT: s_and_b32 s5, s5, 0xfffffc00 319; GFX10-NEXT: s_mov_b32 s33, 0 320; GFX10-NEXT: v_mov_b32_e32 v1, s5 321; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 322; GFX10-NEXT: s_waitcnt lgkmcnt(0) 323; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15 324; GFX10-NEXT: s_and_b32 s4, s4, -16 325; GFX10-NEXT: s_lshl_b32 s4, s4, 5 326; GFX10-NEXT: s_add_u32 s32, s5, s4 327; GFX10-NEXT: s_endpgm 328; 329; GFX11-LABEL: kernel_dynamic_stackalloc_sgpr_align32: 330; GFX11: ; %bb.0: 331; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 332; GFX11-NEXT: s_mov_b32 s32, 32 333; GFX11-NEXT: v_mov_b32_e32 v0, 0 334; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff 335; GFX11-NEXT: s_mov_b32 s33, 0 336; GFX11-NEXT: s_and_b32 s1, s1, 0xfffffc00 337; GFX11-NEXT: scratch_store_b32 off, v0, s1 338; GFX11-NEXT: s_waitcnt lgkmcnt(0) 339; GFX11-NEXT: s_lshl2_add_u32 s0, s0, 15 340; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 341; GFX11-NEXT: s_and_b32 s0, s0, -16 342; GFX11-NEXT: s_lshl_b32 s0, s0, 5 343; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 344; GFX11-NEXT: s_add_u32 s32, s1, s0 345; GFX11-NEXT: s_endpgm 346 %alloca = alloca i32, i32 %n, align 32, addrspace(5) 347 store i32 0, ptr addrspace(5) %alloca 348 ret void 349} 350 351define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) { 352; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32: 353; GFX9: ; %bb.0: 354; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 355; GFX9-NEXT: s_mov_b32 s6, s33 356; GFX9-NEXT: s_add_i32 s33, s32, 0x7c0 357; GFX9-NEXT: s_and_b32 s33, s33, 0xfffff800 358; GFX9-NEXT: s_mov_b32 s7, s34 359; GFX9-NEXT: s_mov_b32 s34, s32 360; GFX9-NEXT: s_addk_i32 s32, 0x1000 361; GFX9-NEXT: s_getpc_b64 s[4:5] 362; GFX9-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 363; GFX9-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 364; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 365; GFX9-NEXT: v_mov_b32_e32 v0, 0 366; GFX9-NEXT: s_mov_b32 s33, s6 367; GFX9-NEXT: s_waitcnt lgkmcnt(0) 368; GFX9-NEXT: s_load_dword s4, s[4:5], 0x0 369; GFX9-NEXT: s_add_u32 s5, s32, 0x7ff 370; GFX9-NEXT: s_and_b32 s5, s5, 0xfffff800 371; GFX9-NEXT: v_mov_b32_e32 v1, s5 372; GFX9-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 373; GFX9-NEXT: s_waitcnt lgkmcnt(0) 374; GFX9-NEXT: s_lshl2_add_u32 s4, s4, 15 375; GFX9-NEXT: s_and_b32 s4, s4, -16 376; GFX9-NEXT: s_lshl_b32 s4, s4, 6 377; GFX9-NEXT: s_add_u32 s32, s5, s4 378; GFX9-NEXT: s_mov_b32 s32, s34 379; GFX9-NEXT: s_mov_b32 s34, s7 380; GFX9-NEXT: s_waitcnt vmcnt(0) 381; GFX9-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX10-LABEL: func_dynamic_stackalloc_sgpr_align32: 384; GFX10: ; %bb.0: 385; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX10-NEXT: s_mov_b32 s6, s33 387; GFX10-NEXT: s_add_i32 s33, s32, 0x3e0 388; GFX10-NEXT: s_mov_b32 s7, s34 389; GFX10-NEXT: s_and_b32 s33, s33, 0xfffffc00 390; GFX10-NEXT: s_mov_b32 s34, s32 391; GFX10-NEXT: s_addk_i32 s32, 0x800 392; GFX10-NEXT: s_getpc_b64 s[4:5] 393; GFX10-NEXT: s_add_u32 s4, s4, gv@gotpcrel32@lo+4 394; GFX10-NEXT: s_addc_u32 s5, s5, gv@gotpcrel32@hi+12 395; GFX10-NEXT: v_mov_b32_e32 v0, 0 396; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 397; GFX10-NEXT: s_mov_b32 s33, s6 398; GFX10-NEXT: s_waitcnt lgkmcnt(0) 399; GFX10-NEXT: s_load_dword s4, s[4:5], 0x0 400; GFX10-NEXT: s_add_u32 s5, s32, 0x3ff 401; GFX10-NEXT: s_and_b32 s5, s5, 0xfffffc00 402; GFX10-NEXT: v_mov_b32_e32 v1, s5 403; GFX10-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 404; GFX10-NEXT: s_waitcnt lgkmcnt(0) 405; GFX10-NEXT: s_lshl2_add_u32 s4, s4, 15 406; GFX10-NEXT: s_and_b32 s4, s4, -16 407; GFX10-NEXT: s_lshl_b32 s4, s4, 5 408; GFX10-NEXT: s_add_u32 s32, s5, s4 409; GFX10-NEXT: s_mov_b32 s32, s34 410; GFX10-NEXT: s_mov_b32 s34, s7 411; GFX10-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX11-LABEL: func_dynamic_stackalloc_sgpr_align32: 414; GFX11: ; %bb.0: 415; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX11-NEXT: s_mov_b32 s2, s33 417; GFX11-NEXT: s_add_i32 s33, s32, 31 418; GFX11-NEXT: s_mov_b32 s3, s34 419; GFX11-NEXT: s_and_not1_b32 s33, s33, 31 420; GFX11-NEXT: s_mov_b32 s34, s32 421; GFX11-NEXT: s_add_i32 s32, s32, 64 422; GFX11-NEXT: s_getpc_b64 s[0:1] 423; GFX11-NEXT: s_add_u32 s0, s0, gv@gotpcrel32@lo+4 424; GFX11-NEXT: s_addc_u32 s1, s1, gv@gotpcrel32@hi+12 425; GFX11-NEXT: v_mov_b32_e32 v0, 0 426; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 427; GFX11-NEXT: s_mov_b32 s33, s2 428; GFX11-NEXT: s_waitcnt lgkmcnt(0) 429; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 430; GFX11-NEXT: s_add_u32 s1, s32, 0x3ff 431; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) 432; GFX11-NEXT: s_and_b32 s1, s1, 0xfffffc00 433; GFX11-NEXT: scratch_store_b32 off, v0, s1 434; GFX11-NEXT: s_waitcnt lgkmcnt(0) 435; GFX11-NEXT: s_lshl2_add_u32 s0, s0, 15 436; GFX11-NEXT: s_and_b32 s0, s0, -16 437; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 438; GFX11-NEXT: s_lshl_b32 s0, s0, 5 439; GFX11-NEXT: s_add_u32 s32, s1, s0 440; GFX11-NEXT: s_mov_b32 s32, s34 441; GFX11-NEXT: s_mov_b32 s34, s3 442; GFX11-NEXT: s_setpc_b64 s[30:31] 443 %n = load i32, ptr addrspace(4) @gv 444 %alloca = alloca i32, i32 %n, align 32, addrspace(5) 445 store i32 0, ptr addrspace(5) %alloca 446 ret void 447} 448