1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9-GISEL %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-SDAG %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-GISEL %s 6target datalayout = "A5" 7 8define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform(i32 %n) { 9; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform: 10; GFX9-SDAG: ; %bb.0: 11; GFX9-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0 12; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 13; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 14; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x400 15; GFX9-SDAG-NEXT: s_mov_b32 s5, s32 16; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 17; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 18; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 19; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 20; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7b 21; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 22; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 23; GFX9-SDAG-NEXT: s_add_i32 s32, s5, s4 24; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s5 25; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 26; GFX9-SDAG-NEXT: s_endpgm 27; 28; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform: 29; GFX9-GISEL: ; %bb.0: 30; GFX9-GISEL-NEXT: s_load_dword s5, s[8:9], 0x0 31; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 32; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x400 33; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 34; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 35; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; GFX9-GISEL-NEXT: s_lshl2_add_u32 s5, s5, 15 37; GFX9-GISEL-NEXT: s_and_b32 s5, s5, -16 38; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 39; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 40; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 6 41; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 42; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 43; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 44; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 45; GFX9-GISEL-NEXT: s_endpgm 46; 47; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform: 48; GFX11-SDAG: ; %bb.0: 49; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x0 50; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0x7b 51; GFX11-SDAG-NEXT: s_mov_b32 s32, 16 52; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 53; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 54; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc 55; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 56; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 57; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 58; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 59; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 60; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 61; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 62; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 63; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 64; GFX11-SDAG-NEXT: s_endpgm 65; 66; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform: 67; GFX11-GISEL: ; %bb.0: 68; GFX11-GISEL-NEXT: s_load_b32 s1, s[4:5], 0x0 69; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 70; GFX11-GISEL-NEXT: s_mov_b32 s32, 16 71; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 72; GFX11-GISEL-NEXT: s_mov_b32 s0, s32 73; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s0 dlc 74; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 75; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 76; GFX11-GISEL-NEXT: s_lshl2_add_u32 s1, s1, 15 77; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 78; GFX11-GISEL-NEXT: s_and_b32 s1, s1, -16 79; GFX11-GISEL-NEXT: s_lshl_b32 s1, s1, 5 80; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 81; GFX11-GISEL-NEXT: s_add_u32 s32, s0, s1 82; GFX11-GISEL-NEXT: s_endpgm 83 %alloca = alloca i32, i32 %n, addrspace(5) 84 store volatile i32 123, ptr addrspace(5) %alloca 85 ret void 86} 87 88define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_over_aligned(i32 %n) { 89; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned: 90; GFX9-SDAG: ; %bb.0: 91; GFX9-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0 92; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 93; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 94; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 95; GFX9-SDAG-NEXT: s_add_i32 s5, s32, 0x1fff 96; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 97; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 98; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 99; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0xffffe000 100; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 101; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 10 102; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 103; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s5 104; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 105; GFX9-SDAG-NEXT: s_add_i32 s32, s5, s4 106; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 107; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 108; GFX9-SDAG-NEXT: s_endpgm 109; 110; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned: 111; GFX9-GISEL: ; %bb.0: 112; GFX9-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0 113; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x2000 114; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 115; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 116; GFX9-GISEL-NEXT: s_add_u32 s5, s32, 0x1fff 117; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 118; GFX9-GISEL-NEXT: s_lshl2_add_u32 s4, s4, 15 119; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffffe000 120; GFX9-GISEL-NEXT: s_and_b32 s4, s4, -16 121; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 10 122; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 123; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 6 124; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 125; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s4 126; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 127; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 128; GFX9-GISEL-NEXT: s_endpgm 129; 130; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned: 131; GFX11-SDAG: ; %bb.0: 132; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x0 133; GFX11-SDAG-NEXT: s_movk_i32 s32, 0x80 134; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 10 135; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0xfff 136; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 137; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff000 138; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc 139; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 140; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 141; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 142; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 143; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 144; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 145; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 146; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 147; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 148; GFX11-SDAG-NEXT: s_endpgm 149; 150; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned: 151; GFX11-GISEL: ; %bb.0: 152; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x0 153; GFX11-GISEL-NEXT: s_movk_i32 s32, 0x80 154; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 10 155; GFX11-GISEL-NEXT: s_add_u32 s1, s32, 0xfff 156; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 157; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 158; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 159; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 160; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 161; GFX11-GISEL-NEXT: s_lshl2_add_u32 s0, s0, 15 162; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 163; GFX11-GISEL-NEXT: s_and_b32 s0, s0, -16 164; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 165; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 166; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 167; GFX11-GISEL-NEXT: s_endpgm 168 %alloca = alloca i32, i32 %n, align 128, addrspace(5) 169 store volatile i32 10, ptr addrspace(5) %alloca 170 ret void 171} 172 173define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_under_aligned(i32 %n) { 174; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned: 175; GFX9-SDAG: ; %bb.0: 176; GFX9-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0 177; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 178; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 179; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x400 180; GFX9-SDAG-NEXT: s_mov_b32 s5, s32 181; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 182; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 183; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 184; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 185; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 22 186; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 187; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 188; GFX9-SDAG-NEXT: s_add_i32 s32, s5, s4 189; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s5 190; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 191; GFX9-SDAG-NEXT: s_endpgm 192; 193; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned: 194; GFX9-GISEL: ; %bb.0: 195; GFX9-GISEL-NEXT: s_load_dword s5, s[8:9], 0x0 196; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 197; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x400 198; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 199; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 200; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 201; GFX9-GISEL-NEXT: s_lshl2_add_u32 s5, s5, 15 202; GFX9-GISEL-NEXT: s_and_b32 s5, s5, -16 203; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 22 204; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 205; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 6 206; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 207; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 208; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 209; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 210; GFX9-GISEL-NEXT: s_endpgm 211; 212; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned: 213; GFX11-SDAG: ; %bb.0: 214; GFX11-SDAG-NEXT: s_load_b32 s0, s[4:5], 0x0 215; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 22 216; GFX11-SDAG-NEXT: s_mov_b32 s32, 16 217; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 218; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 219; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc 220; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 221; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 222; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 223; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 224; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 225; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 226; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 227; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 228; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 229; GFX11-SDAG-NEXT: s_endpgm 230; 231; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned: 232; GFX11-GISEL: ; %bb.0: 233; GFX11-GISEL-NEXT: s_load_b32 s1, s[4:5], 0x0 234; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 22 235; GFX11-GISEL-NEXT: s_mov_b32 s32, 16 236; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 237; GFX11-GISEL-NEXT: s_mov_b32 s0, s32 238; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s0 dlc 239; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 240; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 241; GFX11-GISEL-NEXT: s_lshl2_add_u32 s1, s1, 15 242; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 243; GFX11-GISEL-NEXT: s_and_b32 s1, s1, -16 244; GFX11-GISEL-NEXT: s_lshl_b32 s1, s1, 5 245; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 246; GFX11-GISEL-NEXT: s_add_u32 s32, s0, s1 247; GFX11-GISEL-NEXT: s_endpgm 248 %alloca = alloca i32, i32 %n, align 2, addrspace(5) 249 store volatile i32 22, ptr addrspace(5) %alloca 250 ret void 251} 252 253define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent() { 254; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent: 255; GFX9-SDAG: ; %bb.0: 256; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 257; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 258; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 259; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 260; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 261; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 262; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 263; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x400 264; GFX9-SDAG-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 265; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 266; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 267; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 268; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 269; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 270; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB3_1 271; GFX9-SDAG-NEXT: ; %bb.2: 272; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 273; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 274; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 275; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 276; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7b 277; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 278; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 279; GFX9-SDAG-NEXT: s_endpgm 280; 281; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent: 282; GFX9-GISEL: ; %bb.0: 283; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 284; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 285; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 286; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 287; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 288; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 289; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 290; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x400 291; GFX9-GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 292; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 293; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 294; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 295; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 296; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 297; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB3_1 298; GFX9-GISEL-NEXT: ; %bb.2: 299; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 300; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 301; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 302; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 303; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 304; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 305; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 306; GFX9-GISEL-NEXT: s_endpgm 307; 308; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent: 309; GFX11-SDAG: ; %bb.0: 310; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 311; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 312; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 313; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 314; GFX11-SDAG-NEXT: s_mov_b32 s32, 16 315; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 316; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 317; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 318; GFX11-SDAG-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 319; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 320; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 321; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 322; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 323; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 324; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 325; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 326; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB3_1 327; GFX11-SDAG-NEXT: ; %bb.2: 328; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 329; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x7b 330; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 331; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 332; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 333; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 334; GFX11-SDAG-NEXT: s_endpgm 335; 336; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent: 337; GFX11-GISEL: ; %bb.0: 338; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 339; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 340; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 341; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 342; GFX11-GISEL-NEXT: s_mov_b32 s32, 16 343; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 344; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 345; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 346; GFX11-GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 347; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 348; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 349; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 350; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 351; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 352; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 353; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 354; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB3_1 355; GFX11-GISEL-NEXT: ; %bb.2: 356; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 357; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 358; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 359; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 360; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 361; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 362; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 363; GFX11-GISEL-NEXT: s_endpgm 364 %idx = call i32 @llvm.amdgcn.workitem.id.x() 365 %alloca = alloca float, i32 %idx, addrspace(5) 366 store volatile i32 123, ptr addrspace(5) %alloca 367 ret void 368} 369 370define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent_over_aligned() { 371; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned: 372; GFX9-SDAG: ; %bb.0: 373; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 374; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 375; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 376; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 377; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 378; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 379; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 380; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 381; GFX9-SDAG-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 382; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 383; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 384; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 385; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 386; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 387; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB4_1 388; GFX9-SDAG-NEXT: ; %bb.2: 389; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff 390; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 0xffffe000 391; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 392; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s6, 6, v0 393; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 394; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc 395; GFX9-SDAG-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 396; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 397; GFX9-SDAG-NEXT: s_endpgm 398; 399; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned: 400; GFX9-GISEL: ; %bb.0: 401; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 402; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 403; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 404; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 405; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 406; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 407; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 408; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x2000 409; GFX9-GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 410; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 411; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 412; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 413; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 414; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 415; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB4_1 416; GFX9-GISEL-NEXT: ; %bb.2: 417; GFX9-GISEL-NEXT: s_add_u32 s5, s32, 0x1fff 418; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffffe000 419; GFX9-GISEL-NEXT: s_lshl_b32 s4, s6, 6 420; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x1bc 421; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 422; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s4 423; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 424; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 425; GFX9-GISEL-NEXT: s_endpgm 426; 427; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned: 428; GFX11-SDAG: ; %bb.0: 429; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 430; GFX11-SDAG-NEXT: s_movk_i32 s32, 0x80 431; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo 432; GFX11-SDAG-NEXT: s_add_i32 s0, s32, 0xfff 433; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 434; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 435; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 0xfffff000 436; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 437; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 438; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 439; GFX11-SDAG-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 440; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 441; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 442; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3 443; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 444; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 445; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4 446; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 447; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB4_1 448; GFX11-SDAG-NEXT: ; %bb.2: 449; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s0 450; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc 451; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 452; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 453; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s0 dlc 454; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 455; GFX11-SDAG-NEXT: s_endpgm 456; 457; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned: 458; GFX11-GISEL: ; %bb.0: 459; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 460; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 461; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 462; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 463; GFX11-GISEL-NEXT: s_movk_i32 s32, 0x80 464; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 465; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 466; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 467; GFX11-GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 468; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 469; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 470; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 471; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 472; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 473; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 474; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 475; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB4_1 476; GFX11-GISEL-NEXT: ; %bb.2: 477; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x1bc 478; GFX11-GISEL-NEXT: s_add_u32 s1, s32, 0xfff 479; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 480; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 481; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 482; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 483; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 484; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 485; GFX11-GISEL-NEXT: s_endpgm 486 %idx = call i32 @llvm.amdgcn.workitem.id.x() 487 %alloca = alloca i32, i32 %idx, align 128, addrspace(5) 488 store volatile i32 444, ptr addrspace(5) %alloca 489 ret void 490} 491 492define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent_under_aligned() { 493; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned: 494; GFX9-SDAG: ; %bb.0: 495; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 496; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 497; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 498; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 499; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 500; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 501; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x400 502; GFX9-SDAG-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 503; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 504; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 505; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 506; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 507; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 508; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB5_1 509; GFX9-SDAG-NEXT: ; %bb.2: 510; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 511; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 512; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 513; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 514; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x29a 515; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 516; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 517; GFX9-SDAG-NEXT: s_endpgm 518; 519; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned: 520; GFX9-GISEL: ; %bb.0: 521; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 522; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 4, 15 523; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 524; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 525; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 526; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 527; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 528; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x400 529; GFX9-GISEL-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 530; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 531; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 532; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 533; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 534; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 535; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB5_1 536; GFX9-GISEL-NEXT: ; %bb.2: 537; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 538; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 539; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 540; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 541; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 542; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 543; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 544; GFX9-GISEL-NEXT: s_endpgm 545; 546; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned: 547; GFX11-SDAG: ; %bb.0: 548; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 549; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 550; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 551; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 552; GFX11-SDAG-NEXT: s_mov_b32 s32, 16 553; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 554; GFX11-SDAG-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 555; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 556; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 557; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 558; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 559; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 560; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 561; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 562; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB5_1 563; GFX11-SDAG-NEXT: ; %bb.2: 564; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 565; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x29a 566; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 567; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 568; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 569; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 570; GFX11-SDAG-NEXT: s_endpgm 571; 572; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned: 573; GFX11-GISEL: ; %bb.0: 574; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 575; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 576; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 577; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 578; GFX11-GISEL-NEXT: s_mov_b32 s32, 16 579; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 4, 15 580; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 581; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 582; GFX11-GISEL-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 583; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 584; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 585; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 586; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 587; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 588; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 589; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 590; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB5_1 591; GFX11-GISEL-NEXT: ; %bb.2: 592; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 593; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 594; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 595; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 596; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 597; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 598; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 599; GFX11-GISEL-NEXT: s_endpgm 600 %idx = call i32 @llvm.amdgcn.workitem.id.x() 601 %alloca = alloca i128, i32 %idx, align 2, addrspace(5) 602 store volatile i32 666, ptr addrspace(5) %alloca 603 ret void 604} 605 606define amdgpu_kernel void @test_dynamic_stackalloc_kernel_multiple_allocas(i32 %n, i32 %m) { 607; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas: 608; GFX9-SDAG: ; %bb.0: ; %entry 609; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 610; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 611; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 612; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 613; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 614; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 615; GFX9-SDAG-NEXT: s_cmp_lg_u32 s4, 0 616; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x2000 617; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB6_4 618; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.0 619; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 2 620; GFX9-SDAG-NEXT: s_add_i32 s5, s5, 15 621; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff 622; GFX9-SDAG-NEXT: s_and_b32 s5, s5, -16 623; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 624; GFX9-SDAG-NEXT: s_and_b32 s9, s6, 0xfffff000 625; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 6 626; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 627; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec 628; GFX9-SDAG-NEXT: s_add_i32 s32, s9, s5 629; GFX9-SDAG-NEXT: .LBB6_2: ; =>This Inner Loop Header: Depth=1 630; GFX9-SDAG-NEXT: s_ff1_i32_b64 s5, s[6:7] 631; GFX9-SDAG-NEXT: v_readlane_b32 s10, v0, s5 632; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s5 633; GFX9-SDAG-NEXT: s_max_u32 s8, s8, s10 634; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 635; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB6_2 636; GFX9-SDAG-NEXT: ; %bb.3: 637; GFX9-SDAG-NEXT: s_mov_b32 s5, s32 638; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s5 639; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s8, 6, v0 640; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 641; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 3 642; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s9 643; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 644; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 645; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 4 646; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s5 647; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 648; GFX9-SDAG-NEXT: .LBB6_4: ; %bb.1 649; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 2 650; GFX9-SDAG-NEXT: s_add_i32 s4, s4, 15 651; GFX9-SDAG-NEXT: s_and_b32 s4, s4, -16 652; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 653; GFX9-SDAG-NEXT: s_lshl_b32 s4, s4, 6 654; GFX9-SDAG-NEXT: s_mov_b32 s5, s32 655; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s33 656; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 657; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2 658; GFX9-SDAG-NEXT: s_add_i32 s32, s5, s4 659; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s5 660; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 661; GFX9-SDAG-NEXT: s_endpgm 662; 663; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas: 664; GFX9-GISEL: ; %bb.0: ; %entry 665; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 666; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 667; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 668; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 669; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 670; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 671; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 672; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x2000 673; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB6_4 674; GFX9-GISEL-NEXT: ; %bb.1: ; %bb.0 675; GFX9-GISEL-NEXT: s_lshl2_add_u32 s5, s5, 15 676; GFX9-GISEL-NEXT: s_and_b32 s5, s5, -16 677; GFX9-GISEL-NEXT: s_lshl_b32 s6, s5, 6 678; GFX9-GISEL-NEXT: s_add_u32 s5, s32, 0xfff 679; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xfffff000 680; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 681; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s6 682; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 683; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], exec 684; GFX9-GISEL-NEXT: .LBB6_2: ; =>This Inner Loop Header: Depth=1 685; GFX9-GISEL-NEXT: s_ff1_i32_b64 s9, s[6:7] 686; GFX9-GISEL-NEXT: v_readlane_b32 s10, v0, s9 687; GFX9-GISEL-NEXT: s_bitset0_b64 s[6:7], s9 688; GFX9-GISEL-NEXT: s_max_u32 s8, s8, s10 689; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 690; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB6_2 691; GFX9-GISEL-NEXT: ; %bb.3: 692; GFX9-GISEL-NEXT: s_mov_b32 s6, s32 693; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 3 694; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 695; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, 6 696; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 697; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 698; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 4 699; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s6 700; GFX9-GISEL-NEXT: s_add_u32 s32, s6, s7 701; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 702; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 703; GFX9-GISEL-NEXT: .LBB6_4: ; %bb.1 704; GFX9-GISEL-NEXT: s_lshl2_add_u32 s4, s4, 15 705; GFX9-GISEL-NEXT: s_mov_b32 s5, s32 706; GFX9-GISEL-NEXT: s_and_b32 s4, s4, -16 707; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 1 708; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 6 709; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], s33 710; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 711; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 2 712; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 713; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s4 714; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 715; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 716; GFX9-GISEL-NEXT: s_endpgm 717; 718; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas: 719; GFX11-SDAG: ; %bb.0: ; %entry 720; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 721; GFX11-SDAG-NEXT: s_mov_b32 s2, 0 722; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 723; GFX11-SDAG-NEXT: s_movk_i32 s32, 0x80 724; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 725; GFX11-SDAG-NEXT: s_cmp_lg_u32 s0, 0 726; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB6_4 727; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.0 728; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 729; GFX11-SDAG-NEXT: s_lshl_b32 s1, s1, 2 730; GFX11-SDAG-NEXT: s_add_i32 s3, s32, 0x7ff 731; GFX11-SDAG-NEXT: s_add_i32 s1, s1, 15 732; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) 733; GFX11-SDAG-NEXT: s_and_b32 s4, s1, -16 734; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 735; GFX11-SDAG-NEXT: s_and_b32 s1, s3, 0xfffff800 736; GFX11-SDAG-NEXT: s_lshl_b32 s3, s4, 5 737; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s3 738; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 739; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 740; GFX11-SDAG-NEXT: s_mov_b32 s3, exec_lo 741; GFX11-SDAG-NEXT: .LBB6_2: ; =>This Inner Loop Header: Depth=1 742; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 743; GFX11-SDAG-NEXT: s_ctz_i32_b32 s4, s3 744; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 745; GFX11-SDAG-NEXT: v_readlane_b32 s5, v0, s4 746; GFX11-SDAG-NEXT: s_bitset0_b32 s3, s4 747; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 748; GFX11-SDAG-NEXT: s_max_u32 s2, s2, s5 749; GFX11-SDAG-NEXT: s_cmp_lg_u32 s3, 0 750; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB6_2 751; GFX11-SDAG-NEXT: ; %bb.3: 752; GFX11-SDAG-NEXT: s_mov_b32 s3, s32 753; GFX11-SDAG-NEXT: v_dual_mov_b32 v1, 3 :: v_dual_mov_b32 v2, 4 754; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s2, 5, s3 755; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 756; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 757; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s3 dlc 758; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 759; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 760; GFX11-SDAG-NEXT: .LBB6_4: ; %bb.1 761; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 762; GFX11-SDAG-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 763; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 764; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 765; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 766; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 767; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s33 dlc 768; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 769; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 770; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 771; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 772; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 773; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 774; GFX11-SDAG-NEXT: s_endpgm 775; 776; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas: 777; GFX11-GISEL: ; %bb.0: ; %entry 778; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 779; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 780; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 781; GFX11-GISEL-NEXT: s_movk_i32 s32, 0x80 782; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 783; GFX11-GISEL-NEXT: s_cmp_lg_u32 s0, 0 784; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB6_4 785; GFX11-GISEL-NEXT: ; %bb.1: ; %bb.0 786; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 787; GFX11-GISEL-NEXT: s_lshl2_add_u32 s1, s1, 15 788; GFX11-GISEL-NEXT: s_add_u32 s3, s32, 0x7ff 789; GFX11-GISEL-NEXT: s_and_b32 s1, s1, -16 790; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 791; GFX11-GISEL-NEXT: s_lshl_b32 s4, s1, 5 792; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 793; GFX11-GISEL-NEXT: s_and_b32 s1, s3, 0xfffff800 794; GFX11-GISEL-NEXT: s_mov_b32 s3, exec_lo 795; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s4 796; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 797; GFX11-GISEL-NEXT: .LBB6_2: ; =>This Inner Loop Header: Depth=1 798; GFX11-GISEL-NEXT: s_ctz_i32_b32 s4, s3 799; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 800; GFX11-GISEL-NEXT: v_readlane_b32 s5, v0, s4 801; GFX11-GISEL-NEXT: s_bitset0_b32 s3, s4 802; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 803; GFX11-GISEL-NEXT: s_max_u32 s2, s2, s5 804; GFX11-GISEL-NEXT: s_cmp_lg_u32 s3, 0 805; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB6_2 806; GFX11-GISEL-NEXT: ; %bb.3: 807; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 808; GFX11-GISEL-NEXT: s_mov_b32 s3, s32 809; GFX11-GISEL-NEXT: s_lshl_b32 s2, s2, 5 810; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 811; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 812; GFX11-GISEL-NEXT: scratch_store_b32 off, v1, s3 dlc 813; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 814; GFX11-GISEL-NEXT: s_add_u32 s32, s3, s2 815; GFX11-GISEL-NEXT: .LBB6_4: ; %bb.1 816; GFX11-GISEL-NEXT: s_lshl2_add_u32 s0, s0, 15 817; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 818; GFX11-GISEL-NEXT: s_and_b32 s0, s0, -16 819; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 820; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 821; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s33 dlc 822; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 823; GFX11-GISEL-NEXT: scratch_store_b32 off, v1, s1 dlc 824; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 825; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 826; GFX11-GISEL-NEXT: s_endpgm 827entry: 828 %cond = icmp eq i32 %n, 0 829 %alloca1 = alloca i32, i32 8, addrspace(5) 830 %alloca2 = alloca i17, i32 %n, addrspace(5) 831 br i1 %cond, label %bb.0, label %bb.1 832bb.0: 833 %idx = call i32 @llvm.amdgcn.workitem.id.x() 834 %alloca3 = alloca i32, i32 %m, align 64, addrspace(5) 835 %alloca4 = alloca i32, i32 %idx, align 4, addrspace(5) 836 store volatile i32 3, ptr addrspace(5) %alloca3 837 store volatile i32 4, ptr addrspace(5) %alloca4 838 br label %bb.1 839bb.1: 840 store volatile i32 1, ptr addrspace(5) %alloca1 841 store volatile i32 2, ptr addrspace(5) %alloca2 842 ret void 843} 844 845define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i32 %m) { 846; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_control_flow: 847; GFX9-SDAG: ; %bb.0: ; %entry 848; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 849; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s17 850; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 851; GFX9-SDAG-NEXT: s_mov_b32 s33, 0 852; GFX9-SDAG-NEXT: s_movk_i32 s32, 0x1000 853; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 854; GFX9-SDAG-NEXT: s_cmp_lg_u32 s4, 0 855; GFX9-SDAG-NEXT: s_mov_b32 s4, 0 856; GFX9-SDAG-NEXT: s_cbranch_scc0 .LBB7_6 857; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.1 858; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 859; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 860; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec 861; GFX9-SDAG-NEXT: .LBB7_2: ; =>This Inner Loop Header: Depth=1 862; GFX9-SDAG-NEXT: s_ff1_i32_b64 s8, s[6:7] 863; GFX9-SDAG-NEXT: v_readlane_b32 s9, v0, s8 864; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s8 865; GFX9-SDAG-NEXT: s_max_u32 s4, s4, s9 866; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 867; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB7_2 868; GFX9-SDAG-NEXT: ; %bb.3: 869; GFX9-SDAG-NEXT: s_mov_b32 s6, s32 870; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 871; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s4, 6, v0 872; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 873; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 874; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s6 875; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 876; GFX9-SDAG-NEXT: s_cbranch_execnz .LBB7_5 877; GFX9-SDAG-NEXT: .LBB7_4: ; %bb.0 878; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 2 879; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0xfff 880; GFX9-SDAG-NEXT: s_add_i32 s5, s5, 15 881; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 0xfffff000 882; GFX9-SDAG-NEXT: s_and_b32 s5, s5, -16 883; GFX9-SDAG-NEXT: s_lshl_b32 s5, s5, 6 884; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2 885; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s4 886; GFX9-SDAG-NEXT: s_add_i32 s32, s4, s5 887; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 888; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 889; GFX9-SDAG-NEXT: .LBB7_5: ; %bb.2 890; GFX9-SDAG-NEXT: s_endpgm 891; GFX9-SDAG-NEXT: .LBB7_6: 892; GFX9-SDAG-NEXT: s_branch .LBB7_4 893; 894; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_control_flow: 895; GFX9-GISEL: ; %bb.0: ; %entry 896; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 897; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s17 898; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 899; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 900; GFX9-GISEL-NEXT: s_mov_b32 s33, 0 901; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 902; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 903; GFX9-GISEL-NEXT: s_mov_b32 s4, 1 904; GFX9-GISEL-NEXT: s_movk_i32 s32, 0x1000 905; GFX9-GISEL-NEXT: s_cbranch_scc0 .LBB7_4 906; GFX9-GISEL-NEXT: ; %bb.1: ; %bb.1 907; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 908; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 909; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], exec 910; GFX9-GISEL-NEXT: .LBB7_2: ; =>This Inner Loop Header: Depth=1 911; GFX9-GISEL-NEXT: s_ff1_i32_b64 s4, s[6:7] 912; GFX9-GISEL-NEXT: v_readlane_b32 s9, v0, s4 913; GFX9-GISEL-NEXT: s_bitset0_b64 s[6:7], s4 914; GFX9-GISEL-NEXT: s_max_u32 s8, s8, s9 915; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 916; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB7_2 917; GFX9-GISEL-NEXT: ; %bb.3: 918; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 919; GFX9-GISEL-NEXT: s_lshl_b32 s6, s8, 6 920; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 1 921; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 922; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s6 923; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 924; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 925; GFX9-GISEL-NEXT: s_mov_b32 s4, 0 926; GFX9-GISEL-NEXT: .LBB7_4: ; %Flow 927; GFX9-GISEL-NEXT: s_xor_b32 s4, s4, 1 928; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 1 929; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 930; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB7_6 931; GFX9-GISEL-NEXT: ; %bb.5: ; %bb.0 932; GFX9-GISEL-NEXT: s_lshl2_add_u32 s4, s5, 15 933; GFX9-GISEL-NEXT: s_add_u32 s5, s32, 0xfff 934; GFX9-GISEL-NEXT: s_and_b32 s4, s4, -16 935; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xfffff000 936; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 6 937; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 2 938; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 939; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s4 940; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 941; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 942; GFX9-GISEL-NEXT: .LBB7_6: ; %bb.2 943; GFX9-GISEL-NEXT: s_endpgm 944; 945; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_control_flow: 946; GFX11-SDAG: ; %bb.0: ; %entry 947; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 948; GFX11-SDAG-NEXT: s_mov_b32 s33, 0 949; GFX11-SDAG-NEXT: s_mov_b32 s32, 64 950; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 951; GFX11-SDAG-NEXT: s_cmp_lg_u32 s0, 0 952; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 953; GFX11-SDAG-NEXT: s_cbranch_scc0 .LBB7_6 954; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.1 955; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 956; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo 957; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 958; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 959; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 960; GFX11-SDAG-NEXT: .LBB7_2: ; =>This Inner Loop Header: Depth=1 961; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 962; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 963; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3 964; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 965; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 966; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s4 967; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 968; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB7_2 969; GFX11-SDAG-NEXT: ; %bb.3: 970; GFX11-SDAG-NEXT: s_mov_b32 s2, s32 971; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 972; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s2 973; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s2 dlc 974; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 975; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 976; GFX11-SDAG-NEXT: s_cbranch_execnz .LBB7_5 977; GFX11-SDAG-NEXT: .LBB7_4: ; %bb.0 978; GFX11-SDAG-NEXT: s_lshl_b32 s0, s1, 2 979; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 980; GFX11-SDAG-NEXT: s_add_i32 s0, s0, 15 981; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0x7ff 982; GFX11-SDAG-NEXT: s_and_b32 s0, s0, -16 983; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff800 984; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 5 985; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc 986; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 987; GFX11-SDAG-NEXT: s_add_i32 s32, s1, s0 988; GFX11-SDAG-NEXT: .LBB7_5: ; %bb.2 989; GFX11-SDAG-NEXT: s_endpgm 990; GFX11-SDAG-NEXT: .LBB7_6: 991; GFX11-SDAG-NEXT: s_branch .LBB7_4 992; 993; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_control_flow: 994; GFX11-GISEL: ; %bb.0: ; %entry 995; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 996; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 997; GFX11-GISEL-NEXT: s_mov_b32 s33, 0 998; GFX11-GISEL-NEXT: s_mov_b32 s32, 64 999; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1000; GFX11-GISEL-NEXT: s_cmp_lg_u32 s0, 0 1001; GFX11-GISEL-NEXT: s_mov_b32 s0, 1 1002; GFX11-GISEL-NEXT: s_cbranch_scc0 .LBB7_4 1003; GFX11-GISEL-NEXT: ; %bb.1: ; %bb.1 1004; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1005; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo 1006; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1007; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1008; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1009; GFX11-GISEL-NEXT: .LBB7_2: ; =>This Inner Loop Header: Depth=1 1010; GFX11-GISEL-NEXT: s_ctz_i32_b32 s3, s0 1011; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1012; GFX11-GISEL-NEXT: v_readlane_b32 s4, v0, s3 1013; GFX11-GISEL-NEXT: s_bitset0_b32 s0, s3 1014; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1015; GFX11-GISEL-NEXT: s_max_u32 s2, s2, s4 1016; GFX11-GISEL-NEXT: s_cmp_lg_u32 s0, 0 1017; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB7_2 1018; GFX11-GISEL-NEXT: ; %bb.3: 1019; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 1 1020; GFX11-GISEL-NEXT: s_mov_b32 s3, s32 1021; GFX11-GISEL-NEXT: s_lshl_b32 s0, s2, 5 1022; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1023; GFX11-GISEL-NEXT: s_add_u32 s32, s3, s0 1024; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1025; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s3 dlc 1026; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1027; GFX11-GISEL-NEXT: .LBB7_4: ; %Flow 1028; GFX11-GISEL-NEXT: s_xor_b32 s0, s0, 1 1029; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 1030; GFX11-GISEL-NEXT: s_and_b32 s0, s0, 1 1031; GFX11-GISEL-NEXT: s_cmp_lg_u32 s0, 0 1032; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB7_6 1033; GFX11-GISEL-NEXT: ; %bb.5: ; %bb.0 1034; GFX11-GISEL-NEXT: s_lshl2_add_u32 s0, s1, 15 1035; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 2 1036; GFX11-GISEL-NEXT: s_add_u32 s1, s32, 0x7ff 1037; GFX11-GISEL-NEXT: s_and_b32 s0, s0, -16 1038; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff800 1039; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1040; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1041; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1042; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1043; GFX11-GISEL-NEXT: .LBB7_6: ; %bb.2 1044; GFX11-GISEL-NEXT: s_endpgm 1045entry: 1046 %cond = icmp eq i32 %n, 0 1047 br i1 %cond, label %bb.0, label %bb.1 1048bb.0: 1049 %alloca2 = alloca i32, i32 %m, align 64, addrspace(5) 1050 store volatile i32 2, ptr addrspace(5) %alloca2 1051 br label %bb.2 1052bb.1: 1053 %idx = call i32 @llvm.amdgcn.workitem.id.x() 1054 %alloca1 = alloca i32, i32 %idx, align 4, addrspace(5) 1055 store volatile i32 1, ptr addrspace(5) %alloca1 1056 br label %bb.2 1057bb.2: 1058 ret void 1059} 1060 1061define void @test_dynamic_stackalloc_device_uniform(i32 %n) { 1062; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform: 1063; GFX9-SDAG: ; %bb.0: 1064; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1065; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1066; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 1067; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1068; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1069; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 1070; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 1071; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 1072; GFX9-SDAG-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1073; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 1074; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 1075; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 1076; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 1077; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1078; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB8_1 1079; GFX9-SDAG-NEXT: ; %bb.2: 1080; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 1081; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 1082; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 1083; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1084; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7b 1085; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 1086; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1087; GFX9-SDAG-NEXT: s_mov_b32 s32, s33 1088; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 1089; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1090; 1091; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform: 1092; GFX9-GISEL: ; %bb.0: 1093; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1094; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1095; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 1096; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1097; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 1098; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 1099; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 1100; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 1101; GFX9-GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1102; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 1103; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 1104; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 1105; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 1106; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 1107; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB8_1 1108; GFX9-GISEL-NEXT: ; %bb.2: 1109; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 1110; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 1111; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 1112; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 1113; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 1114; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 1115; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1116; GFX9-GISEL-NEXT: s_mov_b32 s32, s33 1117; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 1118; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1119; 1120; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform: 1121; GFX11-SDAG: ; %bb.0: 1122; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1123; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1124; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 1125; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 1126; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 1127; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 1128; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1129; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 1130; GFX11-SDAG-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1131; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 1132; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1133; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 1134; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 1135; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1136; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 1137; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 1138; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB8_1 1139; GFX11-SDAG-NEXT: ; %bb.2: 1140; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 1141; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x7b 1142; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 1143; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 1144; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1145; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1146; GFX11-SDAG-NEXT: s_mov_b32 s32, s33 1147; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 1148; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1149; 1150; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform: 1151; GFX11-GISEL: ; %bb.0: 1152; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1154; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 1155; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 1156; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1157; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 1158; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1159; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 1160; GFX11-GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 1161; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 1162; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1163; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 1164; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 1165; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1166; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 1167; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 1168; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB8_1 1169; GFX11-GISEL-NEXT: ; %bb.2: 1170; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 1171; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 1172; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1173; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1174; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1175; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1176; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1177; GFX11-GISEL-NEXT: s_mov_b32 s32, s33 1178; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 1179; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1180 %alloca = alloca i32, i32 %n, addrspace(5) 1181 store volatile i32 123, ptr addrspace(5) %alloca 1182 ret void 1183} 1184 1185define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) { 1186; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: 1187; GFX9-SDAG: ; %bb.0: 1188; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1189; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1190; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 1191; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 1192; GFX9-SDAG-NEXT: s_mov_b32 s10, s34 1193; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1194; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1195; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 1196; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 1197; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 1198; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 1199; GFX9-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1200; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 1201; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 1202; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 1203; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 1204; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1205; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB9_1 1206; GFX9-SDAG-NEXT: ; %bb.2: 1207; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff 1208; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 0xffffe000 1209; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 1210; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s6, 6, v0 1211; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 1212; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 10 1213; GFX9-SDAG-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 1214; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1215; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 1216; GFX9-SDAG-NEXT: s_mov_b32 s34, s10 1217; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 1218; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1219; 1220; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: 1221; GFX9-GISEL: ; %bb.0: 1222; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1223; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1224; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 1225; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0x1fc0 1226; GFX9-GISEL-NEXT: s_mov_b32 s10, s34 1227; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1228; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 1229; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 1230; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 1231; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 1232; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x4000 1233; GFX9-GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1234; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 1235; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 1236; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 1237; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 1238; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 1239; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB9_1 1240; GFX9-GISEL-NEXT: ; %bb.2: 1241; GFX9-GISEL-NEXT: s_add_u32 s5, s32, 0x1fff 1242; GFX9-GISEL-NEXT: s_lshl_b32 s4, s6, 6 1243; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffffe000 1244; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s4 1245; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 10 1246; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 1247; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 1248; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1249; GFX9-GISEL-NEXT: s_mov_b32 s32, s34 1250; GFX9-GISEL-NEXT: s_mov_b32 s34, s10 1251; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 1252; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1253; 1254; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: 1255; GFX11-SDAG: ; %bb.0: 1256; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1257; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1258; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 1259; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f 1260; GFX11-SDAG-NEXT: s_mov_b32 s5, s34 1261; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 1262; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1263; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 1264; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 1265; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 1266; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 1267; GFX11-SDAG-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1268; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 1269; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 1270; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 1271; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 1272; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 1273; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 1274; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB9_1 1275; GFX11-SDAG-NEXT: ; %bb.2: 1276; GFX11-SDAG-NEXT: s_add_i32 s1, s32, 0xfff 1277; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 10 1278; GFX11-SDAG-NEXT: s_and_b32 s1, s1, 0xfffff000 1279; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 1280; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 1281; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 1282; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1283; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1284; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 1285; GFX11-SDAG-NEXT: s_mov_b32 s34, s5 1286; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1287; 1288; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned: 1289; GFX11-GISEL: ; %bb.0: 1290; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1291; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1292; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 1293; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 0x7f 1294; GFX11-GISEL-NEXT: s_mov_b32 s5, s34 1295; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 1296; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1297; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1298; GFX11-GISEL-NEXT: s_and_b32 s33, s33, 0xffffff80 1299; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 1300; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x100 1301; GFX11-GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 1302; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 1303; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 1304; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 1305; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 1306; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 1307; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 1308; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB9_1 1309; GFX11-GISEL-NEXT: ; %bb.2: 1310; GFX11-GISEL-NEXT: s_add_u32 s1, s32, 0xfff 1311; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 10 1312; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1313; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 1314; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 1315; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1316; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1317; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1318; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 1319; GFX11-GISEL-NEXT: s_mov_b32 s34, s5 1320; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1321 %alloca = alloca i32, i32 %n, align 128, addrspace(5) 1322 store volatile i32 10, ptr addrspace(5) %alloca 1323 ret void 1324} 1325 1326define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) { 1327; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: 1328; GFX9-SDAG: ; %bb.0: 1329; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1331; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 1332; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1333; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1334; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 1335; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 1336; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 1337; GFX9-SDAG-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 1338; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 1339; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 1340; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 1341; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 1342; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1343; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB10_1 1344; GFX9-SDAG-NEXT: ; %bb.2: 1345; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 1346; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 1347; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 1348; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1349; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 22 1350; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 1351; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1352; GFX9-SDAG-NEXT: s_mov_b32 s32, s33 1353; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 1354; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1355; 1356; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: 1357; GFX9-GISEL: ; %bb.0: 1358; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1359; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1360; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 1361; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1362; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 1363; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 1364; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 1365; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 1366; GFX9-GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 1367; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 1368; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 1369; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 1370; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 1371; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 1372; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB10_1 1373; GFX9-GISEL-NEXT: ; %bb.2: 1374; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 1375; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 1376; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 1377; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 22 1378; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 1379; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 1380; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1381; GFX9-GISEL-NEXT: s_mov_b32 s32, s33 1382; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 1383; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1384; 1385; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: 1386; GFX11-SDAG: ; %bb.0: 1387; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1388; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1389; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 1390; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 1391; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 1392; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 1393; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1394; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 1395; GFX11-SDAG-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 1396; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 1397; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1398; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 1399; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 1400; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1401; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 1402; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 1403; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB10_1 1404; GFX11-SDAG-NEXT: ; %bb.2: 1405; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 1406; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 22 1407; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 1408; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 1409; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1410; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1411; GFX11-SDAG-NEXT: s_mov_b32 s32, s33 1412; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 1413; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1414; 1415; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned: 1416; GFX11-GISEL: ; %bb.0: 1417; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1418; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1419; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 1420; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 1421; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1422; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 1423; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1424; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 1425; GFX11-GISEL-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 1426; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 1427; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1428; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 1429; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 1430; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1431; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 1432; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 1433; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB10_1 1434; GFX11-GISEL-NEXT: ; %bb.2: 1435; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 22 1436; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 1437; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1438; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1439; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1440; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1441; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1442; GFX11-GISEL-NEXT: s_mov_b32 s32, s33 1443; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 1444; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1445 %alloca = alloca i32, i32 %n, align 2, addrspace(5) 1446 store volatile i32 22, ptr addrspace(5) %alloca 1447 ret void 1448} 1449 1450define void @test_dynamic_stackalloc_device_divergent() { 1451; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent: 1452; GFX9-SDAG: ; %bb.0: 1453; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1454; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1455; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1456; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 1457; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 1458; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1459; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 1460; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 1461; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 1462; GFX9-SDAG-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 1463; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 1464; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 1465; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 1466; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 1467; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1468; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB11_1 1469; GFX9-SDAG-NEXT: ; %bb.2: 1470; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 1471; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 1472; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 1473; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1474; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7b 1475; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 1476; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1477; GFX9-SDAG-NEXT: s_mov_b32 s32, s33 1478; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 1479; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1480; 1481; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent: 1482; GFX9-GISEL: ; %bb.0: 1483; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1484; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1485; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1486; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 1487; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1488; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 1489; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 1490; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 1491; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 1492; GFX9-GISEL-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 1493; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 1494; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 1495; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 1496; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 1497; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 1498; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB11_1 1499; GFX9-GISEL-NEXT: ; %bb.2: 1500; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 1501; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 1502; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 1503; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 1504; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 1505; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 1506; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1507; GFX9-GISEL-NEXT: s_mov_b32 s32, s33 1508; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 1509; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1510; 1511; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent: 1512; GFX11-SDAG: ; %bb.0: 1513; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1514; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1515; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 1516; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 1517; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 1518; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 1519; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1520; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 1521; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1522; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 1523; GFX11-SDAG-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 1524; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 1525; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1526; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 1527; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 1528; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1529; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 1530; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 1531; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB11_1 1532; GFX11-SDAG-NEXT: ; %bb.2: 1533; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 1534; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x7b 1535; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 1536; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 1537; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1538; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1539; GFX11-SDAG-NEXT: s_mov_b32 s32, s33 1540; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 1541; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1542; 1543; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent: 1544; GFX11-GISEL: ; %bb.0: 1545; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1546; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1547; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 1548; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 1549; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1550; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 1551; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1552; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 1553; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1554; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1555; GFX11-GISEL-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 1556; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 1557; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1558; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 1559; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 1560; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1561; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 1562; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 1563; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB11_1 1564; GFX11-GISEL-NEXT: ; %bb.2: 1565; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 1566; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 1567; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1568; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1569; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1570; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1571; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1572; GFX11-GISEL-NEXT: s_mov_b32 s32, s33 1573; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 1574; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1575 %idx = call i32 @llvm.amdgcn.workitem.id.x() 1576 %alloca = alloca i32, i32 %idx, addrspace(5) 1577 store volatile i32 123, ptr addrspace(5) %alloca 1578 ret void 1579} 1580 1581define void @test_dynamic_stackalloc_device_divergent_over_aligned() { 1582; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: 1583; GFX9-SDAG: ; %bb.0: 1584; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1585; GFX9-SDAG-NEXT: s_mov_b32 s10, s33 1586; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0x1fc0 1587; GFX9-SDAG-NEXT: s_mov_b32 s11, s34 1588; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 1589; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x4000 1590; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1591; GFX9-SDAG-NEXT: s_add_i32 s4, s32, 0x1fff 1592; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1593; GFX9-SDAG-NEXT: s_and_b32 s6, s4, 0xffffe000 1594; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 1595; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1596; GFX9-SDAG-NEXT: s_mov_b32 s7, 0 1597; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xffffe000 1598; GFX9-SDAG-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 1599; GFX9-SDAG-NEXT: s_ff1_i32_b64 s8, s[4:5] 1600; GFX9-SDAG-NEXT: v_readlane_b32 s9, v0, s8 1601; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s8 1602; GFX9-SDAG-NEXT: s_max_u32 s7, s7, s9 1603; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1604; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB12_1 1605; GFX9-SDAG-NEXT: ; %bb.2: 1606; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 1607; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s7, 6, v0 1608; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 1609; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc 1610; GFX9-SDAG-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 1611; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1612; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 1613; GFX9-SDAG-NEXT: s_mov_b32 s34, s11 1614; GFX9-SDAG-NEXT: s_mov_b32 s33, s10 1615; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1616; 1617; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: 1618; GFX9-GISEL: ; %bb.0: 1619; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1620; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1621; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1622; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 1623; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0x1fc0 1624; GFX9-GISEL-NEXT: s_mov_b32 s10, s34 1625; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1626; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 1627; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 1628; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xffffe000 1629; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 1630; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x4000 1631; GFX9-GISEL-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 1632; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 1633; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 1634; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 1635; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 1636; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 1637; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB12_1 1638; GFX9-GISEL-NEXT: ; %bb.2: 1639; GFX9-GISEL-NEXT: s_add_u32 s5, s32, 0x1fff 1640; GFX9-GISEL-NEXT: s_lshl_b32 s4, s6, 6 1641; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffffe000 1642; GFX9-GISEL-NEXT: s_add_u32 s32, s5, s4 1643; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x1bc 1644; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s5 1645; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 1646; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1647; GFX9-GISEL-NEXT: s_mov_b32 s32, s34 1648; GFX9-GISEL-NEXT: s_mov_b32 s34, s10 1649; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 1650; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1651; 1652; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: 1653; GFX11-SDAG: ; %bb.0: 1654; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1655; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1656; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 1657; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 0x7f 1658; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 1659; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 1660; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1661; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x100 1662; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo 1663; GFX11-SDAG-NEXT: s_add_i32 s0, s32, 0xfff 1664; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 1665; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 1666; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 0xfffff000 1667; GFX11-SDAG-NEXT: s_and_b32 s33, s33, 0xffffff80 1668; GFX11-SDAG-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 1669; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 1670; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1671; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3 1672; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 1673; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1674; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4 1675; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 1676; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB12_1 1677; GFX11-SDAG-NEXT: ; %bb.2: 1678; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s0 1679; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x1bc 1680; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 1681; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 1682; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1683; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s0 dlc 1684; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1685; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 1686; GFX11-SDAG-NEXT: s_mov_b32 s34, s6 1687; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1688; 1689; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned: 1690; GFX11-GISEL: ; %bb.0: 1691; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1692; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1693; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 1694; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 0x7f 1695; GFX11-GISEL-NEXT: s_mov_b32 s5, s34 1696; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 1697; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1698; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1699; GFX11-GISEL-NEXT: s_and_b32 s33, s33, 0xffffff80 1700; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 1701; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x100 1702; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1703; GFX11-GISEL-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 1704; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 1705; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1706; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 1707; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 1708; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1709; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 1710; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 1711; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB12_1 1712; GFX11-GISEL-NEXT: ; %bb.2: 1713; GFX11-GISEL-NEXT: s_add_u32 s1, s32, 0xfff 1714; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x1bc 1715; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1716; GFX11-GISEL-NEXT: s_and_b32 s1, s1, 0xfffff000 1717; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 1718; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1719; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1720; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1721; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 1722; GFX11-GISEL-NEXT: s_mov_b32 s34, s5 1723; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1724 %idx = call i32 @llvm.amdgcn.workitem.id.x() 1725 %alloca = alloca i32, i32 %idx, align 128, addrspace(5) 1726 store volatile i32 444, ptr addrspace(5) %alloca 1727 ret void 1728} 1729 1730define void @test_dynamic_stackalloc_device_divergent_under_aligned() { 1731; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: 1732; GFX9-SDAG: ; %bb.0: 1733; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1734; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1735; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1736; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 1737; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 1738; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1739; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 1740; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 1741; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 1742; GFX9-SDAG-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 1743; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 1744; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 1745; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 1746; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 1747; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1748; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB13_1 1749; GFX9-SDAG-NEXT: ; %bb.2: 1750; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 1751; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 1752; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 1753; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1754; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x29a 1755; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 1756; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1757; GFX9-SDAG-NEXT: s_mov_b32 s32, s33 1758; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 1759; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1760; 1761; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: 1762; GFX9-GISEL: ; %bb.0: 1763; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1764; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1765; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1766; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 1767; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1768; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 1769; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 1770; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 1771; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 1772; GFX9-GISEL-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 1773; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 1774; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 1775; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 1776; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 1777; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 1778; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB13_1 1779; GFX9-GISEL-NEXT: ; %bb.2: 1780; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 1781; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 1782; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 1783; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 1784; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 1785; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 1786; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1787; GFX9-GISEL-NEXT: s_mov_b32 s32, s33 1788; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 1789; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1790; 1791; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: 1792; GFX11-SDAG: ; %bb.0: 1793; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1794; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1795; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 1796; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 1797; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 1798; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 1799; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1800; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 1801; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1802; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 1803; GFX11-SDAG-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 1804; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 1805; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1806; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 1807; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 1808; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1809; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 1810; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 1811; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB13_1 1812; GFX11-SDAG-NEXT: ; %bb.2: 1813; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 1814; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x29a 1815; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 1816; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 1817; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 1818; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1819; GFX11-SDAG-NEXT: s_mov_b32 s32, s33 1820; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 1821; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1822; 1823; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned: 1824; GFX11-GISEL: ; %bb.0: 1825; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1826; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 1827; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 1828; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 1829; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 1830; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 1831; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1832; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 1833; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1834; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 1835; GFX11-GISEL-NEXT: .LBB13_1: ; =>This Inner Loop Header: Depth=1 1836; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 1837; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 1838; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 1839; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 1840; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1841; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 1842; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 1843; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB13_1 1844; GFX11-GISEL-NEXT: ; %bb.2: 1845; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 1846; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 1847; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 1848; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1849; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 1850; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 1851; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 1852; GFX11-GISEL-NEXT: s_mov_b32 s32, s33 1853; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 1854; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1855 %idx = call i32 @llvm.amdgcn.workitem.id.x() 1856 %alloca = alloca i32, i32 %idx, align 2, addrspace(5) 1857 store volatile i32 666, ptr addrspace(5) %alloca 1858 ret void 1859} 1860 1861define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) { 1862; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_multiple_allocas: 1863; GFX9-SDAG: ; %bb.0: ; %entry 1864; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1865; GFX9-SDAG-NEXT: s_mov_b32 s13, s33 1866; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 1867; GFX9-SDAG-NEXT: s_mov_b32 s14, s34 1868; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 1869; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1870; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 1871; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 1872; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x3000 1873; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 1874; GFX9-SDAG-NEXT: s_cbranch_execz .LBB14_6 1875; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.0 1876; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 1877; GFX9-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 1878; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec 1879; GFX9-SDAG-NEXT: s_mov_b32 s10, 0 1880; GFX9-SDAG-NEXT: .LBB14_2: ; =>This Inner Loop Header: Depth=1 1881; GFX9-SDAG-NEXT: s_ff1_i32_b64 s9, s[6:7] 1882; GFX9-SDAG-NEXT: v_readlane_b32 s11, v1, s9 1883; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s9 1884; GFX9-SDAG-NEXT: s_max_u32 s10, s10, s11 1885; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 1886; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB14_2 1887; GFX9-SDAG-NEXT: ; %bb.3: 1888; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff 1889; GFX9-SDAG-NEXT: s_and_b32 s9, s6, 0xfffff000 1890; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s9 1891; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s10, 6, v1 1892; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 1893; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v31 1894; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 1895; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0x1ff0, v1 1896; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec 1897; GFX9-SDAG-NEXT: s_mov_b32 s10, 0 1898; GFX9-SDAG-NEXT: .LBB14_4: ; =>This Inner Loop Header: Depth=1 1899; GFX9-SDAG-NEXT: s_ff1_i32_b64 s11, s[6:7] 1900; GFX9-SDAG-NEXT: v_readlane_b32 s12, v1, s11 1901; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s11 1902; GFX9-SDAG-NEXT: s_max_u32 s10, s10, s12 1903; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 1904; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB14_4 1905; GFX9-SDAG-NEXT: ; %bb.5: 1906; GFX9-SDAG-NEXT: s_mov_b32 s6, s32 1907; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s6 1908; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, s10, 6, v1 1909; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v1 1910; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 3 1911; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s9 1912; GFX9-SDAG-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1913; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1914; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 4 1915; GFX9-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s6 1916; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1917; GFX9-SDAG-NEXT: .LBB14_6: ; %bb.1 1918; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] 1919; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 1920; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 2 1921; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 1922; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 1923; GFX9-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1 1924; GFX9-SDAG-NEXT: s_ff1_i32_b64 s6, s[4:5] 1925; GFX9-SDAG-NEXT: v_readlane_b32 s7, v0, s6 1926; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s6 1927; GFX9-SDAG-NEXT: s_max_u32 s8, s8, s7 1928; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 1929; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB14_7 1930; GFX9-SDAG-NEXT: ; %bb.8: 1931; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 1932; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 1933; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s8, 6, v0 1934; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 1935; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 1936; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s33 1937; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1938; GFX9-SDAG-NEXT: buffer_store_dword v1, off, s[0:3], s4 1939; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1940; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 1941; GFX9-SDAG-NEXT: s_mov_b32 s34, s14 1942; GFX9-SDAG-NEXT: s_mov_b32 s33, s13 1943; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1944; 1945; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas: 1946; GFX9-GISEL: ; %bb.0: ; %entry 1947; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1948; GFX9-GISEL-NEXT: s_mov_b32 s13, s33 1949; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0xfc0 1950; GFX9-GISEL-NEXT: s_mov_b32 s14, s34 1951; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 1952; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1953; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xfffff000 1954; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 1955; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x3000 1956; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc 1957; GFX9-GISEL-NEXT: s_cbranch_execz .LBB14_6 1958; GFX9-GISEL-NEXT: ; %bb.1: ; %bb.0 1959; GFX9-GISEL-NEXT: v_lshl_add_u32 v1, v1, 2, 15 1960; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0x3ff, v31 1961; GFX9-GISEL-NEXT: v_and_b32_e32 v1, -16, v1 1962; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], exec 1963; GFX9-GISEL-NEXT: s_mov_b32 s9, 0 1964; GFX9-GISEL-NEXT: .LBB14_2: ; =>This Inner Loop Header: Depth=1 1965; GFX9-GISEL-NEXT: s_ff1_i32_b64 s10, s[6:7] 1966; GFX9-GISEL-NEXT: v_readlane_b32 s11, v1, s10 1967; GFX9-GISEL-NEXT: s_bitset0_b64 s[6:7], s10 1968; GFX9-GISEL-NEXT: s_max_u32 s9, s9, s11 1969; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 1970; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB14_2 1971; GFX9-GISEL-NEXT: ; %bb.3: 1972; GFX9-GISEL-NEXT: s_add_u32 s7, s32, 0xfff 1973; GFX9-GISEL-NEXT: s_lshl_b32 s6, s9, 6 1974; GFX9-GISEL-NEXT: s_and_b32 s9, s7, 0xfffff000 1975; GFX9-GISEL-NEXT: v_lshl_add_u32 v1, v2, 2, 15 1976; GFX9-GISEL-NEXT: s_add_u32 s32, s9, s6 1977; GFX9-GISEL-NEXT: v_and_b32_e32 v1, -16, v1 1978; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], exec 1979; GFX9-GISEL-NEXT: s_mov_b32 s10, 0 1980; GFX9-GISEL-NEXT: .LBB14_4: ; =>This Inner Loop Header: Depth=1 1981; GFX9-GISEL-NEXT: s_ff1_i32_b64 s11, s[6:7] 1982; GFX9-GISEL-NEXT: v_readlane_b32 s12, v1, s11 1983; GFX9-GISEL-NEXT: s_bitset0_b64 s[6:7], s11 1984; GFX9-GISEL-NEXT: s_max_u32 s10, s10, s12 1985; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 1986; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB14_4 1987; GFX9-GISEL-NEXT: ; %bb.5: 1988; GFX9-GISEL-NEXT: s_mov_b32 s6, s32 1989; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 3 1990; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s9 1991; GFX9-GISEL-NEXT: s_lshl_b32 s7, s10, 6 1992; GFX9-GISEL-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1993; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1994; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 4 1995; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s6 1996; GFX9-GISEL-NEXT: s_add_u32 s32, s6, s7 1997; GFX9-GISEL-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1998; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1999; GFX9-GISEL-NEXT: .LBB14_6: ; %bb.1 2000; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 2001; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2002; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2003; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 2004; GFX9-GISEL-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1 2005; GFX9-GISEL-NEXT: s_ff1_i32_b64 s6, s[4:5] 2006; GFX9-GISEL-NEXT: v_readlane_b32 s7, v0, s6 2007; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s6 2008; GFX9-GISEL-NEXT: s_max_u32 s8, s8, s7 2009; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 2010; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB14_7 2011; GFX9-GISEL-NEXT: ; %bb.8: 2012; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 2013; GFX9-GISEL-NEXT: s_lshl_b32 s5, s8, 6 2014; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 1 2015; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 2016; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], s33 2017; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2018; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 2 2019; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 2020; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 2021; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2022; GFX9-GISEL-NEXT: s_mov_b32 s32, s34 2023; GFX9-GISEL-NEXT: s_mov_b32 s34, s14 2024; GFX9-GISEL-NEXT: s_mov_b32 s33, s13 2025; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 2026; 2027; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_multiple_allocas: 2028; GFX11-SDAG: ; %bb.0: ; %entry 2029; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2030; GFX11-SDAG-NEXT: s_mov_b32 s7, s33 2031; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 2032; GFX11-SDAG-NEXT: s_mov_b32 s8, s34 2033; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 2034; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 2035; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 2036; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 2037; GFX11-SDAG-NEXT: s_addk_i32 s32, 0xc0 2038; GFX11-SDAG-NEXT: v_cmpx_eq_u32_e32 0, v0 2039; GFX11-SDAG-NEXT: s_cbranch_execz .LBB14_6 2040; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.0 2041; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 2042; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo 2043; GFX11-SDAG-NEXT: s_mov_b32 s3, 0 2044; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2045; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 2046; GFX11-SDAG-NEXT: .LBB14_2: ; =>This Inner Loop Header: Depth=1 2047; GFX11-SDAG-NEXT: s_ctz_i32_b32 s4, s2 2048; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2049; GFX11-SDAG-NEXT: v_readlane_b32 s5, v1, s4 2050; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s4 2051; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2052; GFX11-SDAG-NEXT: s_max_u32 s3, s3, s5 2053; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 2054; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_2 2055; GFX11-SDAG-NEXT: ; %bb.3: 2056; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v31 2057; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff 2058; GFX11-SDAG-NEXT: s_mov_b32 s4, exec_lo 2059; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800 2060; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 2061; GFX11-SDAG-NEXT: v_lshl_add_u32 v2, s3, 5, s2 2062; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 2063; GFX11-SDAG-NEXT: s_mov_b32 s3, 0 2064; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v2 2065; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 2066; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0x1ff0, v1 2067; GFX11-SDAG-NEXT: .LBB14_4: ; =>This Inner Loop Header: Depth=1 2068; GFX11-SDAG-NEXT: s_ctz_i32_b32 s5, s4 2069; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2070; GFX11-SDAG-NEXT: v_readlane_b32 s6, v1, s5 2071; GFX11-SDAG-NEXT: s_bitset0_b32 s4, s5 2072; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2073; GFX11-SDAG-NEXT: s_max_u32 s3, s3, s6 2074; GFX11-SDAG-NEXT: s_cmp_lg_u32 s4, 0 2075; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_4 2076; GFX11-SDAG-NEXT: ; %bb.5: 2077; GFX11-SDAG-NEXT: s_mov_b32 s4, s32 2078; GFX11-SDAG-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 2079; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s3, 5, s4 2080; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s2 dlc 2081; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2082; GFX11-SDAG-NEXT: scratch_store_b32 off, v3, s4 dlc 2083; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2084; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1 2085; GFX11-SDAG-NEXT: .LBB14_6: ; %bb.1 2086; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1 2087; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v0, 2, 15 2088; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 2089; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 2090; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 2091; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 2092; GFX11-SDAG-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1 2093; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 2094; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2095; GFX11-SDAG-NEXT: v_readlane_b32 s3, v1, s2 2096; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 2097; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2098; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 2099; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 2100; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB14_7 2101; GFX11-SDAG-NEXT: ; %bb.8: 2102; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 2103; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 1 2104; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s0, 5, s1 2105; GFX11-SDAG-NEXT: scratch_store_b32 off, v2, s33 dlc 2106; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2107; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s1 dlc 2108; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2109; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1 2110; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 2111; GFX11-SDAG-NEXT: s_mov_b32 s34, s8 2112; GFX11-SDAG-NEXT: s_mov_b32 s33, s7 2113; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 2114; 2115; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas: 2116; GFX11-GISEL: ; %bb.0: ; %entry 2117; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2118; GFX11-GISEL-NEXT: s_mov_b32 s7, s33 2119; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 63 2120; GFX11-GISEL-NEXT: s_mov_b32 s8, s34 2121; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 2122; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 2123; GFX11-GISEL-NEXT: s_and_not1_b32 s33, s33, 63 2124; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 2125; GFX11-GISEL-NEXT: s_addk_i32 s32, 0xc0 2126; GFX11-GISEL-NEXT: v_cmpx_eq_u32_e32 0, v0 2127; GFX11-GISEL-NEXT: s_cbranch_execz .LBB14_6 2128; GFX11-GISEL-NEXT: ; %bb.1: ; %bb.0 2129; GFX11-GISEL-NEXT: v_lshl_add_u32 v2, v1, 2, 15 2130; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v31 2131; GFX11-GISEL-NEXT: s_mov_b32 s3, exec_lo 2132; GFX11-GISEL-NEXT: s_mov_b32 s2, 0 2133; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 2134; GFX11-GISEL-NEXT: v_and_b32_e32 v2, -16, v2 2135; GFX11-GISEL-NEXT: .LBB14_2: ; =>This Inner Loop Header: Depth=1 2136; GFX11-GISEL-NEXT: s_ctz_i32_b32 s4, s3 2137; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2138; GFX11-GISEL-NEXT: v_readlane_b32 s5, v2, s4 2139; GFX11-GISEL-NEXT: s_bitset0_b32 s3, s4 2140; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2141; GFX11-GISEL-NEXT: s_max_u32 s2, s2, s5 2142; GFX11-GISEL-NEXT: s_cmp_lg_u32 s3, 0 2143; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB14_2 2144; GFX11-GISEL-NEXT: ; %bb.3: 2145; GFX11-GISEL-NEXT: v_lshl_add_u32 v1, v1, 2, 15 2146; GFX11-GISEL-NEXT: s_lshl_b32 s5, s2, 5 2147; GFX11-GISEL-NEXT: s_add_u32 s2, s32, 0x7ff 2148; GFX11-GISEL-NEXT: s_mov_b32 s4, exec_lo 2149; GFX11-GISEL-NEXT: s_and_b32 s2, s2, 0xfffff800 2150; GFX11-GISEL-NEXT: v_and_b32_e32 v1, -16, v1 2151; GFX11-GISEL-NEXT: s_mov_b32 s3, 0 2152; GFX11-GISEL-NEXT: s_add_u32 s32, s2, s5 2153; GFX11-GISEL-NEXT: .LBB14_4: ; =>This Inner Loop Header: Depth=1 2154; GFX11-GISEL-NEXT: s_ctz_i32_b32 s5, s4 2155; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2156; GFX11-GISEL-NEXT: v_readlane_b32 s6, v1, s5 2157; GFX11-GISEL-NEXT: s_bitset0_b32 s4, s5 2158; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2159; GFX11-GISEL-NEXT: s_max_u32 s3, s3, s6 2160; GFX11-GISEL-NEXT: s_cmp_lg_u32 s4, 0 2161; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB14_4 2162; GFX11-GISEL-NEXT: ; %bb.5: 2163; GFX11-GISEL-NEXT: v_dual_mov_b32 v1, 3 :: v_dual_mov_b32 v2, 4 2164; GFX11-GISEL-NEXT: s_mov_b32 s4, s32 2165; GFX11-GISEL-NEXT: s_lshl_b32 s3, s3, 5 2166; GFX11-GISEL-NEXT: scratch_store_b32 off, v1, s2 dlc 2167; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2168; GFX11-GISEL-NEXT: scratch_store_b32 off, v2, s4 dlc 2169; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2170; GFX11-GISEL-NEXT: s_add_u32 s32, s4, s3 2171; GFX11-GISEL-NEXT: .LBB14_6: ; %bb.1 2172; GFX11-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 2173; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2174; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 2175; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2176; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2177; GFX11-GISEL-NEXT: .LBB14_7: ; =>This Inner Loop Header: Depth=1 2178; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 2179; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2180; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 2181; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 2182; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2183; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 2184; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 2185; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB14_7 2186; GFX11-GISEL-NEXT: ; %bb.8: 2187; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 2188; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 2189; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 2190; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s33 dlc 2191; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2192; GFX11-GISEL-NEXT: scratch_store_b32 off, v1, s1 dlc 2193; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2194; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 2195; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 2196; GFX11-GISEL-NEXT: s_mov_b32 s34, s8 2197; GFX11-GISEL-NEXT: s_mov_b32 s33, s7 2198; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2199entry: 2200 %cond = icmp eq i32 %n, 0 2201 %alloca1 = alloca i32, i32 8, addrspace(5) 2202 %alloca2 = alloca i32, i32 %n, addrspace(5) 2203 br i1 %cond, label %bb.0, label %bb.1 2204bb.0: 2205 %idx = call i32 @llvm.amdgcn.workitem.id.x() 2206 %alloca3 = alloca i32, i32 %m, align 64, addrspace(5) 2207 %alloca4 = alloca i32, i32 %idx, align 4, addrspace(5) 2208 store volatile i32 3, ptr addrspace(5) %alloca3 2209 store volatile i32 4, ptr addrspace(5) %alloca4 2210 br label %bb.1 2211bb.1: 2212 store volatile i32 1, ptr addrspace(5) %alloca1 2213 store volatile i32 2, ptr addrspace(5) %alloca2 2214 ret void 2215} 2216 2217define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) { 2218; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow: 2219; GFX9-SDAG: ; %bb.0: ; %entry 2220; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2221; GFX9-SDAG-NEXT: s_mov_b32 s11, s33 2222; GFX9-SDAG-NEXT: s_add_i32 s33, s32, 0xfc0 2223; GFX9-SDAG-NEXT: s_mov_b32 s12, s34 2224; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 2225; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 2226; GFX9-SDAG-NEXT: s_and_b32 s33, s33, 0xfffff000 2227; GFX9-SDAG-NEXT: s_mov_b32 s34, s32 2228; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x2000 2229; GFX9-SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc 2230; GFX9-SDAG-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 2231; GFX9-SDAG-NEXT: s_cbranch_execz .LBB15_4 2232; GFX9-SDAG-NEXT: ; %bb.1: ; %bb.1 2233; GFX9-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 2234; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 2 2235; GFX9-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 2236; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec 2237; GFX9-SDAG-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1 2238; GFX9-SDAG-NEXT: s_ff1_i32_b64 s9, s[6:7] 2239; GFX9-SDAG-NEXT: v_readlane_b32 s10, v1, s9 2240; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s9 2241; GFX9-SDAG-NEXT: s_max_u32 s8, s8, s10 2242; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 2243; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB15_2 2244; GFX9-SDAG-NEXT: ; %bb.3: 2245; GFX9-SDAG-NEXT: s_add_i32 s6, s32, 0xfff 2246; GFX9-SDAG-NEXT: s_and_b32 s6, s6, 0xfffff000 2247; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s6 2248; GFX9-SDAG-NEXT: v_lshl_add_u32 v2, s8, 6, v1 2249; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v2 2250; GFX9-SDAG-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 2251; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2252; GFX9-SDAG-NEXT: ; implicit-def: $vgpr31 2253; GFX9-SDAG-NEXT: .LBB15_4: ; %Flow 2254; GFX9-SDAG-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 2255; GFX9-SDAG-NEXT: s_cbranch_execz .LBB15_8 2256; GFX9-SDAG-NEXT: ; %bb.5: ; %bb.0 2257; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 2258; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2259; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 2260; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], exec 2261; GFX9-SDAG-NEXT: s_mov_b32 s8, 0 2262; GFX9-SDAG-NEXT: .LBB15_6: ; =>This Inner Loop Header: Depth=1 2263; GFX9-SDAG-NEXT: s_ff1_i32_b64 s9, s[6:7] 2264; GFX9-SDAG-NEXT: v_readlane_b32 s10, v0, s9 2265; GFX9-SDAG-NEXT: s_bitset0_b64 s[6:7], s9 2266; GFX9-SDAG-NEXT: s_max_u32 s8, s8, s10 2267; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[6:7], 0 2268; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB15_6 2269; GFX9-SDAG-NEXT: ; %bb.7: 2270; GFX9-SDAG-NEXT: s_mov_b32 s6, s32 2271; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 2272; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s8, 6, v0 2273; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 2274; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 1 2275; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s6 2276; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2277; GFX9-SDAG-NEXT: .LBB15_8: ; %bb.2 2278; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] 2279; GFX9-SDAG-NEXT: s_mov_b32 s32, s34 2280; GFX9-SDAG-NEXT: s_mov_b32 s34, s12 2281; GFX9-SDAG-NEXT: s_mov_b32 s33, s11 2282; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2283; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 2284; 2285; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow: 2286; GFX9-GISEL: ; %bb.0: ; %entry 2287; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2288; GFX9-GISEL-NEXT: s_mov_b32 s11, s33 2289; GFX9-GISEL-NEXT: s_add_i32 s33, s32, 0xfc0 2290; GFX9-GISEL-NEXT: s_mov_b32 s12, s34 2291; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 2292; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 2293; GFX9-GISEL-NEXT: s_and_b32 s33, s33, 0xfffff000 2294; GFX9-GISEL-NEXT: s_mov_b32 s34, s32 2295; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x2000 2296; GFX9-GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc 2297; GFX9-GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5] 2298; GFX9-GISEL-NEXT: s_cbranch_execz .LBB15_4 2299; GFX9-GISEL-NEXT: ; %bb.1: ; %bb.1 2300; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v1, 2, 15 2301; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2302; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], exec 2303; GFX9-GISEL-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1 2304; GFX9-GISEL-NEXT: s_ff1_i32_b64 s9, s[6:7] 2305; GFX9-GISEL-NEXT: v_readlane_b32 s10, v0, s9 2306; GFX9-GISEL-NEXT: s_bitset0_b64 s[6:7], s9 2307; GFX9-GISEL-NEXT: s_max_u32 s8, s8, s10 2308; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 2309; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB15_2 2310; GFX9-GISEL-NEXT: ; %bb.3: 2311; GFX9-GISEL-NEXT: s_add_u32 s7, s32, 0xfff 2312; GFX9-GISEL-NEXT: s_and_b32 s7, s7, 0xfffff000 2313; GFX9-GISEL-NEXT: s_lshl_b32 s6, s8, 6 2314; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 2 2315; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s7 2316; GFX9-GISEL-NEXT: s_add_u32 s32, s7, s6 2317; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 2318; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2319; GFX9-GISEL-NEXT: ; implicit-def: $vgpr31 2320; GFX9-GISEL-NEXT: .LBB15_4: ; %Flow 2321; GFX9-GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 2322; GFX9-GISEL-NEXT: s_cbranch_execz .LBB15_8 2323; GFX9-GISEL-NEXT: ; %bb.5: ; %bb.0 2324; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 2325; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2326; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2327; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], exec 2328; GFX9-GISEL-NEXT: s_mov_b32 s8, 0 2329; GFX9-GISEL-NEXT: .LBB15_6: ; =>This Inner Loop Header: Depth=1 2330; GFX9-GISEL-NEXT: s_ff1_i32_b64 s9, s[6:7] 2331; GFX9-GISEL-NEXT: v_readlane_b32 s10, v0, s9 2332; GFX9-GISEL-NEXT: s_bitset0_b64 s[6:7], s9 2333; GFX9-GISEL-NEXT: s_max_u32 s8, s8, s10 2334; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[6:7], 0 2335; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB15_6 2336; GFX9-GISEL-NEXT: ; %bb.7: 2337; GFX9-GISEL-NEXT: s_mov_b32 s6, s32 2338; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, 6 2339; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 1 2340; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s6 2341; GFX9-GISEL-NEXT: s_add_u32 s32, s6, s7 2342; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 2343; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2344; GFX9-GISEL-NEXT: .LBB15_8: ; %bb.2 2345; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] 2346; GFX9-GISEL-NEXT: s_mov_b32 s32, s34 2347; GFX9-GISEL-NEXT: s_mov_b32 s34, s12 2348; GFX9-GISEL-NEXT: s_mov_b32 s33, s11 2349; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2350; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 2351; 2352; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow: 2353; GFX11-SDAG: ; %bb.0: ; %entry 2354; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2355; GFX11-SDAG-NEXT: s_mov_b32 s5, s33 2356; GFX11-SDAG-NEXT: s_add_i32 s33, s32, 63 2357; GFX11-SDAG-NEXT: s_mov_b32 s6, s34 2358; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 2359; GFX11-SDAG-NEXT: s_mov_b32 s0, exec_lo 2360; GFX11-SDAG-NEXT: s_and_not1_b32 s33, s33, 63 2361; GFX11-SDAG-NEXT: s_mov_b32 s34, s32 2362; GFX11-SDAG-NEXT: s_addk_i32 s32, 0x80 2363; GFX11-SDAG-NEXT: v_cmpx_ne_u32_e32 0, v0 2364; GFX11-SDAG-NEXT: s_xor_b32 s0, exec_lo, s0 2365; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_4 2366; GFX11-SDAG-NEXT: ; %bb.1: ; %bb.1 2367; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, v1, 2, 15 2368; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 2 2369; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo 2370; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 2371; GFX11-SDAG-NEXT: v_and_b32_e32 v1, -16, v1 2372; GFX11-SDAG-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1 2373; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 2374; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2375; GFX11-SDAG-NEXT: v_readlane_b32 s4, v1, s3 2376; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 2377; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2378; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4 2379; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 2380; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB15_2 2381; GFX11-SDAG-NEXT: ; %bb.3: 2382; GFX11-SDAG-NEXT: s_add_i32 s2, s32, 0x7ff 2383; GFX11-SDAG-NEXT: ; implicit-def: $vgpr31 2384; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 2385; GFX11-SDAG-NEXT: s_and_b32 s2, s2, 0xfffff800 2386; GFX11-SDAG-NEXT: v_lshl_add_u32 v1, s1, 5, s2 2387; GFX11-SDAG-NEXT: scratch_store_b32 off, v0, s2 dlc 2388; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2389; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v1 2390; GFX11-SDAG-NEXT: .LBB15_4: ; %Flow 2391; GFX11-SDAG-NEXT: s_and_not1_saveexec_b32 s0, s0 2392; GFX11-SDAG-NEXT: s_cbranch_execz .LBB15_8 2393; GFX11-SDAG-NEXT: ; %bb.5: ; %bb.0 2394; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v31 2395; GFX11-SDAG-NEXT: s_mov_b32 s2, exec_lo 2396; GFX11-SDAG-NEXT: s_mov_b32 s1, 0 2397; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2398; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2399; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x1ff0, v0 2400; GFX11-SDAG-NEXT: .LBB15_6: ; =>This Inner Loop Header: Depth=1 2401; GFX11-SDAG-NEXT: s_ctz_i32_b32 s3, s2 2402; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2403; GFX11-SDAG-NEXT: v_readlane_b32 s4, v0, s3 2404; GFX11-SDAG-NEXT: s_bitset0_b32 s2, s3 2405; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2406; GFX11-SDAG-NEXT: s_max_u32 s1, s1, s4 2407; GFX11-SDAG-NEXT: s_cmp_lg_u32 s2, 0 2408; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB15_6 2409; GFX11-SDAG-NEXT: ; %bb.7: 2410; GFX11-SDAG-NEXT: s_mov_b32 s2, s32 2411; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 1 2412; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s1, 5, s2 2413; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s2 dlc 2414; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2415; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 2416; GFX11-SDAG-NEXT: .LBB15_8: ; %bb.2 2417; GFX11-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 2418; GFX11-SDAG-NEXT: s_mov_b32 s32, s34 2419; GFX11-SDAG-NEXT: s_mov_b32 s34, s6 2420; GFX11-SDAG-NEXT: s_mov_b32 s33, s5 2421; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 2422; 2423; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow: 2424; GFX11-GISEL: ; %bb.0: ; %entry 2425; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2426; GFX11-GISEL-NEXT: s_mov_b32 s5, s33 2427; GFX11-GISEL-NEXT: s_add_i32 s33, s32, 63 2428; GFX11-GISEL-NEXT: s_mov_b32 s6, s34 2429; GFX11-GISEL-NEXT: s_mov_b32 s1, 0 2430; GFX11-GISEL-NEXT: s_mov_b32 s0, exec_lo 2431; GFX11-GISEL-NEXT: s_and_not1_b32 s33, s33, 63 2432; GFX11-GISEL-NEXT: s_mov_b32 s34, s32 2433; GFX11-GISEL-NEXT: s_addk_i32 s32, 0x80 2434; GFX11-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v0 2435; GFX11-GISEL-NEXT: s_xor_b32 s0, exec_lo, s0 2436; GFX11-GISEL-NEXT: s_cbranch_execz .LBB15_4 2437; GFX11-GISEL-NEXT: ; %bb.1: ; %bb.1 2438; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v1, 2, 15 2439; GFX11-GISEL-NEXT: s_mov_b32 s2, exec_lo 2440; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2441; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2442; GFX11-GISEL-NEXT: .LBB15_2: ; =>This Inner Loop Header: Depth=1 2443; GFX11-GISEL-NEXT: s_ctz_i32_b32 s3, s2 2444; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2445; GFX11-GISEL-NEXT: v_readlane_b32 s4, v0, s3 2446; GFX11-GISEL-NEXT: s_bitset0_b32 s2, s3 2447; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2448; GFX11-GISEL-NEXT: s_max_u32 s1, s1, s4 2449; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0 2450; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB15_2 2451; GFX11-GISEL-NEXT: ; %bb.3: 2452; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 2 2453; GFX11-GISEL-NEXT: s_add_u32 s2, s32, 0x7ff 2454; GFX11-GISEL-NEXT: s_lshl_b32 s1, s1, 5 2455; GFX11-GISEL-NEXT: s_and_b32 s2, s2, 0xfffff800 2456; GFX11-GISEL-NEXT: ; implicit-def: $vgpr31 2457; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2458; GFX11-GISEL-NEXT: s_add_u32 s32, s2, s1 2459; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s2 dlc 2460; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2461; GFX11-GISEL-NEXT: .LBB15_4: ; %Flow 2462; GFX11-GISEL-NEXT: s_and_not1_saveexec_b32 s0, s0 2463; GFX11-GISEL-NEXT: s_cbranch_execz .LBB15_8 2464; GFX11-GISEL-NEXT: ; %bb.5: ; %bb.0 2465; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v31 2466; GFX11-GISEL-NEXT: s_mov_b32 s2, exec_lo 2467; GFX11-GISEL-NEXT: s_mov_b32 s1, 0 2468; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2469; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2470; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2471; GFX11-GISEL-NEXT: .LBB15_6: ; =>This Inner Loop Header: Depth=1 2472; GFX11-GISEL-NEXT: s_ctz_i32_b32 s3, s2 2473; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2474; GFX11-GISEL-NEXT: v_readlane_b32 s4, v0, s3 2475; GFX11-GISEL-NEXT: s_bitset0_b32 s2, s3 2476; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2477; GFX11-GISEL-NEXT: s_max_u32 s1, s1, s4 2478; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0 2479; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB15_6 2480; GFX11-GISEL-NEXT: ; %bb.7: 2481; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 1 2482; GFX11-GISEL-NEXT: s_mov_b32 s2, s32 2483; GFX11-GISEL-NEXT: s_lshl_b32 s1, s1, 5 2484; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2485; GFX11-GISEL-NEXT: s_add_u32 s32, s2, s1 2486; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s2 dlc 2487; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2488; GFX11-GISEL-NEXT: .LBB15_8: ; %bb.2 2489; GFX11-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 2490; GFX11-GISEL-NEXT: s_mov_b32 s32, s34 2491; GFX11-GISEL-NEXT: s_mov_b32 s34, s6 2492; GFX11-GISEL-NEXT: s_mov_b32 s33, s5 2493; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2494entry: 2495 %cond = icmp eq i32 %n, 0 2496 br i1 %cond, label %bb.0, label %bb.1 2497bb.0: 2498 %idx = call i32 @llvm.amdgcn.workitem.id.x() 2499 %alloca1 = alloca i32, i32 %idx, align 4, addrspace(5) 2500 store volatile i32 1, ptr addrspace(5) %alloca1 2501 br label %bb.2 2502bb.1: 2503 %alloca2 = alloca i32, i32 %m, align 64, addrspace(5) 2504 store volatile i32 2, ptr addrspace(5) %alloca2 2505 br label %bb.2 2506bb.2: 2507 ret void 2508} 2509 2510define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 %n) { 2511; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: 2512; GFX9-SDAG: ; %bb.0: 2513; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2514; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 2515; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2516; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 2517; GFX9-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0 2518; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 2519; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 2520; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 2521; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 2522; GFX9-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 2523; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 2524; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 2525; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 2526; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 2527; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 2528; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB16_1 2529; GFX9-SDAG-NEXT: ; %bb.2: 2530; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 2531; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 2532; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 2533; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 2534; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x29a 2535; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 2536; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2537; GFX9-SDAG-NEXT: s_mov_b32 s32, s33 2538; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 2539; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 2540; 2541; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: 2542; GFX9-GISEL: ; %bb.0: 2543; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2544; GFX9-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 2545; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2546; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 2547; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2548; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 2549; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 2550; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 2551; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 2552; GFX9-GISEL-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 2553; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 2554; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 2555; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 2556; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 2557; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 2558; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB16_1 2559; GFX9-GISEL-NEXT: ; %bb.2: 2560; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 2561; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 2562; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 2563; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 2564; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 2565; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 2566; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2567; GFX9-GISEL-NEXT: s_mov_b32 s32, s33 2568; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 2569; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 2570; 2571; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: 2572; GFX11-SDAG: ; %bb.0: 2573; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 2575; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 2576; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 2577; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 2578; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 2579; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2580; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 2581; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2582; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff0, v0 2583; GFX11-SDAG-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 2584; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 2585; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2586; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 2587; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 2588; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2589; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 2590; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 2591; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB16_1 2592; GFX11-SDAG-NEXT: ; %bb.2: 2593; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 2594; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x29a 2595; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 2596; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 2597; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2598; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 2599; GFX11-SDAG-NEXT: s_mov_b32 s32, s33 2600; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 2601; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 2602; 2603; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16: 2604; GFX11-GISEL: ; %bb.0: 2605; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2606; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 2607; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 2608; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 2609; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 2610; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 2611; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2612; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 2613; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2614; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2615; GFX11-GISEL-NEXT: .LBB16_1: ; =>This Inner Loop Header: Depth=1 2616; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 2617; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2618; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 2619; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 2620; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2621; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 2622; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 2623; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB16_1 2624; GFX11-GISEL-NEXT: ; %bb.2: 2625; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 2626; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 2627; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 2628; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2629; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 2630; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 2631; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2632; GFX11-GISEL-NEXT: s_mov_b32 s32, s33 2633; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 2634; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2635 %alloca = alloca i32, i16 %n, align 2, addrspace(5) 2636 store volatile i32 666, ptr addrspace(5) %alloca 2637 ret void 2638} 2639 2640define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 %n) { 2641; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: 2642; GFX9-SDAG: ; %bb.0: 2643; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2644; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2645; GFX9-SDAG-NEXT: s_mov_b32 s9, s33 2646; GFX9-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 2647; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], exec 2648; GFX9-SDAG-NEXT: s_mov_b32 s6, 0 2649; GFX9-SDAG-NEXT: s_mov_b32 s33, s32 2650; GFX9-SDAG-NEXT: s_addk_i32 s32, 0x400 2651; GFX9-SDAG-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 2652; GFX9-SDAG-NEXT: s_ff1_i32_b64 s7, s[4:5] 2653; GFX9-SDAG-NEXT: v_readlane_b32 s8, v0, s7 2654; GFX9-SDAG-NEXT: s_bitset0_b64 s[4:5], s7 2655; GFX9-SDAG-NEXT: s_max_u32 s6, s6, s8 2656; GFX9-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 2657; GFX9-SDAG-NEXT: s_cbranch_scc1 .LBB17_1 2658; GFX9-SDAG-NEXT: ; %bb.2: 2659; GFX9-SDAG-NEXT: s_mov_b32 s4, s32 2660; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 2661; GFX9-SDAG-NEXT: v_lshl_add_u32 v0, s6, 6, v0 2662; GFX9-SDAG-NEXT: v_readfirstlane_b32 s32, v0 2663; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x29a 2664; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], s4 2665; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2666; GFX9-SDAG-NEXT: s_mov_b32 s32, s33 2667; GFX9-SDAG-NEXT: s_mov_b32 s33, s9 2668; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 2669; 2670; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: 2671; GFX9-GISEL: ; %bb.0: 2672; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2673; GFX9-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2674; GFX9-GISEL-NEXT: s_mov_b32 s9, s33 2675; GFX9-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2676; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], exec 2677; GFX9-GISEL-NEXT: s_mov_b32 s6, 0 2678; GFX9-GISEL-NEXT: s_mov_b32 s33, s32 2679; GFX9-GISEL-NEXT: s_addk_i32 s32, 0x400 2680; GFX9-GISEL-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 2681; GFX9-GISEL-NEXT: s_ff1_i32_b64 s7, s[4:5] 2682; GFX9-GISEL-NEXT: v_readlane_b32 s8, v0, s7 2683; GFX9-GISEL-NEXT: s_bitset0_b64 s[4:5], s7 2684; GFX9-GISEL-NEXT: s_max_u32 s6, s6, s8 2685; GFX9-GISEL-NEXT: s_cmp_lg_u64 s[4:5], 0 2686; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB17_1 2687; GFX9-GISEL-NEXT: ; %bb.2: 2688; GFX9-GISEL-NEXT: s_mov_b32 s4, s32 2689; GFX9-GISEL-NEXT: s_lshl_b32 s5, s6, 6 2690; GFX9-GISEL-NEXT: s_add_u32 s32, s4, s5 2691; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 2692; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s4 2693; GFX9-GISEL-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 2694; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2695; GFX9-GISEL-NEXT: s_mov_b32 s32, s33 2696; GFX9-GISEL-NEXT: s_mov_b32 s33, s9 2697; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 2698; 2699; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: 2700; GFX11-SDAG: ; %bb.0: 2701; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2702; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2703; GFX11-SDAG-NEXT: s_mov_b32 s4, s33 2704; GFX11-SDAG-NEXT: s_mov_b32 s1, exec_lo 2705; GFX11-SDAG-NEXT: s_mov_b32 s0, 0 2706; GFX11-SDAG-NEXT: s_mov_b32 s33, s32 2707; GFX11-SDAG-NEXT: v_and_b32_e32 v0, -16, v0 2708; GFX11-SDAG-NEXT: s_add_i32 s32, s32, 16 2709; GFX11-SDAG-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 2710; GFX11-SDAG-NEXT: s_ctz_i32_b32 s2, s1 2711; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2712; GFX11-SDAG-NEXT: v_readlane_b32 s3, v0, s2 2713; GFX11-SDAG-NEXT: s_bitset0_b32 s1, s2 2714; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2715; GFX11-SDAG-NEXT: s_max_u32 s0, s0, s3 2716; GFX11-SDAG-NEXT: s_cmp_lg_u32 s1, 0 2717; GFX11-SDAG-NEXT: s_cbranch_scc1 .LBB17_1 2718; GFX11-SDAG-NEXT: ; %bb.2: 2719; GFX11-SDAG-NEXT: s_mov_b32 s1, s32 2720; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x29a 2721; GFX11-SDAG-NEXT: v_lshl_add_u32 v0, s0, 5, s1 2722; GFX11-SDAG-NEXT: scratch_store_b32 off, v1, s1 dlc 2723; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 2724; GFX11-SDAG-NEXT: v_readfirstlane_b32 s32, v0 2725; GFX11-SDAG-NEXT: s_mov_b32 s32, s33 2726; GFX11-SDAG-NEXT: s_mov_b32 s33, s4 2727; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 2728; 2729; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64: 2730; GFX11-GISEL: ; %bb.0: 2731; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2732; GFX11-GISEL-NEXT: v_lshl_add_u32 v0, v0, 2, 15 2733; GFX11-GISEL-NEXT: s_mov_b32 s4, s33 2734; GFX11-GISEL-NEXT: s_mov_b32 s1, exec_lo 2735; GFX11-GISEL-NEXT: s_mov_b32 s0, 0 2736; GFX11-GISEL-NEXT: s_mov_b32 s33, s32 2737; GFX11-GISEL-NEXT: v_and_b32_e32 v0, -16, v0 2738; GFX11-GISEL-NEXT: s_add_i32 s32, s32, 16 2739; GFX11-GISEL-NEXT: .LBB17_1: ; =>This Inner Loop Header: Depth=1 2740; GFX11-GISEL-NEXT: s_ctz_i32_b32 s2, s1 2741; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2742; GFX11-GISEL-NEXT: v_readlane_b32 s3, v0, s2 2743; GFX11-GISEL-NEXT: s_bitset0_b32 s1, s2 2744; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2745; GFX11-GISEL-NEXT: s_max_u32 s0, s0, s3 2746; GFX11-GISEL-NEXT: s_cmp_lg_u32 s1, 0 2747; GFX11-GISEL-NEXT: s_cbranch_scc1 .LBB17_1 2748; GFX11-GISEL-NEXT: ; %bb.2: 2749; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x29a 2750; GFX11-GISEL-NEXT: s_mov_b32 s1, s32 2751; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 5 2752; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2753; GFX11-GISEL-NEXT: s_add_u32 s32, s1, s0 2754; GFX11-GISEL-NEXT: scratch_store_b32 off, v0, s1 dlc 2755; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 2756; GFX11-GISEL-NEXT: s_mov_b32 s32, s33 2757; GFX11-GISEL-NEXT: s_mov_b32 s33, s4 2758; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 2759 %alloca = alloca i32, i64 %n, align 2, addrspace(5) 2760 store volatile i32 666, ptr addrspace(5) %alloca 2761 ret void 2762} 2763