1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Make sure the stack is never realigned for entry functions. 5 6define amdgpu_kernel void @max_alignment_128() #0 { 7; VI-LABEL: max_alignment_128: 8; VI: ; %bb.0: 9; VI-NEXT: s_add_u32 s0, s0, s17 10; VI-NEXT: s_addc_u32 s1, s1, 0 11; VI-NEXT: v_mov_b32_e32 v0, 3 12; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 13; VI-NEXT: s_waitcnt vmcnt(0) 14; VI-NEXT: v_mov_b32_e32 v0, 9 15; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 16; VI-NEXT: s_waitcnt vmcnt(0) 17; VI-NEXT: s_endpgm 18; VI-NEXT: .section .rodata,"a" 19; VI-NEXT: .p2align 6 20; VI-NEXT: .amdhsa_kernel max_alignment_128 21; VI-NEXT: .amdhsa_group_segment_fixed_size 0 22; VI-NEXT: .amdhsa_private_segment_fixed_size 256 23; VI-NEXT: .amdhsa_kernarg_size 56 24; VI-NEXT: .amdhsa_user_sgpr_count 14 25; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 26; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 27; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1 28; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 29; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1 30; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 31; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 32; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 33; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 34; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 35; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 36; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 37; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2 38; VI-NEXT: .amdhsa_next_free_vgpr 1 39; VI-NEXT: .amdhsa_next_free_sgpr 18 40; VI-NEXT: .amdhsa_reserve_vcc 0 41; VI-NEXT: .amdhsa_reserve_flat_scratch 0 42; VI-NEXT: .amdhsa_float_round_mode_32 0 43; VI-NEXT: .amdhsa_float_round_mode_16_64 0 44; VI-NEXT: .amdhsa_float_denorm_mode_32 3 45; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 46; VI-NEXT: .amdhsa_dx10_clamp 1 47; VI-NEXT: .amdhsa_ieee_mode 1 48; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 49; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 50; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 51; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 52; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 53; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 54; VI-NEXT: .amdhsa_exception_int_div_zero 0 55; VI-NEXT: .end_amdhsa_kernel 56; VI-NEXT: .text 57; 58; GFX9-LABEL: max_alignment_128: 59; GFX9: ; %bb.0: 60; GFX9-NEXT: s_add_u32 s0, s0, s17 61; GFX9-NEXT: s_addc_u32 s1, s1, 0 62; GFX9-NEXT: v_mov_b32_e32 v0, 3 63; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 64; GFX9-NEXT: s_waitcnt vmcnt(0) 65; GFX9-NEXT: v_mov_b32_e32 v0, 9 66; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 67; GFX9-NEXT: s_waitcnt vmcnt(0) 68; GFX9-NEXT: s_endpgm 69; GFX9-NEXT: .section .rodata,"a" 70; GFX9-NEXT: .p2align 6 71; GFX9-NEXT: .amdhsa_kernel max_alignment_128 72; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 73; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 74; GFX9-NEXT: .amdhsa_kernarg_size 56 75; GFX9-NEXT: .amdhsa_user_sgpr_count 14 76; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 77; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 78; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1 79; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 80; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1 81; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 82; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 83; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 84; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 85; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 86; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 87; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 88; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2 89; GFX9-NEXT: .amdhsa_next_free_vgpr 1 90; GFX9-NEXT: .amdhsa_next_free_sgpr 18 91; GFX9-NEXT: .amdhsa_reserve_vcc 0 92; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0 93; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 94; GFX9-NEXT: .amdhsa_float_round_mode_32 0 95; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 96; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 97; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 98; GFX9-NEXT: .amdhsa_dx10_clamp 1 99; GFX9-NEXT: .amdhsa_ieee_mode 1 100; GFX9-NEXT: .amdhsa_fp16_overflow 0 101; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 102; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 103; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 104; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 105; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 106; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 107; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 108; GFX9-NEXT: .end_amdhsa_kernel 109; GFX9-NEXT: .text 110 %clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca 111 store volatile i8 3, ptr addrspace(5) %clutter 112 %alloca.align = alloca i32, align 128, addrspace(5) 113 store volatile i32 9, ptr addrspace(5) %alloca.align, align 128 114 ret void 115} 116 117define amdgpu_kernel void @stackrealign_attr() #1 { 118; VI-LABEL: stackrealign_attr: 119; VI: ; %bb.0: 120; VI-NEXT: s_add_u32 s0, s0, s17 121; VI-NEXT: s_addc_u32 s1, s1, 0 122; VI-NEXT: v_mov_b32_e32 v0, 3 123; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 124; VI-NEXT: s_waitcnt vmcnt(0) 125; VI-NEXT: v_mov_b32_e32 v0, 9 126; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 127; VI-NEXT: s_waitcnt vmcnt(0) 128; VI-NEXT: s_endpgm 129; VI-NEXT: .section .rodata,"a" 130; VI-NEXT: .p2align 6 131; VI-NEXT: .amdhsa_kernel stackrealign_attr 132; VI-NEXT: .amdhsa_group_segment_fixed_size 0 133; VI-NEXT: .amdhsa_private_segment_fixed_size 12 134; VI-NEXT: .amdhsa_kernarg_size 56 135; VI-NEXT: .amdhsa_user_sgpr_count 14 136; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 137; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 138; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1 139; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 140; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1 141; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 142; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 143; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 144; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 145; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 146; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 147; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 148; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2 149; VI-NEXT: .amdhsa_next_free_vgpr 1 150; VI-NEXT: .amdhsa_next_free_sgpr 18 151; VI-NEXT: .amdhsa_reserve_vcc 0 152; VI-NEXT: .amdhsa_reserve_flat_scratch 0 153; VI-NEXT: .amdhsa_float_round_mode_32 0 154; VI-NEXT: .amdhsa_float_round_mode_16_64 0 155; VI-NEXT: .amdhsa_float_denorm_mode_32 3 156; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 157; VI-NEXT: .amdhsa_dx10_clamp 1 158; VI-NEXT: .amdhsa_ieee_mode 1 159; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 160; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 161; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 162; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 163; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 164; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 165; VI-NEXT: .amdhsa_exception_int_div_zero 0 166; VI-NEXT: .end_amdhsa_kernel 167; VI-NEXT: .text 168; 169; GFX9-LABEL: stackrealign_attr: 170; GFX9: ; %bb.0: 171; GFX9-NEXT: s_add_u32 s0, s0, s17 172; GFX9-NEXT: s_addc_u32 s1, s1, 0 173; GFX9-NEXT: v_mov_b32_e32 v0, 3 174; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 175; GFX9-NEXT: s_waitcnt vmcnt(0) 176; GFX9-NEXT: v_mov_b32_e32 v0, 9 177; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 178; GFX9-NEXT: s_waitcnt vmcnt(0) 179; GFX9-NEXT: s_endpgm 180; GFX9-NEXT: .section .rodata,"a" 181; GFX9-NEXT: .p2align 6 182; GFX9-NEXT: .amdhsa_kernel stackrealign_attr 183; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 184; GFX9-NEXT: .amdhsa_private_segment_fixed_size 12 185; GFX9-NEXT: .amdhsa_kernarg_size 56 186; GFX9-NEXT: .amdhsa_user_sgpr_count 14 187; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 188; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 189; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1 190; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 191; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1 192; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 193; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 194; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 195; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 196; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 197; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 198; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 199; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2 200; GFX9-NEXT: .amdhsa_next_free_vgpr 1 201; GFX9-NEXT: .amdhsa_next_free_sgpr 18 202; GFX9-NEXT: .amdhsa_reserve_vcc 0 203; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0 204; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 205; GFX9-NEXT: .amdhsa_float_round_mode_32 0 206; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 207; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 208; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 209; GFX9-NEXT: .amdhsa_dx10_clamp 1 210; GFX9-NEXT: .amdhsa_ieee_mode 1 211; GFX9-NEXT: .amdhsa_fp16_overflow 0 212; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 213; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 214; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 215; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 216; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 217; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 218; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 219; GFX9-NEXT: .end_amdhsa_kernel 220; GFX9-NEXT: .text 221 %clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca 222 store volatile i8 3, ptr addrspace(5) %clutter 223 %alloca.align = alloca i32, align 4, addrspace(5) 224 store volatile i32 9, ptr addrspace(5) %alloca.align, align 4 225 ret void 226} 227 228define amdgpu_kernel void @alignstack_attr() #2 { 229; VI-LABEL: alignstack_attr: 230; VI: ; %bb.0: 231; VI-NEXT: s_add_u32 s0, s0, s17 232; VI-NEXT: s_addc_u32 s1, s1, 0 233; VI-NEXT: v_mov_b32_e32 v0, 3 234; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 235; VI-NEXT: s_waitcnt vmcnt(0) 236; VI-NEXT: v_mov_b32_e32 v0, 9 237; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 238; VI-NEXT: s_waitcnt vmcnt(0) 239; VI-NEXT: s_endpgm 240; VI-NEXT: .section .rodata,"a" 241; VI-NEXT: .p2align 6 242; VI-NEXT: .amdhsa_kernel alignstack_attr 243; VI-NEXT: .amdhsa_group_segment_fixed_size 0 244; VI-NEXT: .amdhsa_private_segment_fixed_size 128 245; VI-NEXT: .amdhsa_kernarg_size 56 246; VI-NEXT: .amdhsa_user_sgpr_count 14 247; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 248; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 249; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 1 250; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 251; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 1 252; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 253; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 254; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 255; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 256; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 257; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 258; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 259; VI-NEXT: .amdhsa_system_vgpr_workitem_id 2 260; VI-NEXT: .amdhsa_next_free_vgpr 1 261; VI-NEXT: .amdhsa_next_free_sgpr 18 262; VI-NEXT: .amdhsa_reserve_vcc 0 263; VI-NEXT: .amdhsa_reserve_flat_scratch 0 264; VI-NEXT: .amdhsa_float_round_mode_32 0 265; VI-NEXT: .amdhsa_float_round_mode_16_64 0 266; VI-NEXT: .amdhsa_float_denorm_mode_32 3 267; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 268; VI-NEXT: .amdhsa_dx10_clamp 1 269; VI-NEXT: .amdhsa_ieee_mode 1 270; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 271; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 272; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 273; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 274; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 275; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 276; VI-NEXT: .amdhsa_exception_int_div_zero 0 277; VI-NEXT: .end_amdhsa_kernel 278; VI-NEXT: .text 279; 280; GFX9-LABEL: alignstack_attr: 281; GFX9: ; %bb.0: 282; GFX9-NEXT: s_add_u32 s0, s0, s17 283; GFX9-NEXT: s_addc_u32 s1, s1, 0 284; GFX9-NEXT: v_mov_b32_e32 v0, 3 285; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 286; GFX9-NEXT: s_waitcnt vmcnt(0) 287; GFX9-NEXT: v_mov_b32_e32 v0, 9 288; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 289; GFX9-NEXT: s_waitcnt vmcnt(0) 290; GFX9-NEXT: s_endpgm 291; GFX9-NEXT: .section .rodata,"a" 292; GFX9-NEXT: .p2align 6 293; GFX9-NEXT: .amdhsa_kernel alignstack_attr 294; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 295; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 296; GFX9-NEXT: .amdhsa_kernarg_size 56 297; GFX9-NEXT: .amdhsa_user_sgpr_count 14 298; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 299; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 300; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 1 301; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 302; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 1 303; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 304; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 305; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 306; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 307; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 308; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 309; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 310; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 2 311; GFX9-NEXT: .amdhsa_next_free_vgpr 1 312; GFX9-NEXT: .amdhsa_next_free_sgpr 18 313; GFX9-NEXT: .amdhsa_reserve_vcc 0 314; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0 315; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 316; GFX9-NEXT: .amdhsa_float_round_mode_32 0 317; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 318; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 319; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 320; GFX9-NEXT: .amdhsa_dx10_clamp 1 321; GFX9-NEXT: .amdhsa_ieee_mode 1 322; GFX9-NEXT: .amdhsa_fp16_overflow 0 323; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 324; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 325; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 326; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 327; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 328; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 329; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 330; GFX9-NEXT: .end_amdhsa_kernel 331; GFX9-NEXT: .text 332 %clutter = alloca i8, addrspace(5) ; Force non-zero offset for next alloca 333 store volatile i8 3, ptr addrspace(5) %clutter 334 %alloca.align = alloca i32, align 4, addrspace(5) 335 store volatile i32 9, ptr addrspace(5) %alloca.align, align 4 336 ret void 337} 338 339attributes #0 = { nounwind } 340attributes #1 = { nounwind "stackrealign" } 341attributes #2 = { nounwind alignstack=128 } 342 343!llvm.module.flags = !{!0} 344!0 = !{i32 1, !"amdhsa_code_object_version", i32 400} 345