1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s 2 3; CHECK: .amdgpu_pal_metadata 4; CHECK-NEXT: --- 5; CHECK-NEXT: amdpal.pipelines: 6; CHECK-NEXT: - .api: Vulkan 7; CHECK-NEXT: .compute_registers: 8; CHECK-NEXT: .tg_size_en: true 9; CHECK-NEXT: .tgid_x_en: false 10; CHECK-NEXT: .tgid_y_en: false 11; CHECK-NEXT: .tgid_z_en: false 12; CHECK-NEXT: .tidig_comp_cnt: 0x1 13; CHECK-NEXT: .hardware_stages: 14; CHECK-NEXT: .cs: 15; CHECK-NEXT: .checksum_value: 0x9444d7d0 16; CHECK-NEXT: .debug_mode: 0 17; CHECK-NEXT: .excp_en: 0 18; CHECK-NEXT: .float_mode: 0xc0 19; CHECK-NEXT: .ieee_mode: true 20; CHECK-NEXT: .image_op: false 21; CHECK-NEXT: .lds_size: 0x200 22; CHECK-NEXT: .mem_ordered: true 23; CHECK-NEXT: .sgpr_limit: 0x6a 24; CHECK-NEXT: .threadgroup_dimensions: 25; CHECK-NEXT: - 0x1 26; CHECK-NEXT: - 0x400 27; CHECK-NEXT: - 0x1 28; CHECK-NEXT: .trap_present: false 29; CHECK-NEXT: .user_data_reg_map: 30; CHECK-NEXT: - 0x10000000 31; CHECK-NEXT: - 0xffffffff 32; CHECK-NEXT: - 0 33; CHECK-NEXT: - 0xffffffff 34; CHECK-NEXT: - 0xffffffff 35; CHECK-NEXT: - 0xffffffff 36; CHECK-NEXT: - 0xffffffff 37; CHECK-NEXT: - 0xffffffff 38; CHECK-NEXT: - 0xffffffff 39; CHECK-NEXT: - 0xffffffff 40; CHECK-NEXT: - 0xffffffff 41; CHECK-NEXT: - 0xffffffff 42; CHECK-NEXT: - 0xffffffff 43; CHECK-NEXT: - 0xffffffff 44; CHECK-NEXT: - 0xffffffff 45; CHECK-NEXT: - 0xffffffff 46; CHECK-NEXT: - 0xffffffff 47; CHECK-NEXT: - 0xffffffff 48; CHECK-NEXT: - 0xffffffff 49; CHECK-NEXT: - 0xffffffff 50; CHECK-NEXT: - 0xffffffff 51; CHECK-NEXT: - 0xffffffff 52; CHECK-NEXT: - 0xffffffff 53; CHECK-NEXT: - 0xffffffff 54; CHECK-NEXT: - 0xffffffff 55; CHECK-NEXT: - 0xffffffff 56; CHECK-NEXT: - 0xffffffff 57; CHECK-NEXT: - 0xffffffff 58; CHECK-NEXT: - 0xffffffff 59; CHECK-NEXT: - 0xffffffff 60; CHECK-NEXT: - 0xffffffff 61; CHECK-NEXT: - 0xffffffff 62; CHECK-NEXT: .user_sgprs: 0x3 63; CHECK-NEXT: .vgpr_limit: 0x100 64; CHECK-NEXT: .wavefront_size: 0x40 65; CHECK-NEXT: .wgp_mode: true 66; CHECK: .registers: {} 67; CHECK-NEXT: .shader_functions: 68; CHECK-NEXT: dynamic_stack: 69; CHECK-NEXT: .backend_stack_size: 0x10 70; CHECK-NEXT: .lds_size: 0 71; CHECK-NEXT: .sgpr_count: 0x22 72; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10 73; CHECK-NEXT: .vgpr_count: 0x2 74; CHECK-NEXT: dynamic_stack_loop: 75; CHECK-NEXT: .backend_stack_size: 0x10 76; CHECK-NEXT: .lds_size: 0 77; CHECK-NEXT: .sgpr_count: 0x22 78; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10 79; CHECK-NEXT: .vgpr_count: 0x3 80; CHECK-NEXT: multiple_stack: 81; CHECK-NEXT: .backend_stack_size: 0x24 82; CHECK-NEXT: .lds_size: 0 83; CHECK-NEXT: .sgpr_count: 0x21 84; CHECK-NEXT: .stack_frame_size_in_bytes: 0x24 85; CHECK-NEXT: .vgpr_count: 0x3 86; CHECK-NEXT: no_stack: 87; CHECK-NEXT: .backend_stack_size: 0 88; CHECK-NEXT: .lds_size: 0 89; CHECK-NEXT: .sgpr_count: 0x20 90; CHECK-NEXT: .stack_frame_size_in_bytes: 0 91; CHECK-NEXT: .vgpr_count: 0x1 92; CHECK-NEXT: no_stack_call: 93; CHECK-NEXT: .backend_stack_size: 0x10 94; CHECK-NEXT: .lds_size: 0 95; CHECK-NEXT: .sgpr_count: 0x22 96; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10 97; CHECK-NEXT: .vgpr_count: 0x3 98; CHECK-NEXT: no_stack_extern_call: 99; CHECK-NEXT: .backend_stack_size: 0x10 100; CHECK-NEXT: .lds_size: 0 101; CHECK-NEXT: .sgpr_count: 0x29 102; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10 103; CHECK-NEXT: .vgpr_count: 0x58 104; CHECK-NEXT: no_stack_extern_call_many_args: 105; CHECK-NEXT: .backend_stack_size: 0x90 106; CHECK-NEXT: .lds_size: 0 107; CHECK-NEXT: .sgpr_count: 0x29 108; CHECK-NEXT: .stack_frame_size_in_bytes: 0x90 109; CHECK-NEXT: .vgpr_count: 0x58 110; CHECK-NEXT: no_stack_indirect_call: 111; CHECK-NEXT: .backend_stack_size: 0x10 112; CHECK-NEXT: .lds_size: 0 113; CHECK-NEXT: .sgpr_count: 0x29 114; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10 115; CHECK-NEXT: .vgpr_count: 0x58 116; CHECK-NEXT: simple_lds: 117; CHECK-NEXT: .backend_stack_size: 0 118; CHECK-NEXT: .lds_size: 0x100 119; CHECK-NEXT: .sgpr_count: 0x20 120; CHECK-NEXT: .stack_frame_size_in_bytes: 0 121; CHECK-NEXT: .vgpr_count: 0x1 122; CHECK-NEXT: simple_lds_recurse: 123; CHECK-NEXT: .backend_stack_size: 0x10 124; CHECK-NEXT: .lds_size: 0x100 125; CHECK-NEXT: .sgpr_count: 0x24 126; CHECK-NEXT: .stack_frame_size_in_bytes: 0x10 127; CHECK-NEXT: .vgpr_count: 0x29 128; CHECK-NEXT: simple_stack: 129; CHECK-NEXT: .backend_stack_size: 0x14 130; CHECK-NEXT: .lds_size: 0 131; CHECK-NEXT: .sgpr_count: 0x21 132; CHECK-NEXT: .stack_frame_size_in_bytes: 0x14 133; CHECK-NEXT: .vgpr_count: 0x2 134; CHECK-NEXT: simple_stack_call: 135; CHECK-NEXT: .backend_stack_size: 0x20 136; CHECK-NEXT: .lds_size: 0 137; CHECK-NEXT: .sgpr_count: 0x22 138; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20 139; CHECK-NEXT: .vgpr_count: 0x4 140; CHECK-NEXT: simple_stack_extern_call: 141; CHECK-NEXT: .backend_stack_size: 0x20 142; CHECK-NEXT: .lds_size: 0 143; CHECK-NEXT: .sgpr_count: 0x29 144; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20 145; CHECK-NEXT: .vgpr_count: 0x58 146; CHECK-NEXT: simple_stack_indirect_call: 147; CHECK-NEXT: .backend_stack_size: 0x20 148; CHECK-NEXT: .lds_size: 0 149; CHECK-NEXT: .sgpr_count: 0x29 150; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20 151; CHECK-NEXT: .vgpr_count: 0x58 152; CHECK-NEXT: simple_stack_recurse: 153; CHECK-NEXT: .backend_stack_size: 0x20 154; CHECK-NEXT: .lds_size: 0 155; CHECK-NEXT: .sgpr_count: 0x24 156; CHECK-NEXT: .stack_frame_size_in_bytes: 0x20 157; CHECK-NEXT: .vgpr_count: 0x2a 158; CHECK:amdpal.version: 159; CHECK-NEXT: - 0x3 160; CHECK-NEXT: - 0 161; CHECK-NEXT:... 162; CHECK-NEXT: .end_amdgpu_pal_metadata 163 164declare amdgpu_gfx float @extern_func(float) #0 165declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0 166 167@funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4 168 169define amdgpu_gfx float @no_stack(float %arg0) #0 { 170 %add = fadd float %arg0, 1.0 171 ret float %add 172} 173 174define amdgpu_gfx float @simple_stack(float %arg0) #0 { 175 %stack = alloca float, i32 4, align 4, addrspace(5) 176 store volatile float 2.0, ptr addrspace(5) %stack 177 %val = load volatile float, ptr addrspace(5) %stack 178 %add = fadd float %arg0, %val 179 ret float %add 180} 181 182define amdgpu_gfx float @multiple_stack(float %arg0) #0 { 183 %stack = alloca float, i32 4, align 4, addrspace(5) 184 store volatile float 2.0, ptr addrspace(5) %stack 185 %val = load volatile float, ptr addrspace(5) %stack 186 %add = fadd float %arg0, %val 187 %stack2 = alloca float, i32 4, align 4, addrspace(5) 188 store volatile float 2.0, ptr addrspace(5) %stack2 189 %val2 = load volatile float, ptr addrspace(5) %stack2 190 %add2 = fadd float %add, %val2 191 ret float %add2 192} 193 194define amdgpu_gfx float @dynamic_stack(float %arg0) #0 { 195bb0: 196 %cmp = fcmp ogt float %arg0, 0.0 197 br i1 %cmp, label %bb1, label %bb2 198 199bb1: 200 %stack = alloca float, i32 4, align 4, addrspace(5) 201 store volatile float 2.0, ptr addrspace(5) %stack 202 %val = load volatile float, ptr addrspace(5) %stack 203 %add = fadd float %arg0, %val 204 br label %bb2 205 206bb2: 207 %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ] 208 ret float %res 209} 210 211define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 { 212bb0: 213 br label %bb1 214 215bb1: 216 %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ] 217 %stack = alloca float, i32 4, align 4, addrspace(5) 218 store volatile float 2.0, ptr addrspace(5) %stack 219 %val = load volatile float, ptr addrspace(5) %stack 220 %add = fadd float %arg0, %val 221 %cmp = icmp sgt i32 %ctr, 0 222 %newctr = sub i32 %ctr, 1 223 br i1 %cmp, label %bb1, label %bb2 224 225bb2: 226 ret float %add 227} 228 229define amdgpu_gfx float @no_stack_call(float %arg0) #0 { 230 %res = call amdgpu_gfx float @simple_stack(float %arg0) 231 ret float %res 232} 233 234define amdgpu_gfx float @simple_stack_call(float %arg0) #0 { 235 %stack = alloca float, i32 4, align 4, addrspace(5) 236 store volatile float 2.0, ptr addrspace(5) %stack 237 %val = load volatile float, ptr addrspace(5) %stack 238 %res = call amdgpu_gfx float @simple_stack(float %arg0) 239 %add = fadd float %res, %val 240 ret float %add 241} 242 243define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 { 244 %res = call amdgpu_gfx float @extern_func(float %arg0) 245 ret float %res 246} 247 248define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 { 249 %stack = alloca float, i32 4, align 4, addrspace(5) 250 store volatile float 2.0, ptr addrspace(5) %stack 251 %val = load volatile float, ptr addrspace(5) %stack 252 %res = call amdgpu_gfx float @extern_func(float %arg0) 253 %add = fadd float %res, %val 254 ret float %add 255} 256 257define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 { 258 %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0) 259 ret float %res 260} 261 262define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 { 263 %fptr = load ptr, ptr addrspace(4) @funcptr 264 call amdgpu_gfx void %fptr() 265 ret float %arg0 266} 267 268define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 { 269 %stack = alloca float, i32 4, align 4, addrspace(5) 270 store volatile float 2.0, ptr addrspace(5) %stack 271 %val = load volatile float, ptr addrspace(5) %stack 272 %fptr = load ptr, ptr addrspace(4) @funcptr 273 call amdgpu_gfx void %fptr() 274 %add = fadd float %arg0, %val 275 ret float %add 276} 277 278define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 { 279 %stack = alloca float, i32 4, align 4, addrspace(5) 280 store volatile float 2.0, ptr addrspace(5) %stack 281 %val = load volatile float, ptr addrspace(5) %stack 282 %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0) 283 %add = fadd float %res, %val 284 ret float %add 285} 286 287@lds = internal addrspace(3) global [64 x float] undef 288 289define amdgpu_gfx float @simple_lds(float %arg0) #0 { 290 %val = load float, ptr addrspace(3) @lds 291 ret float %val 292} 293 294define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 { 295 %val = load float, ptr addrspace(3) @lds 296 %res = call amdgpu_gfx float @simple_lds_recurse(float %val) 297 ret float %res 298} 299 300attributes #0 = { nounwind } 301 302!amdgpu.pal.metadata.msgpack = !{!0} 303 304!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"} 305!1 = !{i32 7} 306