1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s 2 3; CHECK-LABEL: {{^}}_amdgpu_cs_main: 4; CHECK: ; TotalNumSgprs: 4 5; CHECK: ; NumVgprs: 2 6; CHECK: .amdgpu_pal_metadata 7; CHECK-NEXT: --- 8; CHECK-NEXT: amdpal.pipelines: 9; CHECK-NEXT: - .api: Vulkan 10; CHECK-NEXT: .compute_registers: 11; CHECK-NEXT: .tg_size_en: true 12; CHECK-NEXT: .tgid_x_en: false 13; CHECK-NEXT: .tgid_y_en: false 14; CHECK-NEXT: .tgid_z_en: false 15; CHECK-NEXT: .tidig_comp_cnt: 0x1 16; CHECK-NEXT: .graphics_registers: 17; CHECK-NEXT: .ps_extra_lds_size: 0 18; CHECK-NEXT: .spi_ps_input_addr: 19; CHECK-NEXT: .ancillary_ena: false 20; CHECK-NEXT: .front_face_ena: true 21; CHECK-NEXT: .line_stipple_tex_ena: false 22; CHECK-NEXT: .linear_center_ena: true 23; CHECK-NEXT: .linear_centroid_ena: true 24; CHECK-NEXT: .linear_sample_ena: true 25; CHECK-NEXT: .persp_center_ena: true 26; CHECK-NEXT: .persp_centroid_ena: true 27; CHECK-NEXT: .persp_pull_model_ena: false 28; CHECK-NEXT: .persp_sample_ena: true 29; CHECK-NEXT: .pos_fixed_pt_ena: true 30; CHECK-NEXT: .pos_w_float_ena: false 31; CHECK-NEXT: .pos_x_float_ena: false 32; CHECK-NEXT: .pos_y_float_ena: false 33; CHECK-NEXT: .pos_z_float_ena: false 34; CHECK-NEXT: .sample_coverage_ena: false 35; CHECK-NEXT: .spi_ps_input_ena: 36; CHECK-NEXT: .ancillary_ena: false 37; CHECK-NEXT: .front_face_ena: false 38; CHECK-NEXT: .line_stipple_tex_ena: false 39; CHECK-NEXT: .linear_center_ena: false 40; CHECK-NEXT: .linear_centroid_ena: false 41; CHECK-NEXT: .linear_sample_ena: false 42; CHECK-NEXT: .persp_center_ena: false 43; CHECK-NEXT: .persp_centroid_ena: false 44; CHECK-NEXT: .persp_pull_model_ena: false 45; CHECK-NEXT: .persp_sample_ena: true 46; CHECK-NEXT: .pos_fixed_pt_ena: false 47; CHECK-NEXT: .pos_w_float_ena: false 48; CHECK-NEXT: .pos_x_float_ena: false 49; CHECK-NEXT: .pos_y_float_ena: false 50; CHECK-NEXT: .pos_z_float_ena: false 51; CHECK-NEXT: .sample_coverage_ena: false 52; CHECK-NEXT: .hardware_stages: 53; CHECK-NEXT: .cs: 54; CHECK-NEXT: .checksum_value: 0x9444d7d0 55; CHECK-NEXT: .debug_mode: false 56; CHECK-NEXT: .entry_point_symbol: _amdgpu_cs_main 57; CHECK-NEXT: .excp_en: 0 58; CHECK-NEXT: .float_mode: 0xc0 59; CHECK-NEXT: .ieee_mode: false 60; CHECK-NEXT: .image_op: false 61; CHECK-NEXT: .lds_size: 0 62; CHECK-NEXT: .mem_ordered: true 63; CHECK-NEXT: .scratch_en: false 64; CHECK-NEXT: .scratch_memory_size: 0 65; CHECK-NEXT: .sgpr_count: 0x4 66; CHECK-NEXT: .sgpr_limit: 0x6a 67; CHECK-NEXT: .threadgroup_dimensions: 68; CHECK-NEXT: - 0x1 69; CHECK-NEXT: - 0x400 70; CHECK-NEXT: - 0x1 71; CHECK-NEXT: .trap_present: false 72; CHECK-NEXT: .user_data_reg_map: 73; CHECK-NEXT: - 0x10000000 74; CHECK-NEXT: - 0xffffffff 75; CHECK-NEXT: - 0 76; CHECK-NEXT: - 0xffffffff 77; CHECK-NEXT: - 0xffffffff 78; CHECK-NEXT: - 0xffffffff 79; CHECK-NEXT: - 0xffffffff 80; CHECK-NEXT: - 0xffffffff 81; CHECK-NEXT: - 0xffffffff 82; CHECK-NEXT: - 0xffffffff 83; CHECK-NEXT: - 0xffffffff 84; CHECK-NEXT: - 0xffffffff 85; CHECK-NEXT: - 0xffffffff 86; CHECK-NEXT: - 0xffffffff 87; CHECK-NEXT: - 0xffffffff 88; CHECK-NEXT: - 0xffffffff 89; CHECK-NEXT: - 0xffffffff 90; CHECK-NEXT: - 0xffffffff 91; CHECK-NEXT: - 0xffffffff 92; CHECK-NEXT: - 0xffffffff 93; CHECK-NEXT: - 0xffffffff 94; CHECK-NEXT: - 0xffffffff 95; CHECK-NEXT: - 0xffffffff 96; CHECK-NEXT: - 0xffffffff 97; CHECK-NEXT: - 0xffffffff 98; CHECK-NEXT: - 0xffffffff 99; CHECK-NEXT: - 0xffffffff 100; CHECK-NEXT: - 0xffffffff 101; CHECK-NEXT: - 0xffffffff 102; CHECK-NEXT: - 0xffffffff 103; CHECK-NEXT: - 0xffffffff 104; CHECK-NEXT: - 0xffffffff 105; CHECK-NEXT: .user_sgprs: 0x3 106; CHECK-NEXT: .vgpr_count: 0x2 107; CHECK-NEXT: .vgpr_limit: 0x100 108; CHECK-NEXT: .wavefront_size: 0x40 109; CHECK-NEXT: .wgp_mode: false 110; CHECK-NEXT: .gs: 111; CHECK-NEXT: .debug_mode: false 112; CHECK-NEXT: .entry_point_symbol: gs_shader 113; CHECK-NEXT: .ieee_mode: false 114; CHECK-NEXT: .lds_size: 0x200 115; CHECK-NEXT: .mem_ordered: true 116; CHECK-NEXT: .scratch_en: false 117; CHECK-NEXT: .scratch_memory_size: 0 118; CHECK-NEXT: .sgpr_count: 0x1 119; CHECK-NEXT: .vgpr_count: 0x1 120; CHECK-NEXT: .wgp_mode: true 121; CHECK-NEXT: .hs: 122; CHECK-NEXT: .debug_mode: false 123; CHECK-NEXT: .entry_point_symbol: hs_shader 124; CHECK-NEXT: .ieee_mode: false 125; CHECK-NEXT: .lds_size: 0x1000 126; CHECK-NEXT: .mem_ordered: true 127; CHECK-NEXT: .scratch_en: false 128; CHECK-NEXT: .scratch_memory_size: 0 129; CHECK-NEXT: .sgpr_count: 0x1 130; CHECK-NEXT: .vgpr_count: 0x1 131; CHECK-NEXT: .wgp_mode: true 132; CHECK-NEXT: .ps: 133; CHECK-NEXT: .debug_mode: false 134; CHECK-NEXT: .entry_point_symbol: ps_shader 135; CHECK-NEXT: .ieee_mode: false 136; CHECK-NEXT: .lds_size: 0 137; CHECK-NEXT: .mem_ordered: true 138; CHECK-NEXT: .scratch_en: false 139; CHECK-NEXT: .scratch_memory_size: 0 140; CHECK-NEXT: .sgpr_count: 0x1 141; CHECK-NEXT: .vgpr_count: 0x1 142; CHECK-NEXT: .wgp_mode: true 143; CHECK: .registers: {} 144; CHECK:amdpal.version: 145; CHECK-NEXT: - 0x3 146; CHECK-NEXT: - 0 147; CHECK-NEXT:... 148; CHECK-NEXT: .end_amdgpu_pal_metadata 149 150define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg1, i32 %arg2) #0 !lgc.shaderstage !1 { 151.entry: 152 %i = call i64 @llvm.amdgcn.s.getpc() 153 %i1 = and i64 %i, -4294967296 154 %i2 = zext i32 %arg1 to i64 155 %i3 = or i64 %i1, %i2 156 %i4 = inttoptr i64 %i3 to ptr addrspace(4) 157 %i5 = and i32 %arg2, 1023 158 %i6 = lshr i32 %arg2, 10 159 %i7 = and i32 %i6, 1023 160 %i8 = add nuw nsw i32 %i7, %i5 161 %i9 = load <4 x i32>, ptr addrspace(4) %i4, align 16 162 %.idx = shl nuw nsw i32 %i8, 2 163 call void @llvm.amdgcn.raw.buffer.store.i32(i32 1, <4 x i32> %i9, i32 %.idx, i32 0, i32 0) 164 ret void 165} 166 167define dllexport amdgpu_ps void @ps_shader() #1 { 168 ret void 169} 170 171@LDS.GS = external addrspace(3) global [1 x i32], align 4 172 173define dllexport amdgpu_gs void @gs_shader() #2 { 174 %ptr = getelementptr i32, ptr addrspace(3) @LDS.GS, i32 0 175 store i32 0, ptr addrspace(3) %ptr, align 4 176 ret void 177} 178 179@LDS.HS = external addrspace(3) global [1024 x i32], align 4 180 181define dllexport amdgpu_hs void @hs_shader() #2 { 182 %ptr = getelementptr i32, ptr addrspace(3) @LDS.HS, i32 0 183 store i32 0, ptr addrspace(3) %ptr, align 4 184 ret void 185} 186 187!amdgpu.pal.metadata.msgpack = !{!0} 188 189; Function Attrs: nounwind willreturn memory(none) 190declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1 191 192; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) 193declare i64 @llvm.amdgcn.s.getpc() #2 194 195; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write) 196declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3 197 198attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode" } 199 200attributes #1 = { nounwind memory(readwrite) "InitialPSInputAddr"="36983" } 201 202!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"} 203!1 = !{i32 7} 204