xref: /llvm-project/llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll (revision c93e001ca695e905cb965b36d63f7a348d1dd809)
1954ab83eSAustin Kerbow; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck --check-prefix=OBJDUMP %s
2954ab83eSAustin Kerbow; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s
3954ab83eSAustin Kerbow
4954ab83eSAustin Kerbow; OBJDUMP: Contents of section .rodata:
5954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0000 00000000 00000000 10010000 00000000  ................
6954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000  ................
7954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000  ................
8954ab83eSAustin Kerbow; OBJDUMP-NOT:  0030 0000af00 94130000 1a000400 00000000  ................
9*c93e001cSShilei Tian; OBJDUMP-NEXT: 0030 8000af00 98130000 1e000400 00000000  ................
10954ab83eSAustin Kerbow
11954ab83eSAustin Kerbow; ASM-LABEL: amdhsa_kernarg_preload_4_implicit_6:
126548b635SShilei Tian; ASM: .amdhsa_user_sgpr_count 12
136548b635SShilei Tian; ASM: .amdhsa_next_free_sgpr 12
146548b635SShilei Tian; ASM: ; TotalNumSgprs: 18
156548b635SShilei Tian; ASM: ; NumSGPRsForWavesPerEU: 18
16954ab83eSAustin Kerbow
17954ab83eSAustin Kerbow; Test that we include preloaded SGPRs in the GRANULATED_WAVEFRONT_SGPR_COUNT
18954ab83eSAustin Kerbow; feild that are not explicitly referenced in the kernel. This test has 6 implicit
19954ab83eSAustin Kerbow; user SPGRs enabled, 4 preloaded kernarg SGPRs, plus 6 extra SGPRs allocated
20954ab83eSAustin Kerbow; for flat scratch, ect. The total number of allocated SGPRs encoded in the
21954ab83eSAustin Kerbow; kernel descriptor should be 16. That's a 1 in the KD field since the granule
22954ab83eSAustin Kerbow; size is 8 and it's NumGranules - 1. The encoding for that looks like '40'.
23954ab83eSAustin Kerbow
24954ab83eSAustin Kerbowdefine amdgpu_kernel void @amdhsa_kernarg_preload_4_implicit_6(i128 inreg) { ret void }
25954ab83eSAustin Kerbow
26954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0040 00000000 00000000 20010000 00000000  ........ .......
27954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000  ................
28954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000  ................
29954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0070 4000af00 94000000 08000800 00000000  @...............
30954ab83eSAustin Kerbow
31954ab83eSAustin Kerbow; ASM-LABEL: amdhsa_kernarg_preload_8_implicit_2:
32954ab83eSAustin Kerbow; ASM: .amdhsa_user_sgpr_count 10
33954ab83eSAustin Kerbow; ASM: .amdhsa_next_free_sgpr 10
34c897c13dSJanek van Oirschot; ASM: ; TotalNumSgprs: 16
35954ab83eSAustin Kerbow; ASM: ; NumSGPRsForWavesPerEU: 16
36954ab83eSAustin Kerbow
37954ab83eSAustin Kerbow; Only the kernarg_ptr is enabled so we should have 8 preload kernarg SGPRs, 2
38954ab83eSAustin Kerbow; implicit, and 6 extra.
39954ab83eSAustin Kerbow
40954ab83eSAustin Kerbowdefine amdgpu_kernel void @amdhsa_kernarg_preload_8_implicit_2(i256 inreg) #0 { ret void }
41954ab83eSAustin Kerbow
42954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0080 00000000 00000000 08010000 00000000  ................
43954ab83eSAustin Kerbow; OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000  ................
44954ab83eSAustin Kerbow; OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000  ................
45954ab83eSAustin Kerbow; OBJDUMP-NEXT: 00b0 4000af00 86000000 08000100 00000000  @...............
46954ab83eSAustin Kerbow
47954ab83eSAustin Kerbow; ASM-LABEL: amdhsa_kernarg_preload_1_implicit_2:
48954ab83eSAustin Kerbow; ASM: .amdhsa_user_sgpr_count 3
49954ab83eSAustin Kerbow; ASM: .amdhsa_next_free_sgpr 3
50c897c13dSJanek van Oirschot; ASM: ; TotalNumSgprs: 9
51954ab83eSAustin Kerbow; ASM: ; NumSGPRsForWavesPerEU: 9
52954ab83eSAustin Kerbow
53954ab83eSAustin Kerbow; 1 preload, 2 implicit, 6 extra. Rounds up to 16 SGPRs in the KD.
54954ab83eSAustin Kerbow
55954ab83eSAustin Kerbowdefine amdgpu_kernel void @amdhsa_kernarg_preload_1_implicit_2(i32 inreg) #0 { ret void }
56954ab83eSAustin Kerbow
57954ab83eSAustin Kerbow; OBJDUMP-NEXT: 00c0 00000000 00000000 08010000 00000000  ................
58954ab83eSAustin Kerbow; OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000  ................
59954ab83eSAustin Kerbow; OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000  ................
60954ab83eSAustin Kerbow; OBJDUMP-NEXT: 00f0 0000af00 84000000 08000000 00000000  ................
61954ab83eSAustin Kerbow
62954ab83eSAustin Kerbow; ASM-LABEL: amdhsa_kernarg_preload_0_implicit_2:
63954ab83eSAustin Kerbow; ASM: .amdhsa_user_sgpr_count 2
64954ab83eSAustin Kerbow; ASM: .amdhsa_next_free_sgpr 0
65c897c13dSJanek van Oirschot; ASM: ; TotalNumSgprs: 6
66954ab83eSAustin Kerbow; ASM: ; NumSGPRsForWavesPerEU: 6
67954ab83eSAustin Kerbow
68954ab83eSAustin Kerbow; 0 preload kernarg SGPRs, 2 implicit, 6 extra. Rounds up to 8 SGPRs in the KD.
69954ab83eSAustin Kerbow; Encoded like '00'.
70954ab83eSAustin Kerbow
71954ab83eSAustin Kerbowdefine amdgpu_kernel void @amdhsa_kernarg_preload_0_implicit_2(i32) #0 { ret void }
72954ab83eSAustin Kerbow
73954ab83eSAustin Kerbowattributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
74