xref: /llvm-project/llvm/test/CodeGen/AMDGPU/amdgpu-max-num-workgroups-load-annotate.ll (revision 009368f13053dd11515f583fe36b34b15b356593)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-kernel-attributes %s | FileCheck %s
3
4define i32 @use_grid_size_x_max_num_workgroups() #0 {
5; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups(
6; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
8; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0:![0-9]+]]
9; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
10;
11  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
12  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
13  ret i32 %grid.size.x
14}
15
16define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range() #0 {
17; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range(
18; CHECK-SAME: ) #[[ATTR0]] {
19; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
20; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0]]
21; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
22;
23  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
24  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4, !range !0
25  ret i32 %grid.size.x
26}
27
28define i32 @use_grid_size_y_max_num_workgroups() #0 {
29; CHECK-LABEL: define i32 @use_grid_size_y_max_num_workgroups(
30; CHECK-SAME: ) #[[ATTR0]] {
31; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
32; CHECK-NEXT:    [[GEP_GRID_SIZE_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 4
33; CHECK-NEXT:    [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Y]], align 4, !range [[RNG1:![0-9]+]]
34; CHECK-NEXT:    ret i32 [[GRID_SIZE_Y]]
35;
36  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
37  %gep.grid.size.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4
38  %grid.size.y = load i32, ptr addrspace(4) %gep.grid.size.y, align 4
39  ret i32 %grid.size.y
40}
41
42define i32 @use_grid_size_z_max_num_workgroups() #0 {
43; CHECK-LABEL: define i32 @use_grid_size_z_max_num_workgroups(
44; CHECK-SAME: ) #[[ATTR0]] {
45; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
46; CHECK-NEXT:    [[GEP_GRID_SIZE_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 8
47; CHECK-NEXT:    [[GRID_SIZE_Z:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Z]], align 4, !range [[RNG2:![0-9]+]]
48; CHECK-NEXT:    ret i32 [[GRID_SIZE_Z]]
49;
50  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
51  %gep.grid.size.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8
52  %grid.size.z = load i32, ptr addrspace(4) %gep.grid.size.z, align 4
53  ret i32 %grid.size.z
54}
55
56define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type() #0 {
57; CHECK-LABEL: define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type(
58; CHECK-SAME: ) #[[ATTR0]] {
59; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
60; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load <2 x i16>, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
61; CHECK-NEXT:    ret <2 x i16> [[GRID_SIZE_X]]
62;
63  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
64  %grid.size.x = load <2 x i16>, ptr addrspace(4) %implicitarg.ptr, align 4
65  ret <2 x i16> %grid.size.x
66}
67
68define i32 @use_grid_size_x_max_num_workgroups_max_minus_1() #1 {
69; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max_minus_1(
70; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
71; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
72; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG3:![0-9]+]]
73; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
74;
75  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
76  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
77  ret i32 %grid.size.x
78}
79
80define i32 @use_grid_size_x_max_num_workgroups_max() #2 {
81; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max(
82; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
83; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
84; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
85; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
86;
87  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
88  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
89  ret i32 %grid.size.x
90}
91
92define i32 @use_grid_size_x_max_num_workgroups_zero() #3 {
93; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_zero(
94; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
95; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
96; CHECK-NEXT:    [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4
97; CHECK-NEXT:    ret i32 [[GRID_SIZE_X]]
98;
99  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
100  %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4
101  ret i32 %grid.size.x
102}
103
104declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
105
106attributes #0 = { "amdgpu-max-num-workgroups"="36,42,89" }
107attributes #1 = { "amdgpu-max-num-workgroups"="4294967294,42,89" }
108attributes #2 = { "amdgpu-max-num-workgroups"="4294967295,42,89" }
109attributes #3 = { "amdgpu-max-num-workgroups"="0,42,89" }
110
111!0 = !{i32 0, i32 -1}
112
113;.
114; CHECK: attributes #[[ATTR0]] = { "amdgpu-max-num-workgroups"="36,42,89" }
115; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="4294967294,42,89" }
116; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="4294967295,42,89" }
117; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="0,42,89" }
118; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
119;.
120; CHECK: [[RNG0]] = !{i32 1, i32 37}
121; CHECK: [[RNG1]] = !{i32 1, i32 43}
122; CHECK: [[RNG2]] = !{i32 1, i32 90}
123; CHECK: [[RNG3]] = !{i32 1, i32 -1}
124;.
125