1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-kernel-attributes %s | FileCheck %s 3 4define i32 @use_grid_size_x_max_num_workgroups() #0 { 5; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups( 6; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { 7; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 8; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0:![0-9]+]] 9; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] 10; 11 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 12 %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 13 ret i32 %grid.size.x 14} 15 16define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range() #0 { 17; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_existing_nonzero_range( 18; CHECK-SAME: ) #[[ATTR0]] { 19; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 20; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG0]] 21; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] 22; 23 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 24 %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4, !range !0 25 ret i32 %grid.size.x 26} 27 28define i32 @use_grid_size_y_max_num_workgroups() #0 { 29; CHECK-LABEL: define i32 @use_grid_size_y_max_num_workgroups( 30; CHECK-SAME: ) #[[ATTR0]] { 31; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 32; CHECK-NEXT: [[GEP_GRID_SIZE_Y:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 4 33; CHECK-NEXT: [[GRID_SIZE_Y:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Y]], align 4, !range [[RNG1:![0-9]+]] 34; CHECK-NEXT: ret i32 [[GRID_SIZE_Y]] 35; 36 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 37 %gep.grid.size.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4 38 %grid.size.y = load i32, ptr addrspace(4) %gep.grid.size.y, align 4 39 ret i32 %grid.size.y 40} 41 42define i32 @use_grid_size_z_max_num_workgroups() #0 { 43; CHECK-LABEL: define i32 @use_grid_size_z_max_num_workgroups( 44; CHECK-SAME: ) #[[ATTR0]] { 45; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 46; CHECK-NEXT: [[GEP_GRID_SIZE_Z:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 8 47; CHECK-NEXT: [[GRID_SIZE_Z:%.*]] = load i32, ptr addrspace(4) [[GEP_GRID_SIZE_Z]], align 4, !range [[RNG2:![0-9]+]] 48; CHECK-NEXT: ret i32 [[GRID_SIZE_Z]] 49; 50 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 51 %gep.grid.size.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8 52 %grid.size.z = load i32, ptr addrspace(4) %gep.grid.size.z, align 4 53 ret i32 %grid.size.z 54} 55 56define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type() #0 { 57; CHECK-LABEL: define <2 x i16> @use_grid_size_x_max_num_workgroups_load_wrong_type( 58; CHECK-SAME: ) #[[ATTR0]] { 59; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 60; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load <2 x i16>, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4 61; CHECK-NEXT: ret <2 x i16> [[GRID_SIZE_X]] 62; 63 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 64 %grid.size.x = load <2 x i16>, ptr addrspace(4) %implicitarg.ptr, align 4 65 ret <2 x i16> %grid.size.x 66} 67 68define i32 @use_grid_size_x_max_num_workgroups_max_minus_1() #1 { 69; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max_minus_1( 70; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { 71; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 72; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4, !range [[RNG3:![0-9]+]] 73; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] 74; 75 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 76 %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 77 ret i32 %grid.size.x 78} 79 80define i32 @use_grid_size_x_max_num_workgroups_max() #2 { 81; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_max( 82; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { 83; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 84; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4 85; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] 86; 87 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 88 %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 89 ret i32 %grid.size.x 90} 91 92define i32 @use_grid_size_x_max_num_workgroups_zero() #3 { 93; CHECK-LABEL: define i32 @use_grid_size_x_max_num_workgroups_zero( 94; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { 95; CHECK-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 96; CHECK-NEXT: [[GRID_SIZE_X:%.*]] = load i32, ptr addrspace(4) [[IMPLICITARG_PTR]], align 4 97; CHECK-NEXT: ret i32 [[GRID_SIZE_X]] 98; 99 %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 100 %grid.size.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 101 ret i32 %grid.size.x 102} 103 104declare noundef align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3 105 106attributes #0 = { "amdgpu-max-num-workgroups"="36,42,89" } 107attributes #1 = { "amdgpu-max-num-workgroups"="4294967294,42,89" } 108attributes #2 = { "amdgpu-max-num-workgroups"="4294967295,42,89" } 109attributes #3 = { "amdgpu-max-num-workgroups"="0,42,89" } 110 111!0 = !{i32 0, i32 -1} 112 113;. 114; CHECK: attributes #[[ATTR0]] = { "amdgpu-max-num-workgroups"="36,42,89" } 115; CHECK: attributes #[[ATTR1]] = { "amdgpu-max-num-workgroups"="4294967294,42,89" } 116; CHECK: attributes #[[ATTR2]] = { "amdgpu-max-num-workgroups"="4294967295,42,89" } 117; CHECK: attributes #[[ATTR3]] = { "amdgpu-max-num-workgroups"="0,42,89" } 118; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 119;. 120; CHECK: [[RNG0]] = !{i32 1, i32 37} 121; CHECK: [[RNG1]] = !{i32 1, i32 43} 122; CHECK: [[RNG2]] = !{i32 1, i32 90} 123; CHECK: [[RNG3]] = !{i32 1, i32 -1} 124;. 125