1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=amdgpu-lower-kernel-attributes,instcombine,infer-alignment %s | FileCheck -enable-var-scope -check-prefix=GCN %s 3 4; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 5define amdgpu_kernel void @get_local_size_x(ptr addrspace(1) %out) #0 { 6; GCN-LABEL: @get_local_size_x( 7; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 8; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12 9; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4 10; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2 11; GCN-NEXT: ret void 12; 13 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() 14 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 15 %block.count.x = load i32, ptr addrspace(4) %implicitarg.ptr, align 4 16 %cmp.id.count = icmp ult i32 %group.id, %block.count.x 17 %local.size.offset = select i1 %cmp.id.count, i64 12, i64 18 18 %gep.local.size = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 %local.size.offset 19 %local.size = load i16, ptr addrspace(4) %gep.local.size, align 2 20 store i16 %local.size, ptr addrspace(1) %out 21 ret void 22} 23 24; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 25define amdgpu_kernel void @get_local_size_y(ptr addrspace(1) %out) #0 { 26; GCN-LABEL: @get_local_size_y( 27; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 28; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14 29; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 2 30; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2 31; GCN-NEXT: ret void 32; 33 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.y() 34 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 35 %gep.block.count.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 4 36 %block.count.y = load i32, ptr addrspace(4) %gep.block.count.y, align 4 37 %cmp.id.count = icmp ult i32 %group.id, %block.count.y 38 %local.size.offset = select i1 %cmp.id.count, i64 14, i64 20 39 %gep.local.size = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 %local.size.offset 40 %local.size = load i16, ptr addrspace(4) %gep.local.size, align 2 41 store i16 %local.size, ptr addrspace(1) %out 42 ret void 43} 44 45; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 46define amdgpu_kernel void @get_local_size_z(ptr addrspace(1) %out) #0 { 47; GCN-LABEL: @get_local_size_z( 48; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 49; GCN-NEXT: [[GEP_LOCAL_SIZE:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16 50; GCN-NEXT: [[LOCAL_SIZE:%.*]] = load i16, ptr addrspace(4) [[GEP_LOCAL_SIZE]], align 4 51; GCN-NEXT: store i16 [[LOCAL_SIZE]], ptr addrspace(1) [[OUT:%.*]], align 2 52; GCN-NEXT: ret void 53; 54 %group.id = tail call i32 @llvm.amdgcn.workgroup.id.z() 55 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 56 %gep.block.count.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 8 57 %block.count.z = load i32, ptr addrspace(4) %gep.block.count.z, align 4 58 %cmp.id.count = icmp ult i32 %group.id, %block.count.z 59 %local.size.offset = select i1 %cmp.id.count, i64 16, i64 22 60 %gep.local.size = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 %local.size.offset 61 %local.size = load i16, ptr addrspace(4) %gep.local.size, align 2 62 store i16 %local.size, ptr addrspace(1) %out 63 ret void 64} 65 66; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 67define amdgpu_kernel void @get_remainder_x(ptr addrspace(1) %out) #0 { 68; GCN-LABEL: @get_remainder_x( 69; GCN-NEXT: store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2 70; GCN-NEXT: ret void 71; 72 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 73 %gep.x = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 18 74 %remainder.x = load i16, ptr addrspace(4) %gep.x, align 2 75 store i16 %remainder.x, ptr addrspace(1) %out 76 ret void 77} 78 79; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 80define amdgpu_kernel void @get_remainder_y(ptr addrspace(1) %out) #0 { 81; GCN-LABEL: @get_remainder_y( 82; GCN-NEXT: store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2 83; GCN-NEXT: ret void 84; 85 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 86 %gep.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 18 87 %remainder.y = load i16, ptr addrspace(4) %gep.y, align 2 88 store i16 %remainder.y, ptr addrspace(1) %out 89 ret void 90} 91 92; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 93define amdgpu_kernel void @get_remainder_z(ptr addrspace(1) %out) #0 { 94; GCN-LABEL: @get_remainder_z( 95; GCN-NEXT: store i16 0, ptr addrspace(1) [[OUT:%.*]], align 2 96; GCN-NEXT: ret void 97; 98 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 99 %gep.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 18 100 %remainder.z = load i16, ptr addrspace(4) %gep.z, align 2 101 store i16 %remainder.z, ptr addrspace(1) %out 102 ret void 103} 104 105; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 106define amdgpu_kernel void @get_work_group_size_x(ptr addrspace(1) %out) #0 { 107; GCN-LABEL: @get_work_group_size_x( 108; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 109; GCN-NEXT: [[GEP_X:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 12 110; GCN-NEXT: [[GROUP_SIZE_X:%.*]] = load i16, ptr addrspace(4) [[GEP_X]], align 4 111; GCN-NEXT: store i16 [[GROUP_SIZE_X]], ptr addrspace(1) [[OUT:%.*]], align 2 112; GCN-NEXT: ret void 113; 114 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 115 %gep.x = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12 116 %group.size.x = load i16, ptr addrspace(4) %gep.x, align 2 117 store i16 %group.size.x, ptr addrspace(1) %out 118 ret void 119} 120 121; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 122define amdgpu_kernel void @get_work_group_size_y(ptr addrspace(1) %out) #0 { 123; GCN-LABEL: @get_work_group_size_y( 124; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 125; GCN-NEXT: [[GEP_Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 14 126; GCN-NEXT: [[GROUP_SIZE_Y:%.*]] = load i16, ptr addrspace(4) [[GEP_Y]], align 2 127; GCN-NEXT: store i16 [[GROUP_SIZE_Y]], ptr addrspace(1) [[OUT:%.*]], align 2 128; GCN-NEXT: ret void 129; 130 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 131 %gep.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 14 132 %group.size.y = load i16, ptr addrspace(4) %gep.y, align 2 133 store i16 %group.size.y, ptr addrspace(1) %out 134 ret void 135} 136 137; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 138define amdgpu_kernel void @get_work_group_size_z(ptr addrspace(1) %out) #0 { 139; GCN-LABEL: @get_work_group_size_z( 140; GCN-NEXT: [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 141; GCN-NEXT: [[GEP_Z:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 16 142; GCN-NEXT: [[GROUP_SIZE_Z:%.*]] = load i16, ptr addrspace(4) [[GEP_Z]], align 4 143; GCN-NEXT: store i16 [[GROUP_SIZE_Z]], ptr addrspace(1) [[OUT:%.*]], align 2 144; GCN-NEXT: ret void 145; 146 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 147 %gep.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 16 148 %group.size.z = load i16, ptr addrspace(4) %gep.z, align 2 149 store i16 %group.size.z, ptr addrspace(1) %out 150 ret void 151} 152 153; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 154define amdgpu_kernel void @get_work_group_size_x_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { 155; GCN-LABEL: @get_work_group_size_x_reqd( 156; GCN-NEXT: store i16 8, ptr addrspace(1) [[OUT:%.*]], align 2 157; GCN-NEXT: ret void 158; 159 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 160 %gep.x = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 12 161 %group.size.x = load i16, ptr addrspace(4) %gep.x, align 2 162 store i16 %group.size.x, ptr addrspace(1) %out 163 ret void 164} 165 166; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 167define amdgpu_kernel void @get_work_group_size_y_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { 168; GCN-LABEL: @get_work_group_size_y_reqd( 169; GCN-NEXT: store i16 16, ptr addrspace(1) [[OUT:%.*]], align 2 170; GCN-NEXT: ret void 171; 172 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 173 %gep.y = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 14 174 %group.size.y = load i16, ptr addrspace(4) %gep.y, align 2 175 store i16 %group.size.y, ptr addrspace(1) %out 176 ret void 177} 178 179; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn 180define amdgpu_kernel void @get_work_group_size_z_reqd(ptr addrspace(1) %out) #0 !reqd_work_group_size !0 { 181; GCN-LABEL: @get_work_group_size_z_reqd( 182; GCN-NEXT: store i16 2, ptr addrspace(1) [[OUT:%.*]], align 2 183; GCN-NEXT: ret void 184; 185 %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 186 %gep.z = getelementptr inbounds i8, ptr addrspace(4) %implicitarg.ptr, i64 16 187 %group.size.z = load i16, ptr addrspace(4) %gep.z, align 2 188 store i16 %group.size.z, ptr addrspace(1) %out 189 ret void 190} 191 192 193declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1 194declare i32 @llvm.amdgcn.workgroup.id.x() #1 195declare i32 @llvm.amdgcn.workgroup.id.y() #1 196declare i32 @llvm.amdgcn.workgroup.id.z() #1 197 198!llvm.module.flags = !{!1} 199 200attributes #0 = { nounwind "uniform-work-group-size"="true" } 201attributes #1 = { nounwind readnone speculatable } 202!0 = !{i32 8, i32 16, i32 2} 203!1 = !{i32 1, !"amdhsa_code_object_version", i32 500} 204