1; See ./README.md for how to maintain the LLVM IR in this test. 2 3; REQUIRES: amdgpu-registered-target 4 5; RUN: opt -pass-remarks=kernel-info -passes=kernel-info \ 6; RUN: -disable-output %s 2>&1 | \ 7; RUN: FileCheck -match-full-lines %s 8 9; CHECK-NOT: remark: 10; CHECK: remark: test.c:0:0: in artificial function '[[OFF_FUNC:__omp_offloading_[a-f0-9_]*_h_l12]]_debug__', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes 11; CHECK-NEXT: remark: test.c:14:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'i' with static size of 4 bytes 12; CHECK-NEXT: remark: test.c:15:9: in artificial function '[[OFF_FUNC]]_debug__', alloca ('%[[#]]') for 'a' with static size of 8 bytes 13; CHECK-NEXT: remark: <unknown>:0:0: in artificial function '[[OFF_FUNC]]_debug__', 'store' instruction accesses memory in flat address space 14; CHECK-NEXT: remark: test.c:13:3: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@__kmpc_target_init' 15; CHECK-NEXT: remark: test.c:16:5: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@f' 16; CHECK-NEXT: remark: test.c:17:5: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is 'g' 17; CHECK-NEXT: remark: test.c:18:3: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@__kmpc_target_deinit' 18; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', ExternalNotKernel = 0 19; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-max-num-workgroups[0] = 4294967295 20; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-max-num-workgroups[1] = 4294967295 21; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-max-num-workgroups[2] = 4294967295 22; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-flat-work-group-size[0] = 1 23; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-flat-work-group-size[1] = 1024 24; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-waves-per-eu[0] = 4 25; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-waves-per-eu[1] = 10 26; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Allocas = 3 27; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasStaticSizeSum = 20 28; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasDyn = 0 29; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCalls = 4 30; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', IndirectCalls = 0 31; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', DirectCallsToDefinedFunctions = 1 32; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', InlineAssemblyCalls = 0 33; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Invokes = 0 34; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', FlatAddrspaceAccesses = 1 35 36; CHECK-NEXT: remark: test.c:0:0: in artificial function '[[OFF_FUNC]]', artificial alloca ('%[[#]]') for 'dyn_ptr' with static size of 8 bytes 37; CHECK-NEXT: remark: <unknown>:0:0: in artificial function '[[OFF_FUNC]]', 'store' instruction accesses memory in flat address space 38; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', 'load' instruction ('%[[#]]') accesses memory in flat address space 39; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', direct call to defined function, callee is artificial '[[OFF_FUNC]]_debug__' 40; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', ExternalNotKernel = 0 41; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', omp_target_thread_limit = 256 42; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-max-num-workgroups[0] = 4294967295 43; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-max-num-workgroups[1] = 4294967295 44; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-max-num-workgroups[2] = 4294967295 45; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-flat-work-group-size[0] = 1 46; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-flat-work-group-size[1] = 256 47; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-waves-per-eu[0] = 1 48; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-waves-per-eu[1] = 10 49; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Allocas = 1 50; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasStaticSizeSum = 8 51; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasDyn = 0 52; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCalls = 1 53; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', IndirectCalls = 0 54; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', DirectCallsToDefinedFunctions = 1 55; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', InlineAssemblyCalls = 0 56; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Invokes = 0 57; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', FlatAddrspaceAccesses = 2 58 59; CHECK-NEXT: remark: test.c:4:7: in function 'g', alloca ('%[[#]]') for 'i' with static size of 4 bytes 60; CHECK-NEXT: remark: test.c:5:7: in function 'g', alloca ('%[[#]]') for 'a' with static size of 8 bytes 61; CHECK-NEXT: remark: test.c:6:3: in function 'g', direct call, callee is '@f' 62; CHECK-NEXT: remark: test.c:7:3: in function 'g', direct call to defined function, callee is 'g' 63; CHECK-NEXT: remark: test.c:3:0: in function 'g', ExternalNotKernel = 1 64; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-max-num-workgroups[0] = 4294967295 65; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-max-num-workgroups[1] = 4294967295 66; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-max-num-workgroups[2] = 4294967295 67; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-flat-work-group-size[0] = 1 68; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-flat-work-group-size[1] = 1024 69; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-waves-per-eu[0] = 4 70; CHECK-NEXT: remark: test.c:3:0: in function 'g', amdgpu-waves-per-eu[1] = 10 71; CHECK-NEXT: remark: test.c:3:0: in function 'g', Allocas = 2 72; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasStaticSizeSum = 12 73; CHECK-NEXT: remark: test.c:3:0: in function 'g', AllocasDyn = 0 74; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCalls = 2 75; CHECK-NEXT: remark: test.c:3:0: in function 'g', IndirectCalls = 0 76; CHECK-NEXT: remark: test.c:3:0: in function 'g', DirectCallsToDefinedFunctions = 1 77; CHECK-NEXT: remark: test.c:3:0: in function 'g', InlineAssemblyCalls = 0 78; CHECK-NEXT: remark: test.c:3:0: in function 'g', Invokes = 0 79; CHECK-NEXT: remark: test.c:3:0: in function 'g', FlatAddrspaceAccesses = 0 80; CHECK-NOT: {{.}} 81 82; ModuleID = 'test-openmp-amdgcn-amd-amdhsa-gfx906.bc' 83source_filename = "test.c" 84target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" 85target triple = "amdgcn-amd-amdhsa" 86 87%struct.ident_t = type { i32, i32, i32, i32, ptr } 88%struct.DynamicEnvironmentTy = type { i16 } 89%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 90%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 91 92@__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0 93@__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0 94@__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0 95@__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0 96@__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0 97@0 = private unnamed_addr constant [57 x i8] c";test.c;__omp_offloading_fd02_727e9_h_l12_debug__;13;3;;\00", align 1 98@1 = private unnamed_addr addrspace(1) constant %struct.ident_t { i32 0, i32 2, i32 0, i32 56, ptr @0 }, align 8 99@__omp_offloading_fd02_727e9_h_l12_dynamic_environment = weak_odr protected addrspace(1) global %struct.DynamicEnvironmentTy zeroinitializer 100@__omp_offloading_fd02_727e9_h_l12_kernel_environment = weak_odr protected addrspace(1) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr addrspacecast (ptr addrspace(1) @1 to ptr), ptr addrspacecast (ptr addrspace(1) @__omp_offloading_fd02_727e9_h_l12_dynamic_environment to ptr) } 101@__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500 102 103; Function Attrs: convergent noinline norecurse nounwind optnone 104define internal void @__omp_offloading_fd02_727e9_h_l12_debug__(ptr noalias noundef %0) #0 !dbg !15 { 105 %2 = alloca ptr, align 8, addrspace(5) 106 %3 = alloca i32, align 4, addrspace(5) 107 %4 = alloca [2 x i32], align 4, addrspace(5) 108 %5 = addrspacecast ptr addrspace(5) %2 to ptr 109 %6 = addrspacecast ptr addrspace(5) %3 to ptr 110 %7 = addrspacecast ptr addrspace(5) %4 to ptr 111 store ptr %0, ptr %5, align 8 112 #dbg_declare(ptr addrspace(5) %2, !23, !DIExpression(), !24) 113 %8 = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @__omp_offloading_fd02_727e9_h_l12_kernel_environment to ptr), ptr %0), !dbg !25 114 %9 = icmp eq i32 %8, -1, !dbg !25 115 br i1 %9, label %10, label %11, !dbg !25 116 11710: ; preds = %1 118 #dbg_declare(ptr addrspace(5) %3, !26, !DIExpression(), !29) 119 #dbg_declare(ptr addrspace(5) %4, !30, !DIExpression(), !34) 120 call void @f() #4, !dbg !35 121 call void @g() #4, !dbg !36 122 call void @__kmpc_target_deinit(), !dbg !37 123 ret void, !dbg !38 124 12511: ; preds = %1 126 ret void, !dbg !25 127} 128 129; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone 130define weak_odr protected amdgpu_kernel void @__omp_offloading_fd02_727e9_h_l12(ptr noalias noundef %0) #1 !dbg !39 { 131 %2 = alloca ptr, align 8, addrspace(5) 132 %3 = addrspacecast ptr addrspace(5) %2 to ptr 133 store ptr %0, ptr %3, align 8 134 #dbg_declare(ptr addrspace(5) %2, !40, !DIExpression(), !41) 135 %4 = load ptr, ptr %3, align 8, !dbg !42 136 call void @__omp_offloading_fd02_727e9_h_l12_debug__(ptr %4) #5, !dbg !42 137 ret void, !dbg !42 138} 139 140declare i32 @__kmpc_target_init(ptr, ptr) 141 142; Function Attrs: convergent 143declare void @f(...) #2 144 145declare void @__kmpc_target_deinit() 146 147; Function Attrs: convergent noinline nounwind optnone 148define hidden void @g() #3 !dbg !43 { 149 %1 = alloca i32, align 4, addrspace(5) 150 %2 = alloca [2 x i32], align 4, addrspace(5) 151 %3 = addrspacecast ptr addrspace(5) %1 to ptr 152 %4 = addrspacecast ptr addrspace(5) %2 to ptr 153 #dbg_declare(ptr addrspace(5) %1, !46, !DIExpression(), !47) 154 #dbg_declare(ptr addrspace(5) %2, !48, !DIExpression(), !49) 155 call void @f() #4, !dbg !50 156 call void @g() #4, !dbg !51 157 ret void, !dbg !52 158} 159 160attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } 161attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,256" "frame-pointer"="all" "kernel" "no-trapping-math"="true" "omp_target_thread_limit"="256" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size"="true" } 162attributes #2 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } 163attributes #3 = { convergent noinline nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx906" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" } 164attributes #4 = { convergent } 165attributes #5 = { nounwind } 166 167!llvm.dbg.cu = !{!0} 168!omp_offload.info = !{!2} 169!llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11} 170!llvm.ident = !{!12, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13, !13} 171!opencl.ocl.version = !{!14, !14, !14, !14, !14, !14, !14, !14, !14, !14, !14, !14, !14, !14, !14, !14} 172 173!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) 174!1 = !DIFile(filename: "test.c", directory: "/tmp", checksumkind: CSK_MD5, checksum: "27a878d5e894ab6d41bfe96f997f8821") 175!2 = !{i32 0, i32 64770, i32 468969, !"h", i32 12, i32 0, i32 0} 176!3 = !{i32 1, !"amdhsa_code_object_version", i32 500} 177!4 = !{i32 7, !"Dwarf Version", i32 5} 178!5 = !{i32 2, !"Debug Info Version", i32 3} 179!6 = !{i32 1, !"wchar_size", i32 4} 180!7 = !{i32 7, !"openmp", i32 51} 181!8 = !{i32 7, !"openmp-device", i32 51} 182!9 = !{i32 8, !"PIC Level", i32 2} 183!10 = !{i32 7, !"frame-pointer", i32 2} 184!11 = !{i32 4, !"amdgpu_hostcall", i32 1} 185!12 = !{!"clang version 20.0.0git (/tmp/llvm/clang b9447c03a9ef2eed55b685a33511df86f7f94e89)"} 186!13 = !{!"AMD clang version 17.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-6.0.2 24012 af27734ed982b52a9f1be0f035ac91726fc697e4)"} 187!14 = !{i32 2, i32 0} 188!15 = distinct !DISubprogram(name: "__omp_offloading_fd02_727e9_h_l12_debug__", scope: !16, file: !16, line: 13, type: !17, scopeLine: 13, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !22) 189!16 = !DIFile(filename: "test.c", directory: "/tmp") 190!17 = !DISubroutineType(types: !18) 191!18 = !{null, !19} 192!19 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !20) 193!20 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !21) 194!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) 195!22 = !{} 196!23 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !15, type: !19, flags: DIFlagArtificial) 197!24 = !DILocation(line: 0, scope: !15) 198!25 = !DILocation(line: 13, column: 3, scope: !15) 199!26 = !DILocalVariable(name: "i", scope: !27, file: !16, line: 14, type: !28) 200!27 = distinct !DILexicalBlock(scope: !15, file: !16, line: 13, column: 3) 201!28 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 202!29 = !DILocation(line: 14, column: 9, scope: !27) 203!30 = !DILocalVariable(name: "a", scope: !27, file: !16, line: 15, type: !31) 204!31 = !DICompositeType(tag: DW_TAG_array_type, baseType: !28, size: 64, elements: !32) 205!32 = !{!33} 206!33 = !DISubrange(count: 2) 207!34 = !DILocation(line: 15, column: 9, scope: !27) 208!35 = !DILocation(line: 16, column: 5, scope: !27) 209!36 = !DILocation(line: 17, column: 5, scope: !27) 210!37 = !DILocation(line: 18, column: 3, scope: !27) 211!38 = !DILocation(line: 18, column: 3, scope: !15) 212!39 = distinct !DISubprogram(name: "__omp_offloading_fd02_727e9_h_l12", scope: !16, file: !16, line: 12, type: !17, scopeLine: 12, flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !22) 213!40 = !DILocalVariable(name: "dyn_ptr", arg: 1, scope: !39, type: !19, flags: DIFlagArtificial) 214!41 = !DILocation(line: 0, scope: !39) 215!42 = !DILocation(line: 12, column: 1, scope: !39) 216!43 = distinct !DISubprogram(name: "g", scope: !16, file: !16, line: 3, type: !44, scopeLine: 3, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !22) 217!44 = !DISubroutineType(types: !45) 218!45 = !{null} 219!46 = !DILocalVariable(name: "i", scope: !43, file: !16, line: 4, type: !28) 220!47 = !DILocation(line: 4, column: 7, scope: !43) 221!48 = !DILocalVariable(name: "a", scope: !43, file: !16, line: 5, type: !31) 222!49 = !DILocation(line: 5, column: 7, scope: !43) 223!50 = !DILocation(line: 6, column: 3, scope: !43) 224!51 = !DILocation(line: 7, column: 3, scope: !43) 225!52 = !DILocation(line: 8, column: 1, scope: !43) 226