1; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s 2target triple = "nvptx64" 3 4; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Value has potential side effects preventing SPMD-mode execution. Add `[[omp::assume("ompx_spmd_amenable")]]` to the called function to override. 5; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Value has potential side effects preventing SPMD-mode execution. Add `[[omp::assume("ompx_spmd_amenable")]]` to the called function to override. 6; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback. 7; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Call may contain unknown parallel regions. Use `[[omp::assume("omp_no_parallelism")]]` to override. 8; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Call may contain unknown parallel regions. Use `[[omp::assume("omp_no_parallelism")]]` to override. 9; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:20:1: Transformed generic-mode kernel to SPMD-mode. 10 11 12;; void unknown(void); 13;; void known(void) { 14;; #pragma omp parallel 15;; { 16;; unknown(); 17;; } 18;; } 19;; 20;; void test_fallback(void) { 21;; #pragma omp target teams 22;; { 23;; unknown(); 24;; known(); 25;; unknown(); 26;; } 27;; } 28;; 29;; void no_openmp(void) [[omp::assume("omp_no_openmp")]]; 30;; void test_no_fallback(void) { 31;; #pragma omp target teams 32;; { 33;; known(); 34;; known(); 35;; known(); 36;; spmd_amenable(); 37;; } 38;; } 39 40%struct.ident_t = type { i32, i32, i32, i32, ptr } 41%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 } 42%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr } 43 44@0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1 45@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8 46@2 = private unnamed_addr constant [72 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_fallback;11;1;;\00", align 1 47@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8 48@4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1 49@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8 50@6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1 51@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8 52@8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1 53@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8 54@10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1 55@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8 56@12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1 57@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8 58@G = external global i32 59 60@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 61@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null } 62 63 64; Function Attrs: convergent norecurse nounwind 65define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 { 66entry: 67 %captured_vars_addrs.i.i = alloca [0 x ptr], align 8 68 %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment, ptr %dyn) #3, !dbg !18 69 %exec_user_code = icmp eq i32 %0, -1, !dbg !18 70 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18 71 72common.ret: ; preds = %entry, %user_code.entry 73 ret void, !dbg !19 74 75user_code.entry: ; preds = %entry 76 %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3 77 call void @unknown() #6, !dbg !20 78 call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3 79 %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 80 call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23 81 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26 82 call void @unknown() #6, !dbg !27 83 call void @__kmpc_target_deinit() #3, !dbg !28 84 br label %common.ret 85} 86 87define weak i32 @__kmpc_target_init(ptr, ptr) { 88 ret i32 0 89} 90 91 92; Function Attrs: convergent 93declare void @unknown() local_unnamed_addr #1 94 95; Function Attrs: nounwind 96define hidden void @known() local_unnamed_addr #2 !dbg !29 { 97entry: 98 %captured_vars_addrs = alloca [0 x ptr], align 8 99 %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13) 100 call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30 101 ret void, !dbg !31 102} 103 104; Function Attrs: nounwind 105declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3 106 107declare void @__kmpc_target_deinit() local_unnamed_addr 108 109; Function Attrs: norecurse nounwind 110define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 { 111entry: 112 %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8 113 %0 = call i32 @__kmpc_target_init(ptr nonnull @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment, ptr %dyn) #3, !dbg !33 114 %exec_user_code = icmp eq i32 %0, -1, !dbg !33 115 br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33 116 117common.ret: ; preds = %entry, %user_code.entry 118 ret void, !dbg !34 119 120user_code.entry: ; preds = %entry 121 %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3 122 call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 123 %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 124 call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35 125 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39 126 call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 127 %3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 128 call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40 129 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42 130 call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3 131 %4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3 132 call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43 133 call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45 134 call void @spmd_amenable() 135 call void @__kmpc_target_deinit() #3, !dbg !46 136 br label %common.ret 137} 138 139; Function Attrs: convergent norecurse nounwind 140define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 { 141entry: 142 call void @unknown() #6, !dbg !48 143 ret void, !dbg !49 144} 145 146; Function Attrs: convergent norecurse nounwind 147define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 { 148entry: 149 %global_args = alloca ptr, align 8 150 call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51 151 call void @unknown() #6, !dbg !52 152 ret void, !dbg !51 153} 154 155declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr 156 157declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr 158 159; Function Attrs: argmemonly nofree nosync nounwind willreturn 160declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5 161 162; Function Attrs: argmemonly nofree nosync nounwind willreturn 163declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5 164 165declare void @spmd_amenable() #7 166 167attributes #0 = { convergent norecurse nounwind "kernel" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 168attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 169attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 170attributes #3 = { nounwind } 171attributes #4 = { norecurse nounwind "kernel" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } 172attributes #5 = { argmemonly nofree nosync nounwind willreturn } 173attributes #6 = { convergent nounwind } 174attributes #7 = { "llvm.assume"="ompx_spmd_amenable" } 175 176!llvm.dbg.cu = !{!0} 177!omp_offload.info = !{!3, !4} 178!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13} 179!llvm.ident = !{!14} 180 181!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None) 182!1 = !DIFile(filename: "spmdization_remarks.c", directory: "/data/src/llvm-project") 183!2 = !{} 184!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1} 185!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0} 186!7 = !{i32 7, !"Dwarf Version", i32 2} 187!8 = !{i32 2, !"Debug Info Version", i32 3} 188!9 = !{i32 1, !"wchar_size", i32 4} 189!10 = !{i32 7, !"openmp", i32 50} 190!11 = !{i32 7, !"openmp-device", i32 50} 191!12 = !{i32 7, !"PIC Level", i32 2} 192!13 = !{i32 7, !"frame-pointer", i32 2} 193!14 = !{!"clang version 13.0.0"} 194!15 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_fallback_l11", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 195!16 = !DIFile(filename: "llvm/test/Transforms/OpenMP/spmdization_remarks.c", directory: "/data/src/llvm-project") 196!17 = !DISubroutineType(types: !2) 197!18 = !DILocation(line: 11, column: 1, scope: !15) 198!19 = !DILocation(line: 0, scope: !15) 199!20 = !DILocation(line: 13, column: 5, scope: !21, inlinedAt: !22) 200!21 = distinct !DISubprogram(name: "__omp_outlined__", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 201!22 = distinct !DILocation(line: 11, column: 1, scope: !15) 202!23 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !25) 203!24 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 204!25 = distinct !DILocation(line: 14, column: 5, scope: !21, inlinedAt: !22) 205!26 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !25) 206!27 = !DILocation(line: 15, column: 5, scope: !21, inlinedAt: !22) 207!28 = !DILocation(line: 11, column: 25, scope: !15) 208!29 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 209!30 = !DILocation(line: 4, column: 1, scope: !29) 210!31 = !DILocation(line: 8, column: 1, scope: !29) 211!32 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_no_fallback_l20", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 212!33 = !DILocation(line: 20, column: 1, scope: !32) 213!34 = !DILocation(line: 0, scope: !32) 214!35 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !36) 215!36 = distinct !DILocation(line: 22, column: 5, scope: !37, inlinedAt: !38) 216!37 = distinct !DISubprogram(name: "__omp_outlined__1", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 217!38 = distinct !DILocation(line: 20, column: 1, scope: !32) 218!39 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !36) 219!40 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !41) 220!41 = distinct !DILocation(line: 23, column: 5, scope: !37, inlinedAt: !38) 221!42 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !41) 222!43 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !44) 223!44 = distinct !DILocation(line: 24, column: 5, scope: !37, inlinedAt: !38) 224!45 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !44) 225!46 = !DILocation(line: 20, column: 25, scope: !32) 226!47 = distinct !DISubprogram(name: "__omp_outlined__2", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 227!48 = !DILocation(line: 6, column: 5, scope: !47) 228!49 = !DILocation(line: 7, column: 3, scope: !47) 229!50 = distinct !DISubprogram(linkageName: "__omp_outlined__2_wrapper", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) 230!51 = !DILocation(line: 4, column: 1, scope: !50) 231!52 = !DILocation(line: 6, column: 5, scope: !47, inlinedAt: !53) 232!53 = distinct !DILocation(line: 4, column: 1, scope: !50) 233