xref: /llvm-project/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
2target triple = "nvptx64"
3
4; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback.
5; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:13:5: Call may contain unknown parallel regions. Use `[[omp::assume("omp_no_parallelism")]]` to override.
6; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:15:5: Call may contain unknown parallel regions. Use `[[omp::assume("omp_no_parallelism")]]` to override.
7; CHECK{LITERAL}: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:20:1: Rewriting generic-mode kernel with a customized state machine.
8
9
10;; void unknown(void);
11;; void known(void) {
12;;   #pragma omp parallel
13;;   {
14;;     unknown();
15;;   }
16;; }
17;;
18;; void test_fallback(void) {
19;;   #pragma omp target teams
20;;   {
21;;     unknown();
22;;     known();
23;;     unknown();
24;;   }
25;; }
26;;
27;; [[omp::assume("omp_no_openmp")]] void no_openmp(void);
28;; void test_no_fallback(void) {
29;;   #pragma omp target teams
30;;   {
31;;     known();
32;;     known();
33;;     known();
34;;     no_openmp();     // make it non-spmd
35;;   }
36;; }
37
38%struct.ident_t = type { i32, i32, i32, i32, ptr }
39%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
40%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
41
42@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
43@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
44@2 = private unnamed_addr constant [82 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_fallback;11;1;;\00", align 1
45@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @2 }, align 8
46@4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
47@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @4 }, align 8
48@6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
49@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @6 }, align 8
50@8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
51@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @8 }, align 8
52@10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
53@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @10 }, align 8
54@12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
55@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @12 }, align 8
56@G = external global i32
57@__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
58@__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
59
60
61; Function Attrs: convergent norecurse nounwind
62define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_fallback_l11(ptr %dyn) local_unnamed_addr #0 !dbg !15 {
63entry:
64  %captured_vars_addrs.i.i = alloca [0 x ptr], align 8
65  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_fallback_l11_kernel_environment, ptr %dyn) #3, !dbg !18
66  %exec_user_code = icmp eq i32 %0, -1, !dbg !18
67  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
68
69common.ret:                                       ; preds = %entry, %user_code.entry
70  ret void, !dbg !19
71
72user_code.entry:                                  ; preds = %entry
73  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @3) #3
74  call void @unknown() #6, !dbg !20
75  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3
76  %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
77  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i.i, i64 noundef 0) #3, !dbg !23
78  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i.i) #3, !dbg !26
79  call void @unknown() #6, !dbg !27
80  call void @__kmpc_target_deinit() #3, !dbg !28
81  br label %common.ret
82}
83
84; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
85define weak i32 @__kmpc_target_init(ptr, ptr) {
86  ret i32 0
87}
88
89; Function Attrs: convergent
90declare void @unknown() local_unnamed_addr #1
91
92; Function Attrs: nounwind
93define hidden void @known() local_unnamed_addr #2 !dbg !29 {
94entry:
95  %captured_vars_addrs = alloca [0 x ptr], align 8
96  %0 = call i32 @__kmpc_global_thread_num(ptr nonnull @13)
97  call void @__kmpc_parallel_51(ptr nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr nonnull %captured_vars_addrs, i64 0) #3, !dbg !30
98  ret void, !dbg !31
99}
100
101; Function Attrs: nounwind
102declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #3
103
104declare void @__kmpc_target_deinit() local_unnamed_addr
105
106; Function Attrs: norecurse nounwind
107define weak ptx_kernel void @__omp_offloading_2a_d80d3d_test_no_fallback_l20(ptr %dyn) local_unnamed_addr #4 !dbg !32 {
108entry:
109  %captured_vars_addrs.i2.i = alloca [0 x ptr], align 8
110  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2a_d80d3d_test_no_fallback_l20_kernel_environment, ptr %dyn) #3, !dbg !33
111  %exec_user_code = icmp eq i32 %0, -1, !dbg !33
112  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
113
114common.ret:                                       ; preds = %entry, %user_code.entry
115  ret void, !dbg !34
116
117user_code.entry:                                  ; preds = %entry
118  %1 = call i32 @__kmpc_global_thread_num(ptr nonnull @9) #3
119  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
120  %2 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
121  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %2, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !35
122  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !39
123  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
124  %3 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
125  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !40
126  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !42
127  call void @llvm.lifetime.start.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3
128  %4 = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @13) #3
129  call void @__kmpc_parallel_51(ptr noundef nonnull @13, i32 %4, i32 noundef 1, i32 noundef -1, i32 noundef -1, ptr noundef @__omp_outlined__2, ptr noundef @__omp_outlined__2_wrapper, ptr noundef nonnull %captured_vars_addrs.i2.i, i64 noundef 0) #3, !dbg !43
130  call void @llvm.lifetime.end.p0(i64 0, ptr nonnull %captured_vars_addrs.i2.i) #3, !dbg !45
131  call void @no_openmp()
132  call void @no_parallelism()
133  call void @__kmpc_target_deinit() #3, !dbg !46
134  br label %common.ret
135}
136
137; Function Attrs: convergent norecurse nounwind
138define internal void @__omp_outlined__2(ptr noalias nocapture nofree readnone %.global_tid., ptr noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
139entry:
140  call void @unknown() #6, !dbg !48
141  ret void, !dbg !49
142}
143
144; Function Attrs: convergent norecurse nounwind
145define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
146entry:
147  %global_args = alloca ptr, align 8
148  call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #3, !dbg !51
149  call void @unknown() #6, !dbg !52
150  ret void, !dbg !51
151}
152
153declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr
154
155declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr
156
157; Function Attrs: argmemonly nofree nosync nounwind willreturn
158declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #5
159
160; Function Attrs: argmemonly nofree nosync nounwind willreturn
161declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #5
162
163declare void @no_openmp() #7
164declare void @no_parallelism() #8
165
166attributes #0 = { convergent norecurse nounwind "kernel" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
167attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
168attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
169attributes #3 = { nounwind }
170attributes #4 = { norecurse nounwind "kernel" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
171attributes #5 = { argmemonly nofree nosync nounwind willreturn }
172attributes #6 = { convergent nounwind }
173attributes #7 = { "llvm.assume"="omp_no_openmp" }
174attributes #8 = { "llvm.assume"="omp_no_parallelism" }
175
176!llvm.dbg.cu = !{!0}
177!omp_offload.info = !{!3, !4}
178!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
179!llvm.ident = !{!14}
180
181!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
182!1 = !DIFile(filename: "custom_state_machines_remarks.c", directory: "/data/src/llvm-project")
183!2 = !{}
184!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
185!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
186!7 = !{i32 7, !"Dwarf Version", i32 2}
187!8 = !{i32 2, !"Debug Info Version", i32 3}
188!9 = !{i32 1, !"wchar_size", i32 4}
189!10 = !{i32 7, !"openmp", i32 50}
190!11 = !{i32 7, !"openmp-device", i32 50}
191!12 = !{i32 7, !"PIC Level", i32 2}
192!13 = !{i32 7, !"frame-pointer", i32 2}
193!14 = !{!"clang version 13.0.0"}
194!15 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_fallback_l11", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
195!16 = !DIFile(filename: "llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c", directory: "/data/src/llvm-project")
196!17 = !DISubroutineType(types: !2)
197!18 = !DILocation(line: 11, column: 1, scope: !15)
198!19 = !DILocation(line: 0, scope: !15)
199!20 = !DILocation(line: 13, column: 5, scope: !21, inlinedAt: !22)
200!21 = distinct !DISubprogram(name: "__omp_outlined__", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
201!22 = distinct !DILocation(line: 11, column: 1, scope: !15)
202!23 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !25)
203!24 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
204!25 = distinct !DILocation(line: 14, column: 5, scope: !21, inlinedAt: !22)
205!26 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !25)
206!27 = !DILocation(line: 15, column: 5, scope: !21, inlinedAt: !22)
207!28 = !DILocation(line: 11, column: 25, scope: !15)
208!29 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
209!30 = !DILocation(line: 4, column: 1, scope: !29)
210!31 = !DILocation(line: 8, column: 1, scope: !29)
211!32 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_no_fallback_l20", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
212!33 = !DILocation(line: 20, column: 1, scope: !32)
213!34 = !DILocation(line: 0, scope: !32)
214!35 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !36)
215!36 = distinct !DILocation(line: 22, column: 5, scope: !37, inlinedAt: !38)
216!37 = distinct !DISubprogram(name: "__omp_outlined__1", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
217!38 = distinct !DILocation(line: 20, column: 1, scope: !32)
218!39 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !36)
219!40 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !41)
220!41 = distinct !DILocation(line: 23, column: 5, scope: !37, inlinedAt: !38)
221!42 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !41)
222!43 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !44)
223!44 = distinct !DILocation(line: 24, column: 5, scope: !37, inlinedAt: !38)
224!45 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !44)
225!46 = !DILocation(line: 20, column: 25, scope: !32)
226!47 = distinct !DISubprogram(name: "__omp_outlined__2", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
227!48 = !DILocation(line: 6, column: 5, scope: !47)
228!49 = !DILocation(line: 7, column: 3, scope: !47)
229!50 = distinct !DISubprogram(linkageName: "__omp_outlined__2_wrapper", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
230!51 = !DILocation(line: 4, column: 1, scope: !50)
231!52 = !DILocation(line: 6, column: 5, scope: !47, inlinedAt: !53)
232!53 = distinct !DILocation(line: 4, column: 1, scope: !50)
233