xref: /llvm-project/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; RUN: opt -S -passes=openmp-opt -openmp-ir-builder-optimistic-attributes -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s
2; RUN: opt -S -passes=openmp-opt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s
3
4; C input used for this test:
5
6; void bar(void) {
7;     #pragma omp parallel
8;     { }
9; }
10; void foo(void) {
11;   #pragma omp target teams
12;   {
13;     #pragma omp parallel
14;     {}
15;     bar();
16;     unknown();
17;     #pragma omp parallel
18;     {}
19;   }
20; }
21
22; Verify we replace the function pointer uses for the first and last outlined
23; region (1 and 3) but not for the middle one (2) because it could be called from
24; another kernel.
25
26; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef
27; CHECK-DAG: @__omp_outlined__2_wrapper.ID = private constant i8 undef
28
29; CHECK-DAG:   icmp eq ptr %worker.work_fn, @__omp_outlined__1_wrapper.ID
30; CHECK-DAG:   icmp eq ptr %worker.work_fn, @__omp_outlined__2_wrapper.ID
31
32
33; CHECK-DAG:   call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper.ID, ptr %{{.*}}, i64 0)
34; CHECK-DAG:   call void @__kmpc_parallel_51(ptr @1, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr %{{.*}}, i64 0)
35; CHECK-DAG:   call void @__kmpc_parallel_51(ptr @2, i32 %{{.*}}, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %{{.*}}, i64 0)
36
37
38%struct.ident_t = type { i32, i32, i32, i32, ptr }
39%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
40%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
41
42@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
43@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
44@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
45@__omp_offloading_10301_87b2c_foo_l7_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
46
47define weak ptx_kernel void @__omp_offloading_10301_87b2c_foo_l7() "kernel" {
48entry:
49  %.zero.addr = alloca i32, align 4
50  %.threadid_temp. = alloca i32, align 4
51  store i32 0, ptr %.zero.addr, align 4
52  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_10301_87b2c_foo_l7_kernel_environment, ptr null)
53  %exec_user_code = icmp eq i32 %0, -1
54  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
55
56user_code.entry:                                  ; preds = %entry
57  %1 = call i32 @__kmpc_global_thread_num(ptr @1)
58  store i32 %1, ptr %.threadid_temp., align 4
59  call void @__omp_outlined__(ptr %.threadid_temp., ptr %.zero.addr)
60  call void @__kmpc_target_deinit()
61  ret void
62
63worker.exit:                                      ; preds = %entry
64  ret void
65}
66
67define weak i32 @__kmpc_target_init(ptr %0, ptr) {
68  ret i32 0
69}
70
71declare void @unknown()
72
73define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
74entry:
75  %.global_tid..addr = alloca ptr, align 8
76  %.bound_tid..addr = alloca ptr, align 8
77  %captured_vars_addrs = alloca [0 x ptr], align 8
78  %captured_vars_addrs1 = alloca [0 x ptr], align 8
79  store ptr %.global_tid., ptr %.global_tid..addr, align 8
80  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
81  %0 = load ptr, ptr %.global_tid..addr, align 8
82  %1 = load i32, ptr %0, align 4
83  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr %captured_vars_addrs, i64 0)
84  call void @bar()
85  call void @unknown()
86  call void @__kmpc_parallel_51(ptr @1, i32 %1, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr %captured_vars_addrs1, i64 0)
87  ret void
88}
89
90define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
91entry:
92  %.global_tid..addr = alloca ptr, align 8
93  %.bound_tid..addr = alloca ptr, align 8
94  store ptr %.global_tid., ptr %.global_tid..addr, align 8
95  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
96  ret void
97}
98
99define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) {
100entry:
101  %.addr = alloca i16, align 2
102  %.addr1 = alloca i32, align 4
103  %.zero.addr = alloca i32, align 4
104  %global_args = alloca ptr, align 8
105  store i32 0, ptr %.zero.addr, align 4
106  store i16 %0, ptr %.addr, align 2
107  store i32 %1, ptr %.addr1, align 4
108  call void @__kmpc_get_shared_variables(ptr %global_args)
109  call void @__omp_outlined__1(ptr %.addr1, ptr %.zero.addr)
110  ret void
111}
112
113declare void @__kmpc_get_shared_variables(ptr)
114
115declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
116
117define hidden void @bar() {
118entry:
119  %captured_vars_addrs = alloca [0 x ptr], align 8
120  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
121  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr %captured_vars_addrs, i64 0)
122  ret void
123}
124
125define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
126entry:
127  %.global_tid..addr = alloca ptr, align 8
128  %.bound_tid..addr = alloca ptr, align 8
129  store ptr %.global_tid., ptr %.global_tid..addr, align 8
130  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
131  ret void
132}
133
134define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) {
135entry:
136  %.addr = alloca i16, align 2
137  %.addr1 = alloca i32, align 4
138  %.zero.addr = alloca i32, align 4
139  %global_args = alloca ptr, align 8
140  store i32 0, ptr %.zero.addr, align 4
141  store i16 %0, ptr %.addr, align 2
142  store i32 %1, ptr %.addr1, align 4
143  call void @__kmpc_get_shared_variables(ptr %global_args)
144  call void @__omp_outlined__2(ptr %.addr1, ptr %.zero.addr)
145  ret void
146}
147
148declare i32 @__kmpc_global_thread_num(ptr)
149
150declare void @__kmpc_target_deinit()
151
152define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
153entry:
154  %.global_tid..addr = alloca ptr, align 8
155  %.bound_tid..addr = alloca ptr, align 8
156  store ptr %.global_tid., ptr %.global_tid..addr, align 8
157  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
158  ret void
159}
160
161define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) {
162entry:
163  %.addr = alloca i16, align 2
164  %.addr1 = alloca i32, align 4
165  %.zero.addr = alloca i32, align 4
166  %global_args = alloca ptr, align 8
167  store i32 0, ptr %.zero.addr, align 4
168  store i16 %0, ptr %.addr, align 2
169  store i32 %1, ptr %.addr1, align 4
170  call void @__kmpc_get_shared_variables(ptr %global_args)
171  call void @__omp_outlined__3(ptr %.addr1, ptr %.zero.addr)
172  ret void
173}
174
175!omp_offload.info = !{!0}
176!llvm.module.flags = !{!2, !3}
177
178!0 = !{i32 0, i32 66305, i32 555956, !"foo", i32 7, i32 0}
179!2 = !{i32 7, !"openmp", i32 50}
180!3 = !{i32 7, !"openmp-device", i32 50}
181