xref: /llvm-project/llvm/test/Transforms/OpenMP/nested_parallelism.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
2; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
3
4; void foo1(int i) {
5;   #pragma omp parallel
6;     i++;
7; }
8
9; void foo(int i) {
10;   #pragma omp parallel
11;     foo1(i);
12; }
13
14; int main() {
15;   int i=0;
16;   #pragma omp target
17;     foo(i);
18
19;   #pragma omp target
20;     foo1(i);
21; }
22
23target triple = "nvptx64"
24
25%struct.ident_t = type { i32, i32, i32, i32, ptr }
26%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
27%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
28
29@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
30@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
31@i_shared = internal addrspace(3) global [4 x i8] undef, align 16
32@i.i_shared = internal addrspace(3) global [4 x i8] undef, align 16
33
34@__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
35@__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
36
37
38;.
39; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
40; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
41; CHECK: @i_shared = internal addrspace(3) global [4 x i8] undef, align 16
42; CHECK: @i.i_shared = internal addrspace(3) global [4 x i8] undef, align 16
43; CHECK: @__omp_offloading_10302_bd7e0_main_l13_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
44; CHECK: @__omp_offloading_10302_bd7e0_main_l16_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
45;.
46define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" {
47; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l13(
48; CHECK-NEXT:  entry:
49; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
50; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment, ptr [[DYN:%.*]])
51; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
52; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
53; CHECK:       common.ret:
54; CHECK-NEXT:    ret void
55; CHECK:       user_code.entry:
56; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
57; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2:[0-9]+]]
58; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #[[ATTR2]]
59; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
60; CHECK-NEXT:    br i1 [[TMP3]], label [[REGION_GUARDED_I:%.*]], label [[_Z3FOOI_INTERNALIZED_EXIT:%.*]]
61; CHECK:       region.guarded.i:
62; CHECK-NEXT:    [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
63; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i_shared, align 16
64; CHECK-NEXT:    br label [[_Z3FOOI_INTERNALIZED_EXIT]]
65; CHECK:       _Z3fooi.internalized.exit:
66; CHECK-NEXT:    tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
67; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
68; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
69; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
70; CHECK-NEXT:    call void @__kmpc_target_deinit()
71; CHECK-NEXT:    br label [[COMMON_RET]]
72;
73entry:
74  %captured_vars_addrs.i = alloca [1 x ptr], align 8
75  %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l13_kernel_environment, ptr %dyn) #6
76  %exec_user_code = icmp eq i32 %0, -1
77  br i1 %exec_user_code, label %user_code.entry, label %common.ret
78
79common.ret:                                       ; preds = %entry, %_Z3fooi.internalized.exit
80  ret void
81
82user_code.entry:                                  ; preds = %entry
83  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
84  %1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) #6
85  %2 = tail call i32 @__kmpc_get_hardware_thread_id_in_block() #6
86  %3 = icmp eq i32 %2, 0
87  br i1 %3, label %region.guarded.i, label %_Z3fooi.internalized.exit
88
89region.guarded.i:                                 ; preds = %user_code.entry
90  %i.addr.sroa.0.0.extract.trunc = trunc i64 %i to i32
91  store i32 %i.addr.sroa.0.0.extract.trunc, ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), align 16
92  br label %_Z3fooi.internalized.exit
93
94_Z3fooi.internalized.exit:                        ; preds = %user_code.entry, %region.guarded.i
95  tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @1, i32 %2)
96  store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr %captured_vars_addrs.i, align 8
97  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
98  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
99  call void @__kmpc_target_deinit() #6
100  br label %common.ret
101}
102
103declare i32 @__kmpc_target_init(ptr, ptr) local_unnamed_addr
104
105define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
106; CHECK-LABEL: @_Z3fooi(
107; CHECK-NEXT:  entry:
108; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
109; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
110; CHECK-NEXT:    [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
111; CHECK-NEXT:    store i32 [[I1:%.*]], ptr [[I]], align 16
112; CHECK-NEXT:    store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
113; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
114; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
115; CHECK-NEXT:    ret void
116;
117entry:
118  %captured_vars_addrs = alloca [1 x ptr], align 8
119  %0 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) #6
120  %i = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4)
121  store i32 %i1, ptr %i, align 16
122  store ptr %i, ptr %captured_vars_addrs, align 8
123  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull %captured_vars_addrs, i64 1) #6
124  call void @__kmpc_free_shared(ptr %i, i64 4)
125  ret void
126}
127
128declare void @__kmpc_target_deinit(ptr, i8) local_unnamed_addr
129
130define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16(ptr %dyn, i64 noundef %i) local_unnamed_addr "kernel" {
131; CHECK-LABEL: @__omp_offloading_10302_bd7e0_main_l16(
132; CHECK-NEXT:  entry:
133; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
134; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment, ptr [[DYN:%.*]])
135; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
136; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
137; CHECK:       common.ret:
138; CHECK-NEXT:    ret void
139; CHECK:       user_code.entry:
140; CHECK-NEXT:    [[I_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I:%.*]] to i32
141; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
142; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
143; CHECK-NEXT:    store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
144; CHECK-NEXT:    store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
145; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
146; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
147; CHECK-NEXT:    call void @__kmpc_target_deinit()
148; CHECK-NEXT:    br label [[COMMON_RET]]
149;
150entry:
151  %captured_vars_addrs.i = alloca [1 x ptr], align 8
152  %0 = tail call i32 @__kmpc_target_init(ptr @__omp_offloading_10302_bd7e0_main_l16_kernel_environment, ptr %dyn) #6
153  %exec_user_code = icmp eq i32 %0, -1
154  br i1 %exec_user_code, label %user_code.entry, label %common.ret
155
156common.ret:                                       ; preds = %entry, %user_code.entry
157  ret void
158
159user_code.entry:                                  ; preds = %entry
160  %i.addr.sroa.0.0.extract.trunc = trunc i64 %i to i32
161  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
162  %1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) #6
163  store i32 %i.addr.sroa.0.0.extract.trunc, ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), align 16
164  store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr %captured_vars_addrs.i, align 8
165  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
166  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
167  call void @__kmpc_target_deinit() #6
168  br label %common.ret
169}
170
171define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
172; CHECK-LABEL: @_Z4foo1i(
173; CHECK-NEXT:  entry:
174; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
175; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
176; CHECK-NEXT:    [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
177; CHECK-NEXT:    store i32 [[I1:%.*]], ptr [[I]], align 16
178; CHECK-NEXT:    store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
179; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
180; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
181; CHECK-NEXT:    ret void
182;
183entry:
184  %captured_vars_addrs = alloca [1 x ptr], align 8
185  %0 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) #6
186  %i = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4)
187  store i32 %i1, ptr %i, align 16
188  store ptr %i, ptr %captured_vars_addrs, align 8
189  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %0, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs, i64 1) #6
190  call void @__kmpc_free_shared(ptr %i, i64 4)
191  ret void
192}
193
194declare ptr @__kmpc_alloc_shared(i64) local_unnamed_addr #3
195
196define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture noundef nonnull readonly align 4 dereferenceable(4) %i) #4 {
197; CHECK-LABEL: @__omp_outlined__(
198; CHECK-NEXT:  entry:
199; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS_I:%.*]] = alloca [1 x ptr], align 8
200; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4
201; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
202; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
203; CHECK-NEXT:    [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
204; CHECK-NEXT:    store i32 [[TMP0]], ptr [[I_I]], align 16
205; CHECK-NEXT:    store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
206; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
207; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
208; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
209; CHECK-NEXT:    ret void
210;
211entry:
212  %captured_vars_addrs.i = alloca [1 x ptr], align 8
213  %0 = load i32, ptr %i, align 4
214  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
215  %1 = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1) #6
216  %i.i = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #6
217  store i32 %0, ptr %i.i, align 16
218  store ptr %i.i, ptr %captured_vars_addrs.i, align 8
219  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i, i64 1) #6
220  call void @__kmpc_free_shared(ptr %i.i, i64 4) #6
221  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i)
222  ret void
223}
224
225define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
226; CHECK-LABEL: @__omp_outlined___wrapper(
227; CHECK-NEXT:  entry:
228; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
229; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
230; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
231; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
232; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
233; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
234; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
235; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
236; CHECK-NEXT:    [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
237; CHECK-NEXT:    store i32 [[TMP4]], ptr [[I_I_I]], align 16
238; CHECK-NEXT:    store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
239; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
240; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
241; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
242; CHECK-NEXT:    ret void
243;
244entry:
245  %captured_vars_addrs.i.i = alloca [1 x ptr], align 8
246  %global_args = alloca ptr, align 8
247  call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #6
248  %2 = load ptr, ptr %global_args, align 8
249  %3 = load ptr, ptr %2, align 8
250  %4 = load i32, ptr %3, align 4
251  call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %captured_vars_addrs.i.i)
252  %5 = call i32 @__kmpc_global_thread_num(ptr nonnull @1) #6
253  %i.i.i = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #6
254  store i32 %4, ptr %i.i.i, align 16
255  store ptr %i.i.i, ptr %captured_vars_addrs.i.i, align 8
256  call void @__kmpc_parallel_51(ptr nonnull @1, i32 %5, i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull %captured_vars_addrs.i.i, i64 1) #6
257  call void @__kmpc_free_shared(ptr %i.i.i, i64 4) #6
258  call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %captured_vars_addrs.i.i)
259  ret void
260}
261
262declare void @__kmpc_get_shared_variables(ptr) local_unnamed_addr
263
264declare i32 @__kmpc_global_thread_num(ptr) local_unnamed_addr #6
265
266declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) local_unnamed_addr #7
267
268declare void @__kmpc_free_shared(ptr allocptr nocapture, i64) local_unnamed_addr #8
269
270define internal void @__omp_outlined__1(ptr noalias nocapture readnone %.global_tid., ptr noalias nocapture readnone %.bound_tid., ptr nocapture noundef nonnull align 4 dereferenceable(4) %i) #9 {
271; CHECK-LABEL: @__omp_outlined__1(
272; CHECK-NEXT:  entry:
273; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4
274; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP0]], 1
275; CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
276; CHECK-NEXT:    ret void
277;
278entry:
279  %0 = load i32, ptr %i, align 4
280  %inc = add nsw i32 %0, 1
281  store i32 %inc, ptr %i, align 4
282  ret void
283}
284
285define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
286; CHECK-LABEL: @__omp_outlined__1_wrapper(
287; CHECK-NEXT:  entry:
288; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
289; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
290; CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
291; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
292; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
293; CHECK-NEXT:    [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1
294; CHECK-NEXT:    store i32 [[INC_I]], ptr [[TMP3]], align 4
295; CHECK-NEXT:    ret void
296;
297entry:
298  %global_args = alloca ptr, align 8
299  call void @__kmpc_get_shared_variables(ptr nonnull %global_args) #6
300  %2 = load ptr, ptr %global_args, align 8
301  %3 = load ptr, ptr %2, align 8
302  %4 = load i32, ptr %3, align 4
303  %inc.i = add nsw i32 %4, 1
304  store i32 %inc.i, ptr %3, align 4
305  ret void
306}
307
308declare i32 @__kmpc_get_hardware_thread_id_in_block() local_unnamed_addr
309
310declare void @__kmpc_barrier_simple_spmd(ptr, i32) local_unnamed_addr #10
311
312declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #11
313
314declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #11
315
316
317!omp_offload.info = !{!0, !1}
318!llvm.module.flags = !{!4, !5}
319
320!0 = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0}
321!1 = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1}
322
323!4 = !{i32 7, !"openmp", i32 50}
324!5 = !{i32 7, !"openmp-device", i32 50}
325;.
326; CHECK: attributes #[[ATTR0:[0-9]+]] = { "kernel" }
327; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind allocsize(0) }
328; CHECK: attributes #[[ATTR2]] = { nounwind }
329; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline }
330; CHECK: attributes #[[ATTR4:[0-9]+]] = { nosync nounwind }
331; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nounwind }
332; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
333;.
334; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 13, i32 0, i32 0}
335; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 66306, i32 776160, !"main", i32 16, i32 0, i32 1}
336; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50}
337; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
338;.
339