xref: /llvm-project/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s --check-prefixes=CHECK
3; RUN: opt -S -passes=openmp-opt -openmp-opt-disable-spmdization < %s | FileCheck %s --check-prefixes=CHECK-DISABLE-SPMDIZATION
4;
5; __local int G;
6;
7; void leaf() {
8;   G = 42;
9; }
10;
11; void spmd_helper() {
12;   leaf();
13; #pragma omp parallel
14;   leaf();
15; }
16; void spmd() {
17; #pragma omp target
18;   spmd_helper();
19; }
20;
21; void generic_helper() {
22;   leaf();
23;   unknown();
24; }
25; void generic() {
26; #pragma omp target
27;   generic_helper();
28; }
29;
30target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
31target triple = "nvptx64"
32
33%struct.ident_t = type { i32, i32, i32, i32, ptr }
34%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
35%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
36
37@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
38@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
39@2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @0 }, align 8
40@G = external addrspace(5) global i32, align 4
41@__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
42@__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
43
44;.
45; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
46; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
47; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
48; CHECK: @G = external addrspace(5) global i32, align 4
49; CHECK: @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
50; CHECK: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
51;.
52; CHECK-DISABLE-SPMDIZATION: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
53; CHECK-DISABLE-SPMDIZATION: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
54; CHECK-DISABLE-SPMDIZATION: @[[GLOB2:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, ptr @[[GLOB0]] }, align 8
55; CHECK-DISABLE-SPMDIZATION: @G = external addrspace(5) global i32, align 4
56; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
57; CHECK-DISABLE-SPMDIZATION: @__omp_offloading_2b_10393b5_generic_l20_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
58; CHECK-DISABLE-SPMDIZATION: @__omp_outlined___wrapper.ID = private constant i8 undef
59;.
60define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 {
61; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
62; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
63; CHECK-NEXT:  entry:
64; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr [[DYN]])
65; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
66; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
67; CHECK:       user_code.entry:
68; CHECK-NEXT:    call void @spmd_helper() #[[ATTR7:[0-9]+]]
69; CHECK-NEXT:    call void @__kmpc_target_deinit()
70; CHECK-NEXT:    ret void
71; CHECK:       worker.exit:
72; CHECK-NEXT:    ret void
73;
74; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
75; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
76; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
77; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
78; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr [[DYN]])
79; CHECK-DISABLE-SPMDIZATION-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
80; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
81; CHECK-DISABLE-SPMDIZATION:       is_worker_check:
82; CHECK-DISABLE-SPMDIZATION-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
83; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
84; CHECK-DISABLE-SPMDIZATION-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
85; CHECK-DISABLE-SPMDIZATION-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
86; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
87; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.begin:
88; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
89; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
90; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
91; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
92; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
93; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.finished:
94; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
95; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.is_active.check:
96; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
97; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.parallel_region.check:
98; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
99; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.parallel_region.execute:
100; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__omp_outlined___wrapper(i16 0, i32 [[TMP0]])
101; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
102; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.parallel_region.check1:
103; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
104; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.parallel_region.end:
105; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_kernel_end_parallel()
106; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
107; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.done.barrier:
108; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
109; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
110; CHECK-DISABLE-SPMDIZATION:       thread.user_code.check:
111; CHECK-DISABLE-SPMDIZATION-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
112; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
113; CHECK-DISABLE-SPMDIZATION:       user_code.entry:
114; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @spmd_helper() #[[ATTR7:[0-9]+]]
115; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_target_deinit()
116; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
117; CHECK-DISABLE-SPMDIZATION:       worker.exit:
118; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
119;
120entry:
121  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr %dyn)
122  %exec_user_code = icmp eq i32 %0, -1
123  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
124
125user_code.entry:                                  ; preds = %entry
126  call void @spmd_helper() #5
127  call void @__kmpc_target_deinit()
128  ret void
129
130worker.exit:                                      ; preds = %entry
131  ret void
132}
133
134; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
135define weak i32 @__kmpc_target_init(ptr, ptr) {
136; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
137; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
138; CHECK-NEXT:    ret i32 0
139;
140; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__kmpc_target_init
141; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
142; CHECK-DISABLE-SPMDIZATION-NEXT:    ret i32 0
143;
144  ret i32 0
145}
146
147declare void @__kmpc_target_deinit()
148
149; Function Attrs: convergent noinline norecurse nounwind
150define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #0 {
151; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
152; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
153; CHECK-NEXT:  entry:
154; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
155; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr [[DYN]])
156; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
157; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
158; CHECK:       is_worker_check:
159; CHECK-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
160; CHECK-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
161; CHECK-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
162; CHECK-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
163; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
164; CHECK:       worker_state_machine.begin:
165; CHECK-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
166; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
167; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
168; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
169; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
170; CHECK:       worker_state_machine.finished:
171; CHECK-NEXT:    ret void
172; CHECK:       worker_state_machine.is_active.check:
173; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
174; CHECK:       worker_state_machine.parallel_region.fallback.execute:
175; CHECK-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
176; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
177; CHECK:       worker_state_machine.parallel_region.end:
178; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
179; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
180; CHECK:       worker_state_machine.done.barrier:
181; CHECK-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
182; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
183; CHECK:       thread.user_code.check:
184; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
185; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
186; CHECK:       user_code.entry:
187; CHECK-NEXT:    call void @generic_helper() #[[ATTR7]]
188; CHECK-NEXT:    call void @__kmpc_target_deinit()
189; CHECK-NEXT:    ret void
190; CHECK:       worker.exit:
191; CHECK-NEXT:    ret void
192;
193; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
194; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
195; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
196; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
197; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr [[DYN]])
198; CHECK-DISABLE-SPMDIZATION-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
199; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
200; CHECK-DISABLE-SPMDIZATION:       is_worker_check:
201; CHECK-DISABLE-SPMDIZATION-NEXT:    [[BLOCK_HW_SIZE:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
202; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WARP_SIZE:%.*]] = call i32 @__kmpc_get_warp_size()
203; CHECK-DISABLE-SPMDIZATION-NEXT:    [[BLOCK_SIZE:%.*]] = sub i32 [[BLOCK_HW_SIZE]], [[WARP_SIZE]]
204; CHECK-DISABLE-SPMDIZATION-NEXT:    [[THREAD_IS_MAIN_OR_WORKER:%.*]] = icmp slt i32 [[TMP0]], [[BLOCK_SIZE]]
205; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
206; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.begin:
207; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
208; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
209; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
210; CHECK-DISABLE-SPMDIZATION-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
211; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
212; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.finished:
213; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
214; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.is_active.check:
215; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
216; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.parallel_region.fallback.execute:
217; CHECK-DISABLE-SPMDIZATION-NEXT:    call void [[WORKER_WORK_FN]](i16 0, i32 [[TMP0]])
218; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
219; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.parallel_region.end:
220; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_kernel_end_parallel()
221; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
222; CHECK-DISABLE-SPMDIZATION:       worker_state_machine.done.barrier:
223; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
224; CHECK-DISABLE-SPMDIZATION-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
225; CHECK-DISABLE-SPMDIZATION:       thread.user_code.check:
226; CHECK-DISABLE-SPMDIZATION-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
227; CHECK-DISABLE-SPMDIZATION-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
228; CHECK-DISABLE-SPMDIZATION:       user_code.entry:
229; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @generic_helper() #[[ATTR7]]
230; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_target_deinit()
231; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
232; CHECK-DISABLE-SPMDIZATION:       worker.exit:
233; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
234;
235entry:
236  %0 = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr %dyn)
237  %exec_user_code = icmp eq i32 %0, -1
238  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
239
240user_code.entry:                                  ; preds = %entry
241  call void @generic_helper() #5
242  call void @__kmpc_target_deinit()
243  ret void
244
245worker.exit:                                      ; preds = %entry
246  ret void
247}
248
249; Function Attrs: convergent noinline nounwind
250define internal void @spmd_helper() #1 {
251; CHECK-LABEL: define {{[^@]+}}@spmd_helper
252; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
253; CHECK-NEXT:  entry:
254; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
255; CHECK-NEXT:    call void @leaf() #[[ATTR8:[0-9]+]]
256; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4:[0-9]+]]
257; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
258; CHECK-NEXT:    ret void
259;
260; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@spmd_helper
261; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1:[0-9]+]] {
262; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
263; CHECK-DISABLE-SPMDIZATION-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
264; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @leaf() #[[ATTR8:[0-9]+]]
265; CHECK-DISABLE-SPMDIZATION-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR4:[0-9]+]]
266; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
267; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
268;
269entry:
270  %captured_vars_addrs = alloca [0 x ptr], align 8
271  call void @leaf()
272  %0 = call i32 @__kmpc_global_thread_num(ptr @2)
273  call void @__kmpc_parallel_51(ptr @2, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
274  ret void
275}
276
277; Function Attrs: convergent noinline norecurse nounwind
278define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
279; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
280; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
281; CHECK-NEXT:  entry:
282; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
283; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
284; CHECK-NEXT:    call void @leaf() #[[ATTR8]]
285; CHECK-NEXT:    ret void
286;
287; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined__
288; CHECK-DISABLE-SPMDIZATION-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
289; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
290; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
291; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
292; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @leaf() #[[ATTR8]]
293; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
294;
295entry:
296  %.global_tid..addr = alloca ptr, align 8
297  %.bound_tid..addr = alloca ptr, align 8
298  store ptr %.global_tid., ptr %.global_tid..addr, align 8
299  store ptr %.bound_tid., ptr %.bound_tid..addr, align 8
300  call void @leaf()
301  ret void
302}
303
304; Function Attrs: convergent noinline norecurse nounwind
305define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 {
306; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
307; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
308; CHECK-NEXT:  entry:
309; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
310; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
311; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
312; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
313; CHECK-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
314; CHECK-NEXT:    call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR8]]
315; CHECK-NEXT:    ret void
316;
317; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
318; CHECK-DISABLE-SPMDIZATION-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
319; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
320; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
321; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
322; CHECK-DISABLE-SPMDIZATION-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
323; CHECK-DISABLE-SPMDIZATION-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
324; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
325; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR8]]
326; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
327;
328entry:
329  %.addr = alloca i16, align 2
330  %.addr1 = alloca i32, align 4
331  %.zero.addr = alloca i32, align 4
332  %global_args = alloca ptr, align 8
333  store i16 %0, ptr %.addr, align 2
334  store i32 %1, ptr %.addr1, align 4
335  store i32 0, ptr %.zero.addr, align 4
336  call void @__kmpc_get_shared_variables(ptr %global_args)
337  call void @__omp_outlined__(ptr %.addr1, ptr %.zero.addr) #3
338  ret void
339}
340
341declare void @__kmpc_get_shared_variables(ptr)
342
343; Function Attrs: nounwind
344declare i32 @__kmpc_global_thread_num(ptr) #3
345
346; Function Attrs: alwaysinline
347declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64) #4
348
349declare void @unknown()
350
351; Function Attrs: convergent noinline nounwind
352define internal void @leaf() #1 {
353; CHECK-LABEL: define {{[^@]+}}@leaf
354; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
355; CHECK-NEXT:  entry:
356; CHECK-NEXT:    store i32 42, ptr addrspace(5) @G, align 4
357; CHECK-NEXT:    ret void
358;
359; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@leaf
360; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR6:[0-9]+]] {
361; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
362; CHECK-DISABLE-SPMDIZATION-NEXT:    store i32 42, ptr addrspace(5) @G, align 4
363; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
364;
365entry:
366  store i32 42, ptr addrspace(5) @G, align 4
367  ret void
368}
369
370; Function Attrs: convergent noinline nounwind
371define internal void @generic_helper() #1 {
372; CHECK-LABEL: define {{[^@]+}}@generic_helper
373; CHECK-SAME: () #[[ATTR1]] {
374; CHECK-NEXT:  entry:
375; CHECK-NEXT:    call void @unknown()
376; CHECK-NEXT:    call void @leaf() #[[ATTR8]]
377; CHECK-NEXT:    ret void
378;
379; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@generic_helper
380; CHECK-DISABLE-SPMDIZATION-SAME: () #[[ATTR1]] {
381; CHECK-DISABLE-SPMDIZATION-NEXT:  entry:
382; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @unknown()
383; CHECK-DISABLE-SPMDIZATION-NEXT:    call void @leaf() #[[ATTR8]]
384; CHECK-DISABLE-SPMDIZATION-NEXT:    ret void
385;
386entry:
387  call void @unknown()
388  call void @leaf()
389  ret void
390}
391
392attributes #0 = { convergent noinline norecurse nounwind "kernel" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
393attributes #1 = { convergent noinline nounwind  "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
394attributes #2 = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
395attributes #3 = { nounwind }
396attributes #4 = { alwaysinline }
397attributes #5 = { convergent }
398
399!omp_offload.info = !{!0, !1}
400!llvm.module.flags = !{!4, !5, !6, !7, !8}
401!llvm.ident = !{!9}
402
403!0 = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
404!1 = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
405!4 = !{i32 1, !"wchar_size", i32 4}
406!5 = !{i32 7, !"openmp", i32 50}
407!6 = !{i32 7, !"openmp-device", i32 50}
408!7 = !{i32 8, !"PIC Level", i32 2}
409!8 = !{i32 7, !"frame-pointer", i32 2}
410!9 = !{!"clang version 14.0.0"}
411;.
412; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
413; CHECK: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
414; CHECK: attributes #[[ATTR2]] = { norecurse nosync memory(write) }
415; CHECK: attributes #[[ATTR3]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
416; CHECK: attributes #[[ATTR4]] = { nounwind }
417; CHECK: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
418; CHECK: attributes #[[ATTR6]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
419; CHECK: attributes #[[ATTR7]] = { convergent nounwind }
420; CHECK: attributes #[[ATTR8]] = { nosync nounwind memory(write) }
421;.
422; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
423; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
424; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { norecurse nosync memory(write) }
425; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
426; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { nounwind }
427; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5:[0-9]+]] = { alwaysinline }
428; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
429; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent nounwind }
430; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR8]] = { nosync nounwind memory(write) }
431;.
432; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
433; CHECK: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
434; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
435; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
436; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
437; CHECK: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
438; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
439; CHECK: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
440;.
441; CHECK-DISABLE-SPMDIZATION: [[META0:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"spmd", i32 12, i32 0}
442; CHECK-DISABLE-SPMDIZATION: [[META1:![0-9]+]] = !{i32 0, i32 43, i32 17011637, !"generic", i32 20, i32 1}
443; CHECK-DISABLE-SPMDIZATION: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
444; CHECK-DISABLE-SPMDIZATION: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50}
445; CHECK-DISABLE-SPMDIZATION: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
446; CHECK-DISABLE-SPMDIZATION: [[META5:![0-9]+]] = !{i32 8, !"PIC Level", i32 2}
447; CHECK-DISABLE-SPMDIZATION: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
448; CHECK-DISABLE-SPMDIZATION: [[META7:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
449;.
450