xref: /llvm-project/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
3target triple = "nvptx64"
4
5%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
6%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
7
8@G = external global i32
9
10@kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
11@kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
12@kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr null, ptr null }
13
14;.
15; CHECK: @G = external global i32
16; CHECK: @kernel0_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 666, i32 0, i32 777, i32 0, i32 0 }, ptr null, ptr null }
17; CHECK: @kernel1_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 666, i32 0, i32 777, i32 0, i32 0 }, ptr null, ptr null }
18; CHECK: @kernel2_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 3, i32 0, i32 666, i32 0, i32 777, i32 0, i32 0 }, ptr null, ptr null }
19; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
20; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[GLOB0]] }, align 8
21;.
22define weak ptx_kernel void @kernel0(ptr %dyn) "kernel" #0 {
23; CHECK-LABEL: define {{[^@]+}}@kernel0
24; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
25; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment, ptr [[DYN]])
26; CHECK-NEXT:    [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
27; CHECK-NEXT:    [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
28; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
29; CHECK:       exit.threads:
30; CHECK-NEXT:    ret void
31; CHECK:       main.thread.user_code:
32; CHECK-NEXT:    call void @helper0() #[[ATTR1:[0-9]+]]
33; CHECK-NEXT:    call void @helper1() #[[ATTR1]]
34; CHECK-NEXT:    call void @helper2() #[[ATTR1]]
35; CHECK-NEXT:    call void @__kmpc_target_deinit()
36; CHECK-NEXT:    ret void
37;
38  %i = call i32 @__kmpc_target_init(ptr @kernel0_kernel_environment, ptr %dyn)
39  call void @helper0()
40  call void @helper1()
41  call void @helper2()
42  call void @__kmpc_target_deinit()
43  ret void
44}
45
46define weak ptx_kernel void @kernel1(ptr %dyn) "kernel" #0 {
47; CHECK-LABEL: define {{[^@]+}}@kernel1
48; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
49; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment, ptr [[DYN]])
50; CHECK-NEXT:    [[THREAD_ID_IN_BLOCK:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
51; CHECK-NEXT:    [[THREAD_IS_MAIN:%.*]] = icmp ne i32 [[THREAD_ID_IN_BLOCK]], 0
52; CHECK-NEXT:    br i1 [[THREAD_IS_MAIN]], label [[EXIT_THREADS:%.*]], label [[MAIN_THREAD_USER_CODE:%.*]]
53; CHECK:       exit.threads:
54; CHECK-NEXT:    ret void
55; CHECK:       main.thread.user_code:
56; CHECK-NEXT:    call void @helper1() #[[ATTR1]]
57; CHECK-NEXT:    call void @__kmpc_target_deinit()
58; CHECK-NEXT:    ret void
59;
60  %i = call i32 @__kmpc_target_init(ptr @kernel1_kernel_environment, ptr %dyn)
61  call void @helper1()
62  call void @__kmpc_target_deinit()
63  ret void
64}
65
66define weak ptx_kernel void @kernel2(ptr %dyn) "kernel" #0 {
67; CHECK-LABEL: define {{[^@]+}}@kernel2
68; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
69; CHECK-NEXT:  entry:
70; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
71; CHECK-NEXT:    [[I:%.*]] = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment, ptr [[DYN]])
72; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[I]], -1
73; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]]
74; CHECK:       common.ret:
75; CHECK-NEXT:    ret void
76; CHECK:       user_code.entry:
77; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr null) #[[ATTR1]]
78; CHECK-NEXT:    call void @helper0() #[[ATTR1]]
79; CHECK-NEXT:    call void @helper1() #[[ATTR1]]
80; CHECK-NEXT:    call void @helper2() #[[ATTR1]]
81; CHECK-NEXT:    call void @__kmpc_parallel_51(ptr null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
82; CHECK-NEXT:    call void @__kmpc_target_deinit()
83; CHECK-NEXT:    ret void
84;
85entry:
86  %captured_vars_addrs = alloca [0 x ptr], align 8
87  %i = call i32 @__kmpc_target_init(ptr @kernel2_kernel_environment, ptr %dyn)
88  %exec_user_code = icmp eq i32 %i, -1
89  br i1 %exec_user_code, label %user_code.entry, label %common.ret
90
91common.ret:
92  ret void
93
94user_code.entry:
95  %0 = call i32 @__kmpc_global_thread_num(ptr null)
96  call void @helper0()
97  call void @helper1()
98  call void @helper2()
99  call void @__kmpc_parallel_51(ptr null, i32 %0, i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr %captured_vars_addrs, i64 0)
100  call void @__kmpc_target_deinit()
101  ret void
102}
103
104define internal void @helper0() {
105; CHECK-LABEL: define {{[^@]+}}@helper0
106; CHECK-SAME: () #[[ATTR1]] {
107; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
108; CHECK:       region.check.tid:
109; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
110; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
111; CHECK-NEXT:    br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
112; CHECK:       region.guarded:
113; CHECK-NEXT:    store i32 666, ptr @G, align 4
114; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
115; CHECK:       region.guarded.end:
116; CHECK-NEXT:    br label [[REGION_BARRIER]]
117; CHECK:       region.barrier:
118; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]])
119; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
120; CHECK:       region.exit:
121; CHECK-NEXT:    ret void
122;
123  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
124  store i32 %threadLimit, ptr @G
125  ret void
126}
127
128define internal void @helper1() {
129; CHECK-LABEL: define {{[^@]+}}@helper1
130; CHECK-SAME: () #[[ATTR1]] {
131; CHECK-NEXT:    br label [[F:%.*]]
132; CHECK:       t:
133; CHECK-NEXT:    unreachable
134; CHECK:       f:
135; CHECK-NEXT:    ret void
136;
137  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
138  %c = icmp eq i32 %threadLimit, 666
139  br i1 %c, label %f, label %t
140t:
141  call void @helper0()
142  ret void
143f:
144  ret void
145}
146
147define internal void @helper2() {
148; CHECK-LABEL: define {{[^@]+}}@helper2
149; CHECK-SAME: () #[[ATTR1]] {
150; CHECK-NEXT:    br label [[REGION_CHECK_TID:%.*]]
151; CHECK:       region.check.tid:
152; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
153; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
154; CHECK-NEXT:    br i1 [[TMP2]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
155; CHECK:       region.guarded:
156; CHECK-NEXT:    store i32 666, ptr @G, align 4
157; CHECK-NEXT:    br label [[REGION_GUARDED_END:%.*]]
158; CHECK:       region.guarded.end:
159; CHECK-NEXT:    br label [[REGION_BARRIER]]
160; CHECK:       region.barrier:
161; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB1]], i32 [[TMP1]])
162; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
163; CHECK:       region.exit:
164; CHECK-NEXT:    ret void
165;
166  %threadLimit = call i32 @__kmpc_get_hardware_num_threads_in_block()
167  store i32 %threadLimit, ptr @G
168  ret void
169}
170
171define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %.bound_tid.) {
172; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
173; CHECK-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
174; CHECK-NEXT:  entry:
175; CHECK-NEXT:    ret void
176;
177entry:
178  ret void
179}
180
181define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) {
182; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
183; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) {
184; CHECK-NEXT:  entry:
185; CHECK-NEXT:    ret void
186;
187entry:
188  ret void
189}
190
191define internal i32 @__kmpc_get_hardware_num_threads_in_block() {
192  %ret = call i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
193  ret i32 %ret
194}
195declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy()
196declare i32 @__kmpc_target_init(ptr, ptr) #1
197declare void @__kmpc_target_deinit() #1
198declare void @__kmpc_parallel_51(ptr, i32, i32, i32, i32, ptr, ptr, ptr, i64)
199declare i32 @__kmpc_global_thread_num(ptr)
200
201
202!llvm.module.flags = !{!0, !1}
203
204attributes #0 = { "omp_target_thread_limit"="666" "omp_target_num_teams"="777"}
205
206!0 = !{i32 7, !"openmp", i32 50}
207!1 = !{i32 7, !"openmp-device", i32 50}
208;.
209; CHECK: attributes #[[ATTR0]] = { "kernel" "omp_target_num_teams"="777" "omp_target_thread_limit"="666" }
210; CHECK: attributes #[[ATTR1]] = { nounwind }
211; CHECK: attributes #[[ATTR2:[0-9]+]] = { alwaysinline }
212; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind }
213;.
214; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
215; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
216;.
217