xref: /llvm-project/llvm/test/Transforms/OpenMP/replace_globalization.ll (revision 07ed8187acc31ac3f4779da452864a29d48799ac)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals --include-generated-funcs
2; RUN: opt -S -passes='openmp-opt' < %s | FileCheck %s
3; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS
4; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output -openmp-opt-shared-limit=4 < %s 2>&1 | FileCheck %s -check-prefix=CHECK-LIMIT
5target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
6target triple = "nvptx64"
7
8; UTC_ARGS: --disable
9; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory
10; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory
11; CHECK-REMARKS-NOT: 6 bytes
12; CHECK-LIMIT: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory
13; CHECK-LIMIT: remark: replace_globalization.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization
14; UTC_ARGS: --enable
15
16%struct.ident_t = type { i32, i32, i32, i32, ptr }
17%struct.KernelEnvironmentTy = type { %struct.ConfigurationEnvironmentTy, ptr, ptr }
18%struct.ConfigurationEnvironmentTy = type { i8, i8, i8, i32, i32, i32, i32, i32, i32 }
19
20@S = external local_unnamed_addr global ptr
21@0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
22@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @0 }, align 8
23@foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
24@bar_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
25@baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @1, ptr null }
26
27
28define dso_local ptx_kernel void @foo(ptr %dyn) "kernel" {
29entry:
30  %c = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr %dyn)
31  %x = call align 4 ptr @__kmpc_alloc_shared(i64 4)
32  call void @unknown_no_openmp()
33  call void @use(ptr %x)
34  call void @__kmpc_free_shared(ptr %x, i64 4)
35  call void @__kmpc_target_deinit()
36  ret void
37}
38
39define ptx_kernel void @bar(ptr %dyn) "kernel" {
40  %c = call i32 @__kmpc_target_init(ptr @bar_kernel_environment, ptr %dyn)
41  call void @unknown_no_openmp()
42  %cmp = icmp eq i32 %c, -1
43  br i1 %cmp, label %master1, label %exit
44master1:
45  %x = call align 4 ptr @__kmpc_alloc_shared(i64 16), !dbg !11
46  call void @use(ptr %x)
47  call void @__kmpc_free_shared(ptr %x, i64 16)
48  br label %next
49next:
50  call void @unknown_no_openmp()
51  %b0 = icmp eq i32 %c, -1
52  br i1 %b0, label %master2, label %exit
53master2:
54  %y = call align 4 ptr @__kmpc_alloc_shared(i64 4), !dbg !12
55  call void @use(ptr %y)
56  call void @__kmpc_free_shared(ptr %y, i64 4)
57  br label %exit
58exit:
59  call void @__kmpc_target_deinit()
60  ret void
61}
62
63define ptx_kernel void @baz_spmd(ptr %dyn) "kernel" {
64  %c = call i32 @__kmpc_target_init(ptr @baz_kernel_environment, ptr %dyn)
65  call void @unknown_no_openmp()
66  %c0 = icmp eq i32 %c, -1
67  br i1 %c0, label %master3, label %exit
68master3:
69  %z = call align 4 ptr @__kmpc_alloc_shared(i64 24), !dbg !12
70  call void @use(ptr %z)
71  call void @__kmpc_free_shared(ptr %z, i64 24)
72  br label %exit
73exit:
74  call void @__kmpc_target_deinit()
75  ret void
76}
77
78define void @use(ptr %x) {
79entry:
80  store ptr %x, ptr @S
81  ret void
82}
83
84@offset =global i32 undef
85@stack = internal addrspace(3) global [1024 x i8] undef
86define private ptr @__kmpc_alloc_shared(i64) {
87  %ac = addrspacecast ptr addrspace(3) @stack to ptr
88  %l = load i32, ptr @offset
89  %gep = getelementptr i8, ptr %ac, i32 %l
90  ret ptr %gep
91}
92
93declare void @__kmpc_free_shared(ptr, i64)
94
95declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
96
97declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
98
99declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
100
101; Make it a weak definition so we will apply custom state machine rewriting but can't use the body in the reasoning.
102define weak i32 @__kmpc_target_init(ptr, ptr) {
103  ret i32 0
104}
105
106declare void @__kmpc_target_deinit()
107
108declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
109
110!llvm.dbg.cu = !{!0}
111!llvm.module.flags = !{!3, !4, !5, !6}
112
113!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
114!1 = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c")
115!2 = !{}
116!3 = !{i32 2, !"Debug Info Version", i32 3}
117!4 = !{i32 1, !"wchar_size", i32 4}
118!5 = !{i32 7, !"openmp", i32 50}
119!6 = !{i32 7, !"openmp-device", i32 50}
120!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 1, type: !10, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
121!10 = !DISubroutineType(types: !2)
122!11 = !DILocation(line: 5, column: 7, scope: !9)
123!12 = !DILocation(line: 5, column: 14, scope: !9)
124;.
125; CHECK: @S = external local_unnamed_addr global ptr
126; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
127; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @[[GLOB0]] }, align 8
128; CHECK: @foo_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
129; CHECK: @bar_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
130; CHECK: @baz_kernel_environment = local_unnamed_addr constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 0, i8 0, i8 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 }, ptr @[[GLOB1]], ptr null }
131; CHECK: @offset = global i32 undef
132; CHECK: @stack = internal addrspace(3) global [1024 x i8] undef
133; CHECK: @x_shared = internal addrspace(3) global [16 x i8] poison, align 4
134; CHECK: @y_shared = internal addrspace(3) global [4 x i8] poison, align 4
135;.
136; CHECK-LABEL: define {{[^@]+}}@foo
137; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
138; CHECK-NEXT:  entry:
139; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(ptr @foo_kernel_environment, ptr [[DYN]])
140; CHECK-NEXT:    [[X:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 4) #[[ATTR6:[0-9]+]]
141; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR5:[0-9]+]]
142; CHECK-NEXT:    call void @use.internalized(ptr nofree [[X]]) #[[ATTR7:[0-9]+]]
143; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[X]], i64 4) #[[ATTR8:[0-9]+]]
144; CHECK-NEXT:    call void @__kmpc_target_deinit()
145; CHECK-NEXT:    ret void
146;
147;
148; CHECK-LABEL: define {{[^@]+}}@bar
149; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
150; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(ptr @bar_kernel_environment, ptr [[DYN]])
151; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR5]]
152; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], -1
153; CHECK-NEXT:    br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
154; CHECK:       master1:
155; CHECK-NEXT:    call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @x_shared to ptr)) #[[ATTR7]]
156; CHECK-NEXT:    br label [[NEXT:%.*]]
157; CHECK:       next:
158; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR5]]
159; CHECK-NEXT:    [[B0:%.*]] = icmp eq i32 [[C]], -1
160; CHECK-NEXT:    br i1 [[B0]], label [[MASTER2:%.*]], label [[EXIT]]
161; CHECK:       master2:
162; CHECK-NEXT:    call void @use.internalized(ptr nofree addrspacecast (ptr addrspace(3) @y_shared to ptr)) #[[ATTR7]]
163; CHECK-NEXT:    br label [[EXIT]]
164; CHECK:       exit:
165; CHECK-NEXT:    call void @__kmpc_target_deinit()
166; CHECK-NEXT:    ret void
167;
168;
169; CHECK-LABEL: define {{[^@]+}}@baz_spmd
170; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
171; CHECK-NEXT:    [[C:%.*]] = call i32 @__kmpc_target_init(ptr @baz_kernel_environment, ptr [[DYN]])
172; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR5]]
173; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[C]], -1
174; CHECK-NEXT:    br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]]
175; CHECK:       master3:
176; CHECK-NEXT:    [[Z:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i64 24) #[[ATTR6]], !dbg [[DBG7:![0-9]+]]
177; CHECK-NEXT:    call void @use.internalized(ptr nofree [[Z]]) #[[ATTR7]]
178; CHECK-NEXT:    call void @__kmpc_free_shared(ptr [[Z]], i64 24) #[[ATTR8]]
179; CHECK-NEXT:    br label [[EXIT]]
180; CHECK:       exit:
181; CHECK-NEXT:    call void @__kmpc_target_deinit()
182; CHECK-NEXT:    ret void
183;
184;
185; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(write)
186; CHECK-LABEL: define {{[^@]+}}@use.internalized
187; CHECK-SAME: (ptr nofree [[X:%.*]]) #[[ATTR1:[0-9]+]] {
188; CHECK-NEXT:  entry:
189; CHECK-NEXT:    store ptr [[X]], ptr @S, align 8
190; CHECK-NEXT:    ret void
191;
192;
193; CHECK-LABEL: define {{[^@]+}}@use
194; CHECK-SAME: (ptr [[X:%.*]]) {
195; CHECK-NEXT:  entry:
196; CHECK-NEXT:    store ptr [[X]], ptr @S, align 8
197; CHECK-NEXT:    ret void
198;
199;
200; CHECK: Function Attrs: nosync nounwind allocsize(0) memory(read)
201; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared
202; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
203; CHECK-NEXT:    [[L:%.*]] = load i32, ptr @offset, align 4
204; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspacecast (ptr addrspace(3) @stack to ptr), i32 [[L]]
205; CHECK-NEXT:    ret ptr [[GEP]]
206;
207;
208; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
209; CHECK-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) {
210; CHECK-NEXT:    ret i32 0
211;
212;.
213; CHECK: attributes #[[ATTR0]] = { "kernel" }
214; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(write) }
215; CHECK: attributes #[[ATTR2]] = { nosync nounwind allocsize(0) memory(read) }
216; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind }
217; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
218; CHECK: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" }
219; CHECK: attributes #[[ATTR6]] = { nounwind memory(read) }
220; CHECK: attributes #[[ATTR7]] = { nosync nounwind memory(write) }
221; CHECK: attributes #[[ATTR8]] = { nounwind }
222;.
223; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
224; CHECK: [[META1]] = !DIFile(filename: "replace_globalization.c", directory: {{.*}})
225; CHECK: [[META2]] = !{}
226; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
227; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
228; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
229; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
230; CHECK: [[DBG7]] = !DILocation(line: 5, column: 14, scope: [[META8:![0-9]+]])
231; CHECK: [[META8]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 1, type: [[META9:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META2]])
232; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
233;.
234;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
235; CHECK-LIMIT: {{.*}}
236; CHECK-REMARKS: {{.*}}
237