xref: /llvm-project/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
2; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
3; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
4;
5;    void bar(int, float, double);
6;
7;    void foo(int N) {
8;      float p = 3;
9;      double q = 5;
10;      N = 7;
11;
12;    #pragma omp parallel for firstprivate(q)
13;      for (int i = 2; i < N; i++) {
14;        bar(i, p, q);
15;      }
16;    }
17;
18; Verify the constant value of q is propagated into the outlined function.
19;
20target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
21
22%struct.ident_t = type { i32, i32, i32, i32, ptr }
23
24@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
25@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 514, i32 0, i32 0, ptr @.str }, align 8
26@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
27
28;.
29; CHECK: @.str = private unnamed_addr constant [23 x i8] c"
30; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 514, i32 0, i32 0, ptr @.str }, align 8
31; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, ptr @.str }, align 8
32;.
33define dso_local void @foo(i32 %N) {
34; TUNIT-LABEL: define {{[^@]+}}@foo
35; TUNIT-SAME: (i32 [[N:%.*]]) {
36; TUNIT-NEXT:  entry:
37; TUNIT-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
38; TUNIT-NEXT:    [[P:%.*]] = alloca float, align 4
39; TUNIT-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, ptr noundef nonnull @.omp_outlined., ptr noalias nofree nonnull readnone align 4 captures(none) dereferenceable(4) undef, ptr noalias nofree nonnull readnone align 4 captures(none) dereferenceable(4) undef, i64 undef)
40; TUNIT-NEXT:    ret void
41;
42; CGSCC-LABEL: define {{[^@]+}}@foo
43; CGSCC-SAME: (i32 [[N:%.*]]) {
44; CGSCC-NEXT:  entry:
45; CGSCC-NEXT:    [[N_ADDR:%.*]] = alloca i32, align 4
46; CGSCC-NEXT:    [[P:%.*]] = alloca float, align 4
47; CGSCC-NEXT:    store i32 [[N]], ptr [[N_ADDR]], align 4
48; CGSCC-NEXT:    store float 3.000000e+00, ptr [[P]], align 4
49; CGSCC-NEXT:    store i32 7, ptr [[N_ADDR]], align 4
50; CGSCC-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 3, ptr noundef nonnull @.omp_outlined., ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[N_ADDR]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[P]], i64 noundef 4617315517961601024)
51; CGSCC-NEXT:    ret void
52;
53entry:
54  %N.addr = alloca i32, align 4
55  %p = alloca float, align 4
56  store i32 %N, ptr %N.addr, align 4
57  store float 3.000000e+00, ptr %p, align 4
58  store i32 7, ptr %N.addr, align 4
59  call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 3, ptr @.omp_outlined., ptr nonnull %N.addr, ptr nonnull %p, i64 4617315517961601024)
60  ret void
61}
62
63define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bound_tid., ptr dereferenceable(4) %N, ptr dereferenceable(4) %p, i64 %q) {
64; TUNIT-LABEL: define {{[^@]+}}@.omp_outlined.
65; TUNIT-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr noalias nofree noundef nonnull readnone align 4 captures(none) dereferenceable(4) [[N:%.*]], ptr noalias nofree noundef nonnull readnone align 4 captures(none) dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) {
66; TUNIT-NEXT:  entry:
67; TUNIT-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
68; TUNIT-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
69; TUNIT-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
70; TUNIT-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
71; TUNIT-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
72; TUNIT-NEXT:    store i64 4617315517961601024, ptr [[Q_ADDR]], align 8
73; TUNIT-NEXT:    br label [[OMP_PRECOND_THEN:%.*]]
74; TUNIT:       omp.precond.then:
75; TUNIT-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
76; TUNIT-NEXT:    store i32 4, ptr [[DOTOMP_UB]], align 4
77; TUNIT-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
78; TUNIT-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
79; TUNIT-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
80; TUNIT-NEXT:    call void @__kmpc_for_static_init_4(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP5]], i32 noundef 34, ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1)
81; TUNIT-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
82; TUNIT-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], 4
83; TUNIT-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
84; TUNIT:       cond.true:
85; TUNIT-NEXT:    br label [[COND_END:%.*]]
86; TUNIT:       cond.false:
87; TUNIT-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
88; TUNIT-NEXT:    br label [[COND_END]]
89; TUNIT:       cond.end:
90; TUNIT-NEXT:    [[COND:%.*]] = phi i32 [ 4, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
91; TUNIT-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
92; TUNIT-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
93; TUNIT-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
94; TUNIT:       omp.inner.for.cond:
95; TUNIT-NEXT:    [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ]
96; TUNIT-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
97; TUNIT-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]]
98; TUNIT-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]]
99; TUNIT:       omp.inner.for.cond.cleanup:
100; TUNIT-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
101; TUNIT:       omp.inner.for.body:
102; TUNIT-NEXT:    [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2
103; TUNIT-NEXT:    [[TMP11:%.*]] = load double, ptr [[Q_ADDR]], align 8
104; TUNIT-NEXT:    call void @bar(i32 [[ADD10]], float nofpclass(nan inf zero sub nnorm) 3.000000e+00, double [[TMP11]])
105; TUNIT-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
106; TUNIT:       omp.body.continue:
107; TUNIT-NEXT:    br label [[OMP_INNER_FOR_INC]]
108; TUNIT:       omp.inner.for.inc:
109; TUNIT-NEXT:    [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1
110; TUNIT-NEXT:    br label [[OMP_INNER_FOR_COND]]
111; TUNIT:       omp.inner.for.end:
112; TUNIT-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
113; TUNIT:       omp.loop.exit:
114; TUNIT-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
115; TUNIT-NEXT:    call void @__kmpc_for_static_fini(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP12]])
116; TUNIT-NEXT:    br label [[OMP_PRECOND_END:%.*]]
117; TUNIT:       omp.precond.end:
118; TUNIT-NEXT:    ret void
119;
120; CGSCC-LABEL: define {{[^@]+}}@.omp_outlined.
121; CGSCC-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[N:%.*]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]]) {
122; CGSCC-NEXT:  entry:
123; CGSCC-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
124; CGSCC-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
125; CGSCC-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
126; CGSCC-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
127; CGSCC-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
128; CGSCC-NEXT:    store i64 4617315517961601024, ptr [[Q_ADDR]], align 8
129; CGSCC-NEXT:    [[TMP:%.*]] = load i32, ptr [[N]], align 4
130; CGSCC-NEXT:    [[SUB3:%.*]] = add nsw i32 [[TMP]], -3
131; CGSCC-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2
132; CGSCC-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
133; CGSCC:       omp.precond.then:
134; CGSCC-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
135; CGSCC-NEXT:    store i32 [[SUB3]], ptr [[DOTOMP_UB]], align 4
136; CGSCC-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
137; CGSCC-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
138; CGSCC-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
139; CGSCC-NEXT:    call void @__kmpc_for_static_init_4(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP5]], i32 noundef 34, ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1)
140; CGSCC-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
141; CGSCC-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]]
142; CGSCC-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
143; CGSCC:       cond.true:
144; CGSCC-NEXT:    br label [[COND_END:%.*]]
145; CGSCC:       cond.false:
146; CGSCC-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
147; CGSCC-NEXT:    br label [[COND_END]]
148; CGSCC:       cond.end:
149; CGSCC-NEXT:    [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
150; CGSCC-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
151; CGSCC-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
152; CGSCC-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
153; CGSCC:       omp.inner.for.cond:
154; CGSCC-NEXT:    [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ]
155; CGSCC-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
156; CGSCC-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]]
157; CGSCC-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]]
158; CGSCC:       omp.inner.for.cond.cleanup:
159; CGSCC-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
160; CGSCC:       omp.inner.for.body:
161; CGSCC-NEXT:    [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2
162; CGSCC-NEXT:    [[TMP10:%.*]] = load float, ptr [[P]], align 4
163; CGSCC-NEXT:    [[TMP11:%.*]] = load double, ptr [[Q_ADDR]], align 8
164; CGSCC-NEXT:    call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]])
165; CGSCC-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
166; CGSCC:       omp.body.continue:
167; CGSCC-NEXT:    br label [[OMP_INNER_FOR_INC]]
168; CGSCC:       omp.inner.for.inc:
169; CGSCC-NEXT:    [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1
170; CGSCC-NEXT:    br label [[OMP_INNER_FOR_COND]]
171; CGSCC:       omp.inner.for.end:
172; CGSCC-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
173; CGSCC:       omp.loop.exit:
174; CGSCC-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
175; CGSCC-NEXT:    call void @__kmpc_for_static_fini(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP12]])
176; CGSCC-NEXT:    br label [[OMP_PRECOND_END]]
177; CGSCC:       omp.precond.end:
178; CGSCC-NEXT:    ret void
179;
180entry:
181  %q.addr = alloca i64, align 8
182  %.omp.lb = alloca i32, align 4
183  %.omp.ub = alloca i32, align 4
184  %.omp.stride = alloca i32, align 4
185  %.omp.is_last = alloca i32, align 4
186  store i64 %q, ptr %q.addr, align 8
187  %tmp = load i32, ptr %N, align 4
188  %sub3 = add nsw i32 %tmp, -3
189  %cmp = icmp sgt i32 %tmp, 2
190  br i1 %cmp, label %omp.precond.then, label %omp.precond.end
191
192omp.precond.then:                                 ; preds = %entry
193  store i32 0, ptr %.omp.lb, align 4
194  store i32 %sub3, ptr %.omp.ub, align 4
195  store i32 1, ptr %.omp.stride, align 4
196  store i32 0, ptr %.omp.is_last, align 4
197  %tmp5 = load i32, ptr %.global_tid., align 4
198  call void @__kmpc_for_static_init_4(ptr nonnull @0, i32 %tmp5, i32 34, ptr nonnull %.omp.is_last, ptr nonnull %.omp.lb, ptr nonnull %.omp.ub, ptr nonnull %.omp.stride, i32 1, i32 1)
199  %tmp6 = load i32, ptr %.omp.ub, align 4
200  %cmp6 = icmp sgt i32 %tmp6, %sub3
201  br i1 %cmp6, label %cond.true, label %cond.false
202
203cond.true:                                        ; preds = %omp.precond.then
204  br label %cond.end
205
206cond.false:                                       ; preds = %omp.precond.then
207  %tmp7 = load i32, ptr %.omp.ub, align 4
208  br label %cond.end
209
210cond.end:                                         ; preds = %cond.false, %cond.true
211  %cond = phi i32 [ %sub3, %cond.true ], [ %tmp7, %cond.false ]
212  store i32 %cond, ptr %.omp.ub, align 4
213  %tmp8 = load i32, ptr %.omp.lb, align 4
214  br label %omp.inner.for.cond
215
216omp.inner.for.cond:                               ; preds = %omp.inner.for.inc, %cond.end
217  %.omp.iv.0 = phi i32 [ %tmp8, %cond.end ], [ %add11, %omp.inner.for.inc ]
218  %tmp9 = load i32, ptr %.omp.ub, align 4
219  %cmp8 = icmp sgt i32 %.omp.iv.0, %tmp9
220  br i1 %cmp8, label %omp.inner.for.cond.cleanup, label %omp.inner.for.body
221
222omp.inner.for.cond.cleanup:                       ; preds = %omp.inner.for.cond
223  br label %omp.inner.for.end
224
225omp.inner.for.body:                               ; preds = %omp.inner.for.cond
226  %add10 = add nsw i32 %.omp.iv.0, 2
227  %tmp10 = load float, ptr %p, align 4
228  %tmp11 = load double, ptr %q.addr, align 8
229  call void @bar(i32 %add10, float %tmp10, double %tmp11)
230  br label %omp.body.continue
231
232omp.body.continue:                                ; preds = %omp.inner.for.body
233  br label %omp.inner.for.inc
234
235omp.inner.for.inc:                                ; preds = %omp.body.continue
236  %add11 = add nsw i32 %.omp.iv.0, 1
237  br label %omp.inner.for.cond
238
239omp.inner.for.end:                                ; preds = %omp.inner.for.cond.cleanup
240  br label %omp.loop.exit
241
242omp.loop.exit:                                    ; preds = %omp.inner.for.end
243  %tmp12 = load i32, ptr %.global_tid., align 4
244  call void @__kmpc_for_static_fini(ptr nonnull @0, i32 %tmp12)
245  br label %omp.precond.end
246
247omp.precond.end:                                  ; preds = %omp.loop.exit, %entry
248  ret void
249}
250
251declare dso_local void @__kmpc_for_static_init_4(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32)
252
253declare dso_local void @bar(i32, float, double)
254
255declare dso_local void @__kmpc_for_static_fini(ptr, i32)
256
257declare !callback !0 dso_local void @__kmpc_fork_call(ptr, i32, ptr, ...)
258
259!1 = !{i64 2, i64 -1, i64 -1, i1 true}
260!0 = !{!1}
261;.
262; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
263; TUNIT: [[META1]] = !{i64 2, i64 -1, i64 -1, i1 true}
264;.
265; CGSCC: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
266; CGSCC: [[META1]] = !{i64 2, i64 -1, i64 -1, i1 true}
267;.
268;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
269; CHECK: {{.*}}
270