xref: /llvm-project/clang/test/OpenMP/taskloop_reduction_codegen.cpp (revision 2a2847823f0d13188c43ebdd0baf42a95df750c7)
1 // RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s
2 
3 // RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s
4 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
5 // expected-no-diagnostics
6 
7 // CHECK: [[RED_SIZE1:@reduction_size[.].+]] = common thread_local global i64 0
8 // CHECK: [[RED_SIZE2:@reduction_size[.].+]] = common thread_local global i64 0
9 
10 struct S {
11   float a;
12   S() : a(0.0f) {}
13   ~S() {}
14 };
15 
16 #pragma omp declare reduction(+:S:omp_out.a += omp_in.a) initializer(omp_priv = omp_orig)
17 
18 float g;
19 
20 int a;
21 #pragma omp threadprivate(a)
22 int main (int argc, char *argv[])
23 {
24 int   i, n;
25 float a[100], b[100], sum, e[argc + 100];
26 S c[100];
27 float &d = g;
28 
29 /* Some initializations */
30 n = 100;
31 for (i=0; i < n; i++)
32   a[i] = b[i] = i * 1.0;
33 sum = 0.0;
34 
35 #pragma omp taskloop reduction(+:sum, c[:n], d, e)
36   for (i=0; i < n; i++) {
37     sum = sum + (a[i] * b[i]);
38     c[i].a = i*i;
39     d += i*i;
40     e[i] = i;
41   }
42 
43 }
44 
45 // CHECK-LABEL: @main(
46 // CHECK:    [[RETVAL:%.*]] = alloca i32,
47 // CHECK:    [[ARGC_ADDR:%.*]] = alloca i32,
48 // CHECK:    [[ARGV_ADDR:%.*]] = alloca i8**,
49 // CHECK:    [[I:%.*]] = alloca i32,
50 // CHECK:    [[N:%.*]] = alloca i32,
51 // CHECK:    [[A:%.*]] = alloca [100 x float],
52 // CHECK:    [[B:%.*]] = alloca [100 x float],
53 // CHECK:    [[SUM:%.*]] = alloca float,
54 // CHECK:    [[SAVED_STACK:%.*]] = alloca i8*,
55 // CHECK:    [[C:%.*]] = alloca [100 x %struct.S],
56 // CHECK:    [[D:%.*]] = alloca float*,
57 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
58 // CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
59 // CHECK:    alloca i32,
60 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
61 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
62 // CHECK:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
63 // CHECK:    store i32 0, i32* [[RETVAL]],
64 // CHECK:    store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]],
65 // CHECK:    store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]],
66 // CHECK:    [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]],
67 // CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 100
68 // CHECK:    [[TMP2:%.*]] = zext i32 [[ADD]] to i64
69 // CHECK:    [[VLA:%.+]] = alloca float, i64 %
70 
71 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
72 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
73 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
74 // CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
75 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
76 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
77 // CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
78 // CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
79 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
80 // CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
81 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
82 // CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
83 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
84 // CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
85 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
86 // CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
87 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
88 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
89 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
90 // CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
91 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
92 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
93 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
94 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
95 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
96 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
97 // CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
98 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
99 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
100 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
101 // CHECK-DAG:    [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (%struct.S* getelementptr (%struct.S, %struct.S* null, i32 1) to i64)
102 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
103 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (%struct.S* getelementptr (%struct.S, %struct.S* null, i32 1) to i64)
104 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
105 // CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
106 // CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
107 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
108 // CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
109 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
110 // CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
111 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
112 // CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
113 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
114 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
115 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
116 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
117 // CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
118 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
119 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
120 // CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
121 // CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
122 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
123 // CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
124 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
125 // CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
126 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
127 // CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
128 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
129 // CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
130 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
131 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
132 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
133 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
134 // CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
135 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
136 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
137 // CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
138 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4
139 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
140 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
141 // CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
142 // CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
143 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
144 // CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
145 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
146 // CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
147 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
148 // CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
149 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
150 // CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
151 // CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
152 // CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
153 // CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
154 // CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
155 // CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
156 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N]],
157 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
158 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
159 // CHECK:    [[SUB:%.*]] = sub nsw i32 [[TMP64]], 0
160 // CHECK:    [[DIV:%.*]] = sdiv i32 [[SUB]], 1
161 // CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
162 // CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
163 // CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
164 // CHECK:    call void @__kmpc_taskloop(%struct.ident_t* {{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
165 // CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
166 
167 // CHECK:    ret i32
168 
169 // CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
170 // CHECK: store float 0.000000e+00, float* %
171 // CHECK: ret void
172 
173 // CHECK: define internal void @[[RED_COMB1]](i8* %0, i8* %1)
174 // CHECK: fadd float %
175 // CHECK: store float %{{.+}}, float* %
176 // CHECK: ret void
177 
178 // CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
179 // CHECK: call void [[OMP_INIT1:@.+]](%struct.S*
180 // CHECK: ret void
181 
182 // CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias %0, %struct.S* noalias %1)
183 // CHECK: fadd float %
184 
185 // CHECK: define internal void [[OMP_INIT1]](%struct.S* noalias %0, %struct.S* noalias %1)
186 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
187 
188 // CHECK: define internal void @[[RED_FINI2]](i8* %0)
189 // CHECK: load i64, i64* [[RED_SIZE1]]
190 // CHECK: call void @
191 // CHECK: ret void
192 
193 // CHECK: define internal void @[[RED_COMB2]](i8* %0, i8* %1)
194 // CHECK: load i64, i64* [[RED_SIZE1]]
195 // CHECK: call void [[OMP_COMB1]](
196 // CHECK: ret void
197 
198 // CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
199 // CHECK: store float 0.000000e+00, float* %
200 // CHECK: ret void
201 
202 // CHECK: define internal void @[[RED_COMB3]](i8* %0, i8* %1)
203 // CHECK: fadd float %
204 // CHECK: store float %{{.+}}, float* %
205 // CHECK: ret void
206 
207 // CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
208 // CHECK: load i64, i64* [[RED_SIZE2]]
209 // CHECK: store float 0.000000e+00, float* %
210 // CHECK: ret void
211 
212 // CHECK: define internal void @[[RED_COMB4]](i8* %0, i8* %1)
213 // CHECK: load i64, i64* [[RED_SIZE2]]
214 // CHECK: fadd float %
215 // CHECK: store float %{{.+}}, float* %
216 // CHECK: ret void
217 
218 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
219 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
220 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
221 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
222 
223 // CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: !
224 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]"
225 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]"
226 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT2]]"
227 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_FINI2]]"
228 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB2]]"
229 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT3]]"
230 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB3]]"
231 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT4]]"
232 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB4]]"
233