xref: /llvm-project/mlir/test/Target/LLVMIR/openmp-reduction.mlir (revision a1f2fb6078bbed8034ce28eafc3518268e25f2ff)
1// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
2
3// Only check the overall shape of the code and the presence of relevant
4// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level.
5
6omp.declare_reduction @add_f32 : f32
7init {
8^bb0(%arg: f32):
9  %0 = llvm.mlir.constant(0.0 : f32) : f32
10  omp.yield (%0 : f32)
11}
12combiner {
13^bb1(%arg0: f32, %arg1: f32):
14  %1 = llvm.fadd %arg0, %arg1 : f32
15  omp.yield (%1 : f32)
16}
17atomic {
18^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
19  %2 = llvm.load %arg3 : !llvm.ptr -> f32
20  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
21  omp.yield
22}
23
24// CHECK-LABEL: @simple_reduction
25llvm.func @simple_reduction(%lb : i64, %ub : i64, %step : i64) {
26  %c1 = llvm.mlir.constant(1 : i32) : i32
27  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
28  omp.parallel {
29    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
30      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
31        %1 = llvm.mlir.constant(2.0 : f32) : f32
32        %2 = llvm.load %prv : !llvm.ptr -> f32
33        %3 = llvm.fadd %1, %2 : f32
34        llvm.store %3, %prv : f32, !llvm.ptr
35        omp.yield
36      }
37    }
38    omp.terminator
39  }
40  llvm.return
41}
42
43// Call to the outlined function.
44// CHECK: call void {{.*}} @__kmpc_fork_call
45// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
46
47// Outlined function.
48// CHECK: define internal void @[[OUTLINED]]
49
50// Private reduction variable and its initialization.
51// CHECK: %[[PRIVATE:.+]] = alloca float
52// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
53
54// Call to the reduction function.
55// CHECK: call i32 @__kmpc_reduce
56// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
57
58// Atomic reduction.
59// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
60// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
61
62// Non-atomic reduction:
63// CHECK: fadd float
64// CHECK: call void @__kmpc_end_reduce
65// CHECK: br label %[[FINALIZE:.+]]
66
67// CHECK: [[FINALIZE]]:
68// CHECK: call void @__kmpc_barrier
69
70// Update of the private variable using the reduction region
71// (the body block currently comes after all the other blocks).
72// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
73// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]]
74// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
75
76// Reduction function.
77// CHECK: define internal void @[[REDFUNC]]
78// CHECK: fadd float
79
80// -----
81
82omp.declare_reduction @add_f32 : f32
83init {
84^bb0(%arg: f32):
85  %0 = llvm.mlir.constant(0.0 : f32) : f32
86  omp.yield (%0 : f32)
87}
88combiner {
89^bb1(%arg0: f32, %arg1: f32):
90  %1 = llvm.fadd %arg0, %arg1 : f32
91  omp.yield (%1 : f32)
92}
93atomic {
94^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
95  %2 = llvm.load %arg3 : !llvm.ptr -> f32
96  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
97  omp.yield
98}
99
100// When the same reduction declaration is used several times, its regions
101// are translated several times, which shouldn't lead to value/block
102// remapping assertions.
103// CHECK-LABEL: @reuse_declaration
104llvm.func @reuse_declaration(%lb : i64, %ub : i64, %step : i64) {
105  %c1 = llvm.mlir.constant(1 : i32) : i32
106  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
107  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
108  omp.parallel {
109    omp.wsloop reduction(@add_f32 %0 -> %prv0, @add_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) {
110      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
111        %1 = llvm.mlir.constant(2.0 : f32) : f32
112        %3 = llvm.load %prv0 : !llvm.ptr -> f32
113        %4 = llvm.fadd %3, %1 : f32
114        llvm.store %4, %prv0 : f32, !llvm.ptr
115        %5 = llvm.load %prv1 : !llvm.ptr -> f32
116        %6 = llvm.fadd %5, %1 : f32
117        llvm.store %6, %prv1 : f32, !llvm.ptr
118        omp.yield
119      }
120    }
121    omp.terminator
122  }
123  llvm.return
124}
125
126// Call to the outlined function.
127// CHECK: call void {{.*}} @__kmpc_fork_call
128// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
129
130// Outlined function.
131// CHECK: define internal void @[[OUTLINED]]
132
133// Private reduction variable and its initialization.
134// CHECK: %[[PRIVATE1:.+]] = alloca float
135// CHECK: %[[PRIVATE2:.+]] = alloca float
136// CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]]
137// CHECK: store float 0.000000e+00, ptr %[[PRIVATE2]]
138
139// Call to the reduction function.
140// CHECK: call i32 @__kmpc_reduce
141// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
142
143// Atomic reduction.
144// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
145// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL1]]
146// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
147// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL2]]
148
149// Non-atomic reduction:
150// CHECK: fadd float
151// CHECK: fadd float
152// CHECK: call void @__kmpc_end_reduce
153// CHECK: br label %[[FINALIZE:.+]]
154
155// CHECK: [[FINALIZE]]:
156// CHECK: call void @__kmpc_barrier
157
158// Update of the private variable using the reduction region
159// (the body block currently comes after all the other blocks).
160// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
161// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
162// CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]]
163// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
164// CHECK: %[[UPDATED2:.+]] = fadd float %[[PARTIAL2]], 2.000000e+00
165// CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]]
166
167// Reduction function.
168// CHECK: define internal void @[[REDFUNC]]
169// CHECK: fadd float
170// CHECK: fadd float
171
172
173// -----
174
175omp.declare_reduction @add_f32 : f32
176init {
177^bb0(%arg: f32):
178  %0 = llvm.mlir.constant(0.0 : f32) : f32
179  omp.yield (%0 : f32)
180}
181combiner {
182^bb1(%arg0: f32, %arg1: f32):
183  %1 = llvm.fadd %arg0, %arg1 : f32
184  omp.yield (%1 : f32)
185}
186atomic {
187^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
188  %2 = llvm.load %arg3 : !llvm.ptr -> f32
189  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
190  omp.yield
191}
192
193// It's okay not to reference the reduction variable in the body.
194// CHECK-LABEL: @missing_omp_reduction
195llvm.func @missing_omp_reduction(%lb : i64, %ub : i64, %step : i64) {
196  %c1 = llvm.mlir.constant(1 : i32) : i32
197  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
198  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
199  omp.parallel {
200    omp.wsloop reduction(@add_f32 %0 -> %prv0, @add_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) {
201      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
202        %1 = llvm.mlir.constant(2.0 : f32) : f32
203        %3 = llvm.load %prv0 : !llvm.ptr -> f32
204        %4 = llvm.fadd %3, %1 : f32
205        llvm.store %4, %prv0 : f32, !llvm.ptr
206        omp.yield
207      }
208    }
209    omp.terminator
210  }
211  llvm.return
212}
213
214// Call to the outlined function.
215// CHECK: call void {{.*}} @__kmpc_fork_call
216// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
217
218// Outlined function.
219// CHECK: define internal void @[[OUTLINED]]
220
221// Private reduction variable and its initialization.
222// CHECK: %[[PRIVATE1:.+]] = alloca float
223// CHECK: %[[PRIVATE2:.+]] = alloca float
224// CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]]
225// CHECK: store float 0.000000e+00, ptr %[[PRIVATE2]]
226
227// Call to the reduction function.
228// CHECK: call i32 @__kmpc_reduce
229// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
230
231// Atomic reduction.
232// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
233// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL1]]
234// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
235// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL2]]
236
237// Non-atomic reduction:
238// CHECK: fadd float
239// CHECK: fadd float
240// CHECK: call void @__kmpc_end_reduce
241// CHECK: br label %[[FINALIZE:.+]]
242
243// CHECK: [[FINALIZE]]:
244// CHECK: call void @__kmpc_barrier
245
246// Update of the private variable using the reduction region
247// (the body block currently comes after all the other blocks).
248// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
249// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
250// CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]]
251// CHECK-NOT: %{{.*}} = load float, ptr %[[PRIVATE2]]
252// CHECK-NOT: %{{.*}} = fadd float %[[PARTIAL2]], 2.000000e+00
253
254// Reduction function.
255// CHECK: define internal void @[[REDFUNC]]
256// CHECK: fadd float
257// CHECK: fadd float
258
259// -----
260
261omp.declare_reduction @add_f32 : f32
262init {
263^bb0(%arg: f32):
264  %0 = llvm.mlir.constant(0.0 : f32) : f32
265  omp.yield (%0 : f32)
266}
267combiner {
268^bb1(%arg0: f32, %arg1: f32):
269  %1 = llvm.fadd %arg0, %arg1 : f32
270  omp.yield (%1 : f32)
271}
272atomic {
273^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
274  %2 = llvm.load %arg3 : !llvm.ptr -> f32
275  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
276  omp.yield
277}
278
279// It's okay to refer to the same reduction variable more than once in the
280// body.
281// CHECK-LABEL: @double_reference
282llvm.func @double_reference(%lb : i64, %ub : i64, %step : i64) {
283  %c1 = llvm.mlir.constant(1 : i32) : i32
284  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
285  omp.parallel {
286    omp.wsloop reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
287      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
288        %1 = llvm.mlir.constant(2.0 : f32) : f32
289        %2 = llvm.load %prv : !llvm.ptr -> f32
290        %3 = llvm.fadd %2, %1 : f32
291        llvm.store %3, %prv : f32, !llvm.ptr
292        %4 = llvm.load %prv : !llvm.ptr -> f32
293        %5 = llvm.fadd %4, %1 : f32
294        llvm.store %5, %prv : f32, !llvm.ptr
295        omp.yield
296      }
297    }
298    omp.terminator
299  }
300  llvm.return
301}
302
303// Call to the outlined function.
304// CHECK: call void {{.*}} @__kmpc_fork_call
305// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
306
307// Outlined function.
308// CHECK: define internal void @[[OUTLINED]]
309
310// Private reduction variable and its initialization.
311// CHECK: %[[PRIVATE:.+]] = alloca float
312// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
313
314// Call to the reduction function.
315// CHECK: call i32 @__kmpc_reduce
316// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
317
318// Atomic reduction.
319// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
320// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
321
322// Non-atomic reduction:
323// CHECK: fadd float
324// CHECK: call void @__kmpc_end_reduce
325// CHECK: br label %[[FINALIZE:.+]]
326
327// CHECK: [[FINALIZE]]:
328// CHECK: call void @__kmpc_barrier
329
330// Update of the private variable using the reduction region
331// (the body block currently comes after all the other blocks).
332// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
333// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
334// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
335// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
336// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
337// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
338
339// Reduction function.
340// CHECK: define internal void @[[REDFUNC]]
341// CHECK: fadd float
342
343// -----
344
345omp.declare_reduction @add_f32 : f32
346init {
347^bb0(%arg: f32):
348  %0 = llvm.mlir.constant(0.0 : f32) : f32
349  omp.yield (%0 : f32)
350}
351combiner {
352^bb1(%arg0: f32, %arg1: f32):
353  %1 = llvm.fadd %arg0, %arg1 : f32
354  omp.yield (%1 : f32)
355}
356atomic {
357^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
358  %2 = llvm.load %arg3 : !llvm.ptr -> f32
359  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
360  omp.yield
361}
362
363omp.declare_reduction @mul_f32 : f32
364init {
365^bb0(%arg: f32):
366  %0 = llvm.mlir.constant(1.0 : f32) : f32
367  omp.yield (%0 : f32)
368}
369combiner {
370^bb1(%arg0: f32, %arg1: f32):
371  %1 = llvm.fmul %arg0, %arg1 : f32
372  omp.yield (%1 : f32)
373}
374
375// CHECK-LABEL: @no_atomic
376llvm.func @no_atomic(%lb : i64, %ub : i64, %step : i64) {
377  %c1 = llvm.mlir.constant(1 : i32) : i32
378  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
379  %2 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
380  omp.parallel {
381    omp.wsloop reduction(@add_f32 %0 -> %prv0, @mul_f32 %2 -> %prv1 : !llvm.ptr, !llvm.ptr) {
382      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
383        %1 = llvm.mlir.constant(2.0 : f32) : f32
384        %3 = llvm.load %prv0 : !llvm.ptr -> f32
385        %4 = llvm.fadd %3, %1 : f32
386        llvm.store %4, %prv0 : f32, !llvm.ptr
387        %5 = llvm.load %prv1 : !llvm.ptr -> f32
388        %6 = llvm.fmul %5, %1 : f32
389        llvm.store %6, %prv1 : f32, !llvm.ptr
390        omp.yield
391      }
392    }
393    omp.terminator
394  }
395  llvm.return
396}
397
398// Call to the outlined function.
399// CHECK: call void {{.*}} @__kmpc_fork_call
400// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
401
402// Outlined function.
403// CHECK: define internal void @[[OUTLINED]]
404
405// Private reduction variable and its initialization.
406// CHECK: %[[PRIVATE1:.+]] = alloca float
407// CHECK: %[[PRIVATE2:.+]] = alloca float
408// CHECK: store float 0.000000e+00, ptr %[[PRIVATE1]]
409// CHECK: store float 1.000000e+00, ptr %[[PRIVATE2]]
410
411// Call to the reduction function.
412// CHECK: call i32 @__kmpc_reduce
413// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
414
415// Atomic reduction not provided.
416// CHECK: unreachable
417
418// Non-atomic reduction:
419// CHECK: fadd float
420// CHECK: fmul float
421// CHECK: call void @__kmpc_end_reduce
422// CHECK: br label %[[FINALIZE:.+]]
423
424// CHECK: [[FINALIZE]]:
425// CHECK: call void @__kmpc_barrier
426
427// Update of the private variable using the reduction region
428// (the body block currently comes after all the other blocks).
429// CHECK: %[[PARTIAL1:.+]] = load float, ptr %[[PRIVATE1]]
430// CHECK: %[[UPDATED1:.+]] = fadd float %[[PARTIAL1]], 2.000000e+00
431// CHECK: store float %[[UPDATED1]], ptr %[[PRIVATE1]]
432// CHECK: %[[PARTIAL2:.+]] = load float, ptr %[[PRIVATE2]]
433// CHECK: %[[UPDATED2:.+]] = fmul float %[[PARTIAL2]], 2.000000e+00
434// CHECK: store float %[[UPDATED2]], ptr %[[PRIVATE2]]
435
436// Reduction function.
437// CHECK: define internal void @[[REDFUNC]]
438// CHECK: fadd float
439// CHECK: fmul float
440
441// -----
442
443omp.declare_reduction @add_f32 : f32
444init {
445^bb0(%arg: f32):
446  %0 = llvm.mlir.constant(0.0 : f32) : f32
447  omp.yield (%0 : f32)
448}
449combiner {
450^bb1(%arg0: f32, %arg1: f32):
451  %1 = llvm.fadd %arg0, %arg1 : f32
452  omp.yield (%1 : f32)
453}
454atomic {
455^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
456  %2 = llvm.load %arg3 : !llvm.ptr -> f32
457  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
458  omp.yield
459}
460
461// CHECK-LABEL: @simple_reduction_parallel
462llvm.func @simple_reduction_parallel() {
463  %c1 = llvm.mlir.constant(1 : i32) : i32
464  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
465  omp.parallel reduction(@add_f32 %0 -> %prv : !llvm.ptr) {
466    %1 = llvm.mlir.constant(2.0 : f32) : f32
467    %2 = llvm.load %prv : !llvm.ptr -> f32
468    %3 = llvm.fadd %2, %1 : f32
469    llvm.store %3, %prv : f32, !llvm.ptr
470    omp.terminator
471  }
472  llvm.return
473}
474
475// Call to the outlined function.
476// CHECK: call void {{.*}} @__kmpc_fork_call
477// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
478
479// Outlined function.
480// CHECK: define internal void @[[OUTLINED]]
481
482// Private reduction variable and its initialization.
483// CHECK: %[[PRIVATE:.+]] = alloca float
484// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
485
486// Update of the private variable
487// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
488// CHECK: %[[UPDATED:.+]] = fadd float %[[PARTIAL]], 2.000000e+00
489// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
490
491// Call to the reduction function.
492// CHECK: call i32 @__kmpc_reduce
493// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
494
495// Atomic reduction.
496// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
497// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
498
499// Non-atomic reduction:
500// CHECK: fadd float
501// CHECK: call void @__kmpc_end_reduce
502// CHECK: br label %[[FINALIZE:.+]]
503
504// CHECK: [[FINALIZE]]:
505
506// Reduction function.
507// CHECK: define internal void @[[REDFUNC]]
508// CHECK: fadd float
509
510// -----
511
512omp.declare_reduction @add_i32 : i32
513init {
514^bb0(%arg: i32):
515  %0 = llvm.mlir.constant(0 : i32) : i32
516  omp.yield (%0 : i32)
517}
518combiner {
519^bb1(%arg0: i32, %arg1: i32):
520  %1 = llvm.add %arg0, %arg1 : i32
521  omp.yield (%1 : i32)
522}
523atomic {
524^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
525  %2 = llvm.load %arg3 : !llvm.ptr -> i32
526  llvm.atomicrmw add %arg2, %2 monotonic : !llvm.ptr, i32
527  omp.yield
528}
529
530// CHECK-LABEL: @parallel_nested_workshare_reduction
531llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
532  %c1 = llvm.mlir.constant(1 : i32) : i32
533  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
534
535  %lb = llvm.mlir.constant(1 : i64) : i64
536  %step = llvm.mlir.constant(1 : i64) : i64
537
538  omp.parallel {
539    omp.wsloop reduction(@add_i32 %0 -> %prv : !llvm.ptr) {
540      omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
541        %ival = llvm.trunc %iv : i64 to i32
542        %lprv = llvm.load %prv : !llvm.ptr -> i32
543        %add = llvm.add %lprv, %ival : i32
544        llvm.store %add, %prv : i32, !llvm.ptr
545        omp.yield
546      }
547    }
548    omp.terminator
549  }
550
551  llvm.return
552}
553
554// Call to the outlined function.
555// CHECK: call void {{.*}} @__kmpc_fork_call
556// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
557
558// Outlined function.
559// CHECK: define internal void @[[OUTLINED]]
560
561// Private reduction variable and its initialization.
562// CHECK: %[[PRIVATE:[0-9]+]] = alloca i32
563// CHECK: store i32 0, ptr %[[PRIVATE]]
564
565// Loop exit:
566// CHECK: call void @__kmpc_barrier
567
568// Call to the reduction function.
569// CHECK: call i32 @__kmpc_reduce
570// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
571
572// Atomic reduction:
573// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
574// CHECK: atomicrmw add ptr %{{.*}}, i32 %[[PARTIAL]]
575
576// Non-atomic reduction:
577// CHECK: add i32
578// CHECK: call void @__kmpc_end_reduce
579
580// Update of the private variable using the reduction region
581// (the body block currently comes after all the other blocks).
582// CHECK: %[[PARTIAL:.+]] = load i32, ptr %[[PRIVATE]]
583// CHECK: %[[UPDATED:.+]] = add i32 %[[PARTIAL]], {{.*}}
584// CHECK: store i32 %[[UPDATED]], ptr %[[PRIVATE]]
585
586// Reduction function.
587// CHECK: define internal void @[[REDFUNC]]
588// CHECK: add i32
589
590// -----
591
592omp.declare_reduction @add_f32 : f32
593init {
594^bb0(%arg: f32):
595  %0 = llvm.mlir.constant(0.0 : f32) : f32
596  omp.yield (%0 : f32)
597}
598combiner {
599^bb1(%arg0: f32, %arg1: f32):
600  %1 = llvm.fadd %arg0, %arg1 : f32
601  omp.yield (%1 : f32)
602}
603atomic {
604^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
605  %2 = llvm.load %arg3 : !llvm.ptr -> f32
606  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
607  omp.yield
608}
609
610// CHECK-LABEL: @wsloop_simd_reduction
611llvm.func @wsloop_simd_reduction(%lb : i64, %ub : i64, %step : i64) {
612  %c1 = llvm.mlir.constant(1 : i32) : i32
613  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
614  omp.parallel {
615    omp.wsloop reduction(@add_f32 %0 -> %prv1 : !llvm.ptr) {
616      omp.simd reduction(@add_f32 %prv1 -> %prv2 : !llvm.ptr) {
617        omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
618          %1 = llvm.mlir.constant(2.0 : f32) : f32
619          %2 = llvm.load %prv2 : !llvm.ptr -> f32
620          %3 = llvm.fadd %1, %2 : f32
621          llvm.store %3, %prv2 : f32, !llvm.ptr
622          omp.yield
623        }
624      } {omp.composite}
625    } {omp.composite}
626    omp.terminator
627  }
628  llvm.return
629}
630
631// Same checks as for wsloop reduction, because currently omp.simd is ignored in
632// a composite 'do/for simd' construct.
633// Call to the outlined function.
634// CHECK: call void {{.*}} @__kmpc_fork_call
635// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
636
637// Outlined function.
638// CHECK: define internal void @[[OUTLINED]]
639
640// Private reduction variable and its initialization.
641// CHECK: %[[PRIVATE:.+]] = alloca float
642// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
643
644// Call to the reduction function.
645// CHECK: call i32 @__kmpc_reduce
646// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
647
648// Atomic reduction.
649// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
650// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
651
652// Non-atomic reduction:
653// CHECK: fadd float
654// CHECK: call void @__kmpc_end_reduce
655// CHECK: br label %[[FINALIZE:.+]]
656
657// CHECK: [[FINALIZE]]:
658// CHECK: call void @__kmpc_barrier
659
660// Update of the private variable using the reduction region
661// (the body block currently comes after all the other blocks).
662// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
663// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]]
664// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
665
666// Reduction function.
667// CHECK: define internal void @[[REDFUNC]]
668// CHECK: fadd float
669