xref: /llvm-project/openmp/runtime/test/tasking/kmp_task_modifier_simple_ws_new.cpp (revision 42016791101782a0f5e64e1ea9ea282e27a8132a)
1 // RUN: %libomp-cxx-compile-and-run
2 
3 #include <stdio.h>
4 #include <omp.h>
5 
6 #define NT 4
7 #define INIT 10
8 
9 /*
10 The test emulates code generation needed for reduction with task modifier on
11 parallel construct.
12 
13 Note: tasks could just use in_reduction clause, but compiler does not accept
14 this because of bug: it mistakenly requires reduction item to be shared, which
15 is only true for reduction on worksharing and wrong for task reductions.
16 */
17 
18 //------------------------------------------------
19 // OpenMP runtime library routines
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23 extern void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *item);
24 // extern void* __kmpc_task_reduction_modifier_init(void *loc, int gtid, int
25 // flags, int num, void* data);
26 extern void *__kmpc_taskred_modifier_init(void *loc, int gtid, int is_ws,
27                                           int num, void *data);
28 extern void __kmpc_task_reduction_modifier_fini(void *loc, int gtid, int is_ws);
29 extern int __kmpc_global_thread_num(void *);
30 #ifdef __cplusplus
31 }
32 #endif
33 
34 //------------------------------------------------
35 // Compiler-generated code
36 
37 typedef struct red_input {
38   void *reduce_shar; /**< shared between tasks item to reduce into */
39   void *reduce_orig; /**< original reduction item used for initialization */
40   size_t reduce_size; /**< size of data item in bytes */
41   // three compiler-generated routines (init, fini are optional):
42   void *reduce_init; /**< data initialization routine (single parameter) */
43   void *reduce_fini; /**< data finalization routine */
44   void *reduce_comb; /**< data combiner routine */
45   unsigned flags; /**< flags for additional info from compiler */
46 } red_input_t;
47 
i_comb(void * lhs,void * rhs)48 void i_comb(void *lhs, void *rhs) { *(int *)lhs += *(int *)rhs; }
49 
main()50 int main() {
51   int var = INIT;
52   int *p_var_orig = &var;
53   int i;
54   omp_set_dynamic(0);
55   omp_set_num_threads(NT);
56 #pragma omp parallel private(i) shared(p_var_orig)
57 //  #pragma omp for reduction(task,+:var)
58 #pragma omp for reduction(+ : var)
59   for (i = 0; i < NT; ++i) // single iteration per thread
60   {
61     // generated code, which actually should be placed before
62     // loop iterations distribution, but placed here just to show the idea,
63     // and to keep correctness the loop count is equal to number of threads
64     int gtid = __kmpc_global_thread_num(NULL);
65     void *tg; // pointer to taskgroup (optional)
66     red_input_t r_var;
67     r_var.reduce_shar = &var;
68     r_var.reduce_orig =
69         p_var_orig; // not used in this test but illustrates codegen
70     r_var.reduce_size = sizeof(var);
71     r_var.reduce_init = NULL;
72     r_var.reduce_fini = NULL;
73     r_var.reduce_comb = (void *)&i_comb;
74     tg = __kmpc_taskred_modifier_init(
75         NULL, // ident_t loc;
76         gtid,
77         1, // 1 - worksharing construct, 0 - parallel
78         1, // number of reduction objects
79         &r_var // related data
80         );
81     // end of generated code
82     var++;
83 #pragma omp task /*in_reduction(+:var)*/ shared(var)
84     {
85       // emulate task reduction here because of compiler bug:
86       // it mistakenly declines to accept in_reduction because var is private
87       // outside.
88       int gtid = __kmpc_global_thread_num(NULL);
89       int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
90       *p_var += 1;
91     }
92     if (omp_get_thread_num() > 0) {
93 #pragma omp task /*in_reduction(+:var)*/ shared(var)
94       {
95         int gtid = __kmpc_global_thread_num(NULL);
96         int *p_var = (int *)__kmpc_task_reduction_get_th_data(gtid, tg, &var);
97         *p_var += 1;
98       }
99     }
100     // generated code, which actually should be placed after loop completion
101     // but before barrier and before loop reduction. It placed here just to show
102     // the idea,
103     // and to keep correctness the loop count is equal to number of threads
104     __kmpc_task_reduction_modifier_fini(NULL, gtid, 1);
105     // end of generated code
106   }
107   if (var == INIT + NT * 3 - 1) {
108     printf("passed\n");
109     return 0;
110   } else {
111     printf("failed: var = %d (!= %d)\n", var, INIT + NT * 3 - 1);
112     return 1;
113   }
114 }
115