xref: /llvm-project/openmp/runtime/test/worksharing/for/omp_for_collapse_LowerTriangularLessEqual.c (revision fcd2d483251605f1b6cdace0ce5baf5dfd31b880)
1 // RUN: %libomp-compile-and-run
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include "omp.h"
6 
7 #ifndef MAX_BOUND
8 #define MAX_BOUND 64
9 #endif
10 #ifndef _MSC_VER
11 #define NO_EFFICIENCY_CHECK
12 #endif
13 
14 /* To ensure Correctness, only valid iterations are executed and are executed
15    only once. Stores the number of times an iteration is executed. */
16 unsigned *execution_count = NULL;
17 /* Stores the number of iterations executed by each thread. */
18 unsigned *iterations_per_thread = NULL;
19 
Alloc(unsigned bound1,unsigned bound2)20 unsigned *Alloc(unsigned bound1, unsigned bound2) {
21   return (unsigned *)(malloc(bound1 * bound2 * sizeof(unsigned)));
22 }
23 
ZeroOut(unsigned * p,unsigned bound1,unsigned bound2)24 void ZeroOut(unsigned *p, unsigned bound1, unsigned bound2) {
25   memset(p, 0, bound1 * bound2 * sizeof(unsigned));
26 }
27 
Free(unsigned * p)28 void Free(unsigned *p) { free((void *)p); }
29 
Index(unsigned * p,unsigned i,unsigned j,unsigned bound2)30 unsigned *Index(unsigned *p, unsigned i, unsigned j, unsigned bound2) {
31   return &p[i * bound2 + j];
32 }
33 
test(int upper_bound)34 int test(int upper_bound) {
35 
36   unsigned total_iterations = upper_bound * (upper_bound + 1) / 2;
37   unsigned num_threads = omp_get_max_threads();
38   unsigned lower_per_chunk = total_iterations / num_threads;
39   unsigned upper_per_chunk =
40       lower_per_chunk + ((total_iterations % num_threads) ? 1 : 0);
41   int i, j;
42 
43   omp_set_num_threads(num_threads);
44 
45   ZeroOut(execution_count, upper_bound, upper_bound);
46   ZeroOut(iterations_per_thread, num_threads, 1);
47 
48 #ifdef VERBOSE
49   fprintf(stderr,
50           "INFO: Using %6d threads for %6d outer iterations with %6d [%6d:%6d] "
51           "chunks "
52           "loop type lower triangle <,<= - ",
53           num_threads, upper_bound, total_iterations, lower_per_chunk,
54           upper_per_chunk);
55 #endif
56 
57 #pragma omp parallel shared(iterations_per_thread, execution_count)
58   { /* begin of parallel */
59     /* Lower triangular execution_count matrix */
60 #pragma omp for schedule(static) collapse(2)
61     for (i = 0; i < upper_bound; i++) {
62       for (j = 0; j <= i; j++) {
63         (*Index(iterations_per_thread, omp_get_thread_num(), 0, 1))++;
64         (*Index(execution_count, i, j, upper_bound))++;
65       }
66     } /* end of for*/
67   } /* end of parallel */
68 
69   /* check the execution_count array */
70   for (i = 0; i < upper_bound; i++) {
71     for (j = 0; j <= i; j++) {
72       unsigned value = *Index(execution_count, i, j, upper_bound);
73       /* iteration with j<=i are valid, but should have been executed only once
74        */
75       if (value != 1) {
76         fprintf(stderr, "ERROR: valid iteration [%i,%i] executed %i times.\n",
77                 i, j, value);
78         return 0;
79       }
80     }
81     for (j = i + 1; j < upper_bound; j++) {
82       unsigned value = *Index(execution_count, i, j, upper_bound);
83       /* iteration with j>=i are invalid and should not have been executed
84        */
85       if (value > 0) {
86         fprintf(stderr, "ERROR: invalid iteration [%i,%i] executed %i times.\n",
87                 i, j, value);
88         return 0;
89       }
90     }
91   }
92 
93 #ifndef NO_EFFICIENCY_CHECK
94   /* Ensure the number of iterations executed by each thread is within bounds */
95   for (i = 0; i < num_threads; i++) {
96     unsigned value = *Index(iterations_per_thread, i, 0, 1);
97     if (value < lower_per_chunk || value > upper_per_chunk) {
98       fprintf(stderr,
99               "ERROR: Inefficient Collapse thread %d of %d assigned %i "
100               "iterations; must be between %d and %d\n",
101               i, num_threads, value, lower_per_chunk, upper_per_chunk);
102       return 0;
103     }
104   }
105 #endif
106 #ifdef VERBOSE
107   fprintf(stderr, "PASSED\r\n");
108 #endif
109   return 1;
110 }
111 
main()112 int main() {
113 
114   execution_count = Alloc(MAX_BOUND, MAX_BOUND);
115   iterations_per_thread = Alloc(omp_get_max_threads(), 1);
116 
117   for (unsigned j = 0; j < MAX_BOUND; j++) {
118     if (!test(j))
119       return 1;
120   }
121   Free(execution_count);
122   Free(iterations_per_thread);
123   return 0;
124 }
125