xref: /freebsd-src/contrib/llvm-project/openmp/runtime/src/kmp_barrier.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1349cc55cSDimitry Andric /*
2349cc55cSDimitry Andric  * kmp_barrier.h
3349cc55cSDimitry Andric  */
4349cc55cSDimitry Andric 
5349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
9349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10349cc55cSDimitry Andric //
11349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
12349cc55cSDimitry Andric 
13349cc55cSDimitry Andric #ifndef KMP_BARRIER_H
14349cc55cSDimitry Andric #define KMP_BARRIER_H
15349cc55cSDimitry Andric 
16349cc55cSDimitry Andric #include "kmp.h"
17349cc55cSDimitry Andric #include "kmp_i18n.h"
18349cc55cSDimitry Andric 
19349cc55cSDimitry Andric #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20349cc55cSDimitry Andric #include <xmmintrin.h>
21349cc55cSDimitry Andric #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22349cc55cSDimitry Andric #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23349cc55cSDimitry Andric #elif KMP_HAVE_ALIGNED_ALLOC
24*5f757f3fSDimitry Andric #define KMP_ALGIN_UP(val, alignment)                                           \
25*5f757f3fSDimitry Andric   (((val) + (alignment)-1) / (alignment) * (alignment))
26*5f757f3fSDimitry Andric #define KMP_ALIGNED_ALLOCATE(size, alignment)                                  \
27*5f757f3fSDimitry Andric   aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28349cc55cSDimitry Andric #define KMP_ALIGNED_FREE(ptr) free(ptr)
29349cc55cSDimitry Andric #elif KMP_HAVE_POSIX_MEMALIGN
KMP_ALIGNED_ALLOCATE(size_t size,size_t alignment)30349cc55cSDimitry Andric static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
31349cc55cSDimitry Andric   void *ptr;
32349cc55cSDimitry Andric   int n = posix_memalign(&ptr, alignment, size);
33349cc55cSDimitry Andric   if (n != 0) {
34349cc55cSDimitry Andric     if (ptr)
35349cc55cSDimitry Andric       free(ptr);
36349cc55cSDimitry Andric     return nullptr;
37349cc55cSDimitry Andric   }
38349cc55cSDimitry Andric   return ptr;
39349cc55cSDimitry Andric }
40349cc55cSDimitry Andric #define KMP_ALIGNED_FREE(ptr) free(ptr)
41349cc55cSDimitry Andric #elif KMP_HAVE__ALIGNED_MALLOC
42349cc55cSDimitry Andric #include <malloc.h>
43349cc55cSDimitry Andric #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44349cc55cSDimitry Andric #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
45349cc55cSDimitry Andric #else
46349cc55cSDimitry Andric #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47349cc55cSDimitry Andric #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
48349cc55cSDimitry Andric #endif
49349cc55cSDimitry Andric 
50349cc55cSDimitry Andric // Use four cache lines: MLC tends to prefetch the next or previous cache line
51349cc55cSDimitry Andric // creating a possible fake conflict between cores, so this is the only way to
52349cc55cSDimitry Andric // guarantee that no such prefetch can happen.
53349cc55cSDimitry Andric #ifndef KMP_FOURLINE_ALIGN_CACHE
54349cc55cSDimitry Andric #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
55349cc55cSDimitry Andric #endif
56349cc55cSDimitry Andric 
57349cc55cSDimitry Andric #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
58349cc55cSDimitry Andric 
59349cc55cSDimitry Andric class distributedBarrier {
60349cc55cSDimitry Andric   struct flags_s {
61349cc55cSDimitry Andric     kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
62349cc55cSDimitry Andric   };
63349cc55cSDimitry Andric 
64349cc55cSDimitry Andric   struct go_s {
65349cc55cSDimitry Andric     std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
66349cc55cSDimitry Andric   };
67349cc55cSDimitry Andric 
68349cc55cSDimitry Andric   struct iter_s {
69349cc55cSDimitry Andric     kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
70349cc55cSDimitry Andric   };
71349cc55cSDimitry Andric 
72349cc55cSDimitry Andric   struct sleep_s {
73349cc55cSDimitry Andric     std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
74349cc55cSDimitry Andric   };
75349cc55cSDimitry Andric 
76349cc55cSDimitry Andric   void init(size_t nthr);
77349cc55cSDimitry Andric   void resize(size_t nthr);
78349cc55cSDimitry Andric   void computeGo(size_t n);
79349cc55cSDimitry Andric   void computeVarsForN(size_t n);
80349cc55cSDimitry Andric 
81349cc55cSDimitry Andric public:
82349cc55cSDimitry Andric   enum {
83349cc55cSDimitry Andric     MAX_ITERS = 3,
84349cc55cSDimitry Andric     MAX_GOS = 8,
85349cc55cSDimitry Andric     IDEAL_GOS = 4,
86349cc55cSDimitry Andric     IDEAL_CONTENTION = 16,
87349cc55cSDimitry Andric   };
88349cc55cSDimitry Andric 
89349cc55cSDimitry Andric   flags_s *flags[MAX_ITERS];
90349cc55cSDimitry Andric   go_s *go;
91349cc55cSDimitry Andric   iter_s *iter;
92349cc55cSDimitry Andric   sleep_s *sleep;
93349cc55cSDimitry Andric 
94349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
95349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
96349cc55cSDimitry Andric   // number of go signals each requiring one write per iteration
97349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE num_gos;
98349cc55cSDimitry Andric   // number of groups of gos
99349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE num_groups;
100349cc55cSDimitry Andric   // threads per go signal
101349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE threads_per_go;
102349cc55cSDimitry Andric   bool KMP_ALIGN_CACHE fix_threads_per_go;
103349cc55cSDimitry Andric   // threads per group
104349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE threads_per_group;
105349cc55cSDimitry Andric   // number of go signals in a group
106349cc55cSDimitry Andric   size_t KMP_ALIGN_CACHE gos_per_group;
107349cc55cSDimitry Andric   void *team_icvs;
108349cc55cSDimitry Andric 
109349cc55cSDimitry Andric   distributedBarrier() = delete;
110349cc55cSDimitry Andric   ~distributedBarrier() = delete;
111349cc55cSDimitry Andric 
112349cc55cSDimitry Andric   // Used instead of constructor to create aligned data
allocate(int nThreads)113349cc55cSDimitry Andric   static distributedBarrier *allocate(int nThreads) {
114349cc55cSDimitry Andric     distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
115349cc55cSDimitry Andric         sizeof(distributedBarrier), 4 * CACHE_LINE);
116349cc55cSDimitry Andric     if (!d) {
117349cc55cSDimitry Andric       KMP_FATAL(MemoryAllocFailed);
118349cc55cSDimitry Andric     }
119349cc55cSDimitry Andric     d->num_threads = 0;
120349cc55cSDimitry Andric     d->max_threads = 0;
121349cc55cSDimitry Andric     for (int i = 0; i < MAX_ITERS; ++i)
122349cc55cSDimitry Andric       d->flags[i] = NULL;
123349cc55cSDimitry Andric     d->go = NULL;
124349cc55cSDimitry Andric     d->iter = NULL;
125349cc55cSDimitry Andric     d->sleep = NULL;
126349cc55cSDimitry Andric     d->team_icvs = NULL;
127349cc55cSDimitry Andric     d->fix_threads_per_go = false;
128349cc55cSDimitry Andric     // calculate gos and groups ONCE on base size
129349cc55cSDimitry Andric     d->computeGo(nThreads);
130349cc55cSDimitry Andric     d->init(nThreads);
131349cc55cSDimitry Andric     return d;
132349cc55cSDimitry Andric   }
133349cc55cSDimitry Andric 
deallocate(distributedBarrier * db)134349cc55cSDimitry Andric   static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
135349cc55cSDimitry Andric 
update_num_threads(size_t nthr)136349cc55cSDimitry Andric   void update_num_threads(size_t nthr) { init(nthr); }
137349cc55cSDimitry Andric 
need_resize(size_t new_nthr)138349cc55cSDimitry Andric   bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
get_num_threads()139349cc55cSDimitry Andric   size_t get_num_threads() { return num_threads; }
140349cc55cSDimitry Andric   kmp_uint64 go_release();
141349cc55cSDimitry Andric   void go_reset();
142349cc55cSDimitry Andric };
143349cc55cSDimitry Andric 
144349cc55cSDimitry Andric #endif // KMP_BARRIER_H
145