xref: /llvm-project/llvm/include/llvm/Support/Threading.h (revision d8eb4ac41d881a19bea7673d753ba92e6a11f5d6)
1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares helper functions for running LLVM in a multi-threaded
10 // environment.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_SUPPORT_THREADING_H
15 #define LLVM_SUPPORT_THREADING_H
16 
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
20 #include "llvm/Support/Compiler.h"
21 #include <optional>
22 
23 #if defined(_MSC_VER)
24 // MSVC's call_once implementation worked since VS 2015, which is the minimum
25 // supported version as of this writing.
26 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
27 #elif defined(LLVM_ON_UNIX) &&                                                 \
28     (defined(_LIBCPP_VERSION) ||                                               \
29      !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__)))
30 // std::call_once from libc++ is used on all Unix platforms. Other
31 // implementations like libstdc++ are known to have problems on NetBSD,
32 // OpenBSD and PowerPC.
33 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
34 #elif defined(LLVM_ON_UNIX) &&                                                 \
35     (defined(__powerpc__) && defined(__LITTLE_ENDIAN__))
36 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
37 #else
38 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
39 #endif
40 
41 #if LLVM_THREADING_USE_STD_CALL_ONCE
42 #include <mutex>
43 #else
44 #include "llvm/Support/Atomic.h"
45 #endif
46 
47 namespace llvm {
48 class Twine;
49 
50 /// Returns true if LLVM is compiled with support for multi-threading, and
51 /// false otherwise.
52 constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
53 
54 #if LLVM_THREADING_USE_STD_CALL_ONCE
55 
56   typedef std::once_flag once_flag;
57 
58 #else
59 
60   enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
61 
62   /// The llvm::once_flag structure
63   ///
64   /// This type is modeled after std::once_flag to use with llvm::call_once.
65   /// This structure must be used as an opaque object. It is a struct to force
66   /// autoinitialization and behave like std::once_flag.
67   struct once_flag {
68     volatile sys::cas_flag status = Uninitialized;
69   };
70 
71 #endif
72 
73   /// Execute the function specified as a parameter once.
74   ///
75   /// Typical usage:
76   /// \code
77   ///   void foo() {...};
78   ///   ...
79   ///   static once_flag flag;
80   ///   call_once(flag, foo);
81   /// \endcode
82   ///
83   /// \param flag Flag used for tracking whether or not this has run.
84   /// \param F Function to call once.
85   template <typename Function, typename... Args>
86   void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
87 #if LLVM_THREADING_USE_STD_CALL_ONCE
88     std::call_once(flag, std::forward<Function>(F),
89                    std::forward<Args>(ArgList)...);
90 #else
91     // For other platforms we use a generic (if brittle) version based on our
92     // atomics.
93     sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
94     if (old_val == Uninitialized) {
95       std::forward<Function>(F)(std::forward<Args>(ArgList)...);
96       sys::MemoryFence();
97       TsanIgnoreWritesBegin();
98       TsanHappensBefore(&flag.status);
99       flag.status = Done;
100       TsanIgnoreWritesEnd();
101     } else {
102       // Wait until any thread doing the call has finished.
103       sys::cas_flag tmp = flag.status;
104       sys::MemoryFence();
105       while (tmp != Done) {
106         tmp = flag.status;
107         sys::MemoryFence();
108       }
109     }
110     TsanHappensAfter(&flag.status);
111 #endif
112   }
113 
114   /// This tells how a thread pool will be used
115   class ThreadPoolStrategy {
116   public:
117     // The default value (0) means all available threads should be used,
118     // taking the affinity mask into account. If set, this value only represents
119     // a suggested high bound, the runtime might choose a lower value (not
120     // higher).
121     unsigned ThreadsRequested = 0;
122 
123     // If SMT is active, use hyper threads. If false, there will be only one
124     // std::thread per core.
125     bool UseHyperThreads = true;
126 
127     // If set, will constrain 'ThreadsRequested' to the number of hardware
128     // threads, or hardware cores.
129     bool Limit = false;
130 
131     /// Retrieves the max available threads for the current strategy. This
132     /// accounts for affinity masks and takes advantage of all CPU sockets.
133     unsigned compute_thread_count() const;
134 
135     /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
136     /// multi-socket system, this ensures threads are assigned to all CPU
137     /// sockets. \p ThreadPoolNum represents a number bounded by [0,
138     /// compute_thread_count()).
139     void apply_thread_strategy(unsigned ThreadPoolNum) const;
140 
141     /// Finds the CPU socket where a thread should go. Returns 'std::nullopt' if
142     /// the thread shall remain on the actual CPU socket.
143     std::optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
144   };
145 
146   /// Build a strategy from a number of threads as a string provided in \p Num.
147   /// When Num is above the max number of threads specified by the \p Default
148   /// strategy, we attempt to equally allocate the threads on all CPU sockets.
149   /// "0" or an empty string will return the \p Default strategy.
150   /// "all" for using all hardware threads.
151   std::optional<ThreadPoolStrategy>
152   get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
153 
154   /// Returns a thread strategy for tasks requiring significant memory or other
155   /// resources. To be used for workloads where hardware_concurrency() proves to
156   /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
157   /// based on physical cores, if available for the host system, otherwise falls
158   /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
159   /// LLVM_ENABLE_THREADS = OFF.
160   inline ThreadPoolStrategy
161   heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
162     ThreadPoolStrategy S;
163     S.UseHyperThreads = false;
164     S.ThreadsRequested = ThreadCount;
165     return S;
166   }
167 
168   /// Like heavyweight_hardware_concurrency() above, but builds a strategy
169   /// based on the rules described for get_threadpool_strategy().
170   /// If \p Num is invalid, returns a default strategy where one thread per
171   /// hardware core is used.
172   inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
173     std::optional<ThreadPoolStrategy> S =
174         get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
175     if (S)
176       return *S;
177     return heavyweight_hardware_concurrency();
178   }
179 
180   /// Returns a default thread strategy where all available hardware resources
181   /// are to be used, except for those initially excluded by an affinity mask.
182   /// This function takes affinity into consideration. Returns 1 when LLVM is
183   /// configured with LLVM_ENABLE_THREADS=OFF.
184   inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
185     ThreadPoolStrategy S;
186     S.ThreadsRequested = ThreadCount;
187     return S;
188   }
189 
190   /// Like hardware_concurrency() above, but builds a strategy
191   /// based on the rules described for get_threadpool_strategy().
192   /// If \p Num is invalid, returns a default strategy where one thread per
193   /// hardware core is used.
194   inline ThreadPoolStrategy hardware_concurrency(StringRef Num) {
195     std::optional<ThreadPoolStrategy> S =
196         get_threadpool_strategy(Num, hardware_concurrency());
197     if (S)
198       return *S;
199     return hardware_concurrency();
200   }
201 
202   /// Returns an optimal thread strategy to execute specified amount of tasks.
203   /// This strategy should prevent us from creating too many threads if we
204   /// occasionaly have an unexpectedly small amount of tasks.
205   inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
206     ThreadPoolStrategy S;
207     S.Limit = true;
208     S.ThreadsRequested = TaskCount;
209     return S;
210   }
211 
212   /// Return the current thread id, as used in various OS system calls.
213   /// Note that not all platforms guarantee that the value returned will be
214   /// unique across the entire system, so portable code should not assume
215   /// this.
216   uint64_t get_threadid();
217 
218   /// Get the maximum length of a thread name on this platform.
219   /// A value of 0 means there is no limit.
220   uint32_t get_max_thread_name_length();
221 
222   /// Set the name of the current thread.  Setting a thread's name can
223   /// be helpful for enabling useful diagnostics under a debugger or when
224   /// logging.  The level of support for setting a thread's name varies
225   /// wildly across operating systems, and we only make a best effort to
226   /// perform the operation on supported platforms.  No indication of success
227   /// or failure is returned.
228   void set_thread_name(const Twine &Name);
229 
230   /// Get the name of the current thread.  The level of support for
231   /// getting a thread's name varies wildly across operating systems, and it
232   /// is not even guaranteed that if you can successfully set a thread's name
233   /// that you can later get it back.  This function is intended for diagnostic
234   /// purposes, and as with setting a thread's name no indication of whether
235   /// the operation succeeded or failed is returned.
236   void get_thread_name(SmallVectorImpl<char> &Name);
237 
238   /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
239   /// group, the calling thread can be executed. On Windows, threads cannot
240   /// cross CPU sockets boundaries.
241   llvm::BitVector get_thread_affinity_mask();
242 
243   /// Returns how many physical CPUs or NUMA groups the system has.
244   unsigned get_cpus();
245 
246   /// Returns how many physical cores (as opposed to logical cores returned from
247   /// thread::hardware_concurrency(), which includes hyperthreads).
248   /// Returns -1 if unknown for the current host system.
249   int get_physical_cores();
250 
251   enum class ThreadPriority {
252     /// Lower the current thread's priority as much as possible. Can be used
253     /// for long-running tasks that are not time critical; more energy-
254     /// efficient than Low.
255     Background = 0,
256 
257     /// Lower the current thread's priority such that it does not affect
258     /// foreground tasks significantly. This is a good default for long-
259     /// running, latency-insensitive tasks to make sure cpu is not hogged
260     /// by this task.
261     Low = 1,
262 
263     /// Restore the current thread's priority to default scheduling priority.
264     Default = 2,
265   };
266   enum class SetThreadPriorityResult { FAILURE, SUCCESS };
267   SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
268 }
269 
270 #endif
271