xref: /llvm-project/openmp/tools/archer/ompt-tsan.cpp (revision c09787b7d05083791b417c5b97a8cfd6d0874ed9)
1 /*
2  * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for details.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef __STDC_FORMAT_MACROS
14 #define __STDC_FORMAT_MACROS
15 #endif
16 
17 #include <algorithm>
18 #include <atomic>
19 #include <cassert>
20 #include <cstdlib>
21 #include <cstring>
22 #include <dlfcn.h>
23 #include <inttypes.h>
24 #include <iostream>
25 #include <list>
26 #include <mutex>
27 #include <sstream>
28 #include <string>
29 #include <sys/resource.h>
30 #include <unistd.h>
31 #include <unordered_map>
32 #include <vector>
33 
34 #include "omp-tools.h"
35 
36 // Define attribute that indicates that the fall through from the previous
37 // case label is intentional and should not be diagnosed by a compiler
38 //   Code from libcxx/include/__config
39 // Use a function like macro to imply that it must be followed by a semicolon
40 #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
41 #define KMP_FALLTHROUGH() [[fallthrough]]
42 // icc cannot properly tell this attribute is absent so force off
43 #elif defined(__INTEL_COMPILER)
44 #define KMP_FALLTHROUGH() ((void)0)
45 #elif __has_cpp_attribute(clang::fallthrough)
46 #define KMP_FALLTHROUGH() [[clang::fallthrough]]
47 #elif __has_attribute(fallthrough) || __GNUC__ >= 7
48 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
49 #else
50 #define KMP_FALLTHROUGH() ((void)0)
51 #endif
52 
53 static int hasReductionCallback;
54 
55 namespace {
56 class ArcherFlags {
57 public:
58 #if (LLVM_VERSION) >= 40
59   int flush_shadow{0};
60 #endif
61   int print_max_rss{0};
62   int verbose{0};
63   int enabled{1};
64   int report_data_leak{0};
65   int ignore_serial{0};
66   std::atomic<int> all_memory{0};
67 
ArcherFlags(const char * env)68   ArcherFlags(const char *env) {
69     if (env) {
70       std::vector<std::string> tokens;
71       std::string token;
72       std::string str(env);
73       std::istringstream iss(str);
74       int tmp_int;
75       while (std::getline(iss, token, ' '))
76         tokens.push_back(token);
77 
78       for (std::vector<std::string>::iterator it = tokens.begin();
79            it != tokens.end(); ++it) {
80 #if (LLVM_VERSION) >= 40
81         if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
82           continue;
83 #endif
84         if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss))
85           continue;
86         if (sscanf(it->c_str(), "verbose=%d", &verbose))
87           continue;
88         if (sscanf(it->c_str(), "report_data_leak=%d", &report_data_leak))
89           continue;
90         if (sscanf(it->c_str(), "enable=%d", &enabled))
91           continue;
92         if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
93           continue;
94         if (sscanf(it->c_str(), "all_memory=%d", &tmp_int)) {
95           all_memory = tmp_int;
96           continue;
97         }
98         std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
99                   << std::endl;
100       }
101     }
102   }
103 };
104 
105 class TsanFlags {
106 public:
107   int ignore_noninstrumented_modules;
108 
TsanFlags(const char * env)109   TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
110     if (env) {
111       std::vector<std::string> tokens;
112       std::string str(env);
113       auto end = str.end();
114       auto it = str.begin();
115       auto is_sep = [](char c) {
116         return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
117                c == '\r';
118       };
119       while (it != end) {
120         auto next_it = std::find_if(it, end, is_sep);
121         tokens.emplace_back(it, next_it);
122         it = next_it;
123         if (it != end) {
124           ++it;
125         }
126       }
127 
128       for (const auto &token : tokens) {
129         // we are interested in ignore_noninstrumented_modules to print a
130         // warning
131         if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d",
132                    &ignore_noninstrumented_modules))
133           continue;
134       }
135     }
136   }
137 };
138 } // namespace
139 
140 #if (LLVM_VERSION) >= 40
141 extern "C" {
142 int __attribute__((weak)) __archer_get_omp_status();
__tsan_flush_memory()143 void __attribute__((weak)) __tsan_flush_memory() {}
144 }
145 #endif
146 static ArcherFlags *archer_flags;
147 
148 #ifndef TsanHappensBefore
149 
__ompt_tsan_func(Args...)150 template <typename... Args> static void __ompt_tsan_func(Args...) {}
151 
152 #define DECLARE_TSAN_FUNCTION(name, ...)                                       \
153   static void (*name)(__VA_ARGS__) = __ompt_tsan_func<__VA_ARGS__>;
154 
155 // Thread Sanitizer is a tool that finds races in code.
156 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
157 // tsan detects these exact functions by name.
158 extern "C" {
159 DECLARE_TSAN_FUNCTION(AnnotateHappensAfter, const char *, int,
160                       const volatile void *)
161 DECLARE_TSAN_FUNCTION(AnnotateHappensBefore, const char *, int,
162                       const volatile void *)
163 DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesBegin, const char *, int)
164 DECLARE_TSAN_FUNCTION(AnnotateIgnoreWritesEnd, const char *, int)
165 DECLARE_TSAN_FUNCTION(AnnotateNewMemory, const char *, int,
166                       const volatile void *, size_t)
167 DECLARE_TSAN_FUNCTION(__tsan_func_entry, const void *)
168 DECLARE_TSAN_FUNCTION(__tsan_func_exit)
169 
170 // RunningOnValgrind is used to detect absence of TSan and must intentionally be a nullptr.
171 static int (*RunningOnValgrind)(void);
172 }
173 
174 // This marker is used to define a happens-before arc. The race detector will
175 // infer an arc from the begin to the end when they share the same pointer
176 // argument.
177 #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
178 
179 // This marker defines the destination of a happens-before arc.
180 #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
181 
182 // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
183 #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
184 
185 // Resume checking for racy writes.
186 #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
187 
188 // We don't really delete the clock for now
189 #define TsanDeleteClock(cv)
190 
191 // newMemory
192 #define TsanNewMemory(addr, size)                                              \
193   AnnotateNewMemory(__FILE__, __LINE__, addr, size)
194 #define TsanFreeMemory(addr, size)                                             \
195   AnnotateNewMemory(__FILE__, __LINE__, addr, size)
196 #endif
197 
198 // Function entry/exit
199 #define TsanFuncEntry(pc) __tsan_func_entry(pc)
200 #define TsanFuncExit() __tsan_func_exit()
201 
202 /// Required OMPT inquiry functions.
203 static ompt_get_parallel_info_t ompt_get_parallel_info;
204 static ompt_get_thread_data_t ompt_get_thread_data;
205 
206 typedef char ompt_tsan_clockid;
207 
my_next_id()208 static uint64_t my_next_id() {
209   static uint64_t ID = 0;
210   uint64_t ret = __sync_fetch_and_add(&ID, 1);
211   return ret;
212 }
213 
214 static int pagesize{0};
215 
216 // Data structure to provide a threadsafe pool of reusable objects.
217 // DataPool<Type of objects>
218 namespace {
219 template <typename T> struct DataPool final {
220   static __thread DataPool<T> *ThreadDataPool;
221   std::mutex DPMutex{};
222 
223   // store unused objects
224   std::vector<T *> DataPointer{};
225   std::vector<T *> RemoteDataPointer{};
226 
227   // store all allocated memory to finally release
228   std::list<void *> memory;
229 
230   // count remotely returned data (RemoteDataPointer.size())
231   std::atomic<int> remote{0};
232 
233   // totally allocated data objects in pool
234   int total{0};
235 #ifdef DEBUG_DATA
236   int remoteReturn{0};
237   int localReturn{0};
238 
getRemote__anon13bac0180311::DataPool239   int getRemote() { return remoteReturn + remote; }
getLocal__anon13bac0180311::DataPool240   int getLocal() { return localReturn; }
241 #endif
getTotal__anon13bac0180311::DataPool242   int getTotal() { return total; }
getMissing__anon13bac0180311::DataPool243   int getMissing() {
244     return total - DataPointer.size() - RemoteDataPointer.size();
245   }
246 
247   // fill the pool by allocating a page of memory
newDatas__anon13bac0180311::DataPool248   void newDatas() {
249     if (remote > 0) {
250       const std::lock_guard<std::mutex> lock(DPMutex);
251       // DataPointer is empty, so just swap the vectors
252       DataPointer.swap(RemoteDataPointer);
253       remote = 0;
254       return;
255     }
256     // calculate size of an object including padding to cacheline size
257     size_t elemSize = sizeof(T);
258     size_t paddedSize = (((elemSize - 1) / 64) + 1) * 64;
259     // number of padded elements to allocate
260     int ndatas = pagesize / paddedSize;
261     char *datas = (char *)malloc(ndatas * paddedSize);
262     memory.push_back(datas);
263     for (int i = 0; i < ndatas; i++) {
264       DataPointer.push_back(new (datas + i * paddedSize) T(this));
265     }
266     total += ndatas;
267   }
268 
269   // get data from the pool
getData__anon13bac0180311::DataPool270   T *getData() {
271     T *ret;
272     if (DataPointer.empty())
273       newDatas();
274     ret = DataPointer.back();
275     DataPointer.pop_back();
276     return ret;
277   }
278 
279   // accesses to the thread-local datapool don't need locks
returnOwnData__anon13bac0180311::DataPool280   void returnOwnData(T *data) {
281     DataPointer.emplace_back(data);
282 #ifdef DEBUG_DATA
283     localReturn++;
284 #endif
285   }
286 
287   // returning to a remote datapool using lock
returnData__anon13bac0180311::DataPool288   void returnData(T *data) {
289     const std::lock_guard<std::mutex> lock(DPMutex);
290     RemoteDataPointer.emplace_back(data);
291     remote++;
292 #ifdef DEBUG_DATA
293     remoteReturn++;
294 #endif
295   }
296 
~DataPool__anon13bac0180311::DataPool297   ~DataPool() {
298     // we assume all memory is returned when the thread finished / destructor is
299     // called
300     if (archer_flags->report_data_leak && getMissing() != 0) {
301       printf("ERROR: While freeing DataPool (%s) we are missing %i data "
302              "objects.\n",
303              __PRETTY_FUNCTION__, getMissing());
304       exit(-3);
305     }
306     for (auto i : DataPointer)
307       if (i)
308         i->~T();
309     for (auto i : RemoteDataPointer)
310       if (i)
311         i->~T();
312     for (auto i : memory)
313       if (i)
314         free(i);
315   }
316 };
317 
318 template <typename T> struct DataPoolEntry {
319   DataPool<T> *owner;
320 
New__anon13bac0180311::DataPoolEntry321   static T *New() { return DataPool<T>::ThreadDataPool->getData(); }
322 
Delete__anon13bac0180311::DataPoolEntry323   void Delete() {
324     static_cast<T *>(this)->Reset();
325     if (owner == DataPool<T>::ThreadDataPool)
326       owner->returnOwnData(static_cast<T *>(this));
327     else
328       owner->returnData(static_cast<T *>(this));
329   }
330 
DataPoolEntry__anon13bac0180311::DataPoolEntry331   DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
332 };
333 
334 struct DependencyData;
335 typedef DataPool<DependencyData> DependencyDataPool;
336 template <>
337 __thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
338 
339 /// Data structure to store additional information for task dependency.
340 struct DependencyData final : DataPoolEntry<DependencyData> {
341   ompt_tsan_clockid in;
342   ompt_tsan_clockid out;
343   ompt_tsan_clockid inoutset;
GetInPtr__anon13bac0180311::DependencyData344   void *GetInPtr() { return &in; }
GetOutPtr__anon13bac0180311::DependencyData345   void *GetOutPtr() { return &out; }
GetInoutsetPtr__anon13bac0180311::DependencyData346   void *GetInoutsetPtr() { return &inoutset; }
347 
Reset__anon13bac0180311::DependencyData348   void Reset() {}
349 
New__anon13bac0180311::DependencyData350   static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
351 
DependencyData__anon13bac0180311::DependencyData352   DependencyData(DataPool<DependencyData> *dp)
353       : DataPoolEntry<DependencyData>(dp) {}
354 };
355 
356 struct TaskDependency {
357   void *inPtr;
358   void *outPtr;
359   void *inoutsetPtr;
360   ompt_dependence_type_t type;
TaskDependency__anon13bac0180311::TaskDependency361   TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
362       : inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
363         inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
AnnotateBegin__anon13bac0180311::TaskDependency364   void AnnotateBegin() {
365     if (type == ompt_dependence_type_out ||
366         type == ompt_dependence_type_inout ||
367         type == ompt_dependence_type_mutexinoutset) {
368       TsanHappensAfter(inPtr);
369       TsanHappensAfter(outPtr);
370       TsanHappensAfter(inoutsetPtr);
371     } else if (type == ompt_dependence_type_in) {
372       TsanHappensAfter(outPtr);
373       TsanHappensAfter(inoutsetPtr);
374     } else if (type == ompt_dependence_type_inoutset) {
375       TsanHappensAfter(inPtr);
376       TsanHappensAfter(outPtr);
377     }
378   }
AnnotateEnd__anon13bac0180311::TaskDependency379   void AnnotateEnd() {
380     if (type == ompt_dependence_type_out ||
381         type == ompt_dependence_type_inout ||
382         type == ompt_dependence_type_mutexinoutset) {
383       TsanHappensBefore(outPtr);
384     } else if (type == ompt_dependence_type_in) {
385       TsanHappensBefore(inPtr);
386     } else if (type == ompt_dependence_type_inoutset) {
387       TsanHappensBefore(inoutsetPtr);
388     }
389   }
390 };
391 
392 struct ParallelData;
393 typedef DataPool<ParallelData> ParallelDataPool;
394 template <>
395 __thread ParallelDataPool *ParallelDataPool::ThreadDataPool = nullptr;
396 
397 /// Data structure to store additional information for parallel regions.
398 struct ParallelData final : DataPoolEntry<ParallelData> {
399 
400   // Parallel fork is just another barrier, use Barrier[1]
401 
402   /// Two addresses for relationships with barriers.
403   ompt_tsan_clockid Barrier[2];
404 
405   const void *codePtr;
406 
GetParallelPtr__anon13bac0180311::ParallelData407   void *GetParallelPtr() { return &(Barrier[1]); }
408 
GetBarrierPtr__anon13bac0180311::ParallelData409   void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
410 
Init__anon13bac0180311::ParallelData411   ParallelData *Init(const void *codeptr) {
412     codePtr = codeptr;
413     return this;
414   }
415 
Reset__anon13bac0180311::ParallelData416   void Reset() {}
417 
New__anon13bac0180311::ParallelData418   static ParallelData *New(const void *codeptr) {
419     return DataPoolEntry<ParallelData>::New()->Init(codeptr);
420   }
421 
ParallelData__anon13bac0180311::ParallelData422   ParallelData(DataPool<ParallelData> *dp) : DataPoolEntry<ParallelData>(dp) {}
423 };
424 
ToParallelData(ompt_data_t * parallel_data)425 static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
426   return reinterpret_cast<ParallelData *>(parallel_data->ptr);
427 }
428 
429 struct Taskgroup;
430 typedef DataPool<Taskgroup> TaskgroupPool;
431 template <> __thread TaskgroupPool *TaskgroupPool::ThreadDataPool = nullptr;
432 
433 /// Data structure to support stacking of taskgroups and allow synchronization.
434 struct Taskgroup final : DataPoolEntry<Taskgroup> {
435   /// Its address is used for relationships of the taskgroup's task set.
436   ompt_tsan_clockid Ptr;
437 
438   /// Reference to the parent taskgroup.
439   Taskgroup *Parent;
440 
GetPtr__anon13bac0180311::Taskgroup441   void *GetPtr() { return &Ptr; }
442 
Init__anon13bac0180311::Taskgroup443   Taskgroup *Init(Taskgroup *parent) {
444     Parent = parent;
445     return this;
446   }
447 
Reset__anon13bac0180311::Taskgroup448   void Reset() {}
449 
New__anon13bac0180311::Taskgroup450   static Taskgroup *New(Taskgroup *Parent) {
451     return DataPoolEntry<Taskgroup>::New()->Init(Parent);
452   }
453 
Taskgroup__anon13bac0180311::Taskgroup454   Taskgroup(DataPool<Taskgroup> *dp) : DataPoolEntry<Taskgroup>(dp) {}
455 };
456 
457 enum ArcherTaskFlag { ArcherTaskFulfilled = 0x00010000 };
458 
459 struct TaskData;
460 typedef DataPool<TaskData> TaskDataPool;
461 template <> __thread TaskDataPool *TaskDataPool::ThreadDataPool = nullptr;
462 
463 /// Data structure to store additional information for tasks.
464 struct TaskData final : DataPoolEntry<TaskData> {
465   /// Its address is used for relationships of this task.
466   ompt_tsan_clockid Task{0};
467 
468   /// Child tasks use its address to declare a relationship to a taskwait in
469   /// this task.
470   ompt_tsan_clockid Taskwait{0};
471 
472   /// Child tasks use its address to model omp_all_memory dependencies
473   ompt_tsan_clockid AllMemory[2]{0};
474 
475   /// Index of which barrier to use next.
476   char BarrierIndex{0};
477 
478   /// Whether this task is currently executing a barrier.
479   bool InBarrier{false};
480 
481   /// Whether this task is an included task.
482   int TaskType{0};
483 
484   /// count execution phase
485   int execution{0};
486 
487   /// Count how often this structure has been put into child tasks + 1.
488   std::atomic_int RefCount{1};
489 
490   /// Reference to the parent that created this task.
491   TaskData *Parent{nullptr};
492 
493   /// Reference to the team of this task.
494   ParallelData *Team{nullptr};
495 
496   /// Reference to the current taskgroup that this task either belongs to or
497   /// that it just created.
498   Taskgroup *TaskGroup{nullptr};
499 
500   /// Dependency information for this task.
501   TaskDependency *Dependencies{nullptr};
502 
503   /// Number of dependency entries.
504   unsigned DependencyCount{0};
505 
506   // The dependency-map stores DependencyData objects representing
507   // the dependency variables used on the sibling tasks created from
508   // this task
509   // We expect a rare need for the dependency-map, so alloc on demand
510   std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
511 
512 #ifdef DEBUG
513   int freed{0};
514 #endif
515 
isIncluded__anon13bac0180311::TaskData516   bool isIncluded() { return TaskType & ompt_task_undeferred; }
isUntied__anon13bac0180311::TaskData517   bool isUntied() { return TaskType & ompt_task_untied; }
isFinal__anon13bac0180311::TaskData518   bool isFinal() { return TaskType & ompt_task_final; }
isMergable__anon13bac0180311::TaskData519   bool isMergable() { return TaskType & ompt_task_mergeable; }
isMerged__anon13bac0180311::TaskData520   bool isMerged() { return TaskType & ompt_task_merged; }
521 
isExplicit__anon13bac0180311::TaskData522   bool isExplicit() { return TaskType & ompt_task_explicit; }
isImplicit__anon13bac0180311::TaskData523   bool isImplicit() { return TaskType & ompt_task_implicit; }
isInitial__anon13bac0180311::TaskData524   bool isInitial() { return TaskType & ompt_task_initial; }
isTarget__anon13bac0180311::TaskData525   bool isTarget() { return TaskType & ompt_task_target; }
526 
isFulfilled__anon13bac0180311::TaskData527   bool isFulfilled() { return TaskType & ArcherTaskFulfilled; }
setFulfilled__anon13bac0180311::TaskData528   void setFulfilled() { TaskType |= ArcherTaskFulfilled; }
529 
setAllMemoryDep__anon13bac0180311::TaskData530   void setAllMemoryDep() { AllMemory[0] = 1; }
hasAllMemoryDep__anon13bac0180311::TaskData531   bool hasAllMemoryDep() { return AllMemory[0]; }
532 
GetTaskPtr__anon13bac0180311::TaskData533   void *GetTaskPtr() { return &Task; }
534 
GetTaskwaitPtr__anon13bac0180311::TaskData535   void *GetTaskwaitPtr() { return &Taskwait; }
536 
GetLastAllMemoryPtr__anon13bac0180311::TaskData537   void *GetLastAllMemoryPtr() { return AllMemory; }
GetNextAllMemoryPtr__anon13bac0180311::TaskData538   void *GetNextAllMemoryPtr() { return AllMemory + 1; }
539 
Init__anon13bac0180311::TaskData540   TaskData *Init(TaskData *parent, int taskType) {
541     TaskType = taskType;
542     Parent = parent;
543     Team = Parent->Team;
544     BarrierIndex = Parent->BarrierIndex;
545     if (Parent != nullptr) {
546       Parent->RefCount++;
547       // Copy over pointer to taskgroup. This task may set up its own stack
548       // but for now belongs to its parent's taskgroup.
549       TaskGroup = Parent->TaskGroup;
550     }
551     return this;
552   }
553 
Init__anon13bac0180311::TaskData554   TaskData *Init(ParallelData *team, int taskType) {
555     TaskType = taskType;
556     execution = 1;
557     Team = team;
558     return this;
559   }
560 
Reset__anon13bac0180311::TaskData561   void Reset() {
562     InBarrier = false;
563     TaskType = 0;
564     execution = 0;
565     BarrierIndex = 0;
566     RefCount = 1;
567     Parent = nullptr;
568     Team = nullptr;
569     TaskGroup = nullptr;
570     if (DependencyMap) {
571       for (auto i : *DependencyMap)
572         i.second->Delete();
573       delete DependencyMap;
574     }
575     DependencyMap = nullptr;
576     if (Dependencies)
577       free(Dependencies);
578     Dependencies = nullptr;
579     DependencyCount = 0;
580 #ifdef DEBUG
581     freed = 0;
582 #endif
583   }
584 
New__anon13bac0180311::TaskData585   static TaskData *New(TaskData *parent, int taskType) {
586     return DataPoolEntry<TaskData>::New()->Init(parent, taskType);
587   }
588 
New__anon13bac0180311::TaskData589   static TaskData *New(ParallelData *team, int taskType) {
590     return DataPoolEntry<TaskData>::New()->Init(team, taskType);
591   }
592 
TaskData__anon13bac0180311::TaskData593   TaskData(DataPool<TaskData> *dp) : DataPoolEntry<TaskData>(dp) {}
594 };
595 } // namespace
596 
ToTaskData(ompt_data_t * task_data)597 static inline TaskData *ToTaskData(ompt_data_t *task_data) {
598   if (task_data)
599     return reinterpret_cast<TaskData *>(task_data->ptr);
600   return nullptr;
601 }
602 
603 /// Store a mutex for each wait_id to resolve race condition with callbacks.
604 static std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
605 static std::mutex LocksMutex;
606 
ompt_tsan_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)607 static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
608                                    ompt_data_t *thread_data) {
609   ParallelDataPool::ThreadDataPool = new ParallelDataPool;
610   TsanNewMemory(ParallelDataPool::ThreadDataPool,
611                 sizeof(ParallelDataPool::ThreadDataPool));
612   TaskgroupPool::ThreadDataPool = new TaskgroupPool;
613   TsanNewMemory(TaskgroupPool::ThreadDataPool,
614                 sizeof(TaskgroupPool::ThreadDataPool));
615   TaskDataPool::ThreadDataPool = new TaskDataPool;
616   TsanNewMemory(TaskDataPool::ThreadDataPool,
617                 sizeof(TaskDataPool::ThreadDataPool));
618   DependencyDataPool::ThreadDataPool = new DependencyDataPool;
619   TsanNewMemory(DependencyDataPool::ThreadDataPool,
620                 sizeof(DependencyDataPool::ThreadDataPool));
621   thread_data->value = my_next_id();
622 }
623 
ompt_tsan_thread_end(ompt_data_t * thread_data)624 static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
625   TsanIgnoreWritesBegin();
626   delete ParallelDataPool::ThreadDataPool;
627   delete TaskgroupPool::ThreadDataPool;
628   delete TaskDataPool::ThreadDataPool;
629   delete DependencyDataPool::ThreadDataPool;
630   TsanIgnoreWritesEnd();
631 }
632 
633 /// OMPT event callbacks for handling parallel regions.
634 
ompt_tsan_parallel_begin(ompt_data_t * parent_task_data,const ompt_frame_t * parent_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)635 static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
636                                      const ompt_frame_t *parent_task_frame,
637                                      ompt_data_t *parallel_data,
638                                      uint32_t requested_team_size, int flag,
639                                      const void *codeptr_ra) {
640   ParallelData *Data = ParallelData::New(codeptr_ra);
641   parallel_data->ptr = Data;
642 
643   TsanHappensBefore(Data->GetParallelPtr());
644   if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial())
645     TsanIgnoreWritesEnd();
646 }
647 
ompt_tsan_parallel_end(ompt_data_t * parallel_data,ompt_data_t * task_data,int flag,const void * codeptr_ra)648 static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
649                                    ompt_data_t *task_data, int flag,
650                                    const void *codeptr_ra) {
651   if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
652     TsanIgnoreWritesBegin();
653   ParallelData *Data = ToParallelData(parallel_data);
654   TsanHappensAfter(Data->GetBarrierPtr(0));
655   TsanHappensAfter(Data->GetBarrierPtr(1));
656 
657   Data->Delete();
658 
659 #if (LLVM_VERSION >= 40)
660   if (&__archer_get_omp_status) {
661     if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
662       __tsan_flush_memory();
663   }
664 #endif
665 }
666 
ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int type)667 static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
668                                     ompt_data_t *parallel_data,
669                                     ompt_data_t *task_data,
670                                     unsigned int team_size,
671                                     unsigned int thread_num, int type) {
672   switch (endpoint) {
673   case ompt_scope_begin:
674     if (type & ompt_task_initial) {
675       parallel_data->ptr = ParallelData::New(nullptr);
676     }
677     task_data->ptr = TaskData::New(ToParallelData(parallel_data), type);
678     TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
679     TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
680     break;
681   case ompt_scope_end: {
682     TaskData *Data = ToTaskData(task_data);
683 #ifdef DEBUG
684     assert(Data->freed == 0 && "Implicit task end should only be called once!");
685     Data->freed = 1;
686 #endif
687     assert(Data->RefCount == 1 &&
688            "All tasks should have finished at the implicit barrier!");
689     if (type & ompt_task_initial) {
690       Data->Team->Delete();
691     }
692     Data->Delete();
693     TsanFuncExit();
694     break;
695   }
696   case ompt_scope_beginend:
697     // Should not occur according to OpenMP 5.1
698     // Tested in OMPT tests
699     break;
700   }
701 }
702 
ompt_tsan_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)703 static void ompt_tsan_sync_region(ompt_sync_region_t kind,
704                                   ompt_scope_endpoint_t endpoint,
705                                   ompt_data_t *parallel_data,
706                                   ompt_data_t *task_data,
707                                   const void *codeptr_ra) {
708   TaskData *Data = ToTaskData(task_data);
709   switch (endpoint) {
710   case ompt_scope_begin:
711   case ompt_scope_beginend:
712     TsanFuncEntry(codeptr_ra);
713     switch (kind) {
714     case ompt_sync_region_barrier_implementation:
715     case ompt_sync_region_barrier_implicit:
716     case ompt_sync_region_barrier_explicit:
717     case ompt_sync_region_barrier_implicit_parallel:
718     case ompt_sync_region_barrier_implicit_workshare:
719     case ompt_sync_region_barrier_teams:
720     case ompt_sync_region_barrier: {
721       char BarrierIndex = Data->BarrierIndex;
722       TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
723 
724       if (hasReductionCallback < ompt_set_always) {
725         // We ignore writes inside the barrier. These would either occur during
726         // 1. reductions performed by the runtime which are guaranteed to be
727         // race-free.
728         // 2. execution of another task.
729         // For the latter case we will re-enable tracking in task_switch.
730         Data->InBarrier = true;
731         TsanIgnoreWritesBegin();
732       }
733 
734       break;
735     }
736 
737     case ompt_sync_region_taskwait:
738       break;
739 
740     case ompt_sync_region_taskgroup:
741       Data->TaskGroup = Taskgroup::New(Data->TaskGroup);
742       break;
743 
744     case ompt_sync_region_reduction:
745       // should never be reached
746       break;
747     }
748     if (endpoint == ompt_scope_begin)
749       break;
750     KMP_FALLTHROUGH();
751   case ompt_scope_end:
752     TsanFuncExit();
753     switch (kind) {
754     case ompt_sync_region_barrier_implementation:
755     case ompt_sync_region_barrier_implicit:
756     case ompt_sync_region_barrier_explicit:
757     case ompt_sync_region_barrier_implicit_parallel:
758     case ompt_sync_region_barrier_implicit_workshare:
759     case ompt_sync_region_barrier_teams:
760     case ompt_sync_region_barrier: {
761       if (hasReductionCallback < ompt_set_always) {
762         // We want to track writes after the barrier again.
763         Data->InBarrier = false;
764         TsanIgnoreWritesEnd();
765       }
766 
767       char BarrierIndex = Data->BarrierIndex;
768       // Barrier will end after it has been entered by all threads.
769       if (parallel_data)
770         TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
771 
772       // It is not guaranteed that all threads have exited this barrier before
773       // we enter the next one. So we will use a different address.
774       // We are however guaranteed that this current barrier is finished
775       // by the time we exit the next one. So we can then reuse the first
776       // address.
777       Data->BarrierIndex = (BarrierIndex + 1) % 2;
778       break;
779     }
780 
781     case ompt_sync_region_taskwait: {
782       if (Data->execution > 1)
783         TsanHappensAfter(Data->GetTaskwaitPtr());
784       break;
785     }
786 
787     case ompt_sync_region_taskgroup: {
788       assert(Data->TaskGroup != nullptr &&
789              "Should have at least one taskgroup!");
790 
791       TsanHappensAfter(Data->TaskGroup->GetPtr());
792 
793       // Delete this allocated taskgroup, all descendent task are finished by
794       // now.
795       Taskgroup *Parent = Data->TaskGroup->Parent;
796       Data->TaskGroup->Delete();
797       Data->TaskGroup = Parent;
798       break;
799     }
800 
801     case ompt_sync_region_reduction:
802       // Should not occur according to OpenMP 5.1
803       // Tested in OMPT tests
804       break;
805     }
806     break;
807   }
808 }
809 
ompt_tsan_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)810 static void ompt_tsan_reduction(ompt_sync_region_t kind,
811                                 ompt_scope_endpoint_t endpoint,
812                                 ompt_data_t *parallel_data,
813                                 ompt_data_t *task_data,
814                                 const void *codeptr_ra) {
815   switch (endpoint) {
816   case ompt_scope_begin:
817     switch (kind) {
818     case ompt_sync_region_reduction:
819       TsanIgnoreWritesBegin();
820       break;
821     default:
822       break;
823     }
824     break;
825   case ompt_scope_end:
826     switch (kind) {
827     case ompt_sync_region_reduction:
828       TsanIgnoreWritesEnd();
829       break;
830     default:
831       break;
832     }
833     break;
834   case ompt_scope_beginend:
835     // Should not occur according to OpenMP 5.1
836     // Tested in OMPT tests
837     // Would have no implications for DR detection
838     break;
839   }
840 }
841 
842 /// OMPT event callbacks for handling tasks.
843 
ompt_tsan_task_create(ompt_data_t * parent_task_data,const ompt_frame_t * parent_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)844 static void ompt_tsan_task_create(
845     ompt_data_t *parent_task_data,    /* id of parent task            */
846     const ompt_frame_t *parent_frame, /* frame data for parent task   */
847     ompt_data_t *new_task_data,       /* id of created task           */
848     int type, int has_dependences,
849     const void *codeptr_ra) /* pointer to outlined function */
850 {
851   TaskData *Data;
852   assert(new_task_data->ptr == NULL &&
853          "Task data should be initialized to NULL");
854   if (type & ompt_task_initial) {
855     ompt_data_t *parallel_data;
856     int team_size = 1;
857     ompt_get_parallel_info(0, &parallel_data, &team_size);
858     ParallelData *PData = ParallelData::New(nullptr);
859     parallel_data->ptr = PData;
860 
861     Data = TaskData::New(PData, type);
862     new_task_data->ptr = Data;
863   } else if (type & ompt_task_undeferred) {
864     Data = TaskData::New(ToTaskData(parent_task_data), type);
865     new_task_data->ptr = Data;
866   } else if (type & ompt_task_explicit || type & ompt_task_target) {
867     Data = TaskData::New(ToTaskData(parent_task_data), type);
868     new_task_data->ptr = Data;
869 
870     // Use the newly created address. We cannot use a single address from the
871     // parent because that would declare wrong relationships with other
872     // sibling tasks that may be created before this task is started!
873     TsanHappensBefore(Data->GetTaskPtr());
874     ToTaskData(parent_task_data)->execution++;
875   }
876 }
877 
freeTask(TaskData * task)878 static void freeTask(TaskData *task) {
879   while (task != nullptr && --task->RefCount == 0) {
880     TaskData *Parent = task->Parent;
881     task->Delete();
882     task = Parent;
883   }
884 }
885 
886 // LastAllMemoryPtr marks the beginning of an all_memory epoch
887 // NextAllMemoryPtr marks the end of an all_memory epoch
888 // All tasks with depend begin execution after LastAllMemoryPtr
889 // and end before NextAllMemoryPtr
releaseDependencies(TaskData * task)890 static void releaseDependencies(TaskData *task) {
891   if (archer_flags->all_memory) {
892     if (task->hasAllMemoryDep()) {
893       TsanHappensBefore(task->Parent->GetLastAllMemoryPtr());
894       TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
895     } else if (task->DependencyCount)
896       TsanHappensBefore(task->Parent->GetNextAllMemoryPtr());
897   }
898   for (unsigned i = 0; i < task->DependencyCount; i++) {
899     task->Dependencies[i].AnnotateEnd();
900   }
901 }
902 
acquireDependencies(TaskData * task)903 static void acquireDependencies(TaskData *task) {
904   if (archer_flags->all_memory) {
905     if (task->hasAllMemoryDep())
906       TsanHappensAfter(task->Parent->GetNextAllMemoryPtr());
907     else if (task->DependencyCount)
908       TsanHappensAfter(task->Parent->GetLastAllMemoryPtr());
909   }
910   for (unsigned i = 0; i < task->DependencyCount; i++) {
911     task->Dependencies[i].AnnotateBegin();
912   }
913 }
914 
completeTask(TaskData * FromTask)915 static void completeTask(TaskData *FromTask) {
916   if (!FromTask)
917     return;
918   // Task-end happens after a possible omp_fulfill_event call
919   if (FromTask->isFulfilled())
920     TsanHappensAfter(FromTask->GetTaskPtr());
921   // Included tasks are executed sequentially, no need to track
922   // synchronization
923   if (!FromTask->isIncluded()) {
924     // Task will finish before a barrier in the surrounding parallel region
925     // ...
926     ParallelData *PData = FromTask->Team;
927     TsanHappensBefore(PData->GetBarrierPtr(FromTask->BarrierIndex));
928 
929     // ... and before an eventual taskwait by the parent thread.
930     TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
931 
932     if (FromTask->TaskGroup != nullptr) {
933       // This task is part of a taskgroup, so it will finish before the
934       // corresponding taskgroup_end.
935       TsanHappensBefore(FromTask->TaskGroup->GetPtr());
936     }
937   }
938   // release dependencies
939   releaseDependencies(FromTask);
940 }
941 
suspendTask(TaskData * FromTask)942 static void suspendTask(TaskData *FromTask) {
943   if (!FromTask)
944     return;
945   // Task may be resumed at a later point in time.
946   TsanHappensBefore(FromTask->GetTaskPtr());
947 }
948 
switchTasks(TaskData * FromTask,TaskData * ToTask)949 static void switchTasks(TaskData *FromTask, TaskData *ToTask) {
950   // Legacy handling for missing reduction callback
951   if (hasReductionCallback < ompt_set_always) {
952     if (FromTask && FromTask->InBarrier) {
953       // We want to ignore writes in the runtime code during barriers,
954       // but not when executing tasks with user code!
955       TsanIgnoreWritesEnd();
956     }
957     if (ToTask && ToTask->InBarrier) {
958       // We want to ignore writes in the runtime code during barriers,
959       // but not when executing tasks with user code!
960       TsanIgnoreWritesBegin();
961     }
962   }
963   //// Not yet used
964   //  if (FromTask)
965   //    FromTask->deactivate();
966   //  if (ToTask)
967   //    ToTask->activate();
968 }
969 
endTask(TaskData * FromTask)970 static void endTask(TaskData *FromTask) {
971   if (!FromTask)
972     return;
973 }
974 
startTask(TaskData * ToTask)975 static void startTask(TaskData *ToTask) {
976   if (!ToTask)
977     return;
978   // Handle dependencies on first execution of the task
979   if (ToTask->execution == 0) {
980     ToTask->execution++;
981     acquireDependencies(ToTask);
982   }
983   // 1. Task will begin execution after it has been created.
984   // 2. Task will resume after it has been switched away.
985   TsanHappensAfter(ToTask->GetTaskPtr());
986 }
987 
ompt_tsan_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)988 static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
989                                     ompt_task_status_t prior_task_status,
990                                     ompt_data_t *second_task_data) {
991 
992   //
993   //  The necessary action depends on prior_task_status:
994   //
995   //    ompt_task_early_fulfill = 5,
996   //     -> ignored
997   //
998   //    ompt_task_late_fulfill  = 6,
999   //     -> first completed, first freed, second ignored
1000   //
1001   //    ompt_task_complete      = 1,
1002   //    ompt_task_cancel        = 3,
1003   //     -> first completed, first freed, second starts
1004   //
1005   //    ompt_taskwait_complete = 8,
1006   //     -> first starts, first completes, first freed, second ignored
1007   //
1008   //    ompt_task_detach        = 4,
1009   //    ompt_task_yield         = 2,
1010   //    ompt_task_switch        = 7
1011   //     -> first suspended, second starts
1012   //
1013 
1014   TaskData *FromTask = ToTaskData(first_task_data);
1015   TaskData *ToTask = ToTaskData(second_task_data);
1016 
1017   switch (prior_task_status) {
1018   case ompt_task_early_fulfill:
1019     TsanHappensBefore(FromTask->GetTaskPtr());
1020     FromTask->setFulfilled();
1021     return;
1022   case ompt_task_late_fulfill:
1023     TsanHappensAfter(FromTask->GetTaskPtr());
1024     completeTask(FromTask);
1025     freeTask(FromTask);
1026     return;
1027   case ompt_taskwait_complete:
1028     acquireDependencies(FromTask);
1029     freeTask(FromTask);
1030     return;
1031   case ompt_task_complete:
1032     completeTask(FromTask);
1033     endTask(FromTask);
1034     switchTasks(FromTask, ToTask);
1035     freeTask(FromTask);
1036     return;
1037   case ompt_task_cancel:
1038     completeTask(FromTask);
1039     endTask(FromTask);
1040     switchTasks(FromTask, ToTask);
1041     freeTask(FromTask);
1042     startTask(ToTask);
1043     return;
1044   case ompt_task_detach:
1045     endTask(FromTask);
1046     suspendTask(FromTask);
1047     switchTasks(FromTask, ToTask);
1048     startTask(ToTask);
1049     return;
1050   case ompt_task_yield:
1051     suspendTask(FromTask);
1052     switchTasks(FromTask, ToTask);
1053     startTask(ToTask);
1054     return;
1055   case ompt_task_switch:
1056     suspendTask(FromTask);
1057     switchTasks(FromTask, ToTask);
1058     startTask(ToTask);
1059     return;
1060   }
1061 }
1062 
ompt_tsan_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)1063 static void ompt_tsan_dependences(ompt_data_t *task_data,
1064                                   const ompt_dependence_t *deps, int ndeps) {
1065   if (ndeps > 0) {
1066     // Copy the data to use it in task_switch and task_end.
1067     TaskData *Data = ToTaskData(task_data);
1068     if (!Data->Parent) {
1069       // Return since doacross dependences are not supported yet.
1070       return;
1071     }
1072     if (!Data->Parent->DependencyMap)
1073       Data->Parent->DependencyMap =
1074           new std::unordered_map<void *, DependencyData *>();
1075     Data->Dependencies =
1076         (TaskDependency *)malloc(sizeof(TaskDependency) * ndeps);
1077     Data->DependencyCount = ndeps;
1078     for (int i = 0, d = 0; i < ndeps; i++, d++) {
1079       if (deps[i].dependence_type == ompt_dependence_type_out_all_memory ||
1080           deps[i].dependence_type == ompt_dependence_type_inout_all_memory) {
1081         Data->setAllMemoryDep();
1082         Data->DependencyCount--;
1083         if (!archer_flags->all_memory) {
1084           printf("The application uses omp_all_memory, but Archer was\n"
1085                  "started to not consider omp_all_memory. This can lead\n"
1086                  "to false data race alerts.\n"
1087                  "Include all_memory=1 in ARCHER_OPTIONS to consider\n"
1088                  "omp_all_memory from the beginning.\n");
1089           archer_flags->all_memory = 1;
1090         }
1091         d--;
1092         continue;
1093       }
1094       auto ret = Data->Parent->DependencyMap->insert(
1095           std::make_pair(deps[i].variable.ptr, nullptr));
1096       if (ret.second) {
1097         ret.first->second = DependencyData::New();
1098       }
1099       new ((void *)(Data->Dependencies + d))
1100           TaskDependency(ret.first->second, deps[i].dependence_type);
1101     }
1102 
1103     // This callback is executed before this task is first started.
1104     TsanHappensBefore(Data->GetTaskPtr());
1105   }
1106 }
1107 
1108 /// OMPT event callbacks for handling locking.
ompt_tsan_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)1109 static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1110                                      const void *codeptr_ra) {
1111 
1112   // Acquire our own lock to make sure that
1113   // 1. the previous release has finished.
1114   // 2. the next acquire doesn't start before we have finished our release.
1115   LocksMutex.lock();
1116   std::mutex &Lock = Locks[wait_id];
1117   LocksMutex.unlock();
1118 
1119   Lock.lock();
1120   TsanHappensAfter(&Lock);
1121 }
1122 
ompt_tsan_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)1123 static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
1124                                      const void *codeptr_ra) {
1125   LocksMutex.lock();
1126   std::mutex &Lock = Locks[wait_id];
1127   LocksMutex.unlock();
1128   TsanHappensBefore(&Lock);
1129 
1130   Lock.unlock();
1131 }
1132 
1133 // callback , signature , variable to store result , required support level
1134 #define SET_OPTIONAL_CALLBACK_T(event, type, result, level)                    \
1135   do {                                                                         \
1136     ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event;                \
1137     result = ompt_set_callback(ompt_callback_##event,                          \
1138                                (ompt_callback_t)tsan_##event);                 \
1139     if (result < level)                                                        \
1140       printf("Registered callback '" #event "' is not supported at " #level    \
1141              " (%i)\n",                                                        \
1142              result);                                                          \
1143   } while (0)
1144 
1145 #define SET_CALLBACK_T(event, type)                                            \
1146   do {                                                                         \
1147     int res;                                                                   \
1148     SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always);                \
1149   } while (0)
1150 
1151 #define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
1152 
1153 #define findTsanFunction(f, fSig)                                              \
1154   do {                                                                         \
1155     void *fp = dlsym(RTLD_DEFAULT, #f);                                        \
1156     if (fp)                                                                    \
1157       f = fSig fp;                                                             \
1158     else                                                                       \
1159       printf("Unable to find TSan function " #f ".\n");                        \
1160   } while (0)
1161 
1162 #define findTsanFunctionSilent(f, fSig) f = fSig dlsym(RTLD_DEFAULT, #f)
1163 
ompt_tsan_initialize(ompt_function_lookup_t lookup,int device_num,ompt_data_t * tool_data)1164 static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
1165                                 ompt_data_t *tool_data) {
1166   const char *options = getenv("TSAN_OPTIONS");
1167   TsanFlags tsan_flags(options);
1168 
1169   ompt_set_callback_t ompt_set_callback =
1170       (ompt_set_callback_t)lookup("ompt_set_callback");
1171   if (ompt_set_callback == NULL) {
1172     std::cerr << "Could not set callback, exiting..." << std::endl;
1173     std::exit(1);
1174   }
1175   ompt_get_parallel_info =
1176       (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
1177   ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
1178 
1179   if (ompt_get_parallel_info == NULL) {
1180     fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', "
1181                     "exiting...\n");
1182     exit(1);
1183   }
1184 
1185   findTsanFunction(AnnotateHappensAfter,
1186                    (void (*)(const char *, int, const volatile void *)));
1187   findTsanFunction(AnnotateHappensBefore,
1188                    (void (*)(const char *, int, const volatile void *)));
1189   findTsanFunction(AnnotateIgnoreWritesBegin, (void (*)(const char *, int)));
1190   findTsanFunction(AnnotateIgnoreWritesEnd, (void (*)(const char *, int)));
1191   findTsanFunction(
1192       AnnotateNewMemory,
1193       (void (*)(const char *, int, const volatile void *, size_t)));
1194   findTsanFunction(__tsan_func_entry, (void (*)(const void *)));
1195   findTsanFunction(__tsan_func_exit, (void (*)(void)));
1196 
1197   SET_CALLBACK(thread_begin);
1198   SET_CALLBACK(thread_end);
1199   SET_CALLBACK(parallel_begin);
1200   SET_CALLBACK(implicit_task);
1201   SET_CALLBACK(sync_region);
1202   SET_CALLBACK(parallel_end);
1203 
1204   SET_CALLBACK(task_create);
1205   SET_CALLBACK(task_schedule);
1206   SET_CALLBACK(dependences);
1207 
1208   SET_CALLBACK_T(mutex_acquired, mutex);
1209   SET_CALLBACK_T(mutex_released, mutex);
1210   SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
1211                           ompt_set_never);
1212 
1213   if (!tsan_flags.ignore_noninstrumented_modules)
1214     fprintf(stderr,
1215             "Warning: please export "
1216             "TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
1217             "to avoid false positive reports from the OpenMP runtime!\n");
1218   if (archer_flags->ignore_serial)
1219     TsanIgnoreWritesBegin();
1220 
1221   return 1; // success
1222 }
1223 
ompt_tsan_finalize(ompt_data_t * tool_data)1224 static void ompt_tsan_finalize(ompt_data_t *tool_data) {
1225   if (archer_flags->ignore_serial)
1226     TsanIgnoreWritesEnd();
1227   if (archer_flags->print_max_rss) {
1228     struct rusage end;
1229     getrusage(RUSAGE_SELF, &end);
1230     printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
1231   }
1232 
1233   if (archer_flags)
1234     delete archer_flags;
1235 }
1236 
1237 extern "C" ompt_start_tool_result_t *
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1238 ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
1239   const char *options = getenv("ARCHER_OPTIONS");
1240   archer_flags = new ArcherFlags(options);
1241   if (!archer_flags->enabled) {
1242     if (archer_flags->verbose)
1243       std::cout << "Archer disabled, stopping operation" << std::endl;
1244     delete archer_flags;
1245     return NULL;
1246   }
1247 
1248   pagesize = getpagesize();
1249 
1250   static ompt_start_tool_result_t ompt_start_tool_result = {
1251       &ompt_tsan_initialize, &ompt_tsan_finalize, {0}};
1252 
1253   // The OMPT start-up code uses dlopen with RTLD_LAZY. Therefore, we cannot
1254   // rely on dlopen to fail if TSan is missing, but would get a runtime error
1255   // for the first TSan call. We use RunningOnValgrind to detect whether
1256   // an implementation of the Annotation interface is available in the
1257   // execution or disable the tool (by returning NULL).
1258 
1259   findTsanFunctionSilent(RunningOnValgrind, (int (*)(void)));
1260   if (!RunningOnValgrind) // if we are not running on TSAN, give a different
1261                           // tool the chance to be loaded
1262   {
1263     if (archer_flags->verbose)
1264       std::cout << "Archer detected OpenMP application without TSan; "
1265                    "stopping operation"
1266                 << std::endl;
1267     delete archer_flags;
1268     return NULL;
1269   }
1270 
1271   if (archer_flags->verbose)
1272     std::cout << "Archer detected OpenMP application with TSan, supplying "
1273                  "OpenMP synchronization semantics"
1274               << std::endl;
1275   return &ompt_start_tool_result;
1276 }
1277