1*810390e3Srobert //===-- tsan_vector_clock.cpp ---------------------------------------------===//
2*810390e3Srobert //
3*810390e3Srobert // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*810390e3Srobert // See https://llvm.org/LICENSE.txt for license information.
5*810390e3Srobert // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*810390e3Srobert //
7*810390e3Srobert //===----------------------------------------------------------------------===//
8*810390e3Srobert //
9*810390e3Srobert // This file is a part of ThreadSanitizer (TSan), a race detector.
10*810390e3Srobert //
11*810390e3Srobert //===----------------------------------------------------------------------===//
12*810390e3Srobert #include "tsan_vector_clock.h"
13*810390e3Srobert
14*810390e3Srobert #include "sanitizer_common/sanitizer_placement_new.h"
15*810390e3Srobert #include "tsan_mman.h"
16*810390e3Srobert
17*810390e3Srobert namespace __tsan {
18*810390e3Srobert
19*810390e3Srobert #if TSAN_VECTORIZE
20*810390e3Srobert const uptr kVectorClockSize = kThreadSlotCount * sizeof(Epoch) / sizeof(m128);
21*810390e3Srobert #endif
22*810390e3Srobert
VectorClock()23*810390e3Srobert VectorClock::VectorClock() { Reset(); }
24*810390e3Srobert
Reset()25*810390e3Srobert void VectorClock::Reset() {
26*810390e3Srobert #if !TSAN_VECTORIZE
27*810390e3Srobert for (uptr i = 0; i < kThreadSlotCount; i++)
28*810390e3Srobert clk_[i] = kEpochZero;
29*810390e3Srobert #else
30*810390e3Srobert m128 z = _mm_setzero_si128();
31*810390e3Srobert m128* vclk = reinterpret_cast<m128*>(clk_);
32*810390e3Srobert for (uptr i = 0; i < kVectorClockSize; i++) _mm_store_si128(&vclk[i], z);
33*810390e3Srobert #endif
34*810390e3Srobert }
35*810390e3Srobert
Acquire(const VectorClock * src)36*810390e3Srobert void VectorClock::Acquire(const VectorClock* src) {
37*810390e3Srobert if (!src)
38*810390e3Srobert return;
39*810390e3Srobert #if !TSAN_VECTORIZE
40*810390e3Srobert for (uptr i = 0; i < kThreadSlotCount; i++)
41*810390e3Srobert clk_[i] = max(clk_[i], src->clk_[i]);
42*810390e3Srobert #else
43*810390e3Srobert m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
44*810390e3Srobert m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(src->clk_);
45*810390e3Srobert for (uptr i = 0; i < kVectorClockSize; i++) {
46*810390e3Srobert m128 s = _mm_load_si128(&vsrc[i]);
47*810390e3Srobert m128 d = _mm_load_si128(&vdst[i]);
48*810390e3Srobert m128 m = _mm_max_epu16(s, d);
49*810390e3Srobert _mm_store_si128(&vdst[i], m);
50*810390e3Srobert }
51*810390e3Srobert #endif
52*810390e3Srobert }
53*810390e3Srobert
AllocClock(VectorClock ** dstp)54*810390e3Srobert static VectorClock* AllocClock(VectorClock** dstp) {
55*810390e3Srobert if (UNLIKELY(!*dstp))
56*810390e3Srobert *dstp = New<VectorClock>();
57*810390e3Srobert return *dstp;
58*810390e3Srobert }
59*810390e3Srobert
Release(VectorClock ** dstp) const60*810390e3Srobert void VectorClock::Release(VectorClock** dstp) const {
61*810390e3Srobert VectorClock* dst = AllocClock(dstp);
62*810390e3Srobert dst->Acquire(this);
63*810390e3Srobert }
64*810390e3Srobert
ReleaseStore(VectorClock ** dstp) const65*810390e3Srobert void VectorClock::ReleaseStore(VectorClock** dstp) const {
66*810390e3Srobert VectorClock* dst = AllocClock(dstp);
67*810390e3Srobert *dst = *this;
68*810390e3Srobert }
69*810390e3Srobert
operator =(const VectorClock & other)70*810390e3Srobert VectorClock& VectorClock::operator=(const VectorClock& other) {
71*810390e3Srobert #if !TSAN_VECTORIZE
72*810390e3Srobert for (uptr i = 0; i < kThreadSlotCount; i++)
73*810390e3Srobert clk_[i] = other.clk_[i];
74*810390e3Srobert #else
75*810390e3Srobert m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
76*810390e3Srobert m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(other.clk_);
77*810390e3Srobert for (uptr i = 0; i < kVectorClockSize; i++) {
78*810390e3Srobert m128 s = _mm_load_si128(&vsrc[i]);
79*810390e3Srobert _mm_store_si128(&vdst[i], s);
80*810390e3Srobert }
81*810390e3Srobert #endif
82*810390e3Srobert return *this;
83*810390e3Srobert }
84*810390e3Srobert
ReleaseStoreAcquire(VectorClock ** dstp)85*810390e3Srobert void VectorClock::ReleaseStoreAcquire(VectorClock** dstp) {
86*810390e3Srobert VectorClock* dst = AllocClock(dstp);
87*810390e3Srobert #if !TSAN_VECTORIZE
88*810390e3Srobert for (uptr i = 0; i < kThreadSlotCount; i++) {
89*810390e3Srobert Epoch tmp = dst->clk_[i];
90*810390e3Srobert dst->clk_[i] = clk_[i];
91*810390e3Srobert clk_[i] = max(clk_[i], tmp);
92*810390e3Srobert }
93*810390e3Srobert #else
94*810390e3Srobert m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
95*810390e3Srobert m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
96*810390e3Srobert for (uptr i = 0; i < kVectorClockSize; i++) {
97*810390e3Srobert m128 t = _mm_load_si128(&vdst[i]);
98*810390e3Srobert m128 c = _mm_load_si128(&vclk[i]);
99*810390e3Srobert m128 m = _mm_max_epu16(c, t);
100*810390e3Srobert _mm_store_si128(&vdst[i], c);
101*810390e3Srobert _mm_store_si128(&vclk[i], m);
102*810390e3Srobert }
103*810390e3Srobert #endif
104*810390e3Srobert }
105*810390e3Srobert
ReleaseAcquire(VectorClock ** dstp)106*810390e3Srobert void VectorClock::ReleaseAcquire(VectorClock** dstp) {
107*810390e3Srobert VectorClock* dst = AllocClock(dstp);
108*810390e3Srobert #if !TSAN_VECTORIZE
109*810390e3Srobert for (uptr i = 0; i < kThreadSlotCount; i++) {
110*810390e3Srobert dst->clk_[i] = max(dst->clk_[i], clk_[i]);
111*810390e3Srobert clk_[i] = dst->clk_[i];
112*810390e3Srobert }
113*810390e3Srobert #else
114*810390e3Srobert m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
115*810390e3Srobert m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
116*810390e3Srobert for (uptr i = 0; i < kVectorClockSize; i++) {
117*810390e3Srobert m128 c = _mm_load_si128(&vclk[i]);
118*810390e3Srobert m128 d = _mm_load_si128(&vdst[i]);
119*810390e3Srobert m128 m = _mm_max_epu16(c, d);
120*810390e3Srobert _mm_store_si128(&vdst[i], m);
121*810390e3Srobert _mm_store_si128(&vclk[i], m);
122*810390e3Srobert }
123*810390e3Srobert #endif
124*810390e3Srobert }
125*810390e3Srobert
126*810390e3Srobert } // namespace __tsan
127