xref: /freebsd-src/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1349cc55cSDimitry Andric //===-- tsan_rtl_access.cpp -----------------------------------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // This file is a part of ThreadSanitizer (TSan), a race detector.
10349cc55cSDimitry Andric //
11349cc55cSDimitry Andric // Definitions of memory access and function entry/exit entry points.
12349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
13349cc55cSDimitry Andric 
14349cc55cSDimitry Andric #include "tsan_rtl.h"
15349cc55cSDimitry Andric 
16349cc55cSDimitry Andric namespace __tsan {
17349cc55cSDimitry Andric 
18349cc55cSDimitry Andric ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
19349cc55cSDimitry Andric                                              uptr addr, uptr size,
20349cc55cSDimitry Andric                                              AccessType typ) {
21349cc55cSDimitry Andric   DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
22349cc55cSDimitry Andric   if (!kCollectHistory)
23349cc55cSDimitry Andric     return true;
24349cc55cSDimitry Andric   EventAccess* ev;
25349cc55cSDimitry Andric   if (UNLIKELY(!TraceAcquire(thr, &ev)))
26349cc55cSDimitry Andric     return false;
27349cc55cSDimitry Andric   u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
28349cc55cSDimitry Andric   uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
29349cc55cSDimitry Andric   thr->trace_prev_pc = pc;
30349cc55cSDimitry Andric   if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
31349cc55cSDimitry Andric     ev->is_access = 1;
32349cc55cSDimitry Andric     ev->is_read = !!(typ & kAccessRead);
33349cc55cSDimitry Andric     ev->is_atomic = !!(typ & kAccessAtomic);
34349cc55cSDimitry Andric     ev->size_log = size_log;
35349cc55cSDimitry Andric     ev->pc_delta = pc_delta;
36349cc55cSDimitry Andric     DCHECK_EQ(ev->pc_delta, pc_delta);
37349cc55cSDimitry Andric     ev->addr = CompressAddr(addr);
38349cc55cSDimitry Andric     TraceRelease(thr, ev);
39349cc55cSDimitry Andric     return true;
40349cc55cSDimitry Andric   }
41349cc55cSDimitry Andric   auto* evex = reinterpret_cast<EventAccessExt*>(ev);
42349cc55cSDimitry Andric   evex->is_access = 0;
43349cc55cSDimitry Andric   evex->is_func = 0;
44349cc55cSDimitry Andric   evex->type = EventType::kAccessExt;
45349cc55cSDimitry Andric   evex->is_read = !!(typ & kAccessRead);
46349cc55cSDimitry Andric   evex->is_atomic = !!(typ & kAccessAtomic);
47349cc55cSDimitry Andric   evex->size_log = size_log;
480eae32dcSDimitry Andric   // Note: this is important, see comment in EventAccessExt.
490eae32dcSDimitry Andric   evex->_ = 0;
50349cc55cSDimitry Andric   evex->addr = CompressAddr(addr);
51349cc55cSDimitry Andric   evex->pc = pc;
52349cc55cSDimitry Andric   TraceRelease(thr, evex);
53349cc55cSDimitry Andric   return true;
54349cc55cSDimitry Andric }
55349cc55cSDimitry Andric 
560eae32dcSDimitry Andric ALWAYS_INLINE
570eae32dcSDimitry Andric bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
58349cc55cSDimitry Andric                                AccessType typ) {
59349cc55cSDimitry Andric   if (!kCollectHistory)
60349cc55cSDimitry Andric     return true;
61349cc55cSDimitry Andric   EventAccessRange* ev;
62349cc55cSDimitry Andric   if (UNLIKELY(!TraceAcquire(thr, &ev)))
63349cc55cSDimitry Andric     return false;
64349cc55cSDimitry Andric   thr->trace_prev_pc = pc;
65349cc55cSDimitry Andric   ev->is_access = 0;
66349cc55cSDimitry Andric   ev->is_func = 0;
67349cc55cSDimitry Andric   ev->type = EventType::kAccessRange;
68349cc55cSDimitry Andric   ev->is_read = !!(typ & kAccessRead);
69349cc55cSDimitry Andric   ev->is_free = !!(typ & kAccessFree);
70349cc55cSDimitry Andric   ev->size_lo = size;
71349cc55cSDimitry Andric   ev->pc = CompressAddr(pc);
72349cc55cSDimitry Andric   ev->addr = CompressAddr(addr);
73349cc55cSDimitry Andric   ev->size_hi = size >> EventAccessRange::kSizeLoBits;
74349cc55cSDimitry Andric   TraceRelease(thr, ev);
75349cc55cSDimitry Andric   return true;
76349cc55cSDimitry Andric }
77349cc55cSDimitry Andric 
78349cc55cSDimitry Andric void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
79349cc55cSDimitry Andric                             AccessType typ) {
80349cc55cSDimitry Andric   if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
81349cc55cSDimitry Andric     return;
82349cc55cSDimitry Andric   TraceSwitchPart(thr);
83349cc55cSDimitry Andric   UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
84349cc55cSDimitry Andric   DCHECK(res);
85349cc55cSDimitry Andric }
86349cc55cSDimitry Andric 
87349cc55cSDimitry Andric void TraceFunc(ThreadState* thr, uptr pc) {
88349cc55cSDimitry Andric   if (LIKELY(TryTraceFunc(thr, pc)))
89349cc55cSDimitry Andric     return;
90349cc55cSDimitry Andric   TraceSwitchPart(thr);
91349cc55cSDimitry Andric   UNUSED bool res = TryTraceFunc(thr, pc);
92349cc55cSDimitry Andric   DCHECK(res);
93349cc55cSDimitry Andric }
94349cc55cSDimitry Andric 
950eae32dcSDimitry Andric NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
960eae32dcSDimitry Andric   TraceSwitchPart(thr);
970eae32dcSDimitry Andric   FuncEntry(thr, pc);
980eae32dcSDimitry Andric }
990eae32dcSDimitry Andric 
1000eae32dcSDimitry Andric NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
1010eae32dcSDimitry Andric   TraceSwitchPart(thr);
1020eae32dcSDimitry Andric   FuncExit(thr);
1030eae32dcSDimitry Andric }
1040eae32dcSDimitry Andric 
105349cc55cSDimitry Andric void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106349cc55cSDimitry Andric                     StackID stk) {
107349cc55cSDimitry Andric   DCHECK(type == EventType::kLock || type == EventType::kRLock);
108349cc55cSDimitry Andric   if (!kCollectHistory)
109349cc55cSDimitry Andric     return;
110349cc55cSDimitry Andric   EventLock ev;
111349cc55cSDimitry Andric   ev.is_access = 0;
112349cc55cSDimitry Andric   ev.is_func = 0;
113349cc55cSDimitry Andric   ev.type = type;
114349cc55cSDimitry Andric   ev.pc = CompressAddr(pc);
115349cc55cSDimitry Andric   ev.stack_lo = stk;
116349cc55cSDimitry Andric   ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117349cc55cSDimitry Andric   ev._ = 0;
118349cc55cSDimitry Andric   ev.addr = CompressAddr(addr);
119349cc55cSDimitry Andric   TraceEvent(thr, ev);
120349cc55cSDimitry Andric }
121349cc55cSDimitry Andric 
122349cc55cSDimitry Andric void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123349cc55cSDimitry Andric   if (!kCollectHistory)
124349cc55cSDimitry Andric     return;
125349cc55cSDimitry Andric   EventUnlock ev;
126349cc55cSDimitry Andric   ev.is_access = 0;
127349cc55cSDimitry Andric   ev.is_func = 0;
128349cc55cSDimitry Andric   ev.type = EventType::kUnlock;
129349cc55cSDimitry Andric   ev._ = 0;
130349cc55cSDimitry Andric   ev.addr = CompressAddr(addr);
131349cc55cSDimitry Andric   TraceEvent(thr, ev);
132349cc55cSDimitry Andric }
133349cc55cSDimitry Andric 
134349cc55cSDimitry Andric void TraceTime(ThreadState* thr) {
135349cc55cSDimitry Andric   if (!kCollectHistory)
136349cc55cSDimitry Andric     return;
1370eae32dcSDimitry Andric   FastState fast_state = thr->fast_state;
138349cc55cSDimitry Andric   EventTime ev;
139349cc55cSDimitry Andric   ev.is_access = 0;
140349cc55cSDimitry Andric   ev.is_func = 0;
141349cc55cSDimitry Andric   ev.type = EventType::kTime;
1420eae32dcSDimitry Andric   ev.sid = static_cast<u64>(fast_state.sid());
1430eae32dcSDimitry Andric   ev.epoch = static_cast<u64>(fast_state.epoch());
144349cc55cSDimitry Andric   ev._ = 0;
145349cc55cSDimitry Andric   TraceEvent(thr, ev);
146349cc55cSDimitry Andric }
147349cc55cSDimitry Andric 
1480eae32dcSDimitry Andric NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
1490eae32dcSDimitry Andric                            Shadow old,
15004eeddc0SDimitry Andric                            AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
1510eae32dcSDimitry Andric   // For the free shadow markers the first element (that contains kFreeSid)
1520eae32dcSDimitry Andric   // triggers the race, but the second element contains info about the freeing
1530eae32dcSDimitry Andric   // thread, take it.
1540eae32dcSDimitry Andric   if (old.sid() == kFreeSid)
1550eae32dcSDimitry Andric     old = Shadow(LoadShadow(&shadow_mem[1]));
1560eae32dcSDimitry Andric   // This prevents trapping on this address in future.
1570eae32dcSDimitry Andric   for (uptr i = 0; i < kShadowCnt; i++)
1580eae32dcSDimitry Andric     StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
1590eae32dcSDimitry Andric   // See the comment in MemoryRangeFreed as to why the slot is locked
1600eae32dcSDimitry Andric   // for free memory accesses. ReportRace must not be called with
1610eae32dcSDimitry Andric   // the slot locked because of the fork. But MemoryRangeFreed is not
1620eae32dcSDimitry Andric   // called during fork because fork sets ignore_reads_and_writes,
1630eae32dcSDimitry Andric   // so simply unlocking the slot should be fine.
16481ad6265SDimitry Andric   if (typ & kAccessSlotLocked)
1650eae32dcSDimitry Andric     SlotUnlock(thr);
1660eae32dcSDimitry Andric   ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
16781ad6265SDimitry Andric   if (typ & kAccessSlotLocked)
1680eae32dcSDimitry Andric     SlotLock(thr);
1690eae32dcSDimitry Andric }
1700eae32dcSDimitry Andric 
1710eae32dcSDimitry Andric #if !TSAN_VECTORIZE
172349cc55cSDimitry Andric ALWAYS_INLINE
1730eae32dcSDimitry Andric bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174349cc55cSDimitry Andric                         AccessType typ) {
175349cc55cSDimitry Andric   for (uptr i = 0; i < kShadowCnt; i++) {
1760eae32dcSDimitry Andric     auto old = LoadShadow(&s[i]);
1770eae32dcSDimitry Andric     if (!(typ & kAccessRead)) {
1780eae32dcSDimitry Andric       if (old == cur.raw())
179349cc55cSDimitry Andric         return true;
1800eae32dcSDimitry Andric       continue;
1810eae32dcSDimitry Andric     }
1820eae32dcSDimitry Andric     auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
1830eae32dcSDimitry Andric                                          static_cast<u32>(Shadow::kRodata));
1840eae32dcSDimitry Andric     if (masked == cur.raw())
1850eae32dcSDimitry Andric       return true;
1860eae32dcSDimitry Andric     if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
1870eae32dcSDimitry Andric       if (old == Shadow::kRodata)
1880eae32dcSDimitry Andric         return true;
1890eae32dcSDimitry Andric     }
190349cc55cSDimitry Andric   }
191349cc55cSDimitry Andric   return false;
192349cc55cSDimitry Andric }
193349cc55cSDimitry Andric 
194349cc55cSDimitry Andric ALWAYS_INLINE
1950eae32dcSDimitry Andric bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
1960eae32dcSDimitry Andric                 int unused0, int unused1, AccessType typ) {
1970eae32dcSDimitry Andric   bool stored = false;
1980eae32dcSDimitry Andric   for (uptr idx = 0; idx < kShadowCnt; idx++) {
1990eae32dcSDimitry Andric     RawShadow* sp = &shadow_mem[idx];
2000eae32dcSDimitry Andric     Shadow old(LoadShadow(sp));
2010eae32dcSDimitry Andric     if (LIKELY(old.raw() == Shadow::kEmpty)) {
2020eae32dcSDimitry Andric       if (!(typ & kAccessCheckOnly) && !stored)
2030eae32dcSDimitry Andric         StoreShadow(sp, cur.raw());
2040eae32dcSDimitry Andric       return false;
205349cc55cSDimitry Andric     }
2060eae32dcSDimitry Andric     if (LIKELY(!(cur.access() & old.access())))
2070eae32dcSDimitry Andric       continue;
2080eae32dcSDimitry Andric     if (LIKELY(cur.sid() == old.sid())) {
2090eae32dcSDimitry Andric       if (!(typ & kAccessCheckOnly) &&
2100eae32dcSDimitry Andric           LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
2110eae32dcSDimitry Andric         StoreShadow(sp, cur.raw());
2120eae32dcSDimitry Andric         stored = true;
213349cc55cSDimitry Andric       }
2140eae32dcSDimitry Andric       continue;
2150eae32dcSDimitry Andric     }
2160eae32dcSDimitry Andric     if (LIKELY(old.IsBothReadsOrAtomic(typ)))
2170eae32dcSDimitry Andric       continue;
2180eae32dcSDimitry Andric     if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
2190eae32dcSDimitry Andric       continue;
2200eae32dcSDimitry Andric     DoReportRace(thr, shadow_mem, cur, old, typ);
2210eae32dcSDimitry Andric     return true;
2220eae32dcSDimitry Andric   }
2230eae32dcSDimitry Andric   // We did not find any races and had already stored
2240eae32dcSDimitry Andric   // the current access info, so we are done.
2250eae32dcSDimitry Andric   if (LIKELY(stored))
2260eae32dcSDimitry Andric     return false;
2270eae32dcSDimitry Andric   // Choose a random candidate slot and replace it.
2280eae32dcSDimitry Andric   uptr index =
2290eae32dcSDimitry Andric       atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
2300eae32dcSDimitry Andric   StoreShadow(&shadow_mem[index], cur.raw());
2310eae32dcSDimitry Andric   return false;
2320eae32dcSDimitry Andric }
2330eae32dcSDimitry Andric 
2340eae32dcSDimitry Andric #  define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
2350eae32dcSDimitry Andric 
2360eae32dcSDimitry Andric #else /* !TSAN_VECTORIZE */
237349cc55cSDimitry Andric 
238349cc55cSDimitry Andric ALWAYS_INLINE
2390eae32dcSDimitry Andric bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
2400eae32dcSDimitry Andric                         m128 access, AccessType typ) {
2410eae32dcSDimitry Andric   // Note: we could check if there is a larger access of the same type,
2420eae32dcSDimitry Andric   // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
2430eae32dcSDimitry Andric   // and now do smaller reads/writes, these can also be considered as "same
2440eae32dcSDimitry Andric   // access". However, it will make the check more expensive, so it's unclear
2450eae32dcSDimitry Andric   // if it's worth it. But this would conserve trace space, so it's useful
2460eae32dcSDimitry Andric   // besides potential speed up.
2470eae32dcSDimitry Andric   if (!(typ & kAccessRead)) {
2480eae32dcSDimitry Andric     const m128 same = _mm_cmpeq_epi32(shadow, access);
2490eae32dcSDimitry Andric     return _mm_movemask_epi8(same);
2500eae32dcSDimitry Andric   }
2510eae32dcSDimitry Andric   // For reads we need to reset read bit in the shadow,
2520eae32dcSDimitry Andric   // because we need to match read with both reads and writes.
2530eae32dcSDimitry Andric   // Shadow::kRodata has only read bit set, so it does what we want.
2540eae32dcSDimitry Andric   // We also abuse it for rodata check to save few cycles
2550eae32dcSDimitry Andric   // since we already loaded Shadow::kRodata into a register.
2560eae32dcSDimitry Andric   // Reads from rodata can't race.
2570eae32dcSDimitry Andric   // Measurements show that they can be 10-20% of all memory accesses.
2580eae32dcSDimitry Andric   // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
2590eae32dcSDimitry Andric   // (thread epochs start from 1). So the same read bit mask
2600eae32dcSDimitry Andric   // serves as rodata indicator.
2610eae32dcSDimitry Andric   const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
2620eae32dcSDimitry Andric   const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
2630eae32dcSDimitry Andric   m128 same = _mm_cmpeq_epi32(masked_shadow, access);
2640eae32dcSDimitry Andric   // Range memory accesses check Shadow::kRodata before calling this,
2650eae32dcSDimitry Andric   // Shadow::kRodatas is not possible for free memory access
2660eae32dcSDimitry Andric   // and Go does not use Shadow::kRodata.
2670eae32dcSDimitry Andric   if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
2680eae32dcSDimitry Andric     const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
2690eae32dcSDimitry Andric     same = _mm_or_si128(ro, same);
2700eae32dcSDimitry Andric   }
2710eae32dcSDimitry Andric   return _mm_movemask_epi8(same);
2720eae32dcSDimitry Andric }
2730eae32dcSDimitry Andric 
2740eae32dcSDimitry Andric NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
2750eae32dcSDimitry Andric                             u32 race_mask, m128 shadow, AccessType typ) {
2760eae32dcSDimitry Andric   // race_mask points which of the shadow elements raced with the current
2770eae32dcSDimitry Andric   // access. Extract that element.
2780eae32dcSDimitry Andric   CHECK_NE(race_mask, 0);
2790eae32dcSDimitry Andric   u32 old;
2800eae32dcSDimitry Andric   // Note: _mm_extract_epi32 index must be a constant value.
2810eae32dcSDimitry Andric   switch (__builtin_ffs(race_mask) / 4) {
2820eae32dcSDimitry Andric     case 0:
2830eae32dcSDimitry Andric       old = _mm_extract_epi32(shadow, 0);
2840eae32dcSDimitry Andric       break;
2850eae32dcSDimitry Andric     case 1:
2860eae32dcSDimitry Andric       old = _mm_extract_epi32(shadow, 1);
2870eae32dcSDimitry Andric       break;
2880eae32dcSDimitry Andric     case 2:
2890eae32dcSDimitry Andric       old = _mm_extract_epi32(shadow, 2);
2900eae32dcSDimitry Andric       break;
2910eae32dcSDimitry Andric     case 3:
2920eae32dcSDimitry Andric       old = _mm_extract_epi32(shadow, 3);
2930eae32dcSDimitry Andric       break;
2940eae32dcSDimitry Andric   }
2950eae32dcSDimitry Andric   Shadow prev(static_cast<RawShadow>(old));
2960eae32dcSDimitry Andric   // For the free shadow markers the first element (that contains kFreeSid)
2970eae32dcSDimitry Andric   // triggers the race, but the second element contains info about the freeing
2980eae32dcSDimitry Andric   // thread, take it.
2990eae32dcSDimitry Andric   if (prev.sid() == kFreeSid)
3000eae32dcSDimitry Andric     prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
3010eae32dcSDimitry Andric   DoReportRace(thr, shadow_mem, cur, prev, typ);
3020eae32dcSDimitry Andric }
3030eae32dcSDimitry Andric 
3040eae32dcSDimitry Andric ALWAYS_INLINE
3050eae32dcSDimitry Andric bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
3060eae32dcSDimitry Andric                 m128 shadow, m128 access, AccessType typ) {
3070eae32dcSDimitry Andric   // Note: empty/zero slots don't intersect with any access.
3080eae32dcSDimitry Andric   const m128 zero = _mm_setzero_si128();
3090eae32dcSDimitry Andric   const m128 mask_access = _mm_set1_epi32(0x000000ff);
3100eae32dcSDimitry Andric   const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
3110eae32dcSDimitry Andric   const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
3120eae32dcSDimitry Andric   const m128 access_and = _mm_and_si128(access, shadow);
3130eae32dcSDimitry Andric   const m128 access_xor = _mm_xor_si128(access, shadow);
3140eae32dcSDimitry Andric   const m128 intersect = _mm_and_si128(access_and, mask_access);
3150eae32dcSDimitry Andric   const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
3160eae32dcSDimitry Andric   const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
3170eae32dcSDimitry Andric   const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
3180eae32dcSDimitry Andric   const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
3190eae32dcSDimitry Andric   const m128 no_race =
3200eae32dcSDimitry Andric       _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
3210eae32dcSDimitry Andric   const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
3220eae32dcSDimitry Andric   if (UNLIKELY(race_mask))
3230eae32dcSDimitry Andric     goto SHARED;
3240eae32dcSDimitry Andric 
3250eae32dcSDimitry Andric STORE : {
3260eae32dcSDimitry Andric   if (typ & kAccessCheckOnly)
3270eae32dcSDimitry Andric     return false;
3280eae32dcSDimitry Andric   // We could also replace different sid's if access is the same,
3290eae32dcSDimitry Andric   // rw weaker and happens before. However, just checking access below
3300eae32dcSDimitry Andric   // is not enough because we also need to check that !both_read_or_atomic
3310eae32dcSDimitry Andric   // (reads from different sids can be concurrent).
3320eae32dcSDimitry Andric   // Theoretically we could replace smaller accesses with larger accesses,
3330eae32dcSDimitry Andric   // but it's unclear if it's worth doing.
3340eae32dcSDimitry Andric   const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
3350eae32dcSDimitry Andric   const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
3360eae32dcSDimitry Andric   const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
3370eae32dcSDimitry Andric   const m128 access_read_atomic =
3380eae32dcSDimitry Andric       _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
3390eae32dcSDimitry Andric   const m128 rw_weaker =
3400eae32dcSDimitry Andric       _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
3410eae32dcSDimitry Andric   const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
3420eae32dcSDimitry Andric   const int rewrite_mask = _mm_movemask_epi8(rewrite);
3430eae32dcSDimitry Andric   int index = __builtin_ffs(rewrite_mask);
3440eae32dcSDimitry Andric   if (UNLIKELY(index == 0)) {
3450eae32dcSDimitry Andric     const m128 empty = _mm_cmpeq_epi32(shadow, zero);
3460eae32dcSDimitry Andric     const int empty_mask = _mm_movemask_epi8(empty);
3470eae32dcSDimitry Andric     index = __builtin_ffs(empty_mask);
3480eae32dcSDimitry Andric     if (UNLIKELY(index == 0))
3490eae32dcSDimitry Andric       index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
3500eae32dcSDimitry Andric   }
3510eae32dcSDimitry Andric   StoreShadow(&shadow_mem[index / 4], cur.raw());
3520eae32dcSDimitry Andric   // We could zero other slots determined by rewrite_mask.
3530eae32dcSDimitry Andric   // That would help other threads to evict better slots,
3540eae32dcSDimitry Andric   // but it's unclear if it's worth it.
3550eae32dcSDimitry Andric   return false;
3560eae32dcSDimitry Andric }
3570eae32dcSDimitry Andric 
3580eae32dcSDimitry Andric SHARED:
3590eae32dcSDimitry Andric   m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
3600eae32dcSDimitry Andric   // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
3610eae32dcSDimitry Andric   // indexes must be constants.
3620eae32dcSDimitry Andric #  define LOAD_EPOCH(idx)                                                     \
3630eae32dcSDimitry Andric     if (LIKELY(race_mask & (1 << (idx * 4)))) {                               \
3640eae32dcSDimitry Andric       u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1);                         \
3650eae32dcSDimitry Andric       u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid)));    \
3660eae32dcSDimitry Andric       thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
3670eae32dcSDimitry Andric     }
3680eae32dcSDimitry Andric   LOAD_EPOCH(0);
3690eae32dcSDimitry Andric   LOAD_EPOCH(1);
3700eae32dcSDimitry Andric   LOAD_EPOCH(2);
3710eae32dcSDimitry Andric   LOAD_EPOCH(3);
3720eae32dcSDimitry Andric #  undef LOAD_EPOCH
3730eae32dcSDimitry Andric   const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
3740eae32dcSDimitry Andric   const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
3750eae32dcSDimitry Andric   const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
3760eae32dcSDimitry Andric   const int concurrent_mask = _mm_movemask_epi8(concurrent);
3770eae32dcSDimitry Andric   if (LIKELY(concurrent_mask == 0))
3780eae32dcSDimitry Andric     goto STORE;
3790eae32dcSDimitry Andric 
3800eae32dcSDimitry Andric   DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
3810eae32dcSDimitry Andric   return true;
3820eae32dcSDimitry Andric }
3830eae32dcSDimitry Andric 
3840eae32dcSDimitry Andric #  define LOAD_CURRENT_SHADOW(cur, shadow_mem)                         \
3850eae32dcSDimitry Andric     const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
3860eae32dcSDimitry Andric     const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387349cc55cSDimitry Andric #endif
3880eae32dcSDimitry Andric 
3890eae32dcSDimitry Andric char* DumpShadow(char* buf, RawShadow raw) {
3900eae32dcSDimitry Andric   if (raw == Shadow::kEmpty) {
3910eae32dcSDimitry Andric     internal_snprintf(buf, 64, "0");
3920eae32dcSDimitry Andric     return buf;
3930eae32dcSDimitry Andric   }
3940eae32dcSDimitry Andric   Shadow s(raw);
3950eae32dcSDimitry Andric   AccessType typ;
3960eae32dcSDimitry Andric   s.GetAccess(nullptr, nullptr, &typ);
3970eae32dcSDimitry Andric   internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
3980eae32dcSDimitry Andric                     static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
3990eae32dcSDimitry Andric                     s.access(), static_cast<u32>(typ));
4000eae32dcSDimitry Andric   return buf;
4010eae32dcSDimitry Andric }
4020eae32dcSDimitry Andric 
4030eae32dcSDimitry Andric // TryTrace* and TraceRestart* functions allow to turn memory access and func
4040eae32dcSDimitry Andric // entry/exit callbacks into leaf functions with all associated performance
4050eae32dcSDimitry Andric // benefits. These hottest callbacks do only 2 slow path calls: report a race
4060eae32dcSDimitry Andric // and trace part switching. Race reporting is easy to turn into a tail call, we
4070eae32dcSDimitry Andric // just always return from the runtime after reporting a race. But trace part
4080eae32dcSDimitry Andric // switching is harder because it needs to be in the middle of callbacks. To
4090eae32dcSDimitry Andric // turn it into a tail call we immidiately return after TraceRestart* functions,
4100eae32dcSDimitry Andric // but TraceRestart* functions themselves recurse into the callback after
4110eae32dcSDimitry Andric // switching trace part. As the result the hottest callbacks contain only tail
4120eae32dcSDimitry Andric // calls, which effectively makes them leaf functions (can use all registers,
4130eae32dcSDimitry Andric // no frame setup, etc).
4140eae32dcSDimitry Andric NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
4150eae32dcSDimitry Andric                                        uptr size, AccessType typ) {
4160eae32dcSDimitry Andric   TraceSwitchPart(thr);
4170eae32dcSDimitry Andric   MemoryAccess(thr, pc, addr, size, typ);
418349cc55cSDimitry Andric }
419349cc55cSDimitry Andric 
420349cc55cSDimitry Andric ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
4210eae32dcSDimitry Andric                                      uptr size, AccessType typ) {
422349cc55cSDimitry Andric   RawShadow* shadow_mem = MemToShadow(addr);
4230eae32dcSDimitry Andric   UNUSED char memBuf[4][64];
4240eae32dcSDimitry Andric   DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
4250eae32dcSDimitry Andric            static_cast<int>(thr->fast_state.sid()),
4260eae32dcSDimitry Andric            static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
4270eae32dcSDimitry Andric            static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
4280eae32dcSDimitry Andric            DumpShadow(memBuf[1], shadow_mem[1]),
4290eae32dcSDimitry Andric            DumpShadow(memBuf[2], shadow_mem[2]),
4300eae32dcSDimitry Andric            DumpShadow(memBuf[3], shadow_mem[3]));
431349cc55cSDimitry Andric 
432349cc55cSDimitry Andric   FastState fast_state = thr->fast_state;
4330eae32dcSDimitry Andric   Shadow cur(fast_state, addr, size, typ);
4340eae32dcSDimitry Andric 
4350eae32dcSDimitry Andric   LOAD_CURRENT_SHADOW(cur, shadow_mem);
4360eae32dcSDimitry Andric   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
4370eae32dcSDimitry Andric     return;
4380eae32dcSDimitry Andric   if (UNLIKELY(fast_state.GetIgnoreBit()))
4390eae32dcSDimitry Andric     return;
4400eae32dcSDimitry Andric   if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
4410eae32dcSDimitry Andric     return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
4420eae32dcSDimitry Andric   CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
4430eae32dcSDimitry Andric }
4440eae32dcSDimitry Andric 
4450eae32dcSDimitry Andric void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
4460eae32dcSDimitry Andric 
4470eae32dcSDimitry Andric NOINLINE
4480eae32dcSDimitry Andric void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
4490eae32dcSDimitry Andric                            AccessType typ) {
4500eae32dcSDimitry Andric   TraceSwitchPart(thr);
4510eae32dcSDimitry Andric   MemoryAccess16(thr, pc, addr, typ);
4520eae32dcSDimitry Andric }
4530eae32dcSDimitry Andric 
4540eae32dcSDimitry Andric ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
4550eae32dcSDimitry Andric                                        AccessType typ) {
4560eae32dcSDimitry Andric   const uptr size = 16;
4570eae32dcSDimitry Andric   FastState fast_state = thr->fast_state;
4580eae32dcSDimitry Andric   if (UNLIKELY(fast_state.GetIgnoreBit()))
4590eae32dcSDimitry Andric     return;
4600eae32dcSDimitry Andric   Shadow cur(fast_state, 0, 8, typ);
4610eae32dcSDimitry Andric   RawShadow* shadow_mem = MemToShadow(addr);
4620eae32dcSDimitry Andric   bool traced = false;
4630eae32dcSDimitry Andric   {
4640eae32dcSDimitry Andric     LOAD_CURRENT_SHADOW(cur, shadow_mem);
4650eae32dcSDimitry Andric     if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
4660eae32dcSDimitry Andric       goto SECOND;
4670eae32dcSDimitry Andric     if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
4680eae32dcSDimitry Andric       return RestartMemoryAccess16(thr, pc, addr, typ);
4690eae32dcSDimitry Andric     traced = true;
4700eae32dcSDimitry Andric     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471349cc55cSDimitry Andric       return;
472349cc55cSDimitry Andric   }
4730eae32dcSDimitry Andric SECOND:
4740eae32dcSDimitry Andric   shadow_mem += kShadowCnt;
4750eae32dcSDimitry Andric   LOAD_CURRENT_SHADOW(cur, shadow_mem);
4760eae32dcSDimitry Andric   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
4770eae32dcSDimitry Andric     return;
4780eae32dcSDimitry Andric   if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
4790eae32dcSDimitry Andric     return RestartMemoryAccess16(thr, pc, addr, typ);
4800eae32dcSDimitry Andric   CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
4810eae32dcSDimitry Andric }
482349cc55cSDimitry Andric 
4830eae32dcSDimitry Andric NOINLINE
4840eae32dcSDimitry Andric void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
4850eae32dcSDimitry Andric                                   uptr size, AccessType typ) {
4860eae32dcSDimitry Andric   TraceSwitchPart(thr);
4870eae32dcSDimitry Andric   UnalignedMemoryAccess(thr, pc, addr, size, typ);
4880eae32dcSDimitry Andric }
489349cc55cSDimitry Andric 
4900eae32dcSDimitry Andric ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
4910eae32dcSDimitry Andric                                               uptr addr, uptr size,
4920eae32dcSDimitry Andric                                               AccessType typ) {
4930eae32dcSDimitry Andric   DCHECK_LE(size, 8);
4940eae32dcSDimitry Andric   FastState fast_state = thr->fast_state;
4950eae32dcSDimitry Andric   if (UNLIKELY(fast_state.GetIgnoreBit()))
4960eae32dcSDimitry Andric     return;
4970eae32dcSDimitry Andric   RawShadow* shadow_mem = MemToShadow(addr);
4980eae32dcSDimitry Andric   bool traced = false;
4990eae32dcSDimitry Andric   uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
5000eae32dcSDimitry Andric   {
5010eae32dcSDimitry Andric     Shadow cur(fast_state, addr, size1, typ);
5020eae32dcSDimitry Andric     LOAD_CURRENT_SHADOW(cur, shadow_mem);
5030eae32dcSDimitry Andric     if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
5040eae32dcSDimitry Andric       goto SECOND;
5050eae32dcSDimitry Andric     if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
5060eae32dcSDimitry Andric       return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
5070eae32dcSDimitry Andric     traced = true;
5080eae32dcSDimitry Andric     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509349cc55cSDimitry Andric       return;
510349cc55cSDimitry Andric   }
5110eae32dcSDimitry Andric SECOND:
5120eae32dcSDimitry Andric   uptr size2 = size - size1;
5130eae32dcSDimitry Andric   if (LIKELY(size2 == 0))
514349cc55cSDimitry Andric     return;
5150eae32dcSDimitry Andric   shadow_mem += kShadowCnt;
5160eae32dcSDimitry Andric   Shadow cur(fast_state, 0, size2, typ);
5170eae32dcSDimitry Andric   LOAD_CURRENT_SHADOW(cur, shadow_mem);
5180eae32dcSDimitry Andric   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
5190eae32dcSDimitry Andric     return;
5200eae32dcSDimitry Andric   if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
5210eae32dcSDimitry Andric     return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
5220eae32dcSDimitry Andric   CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
523349cc55cSDimitry Andric }
524349cc55cSDimitry Andric 
5250eae32dcSDimitry Andric void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
5260eae32dcSDimitry Andric   DCHECK_LE(p, end);
5270eae32dcSDimitry Andric   DCHECK(IsShadowMem(p));
5280eae32dcSDimitry Andric   DCHECK(IsShadowMem(end));
5290eae32dcSDimitry Andric   UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
5300eae32dcSDimitry Andric   DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
5310eae32dcSDimitry Andric   DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
5320eae32dcSDimitry Andric #if !TSAN_VECTORIZE
5330eae32dcSDimitry Andric   for (; p < end; p += kShadowCnt) {
5340eae32dcSDimitry Andric     p[0] = v;
5350eae32dcSDimitry Andric     for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
5360eae32dcSDimitry Andric   }
5370eae32dcSDimitry Andric #else
5380eae32dcSDimitry Andric   m128 vv = _mm_setr_epi32(
5390eae32dcSDimitry Andric       static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
5400eae32dcSDimitry Andric       static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
5410eae32dcSDimitry Andric   m128* vp = reinterpret_cast<m128*>(p);
5420eae32dcSDimitry Andric   m128* vend = reinterpret_cast<m128*>(end);
5430eae32dcSDimitry Andric   for (; vp < vend; vp++) _mm_store_si128(vp, vv);
5440eae32dcSDimitry Andric #endif
545349cc55cSDimitry Andric }
546349cc55cSDimitry Andric 
5470eae32dcSDimitry Andric static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548349cc55cSDimitry Andric   if (size == 0)
549349cc55cSDimitry Andric     return;
5500eae32dcSDimitry Andric   DCHECK_EQ(addr % kShadowCell, 0);
5510eae32dcSDimitry Andric   DCHECK_EQ(size % kShadowCell, 0);
552349cc55cSDimitry Andric   // If a user passes some insane arguments (memset(0)),
553349cc55cSDimitry Andric   // let it just crash as usual.
554349cc55cSDimitry Andric   if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
555349cc55cSDimitry Andric     return;
556349cc55cSDimitry Andric   RawShadow* begin = MemToShadow(addr);
557349cc55cSDimitry Andric   RawShadow* end = begin + size / kShadowCell * kShadowCnt;
5580eae32dcSDimitry Andric   // Don't want to touch lots of shadow memory.
5590eae32dcSDimitry Andric   // If a program maps 10MB stack, there is no need reset the whole range.
5600eae32dcSDimitry Andric   // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
5610eae32dcSDimitry Andric   if (SANITIZER_WINDOWS ||
5620eae32dcSDimitry Andric       size <= common_flags()->clear_shadow_mmap_threshold) {
5630eae32dcSDimitry Andric     ShadowSet(begin, end, val);
5640eae32dcSDimitry Andric     return;
5650eae32dcSDimitry Andric   }
5660eae32dcSDimitry Andric   // The region is big, reset only beginning and end.
5670eae32dcSDimitry Andric   const uptr kPageSize = GetPageSizeCached();
568349cc55cSDimitry Andric   // Set at least first kPageSize/2 to page boundary.
5690eae32dcSDimitry Andric   RawShadow* mid1 =
5700eae32dcSDimitry Andric       Min(end, reinterpret_cast<RawShadow*>(RoundUp(
5710eae32dcSDimitry Andric                    reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
5720eae32dcSDimitry Andric   ShadowSet(begin, mid1, val);
573349cc55cSDimitry Andric   // Reset middle part.
5740eae32dcSDimitry Andric   RawShadow* mid2 = RoundDown(end, kPageSize);
5750eae32dcSDimitry Andric   if (mid2 > mid1) {
5760eae32dcSDimitry Andric     if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
577349cc55cSDimitry Andric       Die();
5780eae32dcSDimitry Andric   }
579349cc55cSDimitry Andric   // Set the ending.
5800eae32dcSDimitry Andric   ShadowSet(mid2, end, val);
581349cc55cSDimitry Andric }
582349cc55cSDimitry Andric 
583349cc55cSDimitry Andric void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
5840eae32dcSDimitry Andric   uptr addr1 = RoundDown(addr, kShadowCell);
5850eae32dcSDimitry Andric   uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
5860eae32dcSDimitry Andric   MemoryRangeSet(addr1, size1, Shadow::kEmpty);
587349cc55cSDimitry Andric }
588349cc55cSDimitry Andric 
589349cc55cSDimitry Andric void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
5900eae32dcSDimitry Andric   // Callers must lock the slot to ensure synchronization with the reset.
5910eae32dcSDimitry Andric   // The problem with "freed" memory is that it's not "monotonic"
5920eae32dcSDimitry Andric   // with respect to bug detection: freed memory is bad to access,
5930eae32dcSDimitry Andric   // but then if the heap block is reallocated later, it's good to access.
5940eae32dcSDimitry Andric   // As the result a garbage "freed" shadow can lead to a false positive
5950eae32dcSDimitry Andric   // if it happens to match a real free in the thread trace,
5960eae32dcSDimitry Andric   // but the heap block was reallocated before the current memory access,
5970eae32dcSDimitry Andric   // so it's still good to access. It's not the case with data races.
5980eae32dcSDimitry Andric   DCHECK(thr->slot_locked);
5990eae32dcSDimitry Andric   DCHECK_EQ(addr % kShadowCell, 0);
6000eae32dcSDimitry Andric   size = RoundUp(size, kShadowCell);
6010eae32dcSDimitry Andric   // Processing more than 1k (2k of shadow) is expensive,
602349cc55cSDimitry Andric   // can cause excessive memory consumption (user does not necessary touch
603349cc55cSDimitry Andric   // the whole range) and most likely unnecessary.
6040eae32dcSDimitry Andric   size = Min<uptr>(size, 1024);
60581ad6265SDimitry Andric   const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
60681ad6265SDimitry Andric                          kAccessCheckOnly | kAccessNoRodata;
6070eae32dcSDimitry Andric   TraceMemoryAccessRange(thr, pc, addr, size, typ);
6080eae32dcSDimitry Andric   RawShadow* shadow_mem = MemToShadow(addr);
6090eae32dcSDimitry Andric   Shadow cur(thr->fast_state, 0, kShadowCell, typ);
6100eae32dcSDimitry Andric #if TSAN_VECTORIZE
6110eae32dcSDimitry Andric   const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
6120eae32dcSDimitry Andric   const m128 freed = _mm_setr_epi32(
6130eae32dcSDimitry Andric       static_cast<u32>(Shadow::FreedMarker()),
6140eae32dcSDimitry Andric       static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
6150eae32dcSDimitry Andric   for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
6160eae32dcSDimitry Andric     const m128 shadow = _mm_load_si128((m128*)shadow_mem);
6170eae32dcSDimitry Andric     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
6180eae32dcSDimitry Andric       return;
6190eae32dcSDimitry Andric     _mm_store_si128((m128*)shadow_mem, freed);
620349cc55cSDimitry Andric   }
6210eae32dcSDimitry Andric #else
6220eae32dcSDimitry Andric   for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
6230eae32dcSDimitry Andric     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
6240eae32dcSDimitry Andric       return;
6250eae32dcSDimitry Andric     StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
6260eae32dcSDimitry Andric     StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
6270eae32dcSDimitry Andric     StoreShadow(&shadow_mem[2], Shadow::kEmpty);
6280eae32dcSDimitry Andric     StoreShadow(&shadow_mem[3], Shadow::kEmpty);
6290eae32dcSDimitry Andric   }
6300eae32dcSDimitry Andric #endif
631349cc55cSDimitry Andric }
632349cc55cSDimitry Andric 
633349cc55cSDimitry Andric void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
6340eae32dcSDimitry Andric   DCHECK_EQ(addr % kShadowCell, 0);
6350eae32dcSDimitry Andric   size = RoundUp(size, kShadowCell);
6360eae32dcSDimitry Andric   TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
6370eae32dcSDimitry Andric   Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
6380eae32dcSDimitry Andric   MemoryRangeSet(addr, size, cur.raw());
639349cc55cSDimitry Andric }
640349cc55cSDimitry Andric 
641349cc55cSDimitry Andric void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
642349cc55cSDimitry Andric                                          uptr size) {
643349cc55cSDimitry Andric   if (thr->ignore_reads_and_writes == 0)
644349cc55cSDimitry Andric     MemoryRangeImitateWrite(thr, pc, addr, size);
645349cc55cSDimitry Andric   else
646349cc55cSDimitry Andric     MemoryResetRange(thr, pc, addr, size);
647349cc55cSDimitry Andric }
648349cc55cSDimitry Andric 
6490eae32dcSDimitry Andric ALWAYS_INLINE
6500eae32dcSDimitry Andric bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
6510eae32dcSDimitry Andric                           AccessType typ) {
6520eae32dcSDimitry Andric   LOAD_CURRENT_SHADOW(cur, shadow_mem);
6530eae32dcSDimitry Andric   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
6540eae32dcSDimitry Andric     return false;
6550eae32dcSDimitry Andric   return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
6560eae32dcSDimitry Andric }
657349cc55cSDimitry Andric 
6580eae32dcSDimitry Andric template <bool is_read>
6590eae32dcSDimitry Andric NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
6600eae32dcSDimitry Andric                                        uptr size) {
6610eae32dcSDimitry Andric   TraceSwitchPart(thr);
6620eae32dcSDimitry Andric   MemoryAccessRangeT<is_read>(thr, pc, addr, size);
6630eae32dcSDimitry Andric }
6640eae32dcSDimitry Andric 
6650eae32dcSDimitry Andric template <bool is_read>
6660eae32dcSDimitry Andric void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
6670eae32dcSDimitry Andric   const AccessType typ =
6680eae32dcSDimitry Andric       (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
669349cc55cSDimitry Andric   RawShadow* shadow_mem = MemToShadow(addr);
6700eae32dcSDimitry Andric   DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
6710eae32dcSDimitry Andric            (void*)pc, (void*)addr, (int)size, is_read);
672349cc55cSDimitry Andric 
673349cc55cSDimitry Andric #if SANITIZER_DEBUG
674349cc55cSDimitry Andric   if (!IsAppMem(addr)) {
675*0fca6ea1SDimitry Andric     Printf("Access to non app mem start: %p\n", (void*)addr);
676349cc55cSDimitry Andric     DCHECK(IsAppMem(addr));
677349cc55cSDimitry Andric   }
678349cc55cSDimitry Andric   if (!IsAppMem(addr + size - 1)) {
679*0fca6ea1SDimitry Andric     Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));
680349cc55cSDimitry Andric     DCHECK(IsAppMem(addr + size - 1));
681349cc55cSDimitry Andric   }
682349cc55cSDimitry Andric   if (!IsShadowMem(shadow_mem)) {
683*0fca6ea1SDimitry Andric     Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
684349cc55cSDimitry Andric     DCHECK(IsShadowMem(shadow_mem));
685349cc55cSDimitry Andric   }
686*0fca6ea1SDimitry Andric 
687*0fca6ea1SDimitry Andric   RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
688*0fca6ea1SDimitry Andric       reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
689*0fca6ea1SDimitry Andric   if (!IsShadowMem(shadow_mem_end)) {
690*0fca6ea1SDimitry Andric     Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,
691*0fca6ea1SDimitry Andric            (void*)(addr + size - 1));
692*0fca6ea1SDimitry Andric     Printf(
693*0fca6ea1SDimitry Andric         "Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "
694*0fca6ea1SDimitry Andric         "%zx\n",
695*0fca6ea1SDimitry Andric         shadow_mem, (void*)addr, size, kShadowMultiplier);
696*0fca6ea1SDimitry Andric     DCHECK(IsShadowMem(shadow_mem_end));
697349cc55cSDimitry Andric   }
698349cc55cSDimitry Andric #endif
699349cc55cSDimitry Andric 
700349cc55cSDimitry Andric   // Access to .rodata section, no races here.
701349cc55cSDimitry Andric   // Measurements show that it can be 10-20% of all memory accesses.
7020eae32dcSDimitry Andric   // Check here once to not check for every access separately.
7030eae32dcSDimitry Andric   // Note: we could (and should) do this only for the is_read case
7040eae32dcSDimitry Andric   // (writes shouldn't go to .rodata). But it happens in Chromium tests:
7050eae32dcSDimitry Andric   // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
7060eae32dcSDimitry Andric   // Details are unknown since it happens only on CI machines.
7070eae32dcSDimitry Andric   if (*shadow_mem == Shadow::kRodata)
708349cc55cSDimitry Andric     return;
709349cc55cSDimitry Andric 
710349cc55cSDimitry Andric   FastState fast_state = thr->fast_state;
7110eae32dcSDimitry Andric   if (UNLIKELY(fast_state.GetIgnoreBit()))
712349cc55cSDimitry Andric     return;
713349cc55cSDimitry Andric 
7140eae32dcSDimitry Andric   if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
7150eae32dcSDimitry Andric     return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
716349cc55cSDimitry Andric 
7170eae32dcSDimitry Andric   if (UNLIKELY(addr % kShadowCell)) {
718349cc55cSDimitry Andric     // Handle unaligned beginning, if any.
7190eae32dcSDimitry Andric     uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
7200eae32dcSDimitry Andric     size -= size1;
7210eae32dcSDimitry Andric     Shadow cur(fast_state, addr, size1, typ);
7220eae32dcSDimitry Andric     if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
7230eae32dcSDimitry Andric       return;
7240eae32dcSDimitry Andric     shadow_mem += kShadowCnt;
725349cc55cSDimitry Andric   }
726349cc55cSDimitry Andric   // Handle middle part, if any.
7270eae32dcSDimitry Andric   Shadow cur(fast_state, 0, kShadowCell, typ);
7280eae32dcSDimitry Andric   for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
7290eae32dcSDimitry Andric     if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
7300eae32dcSDimitry Andric       return;
731349cc55cSDimitry Andric   }
732349cc55cSDimitry Andric   // Handle ending, if any.
7330eae32dcSDimitry Andric   if (UNLIKELY(size)) {
7340eae32dcSDimitry Andric     Shadow cur(fast_state, 0, size, typ);
7350eae32dcSDimitry Andric     if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
7360eae32dcSDimitry Andric       return;
737349cc55cSDimitry Andric   }
738349cc55cSDimitry Andric }
739349cc55cSDimitry Andric 
7400eae32dcSDimitry Andric template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
7410eae32dcSDimitry Andric                                        uptr size);
7420eae32dcSDimitry Andric template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
7430eae32dcSDimitry Andric                                         uptr size);
7440eae32dcSDimitry Andric 
745349cc55cSDimitry Andric }  // namespace __tsan
746349cc55cSDimitry Andric 
747349cc55cSDimitry Andric #if !SANITIZER_GO
748349cc55cSDimitry Andric // Must be included in this file to make sure everything is inlined.
749349cc55cSDimitry Andric #  include "tsan_interface.inc"
750349cc55cSDimitry Andric #endif
751