master
  1//===-- tsan_rtl_access.cpp -----------------------------------------------===//
  2//
  3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4// See https://llvm.org/LICENSE.txt for license information.
  5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6//
  7//===----------------------------------------------------------------------===//
  8//
  9// This file is a part of ThreadSanitizer (TSan), a race detector.
 10//
 11// Definitions of memory access and function entry/exit entry points.
 12//===----------------------------------------------------------------------===//
 13
 14#include "tsan_rtl.h"
 15
 16namespace __tsan {
 17
 18ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
 19                                             uptr addr, uptr size,
 20                                             AccessType typ) {
 21  DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
 22  if (!kCollectHistory)
 23    return true;
 24  EventAccess* ev;
 25  if (UNLIKELY(!TraceAcquire(thr, &ev)))
 26    return false;
 27  u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
 28  uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
 29  thr->trace_prev_pc = pc;
 30  if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
 31    ev->is_access = 1;
 32    ev->is_read = !!(typ & kAccessRead);
 33    ev->is_atomic = !!(typ & kAccessAtomic);
 34    ev->size_log = size_log;
 35    ev->pc_delta = pc_delta;
 36    DCHECK_EQ(ev->pc_delta, pc_delta);
 37    ev->addr = CompressAddr(addr);
 38    TraceRelease(thr, ev);
 39    return true;
 40  }
 41  auto* evex = reinterpret_cast<EventAccessExt*>(ev);
 42  evex->is_access = 0;
 43  evex->is_func = 0;
 44  evex->type = EventType::kAccessExt;
 45  evex->is_read = !!(typ & kAccessRead);
 46  evex->is_atomic = !!(typ & kAccessAtomic);
 47  evex->size_log = size_log;
 48  // Note: this is important, see comment in EventAccessExt.
 49  evex->_ = 0;
 50  evex->addr = CompressAddr(addr);
 51  evex->pc = pc;
 52  TraceRelease(thr, evex);
 53  return true;
 54}
 55
 56ALWAYS_INLINE
 57bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
 58                               AccessType typ) {
 59  if (!kCollectHistory)
 60    return true;
 61  EventAccessRange* ev;
 62  if (UNLIKELY(!TraceAcquire(thr, &ev)))
 63    return false;
 64  thr->trace_prev_pc = pc;
 65  ev->is_access = 0;
 66  ev->is_func = 0;
 67  ev->type = EventType::kAccessRange;
 68  ev->is_read = !!(typ & kAccessRead);
 69  ev->is_free = !!(typ & kAccessFree);
 70  ev->size_lo = size;
 71  ev->pc = CompressAddr(pc);
 72  ev->addr = CompressAddr(addr);
 73  ev->size_hi = size >> EventAccessRange::kSizeLoBits;
 74  TraceRelease(thr, ev);
 75  return true;
 76}
 77
 78void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
 79                            AccessType typ) {
 80  if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
 81    return;
 82  TraceSwitchPart(thr);
 83  UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
 84  DCHECK(res);
 85}
 86
 87void TraceFunc(ThreadState* thr, uptr pc) {
 88  if (LIKELY(TryTraceFunc(thr, pc)))
 89    return;
 90  TraceSwitchPart(thr);
 91  UNUSED bool res = TryTraceFunc(thr, pc);
 92  DCHECK(res);
 93}
 94
 95NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
 96  TraceSwitchPart(thr);
 97  FuncEntry(thr, pc);
 98}
 99
100NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
101  TraceSwitchPart(thr);
102  FuncExit(thr);
103}
104
105void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106                    StackID stk) {
107  DCHECK(type == EventType::kLock || type == EventType::kRLock);
108  if (!kCollectHistory)
109    return;
110  EventLock ev;
111  ev.is_access = 0;
112  ev.is_func = 0;
113  ev.type = type;
114  ev.pc = CompressAddr(pc);
115  ev.stack_lo = stk;
116  ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117  ev._ = 0;
118  ev.addr = CompressAddr(addr);
119  TraceEvent(thr, ev);
120}
121
122void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123  if (!kCollectHistory)
124    return;
125  EventUnlock ev;
126  ev.is_access = 0;
127  ev.is_func = 0;
128  ev.type = EventType::kUnlock;
129  ev._ = 0;
130  ev.addr = CompressAddr(addr);
131  TraceEvent(thr, ev);
132}
133
134void TraceTime(ThreadState* thr) {
135  if (!kCollectHistory)
136    return;
137  FastState fast_state = thr->fast_state;
138  EventTime ev;
139  ev.is_access = 0;
140  ev.is_func = 0;
141  ev.type = EventType::kTime;
142  ev.sid = static_cast<u64>(fast_state.sid());
143  ev.epoch = static_cast<u64>(fast_state.epoch());
144  ev._ = 0;
145  TraceEvent(thr, ev);
146}
147
148NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
149                           Shadow old,
150                           AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
151  // For the free shadow markers the first element (that contains kFreeSid)
152  // triggers the race, but the second element contains info about the freeing
153  // thread, take it.
154  if (old.sid() == kFreeSid)
155    old = Shadow(LoadShadow(&shadow_mem[1]));
156  // This prevents trapping on this address in future.
157  for (uptr i = 0; i < kShadowCnt; i++)
158    StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
159  // See the comment in MemoryRangeFreed as to why the slot is locked
160  // for free memory accesses. ReportRace must not be called with
161  // the slot locked because of the fork. But MemoryRangeFreed is not
162  // called during fork because fork sets ignore_reads_and_writes,
163  // so simply unlocking the slot should be fine.
164  if (typ & kAccessSlotLocked)
165    SlotUnlock(thr);
166  ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
167  if (typ & kAccessSlotLocked)
168    SlotLock(thr);
169}
170
171#if !TSAN_VECTORIZE
172ALWAYS_INLINE
173bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174                        AccessType typ) {
175  for (uptr i = 0; i < kShadowCnt; i++) {
176    auto old = LoadShadow(&s[i]);
177    if (!(typ & kAccessRead)) {
178      if (old == cur.raw())
179        return true;
180      continue;
181    }
182    auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
183                                         static_cast<u32>(Shadow::kRodata));
184    if (masked == cur.raw())
185      return true;
186    if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
187      if (old == Shadow::kRodata)
188        return true;
189    }
190  }
191  return false;
192}
193
194ALWAYS_INLINE
195bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
196                int unused0, int unused1, AccessType typ) {
197  bool stored = false;
198  for (uptr idx = 0; idx < kShadowCnt; idx++) {
199    RawShadow* sp = &shadow_mem[idx];
200    Shadow old(LoadShadow(sp));
201    if (LIKELY(old.raw() == Shadow::kEmpty)) {
202      if (!(typ & kAccessCheckOnly) && !stored)
203        StoreShadow(sp, cur.raw());
204      return false;
205    }
206    if (LIKELY(!(cur.access() & old.access())))
207      continue;
208    if (LIKELY(cur.sid() == old.sid())) {
209      if (!(typ & kAccessCheckOnly) &&
210          LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
211        StoreShadow(sp, cur.raw());
212        stored = true;
213      }
214      continue;
215    }
216    if (LIKELY(old.IsBothReadsOrAtomic(typ)))
217      continue;
218    if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
219      continue;
220    DoReportRace(thr, shadow_mem, cur, old, typ);
221    return true;
222  }
223  // We did not find any races and had already stored
224  // the current access info, so we are done.
225  if (LIKELY(stored))
226    return false;
227  // Choose a random candidate slot and replace it.
228  uptr index =
229      atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
230  StoreShadow(&shadow_mem[index], cur.raw());
231  return false;
232}
233
234#  define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
235
236#else /* !TSAN_VECTORIZE */
237
238ALWAYS_INLINE
239bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
240                        m128 access, AccessType typ) {
241  // Note: we could check if there is a larger access of the same type,
242  // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243  // and now do smaller reads/writes, these can also be considered as "same
244  // access". However, it will make the check more expensive, so it's unclear
245  // if it's worth it. But this would conserve trace space, so it's useful
246  // besides potential speed up.
247  if (!(typ & kAccessRead)) {
248    const m128 same = _mm_cmpeq_epi32(shadow, access);
249    return _mm_movemask_epi8(same);
250  }
251  // For reads we need to reset read bit in the shadow,
252  // because we need to match read with both reads and writes.
253  // Shadow::kRodata has only read bit set, so it does what we want.
254  // We also abuse it for rodata check to save few cycles
255  // since we already loaded Shadow::kRodata into a register.
256  // Reads from rodata can't race.
257  // Measurements show that they can be 10-20% of all memory accesses.
258  // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259  // (thread epochs start from 1). So the same read bit mask
260  // serves as rodata indicator.
261  const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
262  const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
263  m128 same = _mm_cmpeq_epi32(masked_shadow, access);
264  // Range memory accesses check Shadow::kRodata before calling this,
265  // Shadow::kRodatas is not possible for free memory access
266  // and Go does not use Shadow::kRodata.
267  if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
268    const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
269    same = _mm_or_si128(ro, same);
270  }
271  return _mm_movemask_epi8(same);
272}
273
274NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
275                            u32 race_mask, m128 shadow, AccessType typ) {
276  // race_mask points which of the shadow elements raced with the current
277  // access. Extract that element.
278  CHECK_NE(race_mask, 0);
279  u32 old;
280  // Note: _mm_extract_epi32 index must be a constant value.
281  switch (__builtin_ffs(race_mask) / 4) {
282    case 0:
283      old = _mm_extract_epi32(shadow, 0);
284      break;
285    case 1:
286      old = _mm_extract_epi32(shadow, 1);
287      break;
288    case 2:
289      old = _mm_extract_epi32(shadow, 2);
290      break;
291    case 3:
292      old = _mm_extract_epi32(shadow, 3);
293      break;
294  }
295  Shadow prev(static_cast<RawShadow>(old));
296  // For the free shadow markers the first element (that contains kFreeSid)
297  // triggers the race, but the second element contains info about the freeing
298  // thread, take it.
299  if (prev.sid() == kFreeSid)
300    prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
301  DoReportRace(thr, shadow_mem, cur, prev, typ);
302}
303
304ALWAYS_INLINE
305bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
306                m128 shadow, m128 access, AccessType typ) {
307  // Note: empty/zero slots don't intersect with any access.
308  const m128 zero = _mm_setzero_si128();
309  const m128 mask_access = _mm_set1_epi32(0x000000ff);
310  const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
311  const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
312  const m128 access_and = _mm_and_si128(access, shadow);
313  const m128 access_xor = _mm_xor_si128(access, shadow);
314  const m128 intersect = _mm_and_si128(access_and, mask_access);
315  const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
316  const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
317  const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
318  const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
319  const m128 no_race =
320      _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
321  const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
322  if (UNLIKELY(race_mask))
323    goto SHARED;
324
325STORE : {
326  if (typ & kAccessCheckOnly)
327    return false;
328  // We could also replace different sid's if access is the same,
329  // rw weaker and happens before. However, just checking access below
330  // is not enough because we also need to check that !both_read_or_atomic
331  // (reads from different sids can be concurrent).
332  // Theoretically we could replace smaller accesses with larger accesses,
333  // but it's unclear if it's worth doing.
334  const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
335  const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
336  const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
337  const m128 access_read_atomic =
338      _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
339  const m128 rw_weaker =
340      _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
341  const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
342  const int rewrite_mask = _mm_movemask_epi8(rewrite);
343  int index = __builtin_ffs(rewrite_mask);
344  if (UNLIKELY(index == 0)) {
345    const m128 empty = _mm_cmpeq_epi32(shadow, zero);
346    const int empty_mask = _mm_movemask_epi8(empty);
347    index = __builtin_ffs(empty_mask);
348    if (UNLIKELY(index == 0))
349      index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
350  }
351  StoreShadow(&shadow_mem[index / 4], cur.raw());
352  // We could zero other slots determined by rewrite_mask.
353  // That would help other threads to evict better slots,
354  // but it's unclear if it's worth it.
355  return false;
356}
357
358SHARED:
359  m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
360  // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361  // indexes must be constants.
362#  define LOAD_EPOCH(idx)                                                     \
363    if (LIKELY(race_mask & (1 << (idx * 4)))) {                               \
364      u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1);                         \
365      u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid)));    \
366      thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
367    }
368  LOAD_EPOCH(0);
369  LOAD_EPOCH(1);
370  LOAD_EPOCH(2);
371  LOAD_EPOCH(3);
372#  undef LOAD_EPOCH
373  const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
374  const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
375  const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
376  const int concurrent_mask = _mm_movemask_epi8(concurrent);
377  if (LIKELY(concurrent_mask == 0))
378    goto STORE;
379
380  DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
381  return true;
382}
383
384#  define LOAD_CURRENT_SHADOW(cur, shadow_mem)                         \
385    const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386    const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387#endif
388
389char* DumpShadow(char* buf, RawShadow raw) {
390  if (raw == Shadow::kEmpty) {
391    internal_snprintf(buf, 64, "0");
392    return buf;
393  }
394  Shadow s(raw);
395  AccessType typ;
396  s.GetAccess(nullptr, nullptr, &typ);
397  internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
398                    static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
399                    s.access(), static_cast<u32>(typ));
400  return buf;
401}
402
403// TryTrace* and TraceRestart* functions allow to turn memory access and func
404// entry/exit callbacks into leaf functions with all associated performance
405// benefits. These hottest callbacks do only 2 slow path calls: report a race
406// and trace part switching. Race reporting is easy to turn into a tail call, we
407// just always return from the runtime after reporting a race. But trace part
408// switching is harder because it needs to be in the middle of callbacks. To
409// turn it into a tail call we immidiately return after TraceRestart* functions,
410// but TraceRestart* functions themselves recurse into the callback after
411// switching trace part. As the result the hottest callbacks contain only tail
412// calls, which effectively makes them leaf functions (can use all registers,
413// no frame setup, etc).
414NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
415                                       uptr size, AccessType typ) {
416  TraceSwitchPart(thr);
417  MemoryAccess(thr, pc, addr, size, typ);
418}
419
420ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
421                                     uptr size, AccessType typ) {
422  RawShadow* shadow_mem = MemToShadow(addr);
423  UNUSED char memBuf[4][64];
424  DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
425           static_cast<int>(thr->fast_state.sid()),
426           static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
427           static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
428           DumpShadow(memBuf[1], shadow_mem[1]),
429           DumpShadow(memBuf[2], shadow_mem[2]),
430           DumpShadow(memBuf[3], shadow_mem[3]));
431
432  FastState fast_state = thr->fast_state;
433  Shadow cur(fast_state, addr, size, typ);
434
435  LOAD_CURRENT_SHADOW(cur, shadow_mem);
436  if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
437    return;
438  if (UNLIKELY(fast_state.GetIgnoreBit()))
439    return;
440  if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
441    return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
442  CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
443}
444
445void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
446
447NOINLINE
448void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
449                           AccessType typ) {
450  TraceSwitchPart(thr);
451  MemoryAccess16(thr, pc, addr, typ);
452}
453
454ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
455                                       AccessType typ) {
456  const uptr size = 16;
457  FastState fast_state = thr->fast_state;
458  if (UNLIKELY(fast_state.GetIgnoreBit()))
459    return;
460  Shadow cur(fast_state, 0, 8, typ);
461  RawShadow* shadow_mem = MemToShadow(addr);
462  bool traced = false;
463  {
464    LOAD_CURRENT_SHADOW(cur, shadow_mem);
465    if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
466      goto SECOND;
467    if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
468      return RestartMemoryAccess16(thr, pc, addr, typ);
469    traced = true;
470    if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471      return;
472  }
473SECOND:
474  shadow_mem += kShadowCnt;
475  LOAD_CURRENT_SHADOW(cur, shadow_mem);
476  if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
477    return;
478  if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
479    return RestartMemoryAccess16(thr, pc, addr, typ);
480  CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
481}
482
483NOINLINE
484void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
485                                  uptr size, AccessType typ) {
486  TraceSwitchPart(thr);
487  UnalignedMemoryAccess(thr, pc, addr, size, typ);
488}
489
490ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
491                                              uptr addr, uptr size,
492                                              AccessType typ) {
493  DCHECK_LE(size, 8);
494  FastState fast_state = thr->fast_state;
495  if (UNLIKELY(fast_state.GetIgnoreBit()))
496    return;
497  RawShadow* shadow_mem = MemToShadow(addr);
498  bool traced = false;
499  uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
500  {
501    Shadow cur(fast_state, addr, size1, typ);
502    LOAD_CURRENT_SHADOW(cur, shadow_mem);
503    if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
504      goto SECOND;
505    if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
506      return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
507    traced = true;
508    if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509      return;
510  }
511SECOND:
512  uptr size2 = size - size1;
513  if (LIKELY(size2 == 0))
514    return;
515  shadow_mem += kShadowCnt;
516  Shadow cur(fast_state, 0, size2, typ);
517  LOAD_CURRENT_SHADOW(cur, shadow_mem);
518  if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
519    return;
520  if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
521    return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
522  CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
523}
524
525void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
526  DCHECK_LT(p, end);
527  DCHECK(IsShadowMem(p));
528  DCHECK(IsShadowMem(end - 1));
529  UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
530  DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
531  DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
532#if !TSAN_VECTORIZE
533  for (; p < end; p += kShadowCnt) {
534    p[0] = v;
535    for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
536  }
537#else
538  m128 vv = _mm_setr_epi32(
539      static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
540      static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
541  m128* vp = reinterpret_cast<m128*>(p);
542  m128* vend = reinterpret_cast<m128*>(end);
543  for (; vp < vend; vp++) _mm_store_si128(vp, vv);
544#endif
545}
546
547static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548  if (size == 0)
549    return;
550  DCHECK_EQ(addr % kShadowCell, 0);
551  DCHECK_EQ(size % kShadowCell, 0);
552  // If a user passes some insane arguments (memset(0)),
553  // let it just crash as usual.
554  if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
555    return;
556  RawShadow* begin = MemToShadow(addr);
557  RawShadow* end = begin + size / kShadowCell * kShadowCnt;
558  // Don't want to touch lots of shadow memory.
559  // If a program maps 10MB stack, there is no need reset the whole range.
560  // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561  if (SANITIZER_WINDOWS ||
562      size <= common_flags()->clear_shadow_mmap_threshold) {
563    ShadowSet(begin, end, val);
564    return;
565  }
566  // The region is big, reset only beginning and end.
567  const uptr kPageSize = GetPageSizeCached();
568  // Set at least first kPageSize/2 to page boundary.
569  RawShadow* mid1 =
570      Min(end, reinterpret_cast<RawShadow*>(RoundUp(
571                   reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
572  // begin must < mid1
573  ShadowSet(begin, mid1, val);
574  // Reset middle part.
575  RawShadow* mid2 = RoundDown(end, kPageSize);
576  if (mid2 > mid1) {
577    if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
578      Die();
579  }
580  // Set the ending.
581  if (mid2 < end)
582    ShadowSet(mid2, end, val);
583  else
584    DCHECK_EQ(mid2, end);
585}
586
587void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
588  uptr addr1 = RoundDown(addr, kShadowCell);
589  uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
590  MemoryRangeSet(addr1, size1, Shadow::kEmpty);
591}
592
593void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
594  // Callers must lock the slot to ensure synchronization with the reset.
595  // The problem with "freed" memory is that it's not "monotonic"
596  // with respect to bug detection: freed memory is bad to access,
597  // but then if the heap block is reallocated later, it's good to access.
598  // As the result a garbage "freed" shadow can lead to a false positive
599  // if it happens to match a real free in the thread trace,
600  // but the heap block was reallocated before the current memory access,
601  // so it's still good to access. It's not the case with data races.
602  DCHECK(thr->slot_locked);
603  DCHECK_EQ(addr % kShadowCell, 0);
604  size = RoundUp(size, kShadowCell);
605  // Processing more than 1k (2k of shadow) is expensive,
606  // can cause excessive memory consumption (user does not necessary touch
607  // the whole range) and most likely unnecessary.
608  size = Min<uptr>(size, 1024);
609  const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
610                         kAccessCheckOnly | kAccessNoRodata;
611  TraceMemoryAccessRange(thr, pc, addr, size, typ);
612  RawShadow* shadow_mem = MemToShadow(addr);
613  Shadow cur(thr->fast_state, 0, kShadowCell, typ);
614#if TSAN_VECTORIZE
615  const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
616  const m128 freed = _mm_setr_epi32(
617      static_cast<u32>(Shadow::FreedMarker()),
618      static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
619  for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
620    const m128 shadow = _mm_load_si128((m128*)shadow_mem);
621    if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
622      return;
623    _mm_store_si128((m128*)shadow_mem, freed);
624  }
625#else
626  for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
627    if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
628      return;
629    StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
630    StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
631    StoreShadow(&shadow_mem[2], Shadow::kEmpty);
632    StoreShadow(&shadow_mem[3], Shadow::kEmpty);
633  }
634#endif
635}
636
637void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
638  DCHECK_EQ(addr % kShadowCell, 0);
639  size = RoundUp(size, kShadowCell);
640  TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
641  Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
642  MemoryRangeSet(addr, size, cur.raw());
643}
644
645void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
646                                         uptr size) {
647  if (thr->ignore_reads_and_writes == 0)
648    MemoryRangeImitateWrite(thr, pc, addr, size);
649  else
650    MemoryResetRange(thr, pc, addr, size);
651}
652
653ALWAYS_INLINE
654bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
655                          AccessType typ) {
656  LOAD_CURRENT_SHADOW(cur, shadow_mem);
657  if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
658    return false;
659  return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
660}
661
662template <bool is_read>
663NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
664                                       uptr size) {
665  TraceSwitchPart(thr);
666  MemoryAccessRangeT<is_read>(thr, pc, addr, size);
667}
668
669template <bool is_read>
670void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
671  const AccessType typ =
672      (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
673  RawShadow* shadow_mem = MemToShadow(addr);
674  DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
675           (void*)pc, (void*)addr, (int)size, is_read);
676  DCHECK_NE(size, 0);
677#if SANITIZER_DEBUG
678  if (!IsAppMem(addr)) {
679    Printf("Access to non app mem start: %p\n", (void*)addr);
680    DCHECK(IsAppMem(addr));
681  }
682  if (!IsAppMem(addr + size - 1)) {
683    Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));
684    DCHECK(IsAppMem(addr + size - 1));
685  }
686  if (!IsShadowMem(shadow_mem)) {
687    Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
688    DCHECK(IsShadowMem(shadow_mem));
689  }
690
691  uptr rounded_size =
692      (RoundUpTo(addr + size, kShadowCell) - RoundDownTo(addr, kShadowCell));
693  RawShadow* shadow_mem_end =
694      shadow_mem + rounded_size / kShadowCell * kShadowCnt;
695  if (!IsShadowMem(shadow_mem_end - 1)) {
696    Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end - 1,
697           (void*)(addr + size - 1));
698    Printf(
699        "Shadow start addr (ok): %p (%p); size: 0x%zx; rounded_size: 0x%zx; "
700        "kShadowMultiplier: %zx\n",
701        shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier);
702    DCHECK(IsShadowMem(shadow_mem_end - 1));
703  }
704#endif
705
706  // Access to .rodata section, no races here.
707  // Measurements show that it can be 10-20% of all memory accesses.
708  // Check here once to not check for every access separately.
709  // Note: we could (and should) do this only for the is_read case
710  // (writes shouldn't go to .rodata). But it happens in Chromium tests:
711  // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
712  // Details are unknown since it happens only on CI machines.
713  if (*shadow_mem == Shadow::kRodata)
714    return;
715
716  FastState fast_state = thr->fast_state;
717  if (UNLIKELY(fast_state.GetIgnoreBit()))
718    return;
719
720  if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
721    return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
722
723  if (UNLIKELY(addr % kShadowCell)) {
724    // Handle unaligned beginning, if any.
725    uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
726    size -= size1;
727    Shadow cur(fast_state, addr, size1, typ);
728    if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
729      return;
730    shadow_mem += kShadowCnt;
731  }
732  // Handle middle part, if any.
733  Shadow cur(fast_state, 0, kShadowCell, typ);
734  for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
735    if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
736      return;
737  }
738  // Handle ending, if any.
739  if (UNLIKELY(size)) {
740    Shadow cur(fast_state, 0, size, typ);
741    if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
742      return;
743  }
744}
745
746template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
747                                       uptr size);
748template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
749                                        uptr size);
750
751}  // namespace __tsan
752
753#if !SANITIZER_GO
754// Must be included in this file to make sure everything is inlined.
755#  include "tsan_interface.inc"
756#endif