Skip to content

Commit e7698f4

Browse files
committed
Implement weak memory emulation
1 parent 16315b1 commit e7698f4

File tree

5 files changed

+476
-27
lines changed

5 files changed

+476
-27
lines changed

src/data_race.rs

Lines changed: 144 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
//! The implementation also models races with memory allocation and deallocation via treating allocation and
1313
//! deallocation as a type of write internally for detecting data-races.
1414
//!
15-
//! This does not explore weak memory orders and so can still miss data-races
15+
//! Weak memory orders are explored but not all weak behaviours are exhibited, so it can still miss data-races
1616
//! but should not report false-positives
1717
//!
1818
//! Data-race definition from(<https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races>):
@@ -29,22 +29,6 @@
2929
//! This means that the thread-index can be safely re-used, starting on the next timestamp for the newly created
3030
//! thread.
3131
//!
32-
//! The sequentially consistent ordering corresponds to the ordering that the threads
33-
//! are currently scheduled, this means that the data-race detector has no additional
34-
//! logic for sequentially consistent accesses at the moment since they are indistinguishable
35-
//! from acquire/release operations. If weak memory orderings are explored then this
36-
//! may need to change or be updated accordingly.
37-
//!
38-
//! Per the C++ spec for the memory model a sequentially consistent operation:
39-
//! "A load operation with this memory order performs an acquire operation,
40-
//! a store performs a release operation, and read-modify-write performs
41-
//! both an acquire operation and a release operation, plus a single total
42-
//! order exists in which all threads observe all modifications in the same
43-
//! order (see Sequentially-consistent ordering below) "
44-
//! So in the absence of weak memory effects a seq-cst load & a seq-cst store is identical
45-
//! to an acquire load and a release store given the global sequentially consistent order
46-
//! of the schedule.
47-
//!
4832
//! The timestamps used in the data-race detector assign each sequence of non-atomic operations
4933
//! followed by a single atomic or concurrent operation a single timestamp.
5034
//! Write, Read, Write, ThreadJoin will be represented by a single timestamp value on a thread.
@@ -67,6 +51,7 @@ use std::{
6751
mem,
6852
};
6953

54+
use rustc_const_eval::interpret::alloc_range;
7055
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
7156
use rustc_index::vec::{Idx, IndexVec};
7257
use rustc_middle::{mir, ty::layout::TyAndLayout};
@@ -115,10 +100,10 @@ pub enum AtomicFenceOp {
115100
/// of a thread, contains the happens-before clock and
116101
/// additional metadata to model atomic fence operations.
117102
#[derive(Clone, Default, Debug)]
118-
struct ThreadClockSet {
103+
pub struct ThreadClockSet {
119104
/// The increasing clock representing timestamps
120105
/// that happen-before this thread.
121-
clock: VClock,
106+
pub clock: VClock,
122107

123108
/// The set of timestamps that will happen-before this
124109
/// thread once it performs an acquire fence.
@@ -127,6 +112,12 @@ struct ThreadClockSet {
127112
/// The last timestamp of happens-before relations that
128113
/// have been released by this thread by a fence.
129114
fence_release: VClock,
115+
116+
pub fence_seqcst: VClock,
117+
118+
pub write_seqcst: VClock,
119+
120+
pub read_seqcst: VClock,
130121
}
131122

132123
impl ThreadClockSet {
@@ -169,7 +160,7 @@ pub struct DataRace;
169160
/// common case where no atomic operations
170161
/// exists on the memory cell.
171162
#[derive(Clone, PartialEq, Eq, Default, Debug)]
172-
struct AtomicMemoryCellClocks {
163+
pub struct AtomicMemoryCellClocks {
173164
/// The clock-vector of the timestamp of the last atomic
174165
/// read operation performed by each thread.
175166
/// This detects potential data-races between atomic read
@@ -514,7 +505,32 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
514505
atomic: AtomicReadOp,
515506
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
516507
let this = self.eval_context_ref();
508+
// This will read from the last store in the modification order of this location. In case
509+
// weak memory emulation is enabled, this may not be the store we will pick to actually read from and return.
510+
// This is fine with StackedBorrow and race checks because they don't concern metadata on
511+
// the *value* (including the associated provenance if this is an AtomicPtr) at this location.
512+
// Only metadata on the location itself is used.
517513
let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
514+
515+
if let Some(global) = &this.machine.data_race {
516+
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
517+
if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
518+
if atomic == AtomicReadOp::SeqCst {
519+
global.sc_read();
520+
}
521+
let mut rng = this.machine.rng.borrow_mut();
522+
let loaded = alloc_buffers.buffered_read(
523+
alloc_range(base_offset, place.layout.size),
524+
global,
525+
atomic == AtomicReadOp::SeqCst,
526+
&mut *rng,
527+
|| this.validate_atomic_load(place, atomic),
528+
)?;
529+
530+
return Ok(loaded.unwrap_or(scalar));
531+
}
532+
}
533+
518534
this.validate_atomic_load(place, atomic)?;
519535
Ok(scalar)
520536
}
@@ -528,7 +544,27 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
528544
) -> InterpResult<'tcx> {
529545
let this = self.eval_context_mut();
530546
this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
531-
this.validate_atomic_store(dest, atomic)
547+
548+
this.validate_atomic_store(dest, atomic)?;
549+
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr)?;
550+
if let (
551+
crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
552+
crate::Evaluator { data_race: Some(global), .. },
553+
) = this.get_alloc_extra_mut(alloc_id)?
554+
{
555+
if atomic == AtomicWriteOp::SeqCst {
556+
global.sc_write();
557+
}
558+
let size = dest.layout.size;
559+
alloc_buffers.buffered_write(
560+
val,
561+
alloc_range(base_offset, size),
562+
global,
563+
atomic == AtomicWriteOp::SeqCst,
564+
)?;
565+
}
566+
567+
Ok(())
532568
}
533569

534570
/// Perform an atomic operation on a memory location.
@@ -550,6 +586,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
550586
this.allow_data_races_mut(|this| this.write_immediate(*val, &(*place).into()))?;
551587

552588
this.validate_atomic_rmw(place, atomic)?;
589+
590+
this.buffered_atomic_rmw(val.to_scalar_or_uninit(), place, atomic)?;
553591
Ok(old)
554592
}
555593

@@ -565,7 +603,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
565603

566604
let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
567605
this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
606+
568607
this.validate_atomic_rmw(place, atomic)?;
608+
609+
this.buffered_atomic_rmw(new, place, atomic)?;
569610
Ok(old)
570611
}
571612

@@ -584,15 +625,25 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
584625
let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
585626

586627
let new_val = if min {
587-
if lt { &old } else { &rhs }
628+
if lt {
629+
&old
630+
} else {
631+
&rhs
632+
}
588633
} else {
589-
if lt { &rhs } else { &old }
634+
if lt {
635+
&rhs
636+
} else {
637+
&old
638+
}
590639
};
591640

592641
this.allow_data_races_mut(|this| this.write_immediate(**new_val, &(*place).into()))?;
593642

594643
this.validate_atomic_rmw(place, atomic)?;
595644

645+
this.buffered_atomic_rmw(new_val.to_scalar_or_uninit(), place, atomic)?;
646+
596647
// Return the old value.
597648
Ok(old)
598649
}
@@ -642,14 +693,56 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
642693
if cmpxchg_success {
643694
this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
644695
this.validate_atomic_rmw(place, success)?;
696+
this.buffered_atomic_rmw(new, place, success)?;
645697
} else {
646698
this.validate_atomic_load(place, fail)?;
699+
// A failed compare exchange is equivalent to a load, reading from the latest store
700+
// in the modification order.
701+
// Since `old` is only a value and not the store element, we need to separately
702+
// find it in our store buffer and perform load_impl on it.
703+
if let Some(global) = &this.machine.data_race {
704+
if fail == AtomicReadOp::SeqCst {
705+
global.sc_read();
706+
}
707+
let size = place.layout.size;
708+
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
709+
if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
710+
if global.multi_threaded.get() {
711+
alloc_buffers.read_from_last_store(alloc_range(base_offset, size), global);
712+
}
713+
}
714+
}
647715
}
648716

649717
// Return the old value.
650718
Ok(res)
651719
}
652720

721+
fn buffered_atomic_rmw(
722+
&mut self,
723+
new_val: ScalarMaybeUninit<Tag>,
724+
place: &MPlaceTy<'tcx, Tag>,
725+
atomic: AtomicRwOp,
726+
) -> InterpResult<'tcx> {
727+
let this = self.eval_context_mut();
728+
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
729+
if let (
730+
crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
731+
crate::Evaluator { data_race: Some(global), .. },
732+
) = this.get_alloc_extra_mut(alloc_id)?
733+
{
734+
if atomic == AtomicRwOp::SeqCst {
735+
global.sc_read();
736+
global.sc_write();
737+
}
738+
let size = place.layout.size;
739+
let range = alloc_range(base_offset, size);
740+
alloc_buffers.read_from_last_store(range, global);
741+
alloc_buffers.buffered_write(new_val, range, global, atomic == AtomicRwOp::SeqCst)?;
742+
}
743+
Ok(())
744+
}
745+
653746
/// Update the data-race detector for an atomic read occurring at the
654747
/// associated memory-place and on the current thread.
655748
fn validate_atomic_load(
@@ -723,7 +816,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
723816
fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
724817
let this = self.eval_context_mut();
725818
if let Some(data_race) = &mut this.machine.data_race {
726-
data_race.maybe_perform_sync_operation(move |index, mut clocks| {
819+
data_race.maybe_perform_sync_operation(|index, mut clocks| {
727820
log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
728821

729822
// Apply data-race detection for the current fences
@@ -737,6 +830,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
737830
// Either Release | AcqRel | SeqCst
738831
clocks.apply_release_fence();
739832
}
833+
if atomic == AtomicFenceOp::SeqCst {
834+
data_race.last_sc_fence.borrow_mut().set_at_index(&clocks.clock, index);
835+
clocks.fence_seqcst.join(&data_race.last_sc_fence.borrow());
836+
clocks.write_seqcst.join(&data_race.last_sc_write.borrow());
837+
}
740838

741839
// Increment timestamp in case of release semantics.
742840
Ok(atomic != AtomicFenceOp::Acquire)
@@ -1116,6 +1214,12 @@ pub struct GlobalState {
11161214
/// The associated vector index will be moved into re-use candidates
11171215
/// after the join operation occurs.
11181216
terminated_threads: RefCell<FxHashMap<ThreadId, VectorIdx>>,
1217+
1218+
/// The timestamp of last SC fence performed by each thread
1219+
last_sc_fence: RefCell<VClock>,
1220+
1221+
/// The timestamp of last SC write performed by each thread
1222+
last_sc_write: RefCell<VClock>,
11191223
}
11201224

11211225
impl GlobalState {
@@ -1131,6 +1235,8 @@ impl GlobalState {
11311235
active_thread_count: Cell::new(1),
11321236
reuse_candidates: RefCell::new(FxHashSet::default()),
11331237
terminated_threads: RefCell::new(FxHashMap::default()),
1238+
last_sc_fence: RefCell::new(VClock::default()),
1239+
last_sc_write: RefCell::new(VClock::default()),
11341240
};
11351241

11361242
// Setup the main-thread since it is not explicitly created:
@@ -1445,7 +1551,7 @@ impl GlobalState {
14451551
/// Load the current vector clock in use and the current set of thread clocks
14461552
/// in use for the vector.
14471553
#[inline]
1448-
fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
1554+
pub fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
14491555
let index = self.current_index();
14501556
let ref_vector = self.vector_clocks.borrow();
14511557
let clocks = Ref::map(ref_vector, |vec| &vec[index]);
@@ -1455,7 +1561,7 @@ impl GlobalState {
14551561
/// Load the current vector clock in use and the current set of thread clocks
14561562
/// in use for the vector mutably for modification.
14571563
#[inline]
1458-
fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
1564+
pub fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
14591565
let index = self.current_index();
14601566
let ref_vector = self.vector_clocks.borrow_mut();
14611567
let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
@@ -1468,4 +1574,16 @@ impl GlobalState {
14681574
fn current_index(&self) -> VectorIdx {
14691575
self.current_index.get()
14701576
}
1577+
1578+
// SC ATOMIC STORE rule in the paper.
1579+
fn sc_write(&self) {
1580+
let (index, clocks) = self.current_thread_state();
1581+
self.last_sc_write.borrow_mut().set_at_index(&clocks.clock, index);
1582+
}
1583+
1584+
// SC ATOMIC READ rule in the paper.
1585+
fn sc_read(&self) {
1586+
let (.., mut clocks) = self.current_thread_state_mut();
1587+
clocks.read_seqcst.join(&self.last_sc_fence.borrow());
1588+
}
14711589
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ mod stacked_borrows;
4545
mod sync;
4646
mod thread;
4747
mod vector_clock;
48+
mod weak_memory;
4849

4950
// Establish a "crate-wide prelude": we often import `crate::*`.
5051

src/machine.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,9 @@ pub struct AllocExtra {
190190
/// Data race detection via the use of a vector-clock,
191191
/// this is only added if it is enabled.
192192
pub data_race: Option<data_race::AllocExtra>,
193+
/// Weak memory emulation via the use of store buffers,
194+
/// this is only added if it is enabled.
195+
pub weak_memory: Option<weak_memory::AllocExtra>,
193196
}
194197

195198
/// Precomputed layouts of primitive types
@@ -630,9 +633,16 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
630633
} else {
631634
None
632635
};
636+
let buffer_alloc = if ecx.machine.weak_memory {
637+
// FIXME: if this is an atomic obejct, we want to supply its initial value
638+
// while allocating the store buffer here.
639+
Some(weak_memory::AllocExtra::new_allocation(alloc.size()))
640+
} else {
641+
None
642+
};
633643
let alloc: Allocation<Tag, Self::AllocExtra> = alloc.convert_tag_add_extra(
634644
&ecx.tcx,
635-
AllocExtra { stacked_borrows: stacks, data_race: race_alloc },
645+
AllocExtra { stacked_borrows: stacks, data_race: race_alloc, weak_memory: buffer_alloc },
636646
|ptr| Evaluator::tag_alloc_base_pointer(ecx, ptr),
637647
);
638648
Cow::Owned(alloc)

0 commit comments

Comments
 (0)