Segfaults ain't all bad
mmap(), munmap()
map or unmap files or devices into memory
#include <sys/mman.h>
void *mmap(void addr[.length], size_t length, int prot,
int flags, int fd, off_t offset);
int munmap(void addr[.length], size_t length);
Use of a mapped region can result in these signals:
SIGSEGV
Attempted to write to a region mapped as read-only.
SIGBUS
Attempted access to a page of the buffer that lies beyond the end of the mapped file.
Use of a mapped region can result in these signals:
SIGSEGV
Attempted to write to a region mapped as read-only.
SIGBUS
Attempted access to a page of the buffer that lies beyond the end of the mapped file.
sigaction()
examine and change a signal action
#include <signal.h>
int sigaction(
int signum,
const struct sigaction *_Nullable restrict act,
struct sigaction *_Nullable restrict oldact
);
mprotect()
set protection on a region of memory
#include <sys/mman.h>
int mprotect(void addr[.len], size_t len, int prot);
struct Mmap<T> {
ptr: *mut T,
len: usize,
n_bytes: usize,
}
impl<T> Mmap<T> {
fn new(len: usize, read_only: bool) -> io::Result<Self> {
unsafe {
let mut n_bytes = len * mem::size_of::<T>();
let page_size = system_page_size();
// pad n_bytes to be a multiple of page size
let n_bytes_rem = n_bytes % page_size;
if n_bytes_rem != 0 {
n_bytes += page_size - n_bytes_rem;
}
let prot = if read_only {
PROT_READ
} else {
PROT_READ | PROT_WRITE
};
let ptr = libc::mmap(
ptr::null_mut(),
n_bytes,
prot,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0,
);
if ptr == MAP_FAILED {
return Err(io::Error::last_os_error());
}
Ok(Mmap {
ptr: ptr as _,
len,
n_bytes,
})
}
}
}
impl<T> Drop for Mmap<T> {
fn drop(&mut self) {
unsafe {
libc::munmap(self.ptr as _, self.n_bytes);
}
}
}
impl<T> AsRef<[T]> for Mmap<T> {
fn as_ref(&self) -> &[T] {
unsafe { slice::from_raw_parts(self.ptr, self.len) }
}
}
impl<T> AsMut<[T]> for Mmap<T> {
fn as_mut(&mut self) -> &mut [T] {
unsafe { slice::from_raw_parts_mut(self.ptr, self.len) }
}
}
struct MemoryInner {
bytes: Mmap<u8>,
dirty_page_bits: Mmap<AtomicBool>,
}
impl MemoryInner {
fn new(len: usize) -> io::Result<Self> {
let bytes = Mmap::new(len, true)?;
let n_pages = bytes.n_bytes / system_page_size();
let dirty_page_bits = Mmap::new(n_pages, false)?;
Ok(MemoryInner {
bytes,
dirty_page_bits,
})
}
}
impl MemoryInner {
unsafe fn process_segv(&mut self, si_addr: usize) -> io::Result<()> {
let start_addr = self.bytes.ptr as usize;
let page_size = system_page_size();
let page_index = (si_addr - start_addr) / page_size;
let page_addr = start_addr + page_index * page_size;
let was_dirty = self.dirty_page_bits.as_mut()[page_index]
.fetch_or(true, Ordering::SeqCst);
if !was_dirty
&& libc::mprotect(page_addr as _, page_size, PROT_READ | PROT_WRITE)
!= 0
{
return Err(io::Error::last_os_error());
}
Ok(())
}
}
type MemoryMap = RangeMap<usize, usize>;
fn global_memory_map() -> &'static RwLock<MemoryMap> {
static MEMORY_MAP: OnceLock<RwLock<MemoryMap>> = OnceLock::new();
MEMORY_MAP.get_or_init(|| RwLock::new(MemoryMap::new()))
}
fn with_map<T, F>(f: F) -> T
where
F: FnOnce(&MemoryMap) -> T,
{
f(&global_memory_map().read().unwrap())
}
fn with_map_mut<T, F>(f: F) -> T
where
F: FnOnce(&mut MemoryMap) -> T,
{
f(&mut global_memory_map().write().unwrap())
}
unsafe fn segfault_handler(
sig: c_int,
info: *mut siginfo_t,
ctx: *mut ucontext_t,
) {
with_map(move |map| {
let si_addr = (*info).si_addr() as usize;
if let Some(inner_ptr) = map.get(&si_addr) {
let inner = &mut *(*inner_ptr as *mut MemoryInner);
if inner.process_segv(si_addr).is_err() {
call_old_action(sig, info, ctx);
}
return;
}
call_old_action(sig, info, ctx);
});
}
fn setup_action() -> sigaction {
unsafe {
static OLD_ACTION: OnceLock<sigaction> = OnceLock::new();
*OLD_ACTION.get_or_init(|| {
let mut sa_mask = MaybeUninit::<sigset_t>::uninit();
sigemptyset(sa_mask.as_mut_ptr());
let act = sigaction {
sa_sigaction: segfault_handler as _,
sa_mask: sa_mask.assume_init(),
sa_flags: SA_SIGINFO,
#[cfg(target_os = "linux")]
sa_restorer: None,
};
let mut old_act = MaybeUninit::<sigaction>::uninit();
if libc::sigaction(SIGSEGV, &act, old_act.as_mut_ptr()) != 0 {
process::exit(1);
}
old_act.assume_init()
})
}
}
pub struct Memory(&'static mut MemoryInner);
impl Memory {
pub fn new(len: usize) -> io::Result<Self> {
with_map_mut(|map| {
setup_action();
let inner = MemoryInner::new(len)?;
let inner_slice = inner.bytes.as_ref();
let inner_begin = inner_slice.as_ptr() as usize;
let inner_end = inner_begin + inner_slice.len();
let inner_range = inner_begin..inner_end;
let leaked_inner = Box::leak(Box::new(inner));
map.insert(inner_range, leaked_inner as *const MemoryInner as _);
Ok(Memory(leaked_inner))
})
}
}
impl Drop for Memory {
fn drop(&mut self) {
with_map_mut(|map| {
let inner_slice = self.0.bytes.as_ref();
let inner_begin = inner_slice.as_ptr() as usize;
let inner_end = inner_begin + inner_slice.len();
let inner_range = inner_begin..inner_end;
map.remove(inner_range);
unsafe {
let _ = Box::from_raw(self.0);
}
});
}
}
impl AsRef<[u8]> for Memory {
fn as_ref(&self) -> &[u8] {
let inner = &*self.0;
inner.bytes.as_ref()
}
}
impl AsMut<[u8]> for Memory {
fn as_mut(&mut self) -> &mut [u8] {
let inner = &mut *self.0;
inner.bytes.as_mut()
}
}
impl Deref for Memory {
type Target = [u8];
fn deref(&self) -> &Self::Target {
self.as_ref()
}
}
impl DerefMut for Memory {
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut()
}
}
#[test]
fn read_write() -> io::Result<()> {
let mut memory = Memory::new(1000)?;
memory[42] = 42;
assert_eq!(memory[42], 42, "Memory will be written to correctly");
assert!(
memory.0.dirty_page_bits.as_ref()[0].load(Ordering::SeqCst),
"The first page will be marked as dirty"
);
Ok(())
}
Send and Sync
/// This does not compile if `Send` isn't implemented for `Memory`
#[test]
fn send() -> io::Result<()> {
use std::thread;
let mut memory = Memory::new(1000)?;
// error[E0277]
thread::spawn(move || {
memory[42] = 42;
})
.join()
.unwrap();
Ok(())
}
Send and Sync
Rules for references
unsafe impl Send for Memory {}
unsafe impl Sync for Memory {}
/// This is available on slices, but (maybe fortunately) it is
/// unstable. Luckily, we used atomics to track dirty pages.
pub fn split_at_mut(&mut self, mid: usize) -> (&mut [T], &mut [T]);