From c762d6b18f23630052f33bc984a05f732a11f3ce Mon Sep 17 00:00:00 2001 From: soruh Date: Mon, 24 Jul 2023 23:31:16 +0200 Subject: [PATCH] refactor --- src/allocator.rs | 64 +++++++++++ src/atomic_arc.rs | 1 + src/lib.rs | 287 ++++++++++++++++++++++++++-------------------- 3 files changed, 227 insertions(+), 125 deletions(-) create mode 100644 src/allocator.rs diff --git a/src/allocator.rs b/src/allocator.rs new file mode 100644 index 0000000..0c82848 --- /dev/null +++ b/src/allocator.rs @@ -0,0 +1,64 @@ +use std::mem::size_of; + +use zerocopy::{AsBytes, FromBytes, Unaligned}; + +use crate::{Db, FilePointer, Header, PAGE_SIZE, U32}; + +#[derive(Clone, Copy, FromBytes, AsBytes, Unaligned)] +#[repr(transparent)] +pub struct FreeList { + head: FilePointer, +} + +impl FreeList { + pub fn empty() -> Self { + Self { + head: FilePointer::null(), + } + } +} + +#[derive(Clone, Copy, FromBytes, AsBytes, Unaligned)] +#[repr(C)] +pub struct AllocatorState { + pub general: FreeList, + pub slabs: FilePointer, +} + +#[derive(Clone, Copy, FromBytes, AsBytes, Unaligned)] +#[repr(C)] +pub struct PoolListHeader { + next: FilePointer, + size: U32, + len: U32, +} + +impl PoolListHeader { + fn capacity(&self) -> u32 { + (self.size.get() - size_of::() as u32) / size_of::() as u32 + } +} + +#[derive(Clone, Copy, FromBytes, AsBytes, Unaligned)] +#[repr(C)] +pub struct SizedFreeList { + element_size: U32, + head: FreeList, +} + +impl AllocatorState { + pub fn init(&self, db: &mut Db, size: U32) { + db.write( + self.slabs, + PoolListHeader { + next: FilePointer::null(), + size, + len: 0.into(), + }, + ); + } + + fn slabs_mut<'db>(&self, db: &'db mut Db) -> &'db mut PoolListHeader { + db.modify(self.slabs) + } +} diff --git a/src/atomic_arc.rs b/src/atomic_arc.rs index 44d98c7..471c7bf 100644 --- a/src/atomic_arc.rs +++ b/src/atomic_arc.rs @@ -37,6 +37,7 @@ impl AtomicArc { } } + #[must_use] pub fn swap(&self, new: Arc) -> Arc { unsafe { let old = self diff --git a/src/lib.rs b/src/lib.rs index e716bcb..65ac996 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,24 +1,46 @@ -use std::{collections::HashMap, fs::File, mem::size_of, ops::Range, sync::Arc}; +use std::{borrow::BorrowMut, collections::HashMap, fs::File, mem::size_of, ops::Range, sync::Arc}; +mod allocator; mod atomic_arc; +use allocator::{AllocatorState, FreeList}; use atomic_arc::AtomicArc; use memmap::{Mmap, MmapMut}; use zerocopy::{AsBytes, FromBytes, LayoutVerified, Unaligned, LE}; +const PAGE_SIZE: u64 = 4096; + type U64 = zerocopy::byteorder::U64; +type U32 = zerocopy::byteorder::U32; #[derive(Clone, Copy, FromBytes, AsBytes, Unaligned, Debug, Hash, PartialEq, Eq)] #[repr(transparent)] -struct FilePointer(U64); +pub struct FilePointer(U64); -#[derive(Clone, Copy, FromBytes, AsBytes, Unaligned, Debug)] +impl FilePointer { + fn page(n: u64) -> Self { + Self((n * PAGE_SIZE).into()) + } + fn null() -> Self { + Self(U64::ZERO) + } +} + +#[derive(Clone, Copy, FromBytes, AsBytes, Unaligned, Debug, PartialEq, Eq)] #[repr(C)] -struct FileRange { +pub struct FileRange { start: FilePointer, len: U64, } +impl std::ops::Add for FilePointer { + type Output = Self; + + fn add(self, rhs: u64) -> Self::Output { + Self((self.0.get() + rhs).into()) + } +} + impl FilePointer { pub fn range(&self, len: u64) -> FileRange { FileRange { @@ -43,13 +65,18 @@ impl FileRange { struct Header { magic: [u8; 16], root: FilePointer, + allocator_state: AllocatorState, } impl Default for Header { fn default() -> Self { Self { - magic: [0; 16], - root: FilePointer(0.into()), + magic: *b"cool db format 1", + root: FilePointer::null(), + allocator_state: AllocatorState { + general: FreeList::empty(), + slabs: FilePointer::page(0) + size_of::
() as u64, + }, } } } @@ -63,7 +90,7 @@ pub struct Reader { state: Arc>, } -struct Db { +pub struct Db { file: File, map: MmapMut, header: Header, @@ -71,111 +98,76 @@ struct Db { } #[derive(Clone, Copy)] -struct Modification { - old_range: Option, - new_range: FileRange, +struct Replaced { + from: FileRange, + to: Option, } -struct TransactionHandle<'t> { +pub struct TransactionHandle<'t> { db: &'t mut Db, - modifications: HashMap, - allocator: Allocator, -} - -struct Allocator { - head: FilePointer, -} - -impl Allocator { - unsafe fn allocate_range(&mut self, size: u64) -> FileRange { - todo!() - } - - unsafe fn free_range(&mut self, range: FileRange) { - todo!() - } - - unsafe fn allocate_modification( - &mut self, - old_range: Option, - size: u64, - ) -> Modification { - let new_range = self.allocate_range(size); - - Modification { - old_range, - new_range, - } - } + replaced: HashMap, + new: HashMap, } impl<'t> TransactionHandle<'t> { - fn get_data<'a>(&'a mut self, modification: Modification) -> (FileRange, &'a mut [u8]) { - ( - modification.new_range, - &mut self.db.map[modification.new_range.as_range()], - ) - } - - pub unsafe fn free(&mut self, range: FileRange) { - self.allocator.free_range(range) - } - - pub unsafe fn allocate(&mut self, new_size: u64) -> (FileRange, &mut [u8]) { - let modification = self.allocator.allocate_modification(None, new_size); - - assert!(self - .modifications - .insert(modification.new_range.start, modification) - .is_none()); - - let modification = *self - .modifications - .get(&modification.new_range.start) - .unwrap(); - - self.get_data(modification) + unsafe fn get_data(&mut self, range: FileRange) -> &mut [u8] { + &mut self.db.map[range.as_range()] } pub unsafe fn modify(&mut self, range: FileRange) -> (FileRange, &mut [u8]) { - self.resize(range, range.len()) + let new = if let Some(&replaced) = self.replaced.get(&range.start) { + assert_eq!(replaced.from, range); + + if let Some(to) = replaced.to { + assert_eq!(to, range); + to + } else { + let (to, _) = self.allocate(range.len()); + + self.replaced.get_mut(&range.start).unwrap().to = Some(to); + + to + } + } else if let Some(&new) = self.new.get(&range.start) { + assert_eq!(new, range); + new + } else { + let (new, _) = self.allocate(range.len()); + + let res = self.replaced.insert( + new.start, + Replaced { + from: range, + to: Some(new), + }, + ); + debug_assert!(res.is_none()); + + new + }; + + (new, self.get_data(new)) } - fn resize(&mut self, range: FileRange, new_size: u64) -> (FileRange, &mut [u8]) { - let modification = *self - .modifications - .entry(range.start) - .or_insert_with(|| unsafe { - self.allocator.allocate_modification(Some(range), new_size) - }); + pub fn allocate(&mut self, length: u64) -> (FileRange, &mut [u8]) { + unsafe { + let new = self.allocate_range(length); - assert_eq!( - modification.new_range.len(), - new_size, - "tried to resize a region twice." - ); + let res = self.new.insert(new.start, new); + debug_assert!(res.is_none()); - let n = usize::try_from(range.len().min(new_size)).unwrap(); - - { - let old_range = range.as_range(); - let new_range = modification.new_range.as_range(); - - assert!(!old_range.contains(&new_range.start)); - assert!(!old_range.contains(&new_range.end)); - - assert!(!new_range.contains(&old_range.start)); - assert!(!new_range.contains(&old_range.end)); + (new, self.get_data(new)) } + } - // this is fine, because we just allocated the space we copy into, so it can't overlap with the source - let old_data: &'static [u8] = unsafe { &*(&self.db.map[range.as_range()] as *const _) }; + pub fn free(&mut self, range: FileRange) { + // + } - let data = self.get_data(modification); + fn allocate_range(&mut self, length: u64) -> FileRange { + let range: FileRange = todo!(); - data.1[..n].copy_from_slice(&old_data[..n]); - - data + range } } @@ -199,6 +191,9 @@ impl Db { } fn update_root(&mut self, new_root: FilePointer) -> Arc { + // TODO: we could write some here + flush here for better consistency + // e.g. a copy of the new root pointer + // flush all data in file self.map.flush().unwrap(); @@ -208,8 +203,6 @@ impl Db { .flush_range(Self::root_ptr().0.get() as usize, size_of::()) .unwrap(); - // TODO: we could do a second write + flush here for better consistency - // update data that readers see self.state.swap(Arc::new(Snapshot { root: new_root, @@ -237,6 +230,26 @@ impl Db { .write(data) } + fn modify(&mut self, at: FilePointer) -> &mut T { + self.modify_range(at.range(size_of::() as u64)) + } + + fn modify_range(&mut self, range: FileRange) -> &mut T { + LayoutVerified::<_, T>::new(&mut self.map[range.as_range()]) + .unwrap() + .into_mut() + } + + fn reference(&self, at: FilePointer) -> &T { + self.reference_range(at.range(size_of::() as u64)) + } + + fn reference_range(&self, range: FileRange) -> &T { + LayoutVerified::<_, T>::new(&self.map[range.as_range()]) + .unwrap() + .into_ref() + } + fn remap(&mut self) { let map = unsafe { MmapMut::map_mut(&self.file) }.unwrap(); self.map = map; @@ -246,56 +259,80 @@ impl Db { unsafe { Mmap::map(&self.file) }.unwrap() } - fn resize(&mut self, len: u64) { - self.file.set_len(len).unwrap(); + fn add_pages(&mut self, n: u64) { + self.file + .set_len(self.file.metadata().unwrap().len() + PAGE_SIZE * n) + .unwrap(); + self.remap(); } pub fn new(file: File) -> Self { let len = file.metadata().unwrap().len(); if len == 0 { - file.set_len(size_of::
() as u64).unwrap(); + file.set_len(PAGE_SIZE).unwrap(); } - dbg!(&len); + let map = unsafe { MmapMut::map_mut(&file) }.unwrap(); - let mut map = unsafe { MmapMut::map_mut(&file) }.unwrap(); - - // TODO use the crate Db object and call Db::write(Db::header_ptr()) - let mut header_bytes = - LayoutVerified::<_, Header>::new(&mut map[..size_of::
()]).unwrap(); - - let header = if len == 0 { - let header = Header::default(); - header_bytes.write(header); - header - } else { - header_bytes.read() - }; - - let state = Arc::new(AtomicArc::new(Arc::new(Snapshot { - root: header.root, - map: todo!(), - }))); - - Self { + let mut db = Self { + state: Arc::new(AtomicArc::new(Arc::new(Snapshot { + root: FilePointer::null(), + map: unsafe { Mmap::map(&file).unwrap() }, + }))), file, map, - header, - state, + header: Header::default(), + }; + + if len == 0 { + db.init_allocator(); + db.write(Self::header_ptr(), db.header); + } else { + db.header = db.read(Self::header_ptr()); } + + let _ = db.state.swap(Arc::new(Snapshot { + root: db.header.root, + map: unsafe { Mmap::map(&db.file).unwrap() }, + })); + + db + } + + fn init_allocator(&mut self) { + let allocator_state = self.header.allocator_state; + allocator_state.init( + self, + (PAGE_SIZE - size_of::
() as u64).try_into().unwrap(), + ); } } #[cfg(test)] mod tests { + use std::io::Write; + use std::process::Stdio; use super::*; #[test] fn it_works() { - Db::root_ptr(); - let db = Db::new(tempfile::tempfile().unwrap()); + + let mut child = std::process::Command::new("hexdump") + .arg("-C") + .stdin(Stdio::piped()) + .stdout(Stdio::inherit()) + .spawn() + .unwrap(); + + let mut stdin = child.stdin.take().expect("failed to get stdin"); + + stdin.write_all(db.map.as_bytes()).unwrap(); + + std::mem::drop(stdin); + + child.wait().unwrap(); } }