cow_file/src/allocator.rs

638 lines
19 KiB
Rust

use std::mem::size_of;
use zerocopy::{AsBytes, FromBytes, FromZeroes, Unaligned};
use crate::{Db, FilePointer, FileRange, PagePointer, RawFilePointer, PAGE_SIZE, U16, U32, U64};
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
enum SlabKind {
SingleBytes,
RelativeFreeList,
AbsoluteFreeList,
}
impl SlabKind {
fn for_size(size: u32) -> Self {
if size == 1 {
Self::SingleBytes
} else if size < size_of::<RawFilePointer>() as u32 {
Self::RelativeFreeList
} else if (size as u64) <= PAGE_SIZE / 2 {
// TODO
// slabs of really big objects are very inefficient.
// find a better way/ allocate e.g. more pages at once or at least 10 elements?
Self::AbsoluteFreeList
} else {
panic!("invalid size")
}
}
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned)]
#[repr(C)]
pub struct AllocatorState {
pub general: RawFilePointer,
pub slabs: SlabListPointer,
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned)]
#[repr(transparent)]
pub struct GeneralPurposeAllocator {
pub head_ptr: FilePointer<FreeListBlock>,
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned)]
#[repr(C)]
pub struct FreeListBlock {
next: FilePointer<FreeListBlock>,
size: u8,
}
impl FilePointer<FreeListBlock> {
pub fn next_ptr(self) -> FilePointer<FilePointer<FreeListBlock>> {
FilePointer::new(self.into_raw())
}
pub fn size_start_ptr(self) -> FilePointer<u8> {
FilePointer::new(self.into_raw() + size_of::<FilePointer<FreeListBlock>>() as u64)
}
pub fn size_end_ptr(self) -> FilePointer<U64> {
FilePointer::new(self.into_raw() + size_of::<FreeListBlock>() as u64)
}
}
impl GeneralPurposeAllocator {
const SIZE_MASK: u8 = 0b1000_0000;
const MIN_ALLOCATION_SIZE: u64 = size_of::<FreeListBlock>() as u64;
pub fn size<R>(db: &Db<R>, head: FilePointer<FreeListBlock>) -> u64 {
// println!("get size({head:?})");
let first_byte: u8 = unsafe { db.read(head.size_start_ptr()) };
let size = if first_byte & Self::SIZE_MASK == 0 {
// small size (can fit in 7bits)
first_byte as u64
} else {
// large size
unsafe { db.read::<U64>(head.size_end_ptr()) }.get()
};
Self::MIN_ALLOCATION_SIZE + size
}
fn set_size<R>(db: &mut Db<R>, head: FilePointer<FreeListBlock>, size: u64) {
assert!(size >= Self::MIN_ALLOCATION_SIZE);
let size = size - Self::MIN_ALLOCATION_SIZE;
if size <= (u8::MAX & !Self::SIZE_MASK) as u64 {
// small size (can fit in 7bits)
debug_assert_eq!(size as u8 & Self::SIZE_MASK, 0);
unsafe { db.write(head.size_start_ptr(), size as u8) };
} else {
unsafe {
db.write(head.size_start_ptr(), Self::SIZE_MASK);
db.write::<U64>(head.size_end_ptr(), size.into());
}
}
}
fn clear<R>(db: &mut Db<R>, ptr: FilePointer<FreeListBlock>) -> RawFilePointer {
unsafe {
db.write(ptr.next_ptr(), FilePointer::null());
let first_byte: u8 = db.read(ptr.size_start_ptr());
// clear first size byte
db.write(ptr.size_start_ptr(), 0);
if first_byte & Self::SIZE_MASK != 0 {
// larger block. clear full size field
db.write(ptr.size_end_ptr(), 0.into());
}
}
ptr.into_raw()
}
fn can_allocate_into(needed_size: u64, actual_size: u64) -> bool {
use std::cmp::Ordering::*;
match actual_size.cmp(&needed_size) {
Less => false,
Equal => true,
// leave space to insert the remaining space into the free list
Greater => actual_size >= needed_size + Self::MIN_ALLOCATION_SIZE,
}
}
fn needed_pages(size: u64) -> u64 {
let mut n_pages = div_round_up(size, PAGE_SIZE);
let extra_space = n_pages * PAGE_SIZE - size;
if extra_space != 0 && extra_space < Self::MIN_ALLOCATION_SIZE {
// the extra space in the allocated pages is too small to
// insert it into the free list. allocate an additional page.
n_pages += 1;
}
n_pages
}
pub fn allocate<R>(self, db: &mut Db<R>, expected_size: u64) -> FileRange {
// we need space to store the free list entry
let needed_size = expected_size.max(Self::MIN_ALLOCATION_SIZE);
let head = self.head_ptr;
// if the first element is replaced update the head pointer
let mut prevprev = FilePointer::<FreeListBlock>::null();
let mut prev = head;
let mut next: FilePointer<FreeListBlock> = unsafe { db.read(head.next_ptr()) };
let empty_list = next.is_null();
while !next.is_null() && !Self::can_allocate_into(needed_size, Self::size(db, next)) {
prevprev = prev;
prev = next;
next = unsafe { db.read(next) }.next;
}
// dbg!(next, Self::size(db, next));
let start = if next.is_null() {
let (prev, start, prev_free) = if !empty_list {
let prevlen = Self::size(db, prev);
if prev.into_raw() + prevlen == db.end_of_file() {
// println!("free block at end of file {prev:?}");
Self::clear(db, prev);
(prevprev, prev, prevlen)
} else {
(prev, FilePointer::new(db.end_of_file()), 0)
}
} else {
(prev, FilePointer::new(db.end_of_file()), 0)
};
// dbg!(prev, start, prev_free);
let still_needed = if prev_free > needed_size {
assert!(needed_size + Self::MIN_ALLOCATION_SIZE > prev_free);
needed_size + Self::MIN_ALLOCATION_SIZE - prev_free
} else {
needed_size - prev_free
};
let n_pages = Self::needed_pages(still_needed);
assert_ne!(n_pages, 0);
let page_start = db.add_pages(n_pages).start();
// dbg!(n_pages, page_start);
if prev_free == 0 {
assert_eq!(page_start, start.into_raw());
}
let free_space = prev_free + PAGE_SIZE * n_pages;
let extra_space = free_space - needed_size;
if extra_space != 0 {
let remainder = FilePointer::<FreeListBlock>::new(start.into_raw() + needed_size);
Self::set_size(db, remainder, extra_space);
// prev must be the current tail of the free list and the newly allocated space, being at the end of the file
// must be the last element of the free list to keep it sorted.
unsafe { db.write(prev.next_ptr(), remainder) };
} else {
unsafe { db.write(prev.next_ptr(), FilePointer::<FreeListBlock>::null()) };
}
start
} else {
let start = next;
let nextnext = unsafe { db.read(start.next_ptr()) };
let extra_space = Self::size(db, start) - needed_size;
// dbg!(prev, nextnext, extra_space);
if extra_space != 0 {
let remainder = FilePointer::<FreeListBlock>::new(start.into_raw() + needed_size);
// dbg!(remainder);
Self::set_size(db, remainder, extra_space);
unsafe {
db.write(prev.next_ptr(), remainder);
db.write(remainder.next_ptr(), nextnext);
}
// println!("{:x?}", unsafe { db.read::<[u8; 9 + 8]>(remainder) });
} else {
unsafe { db.write(prev.next_ptr(), nextnext) };
}
start
};
let start = Self::clear(db, start);
start.range(expected_size)
}
pub fn free<R>(self, db: &mut Db<R>, range: FileRange) {
// println!("free({range:?})");
let mut size = range.len().max(Self::MIN_ALLOCATION_SIZE);
let mut start = FilePointer::<FreeListBlock>::new(range.start);
let head = self.head_ptr;
let mut prevprev = FilePointer::null();
let mut prev = head;
let mut next = unsafe { db.read(head.next_ptr()) };
while !next.is_null() && next < start {
prevprev = prev;
prev = next;
next = unsafe { db.read(next.next_ptr()) };
}
if start.into_raw() + size == next.into_raw() {
// we can merge with the next range
let nextlen = Self::size(db, next);
let nextnext = unsafe { db.read(next.next_ptr()) };
// println!("merging with next range {:?}", next.range(nextlen));
Self::clear(db, next);
next = nextnext;
size += nextlen;
}
// we can't merge with the head pointer
if prev != head && prev.into_raw() + Self::size(db, prev) == start.into_raw() {
// we can merge with the previous range
let prevlen = Self::size(db, prev);
// println!("merging with previous range {:?}", prev.range(prevlen));
Self::clear(db, prev);
start = prev;
prev = prevprev;
size += prevlen;
}
unsafe {
db.write(prev.next_ptr(), start);
db.write(start.next_ptr(), next);
Self::set_size(db, start, size)
}
}
}
fn div_round_up(a: u64, b: u64) -> u64 {
(a + b - 1) / b
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned, Debug)]
#[repr(C)]
pub struct SlabListHeader {
next: SlabListPointer,
len: U32,
size: U32,
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned, Debug)]
#[repr(transparent)]
pub struct SlabListPointer(pub FilePointer<SlabListHeader>);
pub struct SlabListIterator<'db, R> {
position: u32,
db: &'db Db<R>,
ptr: SlabListPointer,
}
impl<'db, R> Iterator for SlabListIterator<'db, R> {
type Item = SlabPointer;
fn next(&mut self) -> Option<Self::Item> {
if let Some(res) = self.ptr.get(self.db, self.position) {
self.position += 1;
Some(res)
} else if let Some(next) = self.ptr.next(self.db) {
self.ptr = next;
self.next()
} else {
None
}
}
}
impl SlabListHeader {
pub fn capacity(&self) -> u32 {
(self.size.get() - size_of::<SlabListHeader>() as u32) / size_of::<Slab>() as u32
}
}
impl SlabListPointer {
pub fn next<R>(self, db: &Db<R>) -> Option<SlabListPointer> {
let ptr: SlabListPointer = self.read_header(db).next;
(!ptr.0.is_null()).then_some(ptr)
}
fn read_header<R>(self, db: &Db<R>) -> SlabListHeader {
unsafe { db.read(self.0) }
}
fn modify_header<R>(self, db: &mut Db<R>) -> &mut SlabListHeader {
unsafe { db.modify(self.0) }
}
pub fn set_next<R>(self, db: &mut Db<R>, next: SlabListPointer) {
self.modify_header(db).next = next;
}
pub fn set_len<R>(self, db: &mut Db<R>, len: u32) {
self.modify_header(db).len = U32::from(len);
}
pub fn init<R>(self, db: &mut Db<R>, size: u32) {
*self.modify_header(db) = SlabListHeader {
next: SlabListPointer(FilePointer::null()),
size: size.into(),
len: 0.into(),
};
}
pub fn element_ptr<R>(self, db: &Db<R>, i: u32) -> Option<FilePointer<Slab>> {
let this = self.read_header(db);
(i < this.len.get()).then(|| {
FilePointer::new(
self.0.into_raw()
+ size_of::<SlabListHeader>() as u64
+ i as u64 * size_of::<Slab>() as u64,
)
})
}
pub fn write_element<R>(self, db: &mut Db<R>, i: u32, value: Slab) {
let ptr = self.element_ptr(db, i).unwrap();
unsafe { db.write(ptr, value) };
}
pub fn get<R>(self, db: &Db<R>, i: u32) -> Option<SlabPointer> {
self.element_ptr(db, i).map(SlabPointer)
}
pub fn iter<R>(self, db: &Db<R>) -> SlabListIterator<R> {
SlabListIterator {
position: 0,
db,
ptr: self,
}
}
pub fn add_slab<R>(self, db: &mut Db<R>, slab_size: u32) -> SlabPointer {
println!("add_slab({slab_size})");
let this = self.read_header(db);
dbg!(&this);
let capacity = this.capacity();
let SlabListHeader { mut next, len, .. } = this;
if len.get() >= capacity {
if next.0.is_null() {
next = SlabListPointer(FilePointer::new(db.add_pages(1).start()));
next.init(db, PAGE_SIZE as u32);
self.set_next(db, next);
}
return next.add_slab(db, slab_size);
}
let len = len.get();
self.set_len(db, len + 1);
self.write_element(
db,
len,
Slab {
head: RawFilePointer::null(),
size: slab_size.into(),
},
);
SlabPointer(self.element_ptr(db, len).unwrap())
}
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned)]
#[repr(C)]
pub struct Slab {
head: RawFilePointer,
size: U32,
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned)]
#[repr(C)]
struct RelativeFreeListHeader {
next_page: PagePointer,
first: U16,
}
impl RelativeFreeListHeader {
const DATA_SIZE: u32 = PAGE_SIZE as u32 - size_of::<Self>() as u32;
fn capacity(size: u32) -> u32 {
debug_assert_eq!(SlabKind::for_size(size), SlabKind::RelativeFreeList);
Self::DATA_SIZE / size
}
}
#[derive(Clone, Copy, FromBytes, FromZeroes, AsBytes, Unaligned)]
#[repr(transparent)]
pub struct SlabPointer(FilePointer<Slab>);
impl SlabPointer {
fn read<R>(&self, db: &Db<R>) -> Slab {
unsafe { db.read(self.0) }
}
fn modify<'db, R>(&self, db: &'db mut Db<R>) -> &'db mut Slab {
unsafe { db.modify(self.0) }
}
pub fn size<R>(&self, db: &Db<R>) -> u32 {
self.read(db).size.get()
}
pub fn alloc<R>(&self, db: &mut Db<R>) -> FileRange {
let Slab { mut head, size } = self.read(db);
if head.is_null() {
head = self.allocate_page(db);
}
let size = size.get();
match SlabKind::for_size(size) {
SlabKind::SingleBytes => todo!("single byte slabs"),
SlabKind::RelativeFreeList => {
let (page, offset) = head.page_offset();
let start = FilePointer::<RelativeFreeListHeader>::new(page.start());
assert_eq!(offset, 0);
let RelativeFreeListHeader { first, .. } = unsafe { db.read(start) };
// the page should never be full if its in the free list
assert_ne!(first.get(), 0);
let ptr = FilePointer::<U16>::new(RawFilePointer::from_page_and_offset(
page,
first.get(),
));
let next: U16 = unsafe { db.read(ptr) };
let header = unsafe { db.modify::<RelativeFreeListHeader>(start) };
header.first = next;
if next.get() == 0 {
// page is full
let next_page = header.next_page;
header.next_page = PagePointer::null();
self.modify(db).head = next_page.start();
}
ptr.into_raw()
}
SlabKind::AbsoluteFreeList => {
let next = unsafe { db.read(FilePointer::<RawFilePointer>::new(head)) };
self.set_head(db, next);
head
}
}
.range(size as u64)
}
pub fn free<R>(&self, db: &mut Db<R>, range: FileRange) {
let Slab { head, size } = self.read(db);
assert_eq!(range.len(), size.get() as u64);
let size = size.get();
match SlabKind::for_size(size) {
SlabKind::SingleBytes => todo!("single byte slabs"),
SlabKind::RelativeFreeList => {
let (page, offset) = range.start.page_offset();
let start = FilePointer::<RelativeFreeListHeader>::new(page.start());
let RelativeFreeListHeader { first, .. } = unsafe { db.read(start) };
// update next pointer of new element in free list
unsafe { db.write(FilePointer::<U16>::new(range.start), first) };
let header = unsafe { db.modify::<RelativeFreeListHeader>(start) };
// point to new element
header.first = offset.into();
if first.get() == 0 {
// page was full
let (head_page, offset) = head.page_offset();
assert_eq!(offset, 0);
header.next_page = head_page;
self.modify(db).head = page.start();
}
}
SlabKind::AbsoluteFreeList => {
unsafe { db.write(FilePointer::<RawFilePointer>::new(range.start), head) };
self.set_head(db, range.start);
}
}
}
pub fn set_head<R>(&self, db: &mut Db<R>, next: RawFilePointer) {
self.modify(db).head = next;
}
pub fn allocate_page<R>(&self, db: &mut Db<R>) -> RawFilePointer {
let Slab { head, size } = self.read(db);
println!("allocate_slab_page({size})");
let size = size.get();
match SlabKind::for_size(size) {
SlabKind::SingleBytes => todo!("single byte slabs"),
SlabKind::RelativeFreeList => {
let page = db.add_pages(1);
let (next_page, offset) = head.page_offset();
assert_eq!(offset, 0);
let capacity = RelativeFreeListHeader::capacity(size);
let data_offset = size_of::<Self>() as u16;
unsafe {
db.write(
FilePointer::new(page.start()),
RelativeFreeListHeader {
next_page,
first: data_offset.into(),
},
)
};
let mut offset = 0;
for i in (0..capacity).rev() {
let next = data_offset + (i * size) as u16;
unsafe {
db.write(
FilePointer::new(RawFilePointer::from_page_and_offset(page, next)),
U16::from(offset),
)
};
offset = next;
}
self.set_head(db, page.start());
page.start()
}
SlabKind::AbsoluteFreeList => {
let n = PAGE_SIZE / size as u64;
let page = db.add_pages(1);
let mut next = head;
for i in (0..n).rev() {
let current = page.start() + i * size as u64;
unsafe { db.write(FilePointer::<RawFilePointer>::new(current), next) };
next = current;
}
self.set_head(db, next);
next
}
}
}
}