From b736068ee7b82e05c2ede8bc48ace7ffa4709e29 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Wed, 24 Jul 2024 23:40:11 -0400 Subject: Initial commit. --- backend/src/monolith.rs | 238 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 backend/src/monolith.rs (limited to 'backend/src/monolith.rs') diff --git a/backend/src/monolith.rs b/backend/src/monolith.rs new file mode 100644 index 0000000..a471138 --- /dev/null +++ b/backend/src/monolith.rs @@ -0,0 +1,238 @@ +use std::fs::File; +use std::vec::Vec; +use std::io::Read; +use memmap::Mmap; + +fn consume_str(buf: &[u8]) -> (String, usize) { + let nulp = buf.iter().position(|x| *x == 0u8).unwrap(); + let s = String::from_utf8_lossy(&buf[..nulp]); + (s.to_string(), nulp) +} + +#[derive(Debug)] +pub enum MediaInstance { + Image {thmb: String, orig: String}, + Video +} + +impl MediaInstance { + fn consume(buf: &[u8]) -> (MediaInstance, usize) { + match buf[0] as char { + 'I' => { + let b = &buf[1..]; + let (thmb, p1) = consume_str(b); + let b = &b[p1 + 1..]; + let (orig, p2) = consume_str(b); + (MediaInstance::Image{thmb, orig}, p1 + p2 + 3) + }, + 'V' => { + (MediaInstance::Video, 1) + } + _ => panic!("Invalid media type") + } + } +} + +#[derive(Debug)] +pub struct Post { + pub content: String, + pub date: i64, + pub media: Vec, + pub tags: Vec +} + +pub fn test_print_post(p: &Post) { + println!("{}", p.content); + println!("{}", p.date); + for m in &p.media { + match m { + MediaInstance::Image{thmb, orig} => println!("Image {} {}", thmb, orig), + MediaInstance::Video => println!("Video") + } + } + for t in &p.tags { + println!("{}", t); + } +} + +impl Post { + fn consume(buf: &[u8]) -> (Post, usize) { + let (content, p) = consume_str(buf); + let mut l = p + 1; + let b = &buf[p + 1..]; + let date = i64::from_le_bytes(b[..8].try_into().unwrap()); + l += 8; + let b = &b[8..]; + let mut media = vec![]; + let mut tags = vec![]; + let nmedia = b[0]; + let mut b = &b[1..]; + l += 1; + for _ in 0..nmedia { + let (m, ml) = MediaInstance::consume(b); + media.push(m); + b = &b[ml..]; + l += ml; + } + let ntags = b[0]; + let mut b = &b[1..]; + l += 1; + for _ in 0..ntags { + let (t, p) = consume_str(b); + tags.push(t); + b = &b[p + 1..]; + l+= p + 1; + } + (Post{content, date, media, tags}, l) + } +} + +pub struct Monolith { + filen: String, + idxfn: String, + pdxfn: String, + post_ranges: Vec<(i64, usize, usize)>, +} + +impl Monolith { + pub fn new(filen: String) -> Monolith { + let idxfn = filen.clone() + ".idx"; + let pdxfn = filen.clone() + ".pdx"; + Monolith { + filen, + idxfn, + pdxfn, + post_ranges: vec![] + } + } + + pub fn load_index(&mut self) { + let mut f = File::open(&self.idxfn).unwrap(); + let mut last_r: u64 = 0; + self.post_ranges.clear(); + loop { + let mut buf: [u8; 16] = [0; 16]; + match f.read_exact(&mut buf) { + Ok(_) => (), + Err(e) => match e.kind() { + std::io::ErrorKind::UnexpectedEof => break, + _ => panic!("unexpected error {}", e) + } + } + let t = i64::from_le_bytes(buf[..8].try_into().unwrap()); + let l = last_r; + let r = u64::from_le_bytes(buf[8..].try_into().unwrap()); + self.post_ranges.push((t, l as usize, r as usize)); + last_r = r; + } + } + + fn find_post(&self, date: i64) -> Option<(usize, usize)> { + if let Ok(p) = self.post_ranges.binary_search_by(|p| p.0.cmp(&date)) { + let (_, l, r) = self.post_ranges[p]; + Some((l, r)) + } else { None } + } + + fn find_post_2(&self, date: i64) -> Option<(usize, usize)> { + let f = File::open(&self.idxfn).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + let len = map.len(); + let nposts = len / 16; + let mut l = 0; + let mut r = nposts; + let mut s = r; + let mut postidx = None; + while l < r { + let m = l + s / 2; + let b = &map[m * 16..]; + let cdate = i64::from_le_bytes(b[..8].try_into().unwrap()); + l = if cdate < date { m + 1 } else { l }; + r = if cdate > date { m } else { r }; + s = r - l; + if cdate == date { + postidx = Some(m); + break; + } + } + if let Some(postidx) = postidx { + let b = &map[postidx * 16 + 8..]; + let r = u64::from_le_bytes(b[..8].try_into().unwrap()); + let l = if postidx == 0 { + 0 + } else { + let b = &map[postidx * 16 - 8..]; + u64::from_le_bytes(b[..8].try_into().unwrap()) + }; + Some((l as usize, r as usize)) + } else { None } + } + + fn get_page_range(&self, page: usize) -> Option<(usize, usize)> { + let f = File::open(&self.pdxfn).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + let len = map.len(); + let npages = len / 8; + if page >= npages { + return None; + } + let b = &map[page * 8..]; + let r = u64::from_le_bytes(b[..8].try_into().unwrap()); + let l = if page == npages - 1 { + 0 + } else { + let b = &map[(page + 1) * 8..]; + u64::from_le_bytes(b[..8].try_into().unwrap()) + }; + Some((l as usize, r as usize)) + } + + fn get_posts_in_range(&self, l: usize, r: usize) -> Vec { + let f = File::open(&self.filen).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + let mut remaining = &map[l..r]; + let mut ret = vec![]; + loop { + if remaining.len() == 0 { + break; + } + let (post, len) = Post::consume(remaining); + ret.push(post); + remaining = &remaining[len + 1..]; + } + ret + } + + pub fn get_all_dates(&self) -> Vec { + self.post_ranges.iter().map(|x| x.0).collect() + } + + pub fn get_post(&self, date: i64) -> Option { + if let Some((l, r)) = self.find_post(date) { + let f = File::open(&self.filen).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + Some(Post::consume(&map[l..r]).0) + } else { None } + } + + pub fn get_post_2(&self, date: i64) -> Option { + if let Some((l, r)) = self.find_post_2(date) { + let f = File::open(&self.filen).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + Some(Post::consume(&map[l..r]).0) + } else { None } + } + + pub fn get_page_posts(&self, page: usize) -> Option> { + if let Some((l, r)) = self.get_page_range(page) { + Some(self.get_posts_in_range(l, r)) + } else { + None + } + } + + pub fn get_page_count(&self) -> usize { + let m = std::fs::metadata(&self.pdxfn).unwrap(); + (m.len() / 8) as usize + } +} -- cgit v1.2.3