use std::fs::File; use std::vec::Vec; use std::io::Read; use memmap::Mmap; fn consume_str(buf: &[u8]) -> (String, usize) { let nulp = buf.iter().position(|x| *x == 0u8).unwrap(); let s = String::from_utf8_lossy(&buf[..nulp]); (s.to_string(), nulp) } #[derive(Debug)] pub enum MediaInstance { Image {thmb: String, orig: String}, Video } impl MediaInstance { fn consume(buf: &[u8]) -> (MediaInstance, usize) { match buf[0] as char { 'I' => { let b = &buf[1..]; let (thmb, p1) = consume_str(b); let b = &b[p1 + 1..]; let (orig, p2) = consume_str(b); (MediaInstance::Image{thmb, orig}, p1 + p2 + 3) }, 'V' => { (MediaInstance::Video, 1) } _ => panic!("Invalid media type") } } } #[derive(Debug)] pub struct Post { pub content: String, pub date: i64, pub media: Vec, pub tags: Vec } pub fn test_print_post(p: &Post) { println!("{}", p.content); println!("{}", p.date); for m in &p.media { match m { MediaInstance::Image{thmb, orig} => println!("Image {} {}", thmb, orig), MediaInstance::Video => println!("Video") } } for t in &p.tags { println!("{}", t); } } impl Post { fn consume(buf: &[u8]) -> (Post, usize) { let (content, p) = consume_str(buf); let mut l = p + 1; let b = &buf[p + 1..]; let date = i64::from_le_bytes(b[..8].try_into().unwrap()); l += 8; let b = &b[8..]; let mut media = vec![]; let mut tags = vec![]; let nmedia = b[0]; let mut b = &b[1..]; l += 1; for _ in 0..nmedia { let (m, ml) = MediaInstance::consume(b); media.push(m); b = &b[ml..]; l += ml; } let ntags = b[0]; let mut b = &b[1..]; l += 1; for _ in 0..ntags { let (t, p) = consume_str(b); tags.push(t); b = &b[p + 1..]; l+= p + 1; } (Post{content, date, media, tags}, l) } } pub struct Monolith { filen: String, idxfn: String, pdxfn: String, post_ranges: Vec<(i64, usize, usize)>, } impl Monolith { pub fn new(filen: String) -> Monolith { let idxfn = filen.clone() + ".idx"; let pdxfn = filen.clone() + ".pdx"; Monolith { filen, idxfn, pdxfn, post_ranges: vec![] } } pub fn load_index(&mut self) { let mut f = File::open(&self.idxfn).unwrap(); let mut last_r: u64 = 0; self.post_ranges.clear(); loop { let mut buf: [u8; 16] = [0; 16]; match f.read_exact(&mut buf) { Ok(_) => (), Err(e) => match e.kind() { std::io::ErrorKind::UnexpectedEof => break, _ => panic!("unexpected error {}", e) } } let t = i64::from_le_bytes(buf[..8].try_into().unwrap()); let l = last_r; let r = u64::from_le_bytes(buf[8..].try_into().unwrap()); self.post_ranges.push((t, l as usize, r as usize)); last_r = r; } } fn find_post(&self, date: i64) -> Option<(usize, usize)> { if let Ok(p) = self.post_ranges.binary_search_by(|p| p.0.cmp(&date)) { let (_, l, r) = self.post_ranges[p]; Some((l, r)) } else { None } } fn find_post_2(&self, date: i64) -> Option<(usize, usize)> { let f = File::open(&self.idxfn).unwrap(); let map = unsafe { Mmap::map(&f) }.unwrap(); let len = map.len(); let nposts = len / 16; let mut l = 0; let mut r = nposts; let mut s = r; let mut postidx = None; while l < r { let m = l + s / 2; let b = &map[m * 16..]; let cdate = i64::from_le_bytes(b[..8].try_into().unwrap()); l = if cdate < date { m + 1 } else { l }; r = if cdate > date { m } else { r }; s = r - l; if cdate == date { postidx = Some(m); break; } } if let Some(postidx) = postidx { let b = &map[postidx * 16 + 8..]; let r = u64::from_le_bytes(b[..8].try_into().unwrap()); let l = if postidx == 0 { 0 } else { let b = &map[postidx * 16 - 8..]; u64::from_le_bytes(b[..8].try_into().unwrap()) }; Some((l as usize, r as usize)) } else { None } } fn get_page_range(&self, page: usize) -> Option<(usize, usize)> { let f = File::open(&self.pdxfn).unwrap(); let map = unsafe { Mmap::map(&f) }.unwrap(); let len = map.len(); let npages = len / 8; if page >= npages { return None; } let b = &map[page * 8..]; let r = u64::from_le_bytes(b[..8].try_into().unwrap()); let l = if page == npages - 1 { 0 } else { let b = &map[(page + 1) * 8..]; u64::from_le_bytes(b[..8].try_into().unwrap()) }; Some((l as usize, r as usize)) } fn get_posts_in_range(&self, l: usize, r: usize) -> Vec { let f = File::open(&self.filen).unwrap(); let map = unsafe { Mmap::map(&f) }.unwrap(); let mut remaining = &map[l..r]; let mut ret = vec![]; loop { if remaining.len() == 0 { break; } let (post, len) = Post::consume(remaining); ret.push(post); remaining = &remaining[len + 1..]; } ret } pub fn get_all_dates(&self) -> Vec { self.post_ranges.iter().map(|x| x.0).collect() } pub fn get_post(&self, date: i64) -> Option { if let Some((l, r)) = self.find_post(date) { let f = File::open(&self.filen).unwrap(); let map = unsafe { Mmap::map(&f) }.unwrap(); Some(Post::consume(&map[l..r]).0) } else { None } } pub fn get_post_2(&self, date: i64) -> Option { if let Some((l, r)) = self.find_post_2(date) { let f = File::open(&self.filen).unwrap(); let map = unsafe { Mmap::map(&f) }.unwrap(); Some(Post::consume(&map[l..r]).0) } else { None } } pub fn get_page_posts(&self, page: usize) -> Option> { if let Some((l, r)) = self.get_page_range(page) { Some(self.get_posts_in_range(l, r)) } else { None } } pub fn get_page_count(&self) -> usize { let m = std::fs::metadata(&self.pdxfn).unwrap(); (m.len() / 8) as usize } }