use std::fs::File;
use std::vec::Vec;
use std::io::Read;
use memmap::Mmap;
fn consume_str(buf: &[u8]) -> (String, usize) {
let nulp = buf.iter().position(|x| *x == 0u8).unwrap();
let s = String::from_utf8_lossy(&buf[..nulp]);
(s.to_string(), nulp)
}
#[derive(Debug)]
pub enum MediaInstance {
Image {thmb: String, orig: String},
Video
}
impl MediaInstance {
fn consume(buf: &[u8]) -> (MediaInstance, usize) {
match buf[0] as char {
'I' => {
let b = &buf[1..];
let (thmb, p1) = consume_str(b);
let b = &b[p1 + 1..];
let (orig, p2) = consume_str(b);
(MediaInstance::Image{thmb, orig}, p1 + p2 + 3)
},
'V' => {
(MediaInstance::Video, 1)
}
_ => panic!("Invalid media type")
}
}
}
#[derive(Debug)]
pub struct Post {
pub content: String,
pub date: i64,
pub media: Vec<MediaInstance>,
pub tags: Vec<String>
}
pub fn test_print_post(p: &Post) {
println!("{}", p.content);
println!("{}", p.date);
for m in &p.media {
match m {
MediaInstance::Image{thmb, orig} => println!("Image {} {}", thmb, orig),
MediaInstance::Video => println!("Video")
}
}
for t in &p.tags {
println!("{}", t);
}
}
impl Post {
fn consume(buf: &[u8]) -> (Post, usize) {
let (content, p) = consume_str(buf);
let mut l = p + 1;
let b = &buf[p + 1..];
let date = i64::from_le_bytes(b[..8].try_into().unwrap());
l += 8;
let b = &b[8..];
let mut media = vec![];
let mut tags = vec![];
let nmedia = b[0];
let mut b = &b[1..];
l += 1;
for _ in 0..nmedia {
let (m, ml) = MediaInstance::consume(b);
media.push(m);
b = &b[ml..];
l += ml;
}
let ntags = b[0];
let mut b = &b[1..];
l += 1;
for _ in 0..ntags {
let (t, p) = consume_str(b);
tags.push(t);
b = &b[p + 1..];
l+= p + 1;
}
(Post{content, date, media, tags}, l)
}
}
pub struct Monolith {
filen: String,
idxfn: String,
pdxfn: String,
post_ranges: Vec<(i64, usize, usize)>,
}
impl Monolith {
pub fn new(filen: String) -> Monolith {
let idxfn = filen.clone() + ".idx";
let pdxfn = filen.clone() + ".pdx";
Monolith {
filen,
idxfn,
pdxfn,
post_ranges: vec![]
}
}
pub fn load_index(&mut self) {
let mut f = File::open(&self.idxfn).unwrap();
let mut last_r: u64 = 0;
self.post_ranges.clear();
loop {
let mut buf: [u8; 16] = [0; 16];
match f.read_exact(&mut buf) {
Ok(_) => (),
Err(e) => match e.kind() {
std::io::ErrorKind::UnexpectedEof => break,
_ => panic!("unexpected error {}", e)
}
}
let t = i64::from_le_bytes(buf[..8].try_into().unwrap());
let l = last_r;
let r = u64::from_le_bytes(buf[8..].try_into().unwrap());
self.post_ranges.push((t, l as usize, r as usize));
last_r = r;
}
}
fn find_post(&self, date: i64) -> Option<(usize, usize)> {
if let Ok(p) = self.post_ranges.binary_search_by(|p| p.0.cmp(&date)) {
let (_, l, r) = self.post_ranges[p];
Some((l, r))
} else { None }
}
fn find_post_2(&self, date: i64) -> Option<(usize, usize)> {
let f = File::open(&self.idxfn).unwrap();
let map = unsafe { Mmap::map(&f) }.unwrap();
let len = map.len();
let nposts = len / 16;
let mut l = 0;
let mut r = nposts;
let mut s = r;
let mut postidx = None;
while l < r {
let m = l + s / 2;
let b = &map[m * 16..];
let cdate = i64::from_le_bytes(b[..8].try_into().unwrap());
l = if cdate < date { m + 1 } else { l };
r = if cdate > date { m } else { r };
s = r - l;
if cdate == date {
postidx = Some(m);
break;
}
}
if let Some(postidx) = postidx {
let b = &map[postidx * 16 + 8..];
let r = u64::from_le_bytes(b[..8].try_into().unwrap());
let l = if postidx == 0 {
0
} else {
let b = &map[postidx * 16 - 8..];
u64::from_le_bytes(b[..8].try_into().unwrap())
};
Some((l as usize, r as usize))
} else { None }
}
fn get_page_range(&self, page: usize) -> Option<(usize, usize)> {
let f = File::open(&self.pdxfn).unwrap();
let map = unsafe { Mmap::map(&f) }.unwrap();
let len = map.len();
let npages = len / 8;
if page >= npages {
return None;
}
let b = &map[page * 8..];
let r = u64::from_le_bytes(b[..8].try_into().unwrap());
let l = if page == npages - 1 {
0
} else {
let b = &map[(page + 1) * 8..];
u64::from_le_bytes(b[..8].try_into().unwrap())
};
Some((l as usize, r as usize))
}
fn get_posts_in_range(&self, l: usize, r: usize) -> Vec<Post> {
let f = File::open(&self.filen).unwrap();
let map = unsafe { Mmap::map(&f) }.unwrap();
let mut remaining = &map[l..r];
let mut ret = vec![];
loop {
if remaining.len() == 0 {
break;
}
let (post, len) = Post::consume(remaining);
ret.push(post);
remaining = &remaining[len + 1..];
}
ret
}
pub fn get_all_dates(&self) -> Vec<i64> {
self.post_ranges.iter().map(|x| x.0).collect()
}
pub fn get_post(&self, date: i64) -> Option<Post> {
if let Some((l, r)) = self.find_post(date) {
let f = File::open(&self.filen).unwrap();
let map = unsafe { Mmap::map(&f) }.unwrap();
Some(Post::consume(&map[l..r]).0)
} else { None }
}
pub fn get_post_2(&self, date: i64) -> Option<Post> {
if let Some((l, r)) = self.find_post_2(date) {
let f = File::open(&self.filen).unwrap();
let map = unsafe { Mmap::map(&f) }.unwrap();
Some(Post::consume(&map[l..r]).0)
} else { None }
}
pub fn get_page_posts(&self, page: usize) -> Option<Vec<Post>> {
if let Some((l, r)) = self.get_page_range(page) {
Some(self.get_posts_in_range(l, r))
} else {
None
}
}
pub fn get_page_count(&self) -> usize {
let m = std::fs::metadata(&self.pdxfn).unwrap();
(m.len() / 8) as usize
}
}