aboutsummaryrefslogblamecommitdiff
path: root/backend/src/monolith.rs
blob: a471138c1d8e767d2f86939a3276d93c8e711fc3 (plain) (tree)













































































































































































































































                                                                                    
use std::fs::File;
use std::vec::Vec;
use std::io::Read;
use memmap::Mmap;

fn consume_str(buf: &[u8]) -> (String, usize) {
    let nulp = buf.iter().position(|x| *x == 0u8).unwrap();
    let s = String::from_utf8_lossy(&buf[..nulp]);
    (s.to_string(), nulp)
}

#[derive(Debug)]
pub enum MediaInstance {
    Image {thmb: String, orig: String},
    Video
}

impl MediaInstance {
    fn consume(buf: &[u8]) -> (MediaInstance, usize) {
        match buf[0] as char {
            'I' => {
                let b = &buf[1..];
                let (thmb, p1) = consume_str(b);
                let b = &b[p1 + 1..];
                let (orig, p2) = consume_str(b);
                (MediaInstance::Image{thmb, orig}, p1 + p2 + 3)
            },
            'V' => {
                (MediaInstance::Video, 1)
            }
            _ => panic!("Invalid media type")
        }
    }
}

#[derive(Debug)]
pub struct Post { 
    pub content: String,
    pub date: i64,
    pub media: Vec<MediaInstance>,
    pub tags: Vec<String>
}

pub fn test_print_post(p: &Post) {
    println!("{}", p.content);
    println!("{}", p.date);
    for m in &p.media {
        match m {
            MediaInstance::Image{thmb, orig} => println!("Image {} {}", thmb, orig),
            MediaInstance::Video => println!("Video")
        }
    }
    for t in &p.tags {
        println!("{}", t);
    }
}

impl Post {
    fn consume(buf: &[u8]) -> (Post, usize) {
        let (content, p) = consume_str(buf);
        let mut l = p + 1;
        let b = &buf[p + 1..];
        let date = i64::from_le_bytes(b[..8].try_into().unwrap());
        l += 8;
        let b = &b[8..];
        let mut media = vec![];
        let mut tags = vec![];
        let nmedia = b[0];
        let mut b = &b[1..];
        l += 1;
        for _ in 0..nmedia {
            let (m, ml) = MediaInstance::consume(b);
            media.push(m);
            b = &b[ml..];
            l += ml;
        }
        let ntags = b[0];
        let mut b = &b[1..];
        l += 1;
        for _ in 0..ntags {
            let (t, p) = consume_str(b);
            tags.push(t);
            b = &b[p + 1..];
            l+= p + 1;
        }
        (Post{content, date, media, tags}, l)
    }
}

pub struct Monolith {
    filen: String,
    idxfn: String,
    pdxfn: String,
    post_ranges: Vec<(i64, usize, usize)>,
}

impl Monolith {
    pub fn new(filen: String) -> Monolith {
        let idxfn = filen.clone() + ".idx";
        let pdxfn = filen.clone() + ".pdx";
        Monolith {
            filen,
            idxfn,
            pdxfn,
            post_ranges: vec![]
        }
    }

    pub fn load_index(&mut self) {
        let mut f = File::open(&self.idxfn).unwrap();
        let mut last_r: u64 = 0;
        self.post_ranges.clear();
        loop {
            let mut buf: [u8; 16] = [0; 16];
            match f.read_exact(&mut buf) {
                Ok(_) => (),
                Err(e) => match e.kind() {
                    std::io::ErrorKind::UnexpectedEof => break,
                    _ => panic!("unexpected error {}", e)
                }
            }
            let t = i64::from_le_bytes(buf[..8].try_into().unwrap());
            let l = last_r;
            let r = u64::from_le_bytes(buf[8..].try_into().unwrap());
            self.post_ranges.push((t, l as usize, r as usize));
            last_r = r;
        }
    }

    fn find_post(&self, date: i64) -> Option<(usize, usize)> {
        if let Ok(p) = self.post_ranges.binary_search_by(|p| p.0.cmp(&date)) {
            let (_, l, r) = self.post_ranges[p];
            Some((l, r))
        } else { None }
    }

    fn find_post_2(&self, date: i64) -> Option<(usize, usize)> {
        let f = File::open(&self.idxfn).unwrap();
        let map = unsafe { Mmap::map(&f) }.unwrap();
        let len = map.len();
        let nposts = len / 16;
        let mut l = 0;
        let mut r = nposts;
        let mut s = r;
        let mut postidx = None;
        while l < r {
            let m = l + s / 2;
            let b = &map[m * 16..];
            let cdate = i64::from_le_bytes(b[..8].try_into().unwrap());
            l = if cdate < date { m + 1 } else { l };
            r = if cdate > date { m } else { r };
            s = r - l;
            if cdate == date {
                postidx = Some(m);
                break;
            }
        }
        if let Some(postidx) = postidx {
            let b = &map[postidx * 16 + 8..];
            let r = u64::from_le_bytes(b[..8].try_into().unwrap());
            let l = if postidx == 0 {
                0
            } else {
                let b = &map[postidx * 16 - 8..];
                u64::from_le_bytes(b[..8].try_into().unwrap())
            };
            Some((l as usize, r as usize))
        } else { None }
    }

    fn get_page_range(&self, page: usize) -> Option<(usize, usize)> {
        let f = File::open(&self.pdxfn).unwrap();
        let map = unsafe { Mmap::map(&f) }.unwrap();
        let len = map.len();
        let npages = len / 8;
        if page >= npages {
            return None;
        }
        let b = &map[page * 8..];
        let r = u64::from_le_bytes(b[..8].try_into().unwrap());
        let l = if page == npages - 1 {
            0
        } else {
            let b = &map[(page + 1) * 8..];
            u64::from_le_bytes(b[..8].try_into().unwrap())
        };
        Some((l as usize, r as usize))
    }

    fn get_posts_in_range(&self, l: usize, r: usize) -> Vec<Post> {
        let f = File::open(&self.filen).unwrap();
        let map = unsafe { Mmap::map(&f) }.unwrap();
        let mut remaining = &map[l..r];
        let mut ret = vec![];
        loop {
            if remaining.len() == 0 {
                break;
            }
            let (post, len) = Post::consume(remaining);
            ret.push(post);
            remaining = &remaining[len + 1..];
        }
        ret
    }

    pub fn get_all_dates(&self) -> Vec<i64> {
        self.post_ranges.iter().map(|x| x.0).collect()
    }

    pub fn get_post(&self, date: i64) -> Option<Post> {
        if let Some((l, r)) = self.find_post(date) {
            let f = File::open(&self.filen).unwrap();
            let map = unsafe { Mmap::map(&f) }.unwrap();
            Some(Post::consume(&map[l..r]).0)
        } else { None }
    }

    pub fn get_post_2(&self, date: i64) -> Option<Post> {
        if let Some((l, r)) = self.find_post_2(date) {
            let f = File::open(&self.filen).unwrap();
            let map = unsafe { Mmap::map(&f) }.unwrap();
            Some(Post::consume(&map[l..r]).0)
        } else { None }
    }

    pub fn get_page_posts(&self, page: usize) -> Option<Vec<Post>> {
        if let Some((l, r)) = self.get_page_range(page) {
            Some(self.get_posts_in_range(l, r))
        } else {
            None
        }
    }

    pub fn get_page_count(&self) -> usize {
        let m = std::fs::metadata(&self.pdxfn).unwrap();
        (m.len() / 8) as usize
    }
}