aboutsummaryrefslogtreecommitdiff
path: root/backend/src/monolith.rs
diff options
context:
space:
mode:
authorGravatar Chris Xiong <chirs241097@gmail.com> 2024-07-24 23:40:11 -0400
committerGravatar Chris Xiong <chirs241097@gmail.com> 2024-07-24 23:40:11 -0400
commitb736068ee7b82e05c2ede8bc48ace7ffa4709e29 (patch)
treeeaa3cce9fdd9973043a7a55613584f90f6598a20 /backend/src/monolith.rs
downloadnotekins-b736068ee7b82e05c2ede8bc48ace7ffa4709e29.tar.xz
Initial commit.
Diffstat (limited to 'backend/src/monolith.rs')
-rw-r--r--backend/src/monolith.rs238
1 files changed, 238 insertions, 0 deletions
diff --git a/backend/src/monolith.rs b/backend/src/monolith.rs
new file mode 100644
index 0000000..a471138
--- /dev/null
+++ b/backend/src/monolith.rs
@@ -0,0 +1,238 @@
+use std::fs::File;
+use std::vec::Vec;
+use std::io::Read;
+use memmap::Mmap;
+
+fn consume_str(buf: &[u8]) -> (String, usize) {
+ let nulp = buf.iter().position(|x| *x == 0u8).unwrap();
+ let s = String::from_utf8_lossy(&buf[..nulp]);
+ (s.to_string(), nulp)
+}
+
+#[derive(Debug)]
+pub enum MediaInstance {
+ Image {thmb: String, orig: String},
+ Video
+}
+
+impl MediaInstance {
+ fn consume(buf: &[u8]) -> (MediaInstance, usize) {
+ match buf[0] as char {
+ 'I' => {
+ let b = &buf[1..];
+ let (thmb, p1) = consume_str(b);
+ let b = &b[p1 + 1..];
+ let (orig, p2) = consume_str(b);
+ (MediaInstance::Image{thmb, orig}, p1 + p2 + 3)
+ },
+ 'V' => {
+ (MediaInstance::Video, 1)
+ }
+ _ => panic!("Invalid media type")
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct Post {
+ pub content: String,
+ pub date: i64,
+ pub media: Vec<MediaInstance>,
+ pub tags: Vec<String>
+}
+
+pub fn test_print_post(p: &Post) {
+ println!("{}", p.content);
+ println!("{}", p.date);
+ for m in &p.media {
+ match m {
+ MediaInstance::Image{thmb, orig} => println!("Image {} {}", thmb, orig),
+ MediaInstance::Video => println!("Video")
+ }
+ }
+ for t in &p.tags {
+ println!("{}", t);
+ }
+}
+
+impl Post {
+ fn consume(buf: &[u8]) -> (Post, usize) {
+ let (content, p) = consume_str(buf);
+ let mut l = p + 1;
+ let b = &buf[p + 1..];
+ let date = i64::from_le_bytes(b[..8].try_into().unwrap());
+ l += 8;
+ let b = &b[8..];
+ let mut media = vec![];
+ let mut tags = vec![];
+ let nmedia = b[0];
+ let mut b = &b[1..];
+ l += 1;
+ for _ in 0..nmedia {
+ let (m, ml) = MediaInstance::consume(b);
+ media.push(m);
+ b = &b[ml..];
+ l += ml;
+ }
+ let ntags = b[0];
+ let mut b = &b[1..];
+ l += 1;
+ for _ in 0..ntags {
+ let (t, p) = consume_str(b);
+ tags.push(t);
+ b = &b[p + 1..];
+ l+= p + 1;
+ }
+ (Post{content, date, media, tags}, l)
+ }
+}
+
+pub struct Monolith {
+ filen: String,
+ idxfn: String,
+ pdxfn: String,
+ post_ranges: Vec<(i64, usize, usize)>,
+}
+
+impl Monolith {
+ pub fn new(filen: String) -> Monolith {
+ let idxfn = filen.clone() + ".idx";
+ let pdxfn = filen.clone() + ".pdx";
+ Monolith {
+ filen,
+ idxfn,
+ pdxfn,
+ post_ranges: vec![]
+ }
+ }
+
+ pub fn load_index(&mut self) {
+ let mut f = File::open(&self.idxfn).unwrap();
+ let mut last_r: u64 = 0;
+ self.post_ranges.clear();
+ loop {
+ let mut buf: [u8; 16] = [0; 16];
+ match f.read_exact(&mut buf) {
+ Ok(_) => (),
+ Err(e) => match e.kind() {
+ std::io::ErrorKind::UnexpectedEof => break,
+ _ => panic!("unexpected error {}", e)
+ }
+ }
+ let t = i64::from_le_bytes(buf[..8].try_into().unwrap());
+ let l = last_r;
+ let r = u64::from_le_bytes(buf[8..].try_into().unwrap());
+ self.post_ranges.push((t, l as usize, r as usize));
+ last_r = r;
+ }
+ }
+
+ fn find_post(&self, date: i64) -> Option<(usize, usize)> {
+ if let Ok(p) = self.post_ranges.binary_search_by(|p| p.0.cmp(&date)) {
+ let (_, l, r) = self.post_ranges[p];
+ Some((l, r))
+ } else { None }
+ }
+
+ fn find_post_2(&self, date: i64) -> Option<(usize, usize)> {
+ let f = File::open(&self.idxfn).unwrap();
+ let map = unsafe { Mmap::map(&f) }.unwrap();
+ let len = map.len();
+ let nposts = len / 16;
+ let mut l = 0;
+ let mut r = nposts;
+ let mut s = r;
+ let mut postidx = None;
+ while l < r {
+ let m = l + s / 2;
+ let b = &map[m * 16..];
+ let cdate = i64::from_le_bytes(b[..8].try_into().unwrap());
+ l = if cdate < date { m + 1 } else { l };
+ r = if cdate > date { m } else { r };
+ s = r - l;
+ if cdate == date {
+ postidx = Some(m);
+ break;
+ }
+ }
+ if let Some(postidx) = postidx {
+ let b = &map[postidx * 16 + 8..];
+ let r = u64::from_le_bytes(b[..8].try_into().unwrap());
+ let l = if postidx == 0 {
+ 0
+ } else {
+ let b = &map[postidx * 16 - 8..];
+ u64::from_le_bytes(b[..8].try_into().unwrap())
+ };
+ Some((l as usize, r as usize))
+ } else { None }
+ }
+
+ fn get_page_range(&self, page: usize) -> Option<(usize, usize)> {
+ let f = File::open(&self.pdxfn).unwrap();
+ let map = unsafe { Mmap::map(&f) }.unwrap();
+ let len = map.len();
+ let npages = len / 8;
+ if page >= npages {
+ return None;
+ }
+ let b = &map[page * 8..];
+ let r = u64::from_le_bytes(b[..8].try_into().unwrap());
+ let l = if page == npages - 1 {
+ 0
+ } else {
+ let b = &map[(page + 1) * 8..];
+ u64::from_le_bytes(b[..8].try_into().unwrap())
+ };
+ Some((l as usize, r as usize))
+ }
+
+ fn get_posts_in_range(&self, l: usize, r: usize) -> Vec<Post> {
+ let f = File::open(&self.filen).unwrap();
+ let map = unsafe { Mmap::map(&f) }.unwrap();
+ let mut remaining = &map[l..r];
+ let mut ret = vec![];
+ loop {
+ if remaining.len() == 0 {
+ break;
+ }
+ let (post, len) = Post::consume(remaining);
+ ret.push(post);
+ remaining = &remaining[len + 1..];
+ }
+ ret
+ }
+
+ pub fn get_all_dates(&self) -> Vec<i64> {
+ self.post_ranges.iter().map(|x| x.0).collect()
+ }
+
+ pub fn get_post(&self, date: i64) -> Option<Post> {
+ if let Some((l, r)) = self.find_post(date) {
+ let f = File::open(&self.filen).unwrap();
+ let map = unsafe { Mmap::map(&f) }.unwrap();
+ Some(Post::consume(&map[l..r]).0)
+ } else { None }
+ }
+
+ pub fn get_post_2(&self, date: i64) -> Option<Post> {
+ if let Some((l, r)) = self.find_post_2(date) {
+ let f = File::open(&self.filen).unwrap();
+ let map = unsafe { Mmap::map(&f) }.unwrap();
+ Some(Post::consume(&map[l..r]).0)
+ } else { None }
+ }
+
+ pub fn get_page_posts(&self, page: usize) -> Option<Vec<Post>> {
+ if let Some((l, r)) = self.get_page_range(page) {
+ Some(self.get_posts_in_range(l, r))
+ } else {
+ None
+ }
+ }
+
+ pub fn get_page_count(&self) -> usize {
+ let m = std::fs::metadata(&self.pdxfn).unwrap();
+ (m.len() / 8) as usize
+ }
+}