From b736068ee7b82e05c2ede8bc48ace7ffa4709e29 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Wed, 24 Jul 2024 23:40:11 -0400 Subject: Initial commit. --- .gitignore | 8 + README.rst | 52 ++++++ backend/Cargo.toml | 13 ++ backend/src/config.rs | 82 +++++++++ backend/src/main.rs | 160 ++++++++++++++++++ backend/src/monolith.rs | 238 ++++++++++++++++++++++++++ backend/src/render.rs | 346 ++++++++++++++++++++++++++++++++++++++ template/error.template | 11 ++ template/media-container.template | 3 + template/media-image.template | 1 + template/page.template | 77 +++++++++ template/pager.template | 5 + template/post.template | 7 + utils/__init__.py | 0 utils/atomgen.py | 51 ++++++ utils/config.py | 64 +++++++ utils/mistune_emote.py | 21 +++ utils/monolith.py | 285 +++++++++++++++++++++++++++++++ utils/monolith_test.py | 96 +++++++++++ utils/notectl.py | 178 ++++++++++++++++++++ utils/postutil.py | 155 +++++++++++++++++ utils/rust_monolith_test.py | 92 ++++++++++ 22 files changed, 1945 insertions(+) create mode 100644 .gitignore create mode 100644 README.rst create mode 100644 backend/Cargo.toml create mode 100644 backend/src/config.rs create mode 100644 backend/src/main.rs create mode 100644 backend/src/monolith.rs create mode 100644 backend/src/render.rs create mode 100644 template/error.template create mode 100644 template/media-container.template create mode 100644 template/media-image.template create mode 100644 template/page.template create mode 100644 template/pager.template create mode 100644 template/post.template create mode 100644 utils/__init__.py create mode 100644 utils/atomgen.py create mode 100644 utils/config.py create mode 100644 utils/mistune_emote.py create mode 100644 utils/monolith.py create mode 100644 utils/monolith_test.py create mode 100644 utils/notectl.py create mode 100644 utils/postutil.py create mode 100644 utils/rust_monolith_test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d1772f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +intdoc/ +testsite/ +utils/__pycache__/ +backend/target/ +backend/Cargo.lock + +.directory +*.swp diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..97c420e --- /dev/null +++ b/README.rst @@ -0,0 +1,52 @@ +Notekins +======== + +note + -kin (Germanic diminutive suffix), a short note. + +*A minimal micro-blogging system without the social media aspect.* + +Features +-------- + +* Post stuff of unlimited length using markdown. +* Emote support. Bring any emotes you wish to use with Notekins. +* Media shelf. You may attach media files to your posts (only images + for now). Image thumbnail will be automatically generated to save + bandwidth. +* Automatic Atom feed generation. +* Import archived tweets from Twitter with one simple command (not yet + implemented). +* Default front-end is completely JavaScript-free. + +Notekins isn't for you if you match any of the following ... (these are +ruled out in the project planning phase as anti-features): + +* Crave interaction from other Internet-dwellers. Notekins is designed + to be a strictly single-user system without any support for viewer + interactions. This include replying, "liking", and collection of + visitor analytics. +* Need support for ActivityPub integration. Follows naturally from the + last point +* Are capable of making multiple posts in a single second. Notekins use + timestamps as unique identifier for posts. + +Technical Information +--------------------- + +Notekins has two major components: a local utility, and a server-side +CGI executable. The local utility is written in Python, while the latter +is developed using Rust. Your posts are saved in the way they are typed +out. They are also consolidated into a "monolith" file and two accompanying +index files, which are used by the CGI program. + +The local utility is responsible for managing your posts. It can also +help you setup new instances of Nokekins, publish your posts to your +server, and perform a variety of maintenance tasks on the monolith file. + +Deploying Notekins +------------------ + +As Notekins is still in its early stage, the instructions here might be +outdated at any time. + +(TODO) diff --git a/backend/Cargo.toml b/backend/Cargo.toml new file mode 100644 index 0000000..0c01af7 --- /dev/null +++ b/backend/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "notekins_cgi" +version = "0.1.0" +edition = "2021" + +[dependencies] +chrono = "0.4.38" +memmap = "0.7.0" +regex = "1.10.5" +tzfile = "0.1.3" + +[profile.release] +lto = true diff --git a/backend/src/config.rs b/backend/src/config.rs new file mode 100644 index 0000000..a1085ba --- /dev/null +++ b/backend/src/config.rs @@ -0,0 +1,82 @@ +use std::collections::hash_map::HashMap; +use std::fs::File; +use std::io::Read; +use std::fmt::{Display, Formatter, Error}; + +#[derive(Clone)] +pub enum ConfigValue { + Str(String), + UInt(usize) +} + +impl Display for ConfigValue { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + ConfigValue::Str(s) => write!(f, "{}", s), + ConfigValue::UInt(u) => write!(f, "{}", u) + } + } +} + +struct ConfigItem { + default_value: ConfigValue, + parse_func: Box Option>, +} + +pub struct Config { + v: HashMap, +} + +fn stripped_str(s: String) -> Option { + Some(ConfigValue::Str(String::from(s.trim()))) +} + +fn parse_usize(s: String) -> Option { + if let Ok(r) = usize::from_str_radix(&s, 10) { + Some(ConfigValue::UInt(r)) + } else { None } +} + +pub fn split_at_first<'a, 'b>(s: &'a str, sp: &'b str) -> (&'a str, &'a str) { + if let Some(p) = s.find(sp) { + (&s[.. p], &s[p + sp.len()..]) + } else { (s, "") } +} + +impl Config { + fn items() -> HashMap<&'static str, ConfigItem> { + HashMap::from([ + ("SERVED_DATA_ROOT", ConfigItem{default_value: ConfigValue::Str(String::from("")), parse_func: Box::new(stripped_str)}), + ("POSTS_PER_PAGE", ConfigItem{default_value: ConfigValue::UInt(20), parse_func: Box::new(parse_usize)}), + ("DISPLAY_TIMEZONE", ConfigItem{default_value: ConfigValue::Str(String::from("UTC")), parse_func: Box::new(stripped_str)}), + ("VERSION_STRING", ConfigItem{default_value: ConfigValue::Str(String::from("1.0")), parse_func: Box::new(stripped_str)}), + ]) + } + pub fn parse_config(filename: &str) -> Config { + let mut v = HashMap::from_iter( + Config::items().iter() + .map(|(k, v)| {(String::from(*k), v.default_value.clone())}) + ); + let mut f = File::open(filename).unwrap(); + let mut s = String::new(); + if let Ok(_) = f.read_to_string(&mut s) { + for l in s.split("\n") { + let (sname, svalue) = split_at_first(l, "="); + if let Some(ConfigItem{default_value: _, parse_func}) = Config::items().get(sname) { + if let Some(value) = parse_func(String::from(svalue)) { + v.insert(String::from(sname), value); + } + } + } + } + Config { v } + } + pub fn get(&self, key: &str) -> ConfigValue { + self.v.get(&String::from(key)).unwrap().clone() + } + pub fn get_str(&self, key: &str) -> String { + if let ConfigValue::Str(s) = self.get(key) { + s + } else { String::from("") } + } +} diff --git a/backend/src/main.rs b/backend/src/main.rs new file mode 100644 index 0000000..0474f89 --- /dev/null +++ b/backend/src/main.rs @@ -0,0 +1,160 @@ +mod config; +mod monolith; +mod render; + +use std::env; +use std::collections::hash_map::{HashMap, RandomState}; + +enum CGIStatusCode { + S200, + C400, + C404, + C405 +} + +type CGIHTTPHeaders = Vec<(String, String)>; + +struct CGIResponse { + status: CGIStatusCode, + headers: CGIHTTPHeaders, + body: String +} + +fn cgi_handle_request(conf: &config::Config) -> Result { + let headers = vec![ + (String::from("Allow"), String::from("GET")), + (String::from("Content-type"), String::from("text/html")) + ]; + let mkerr = |status| CGIResponse { + status, + headers: headers.clone(), + body: String::from("") + }; + let request_method = env::var("REQUEST_METHOD").map_err(|_| mkerr(CGIStatusCode::C400))?; + if request_method != "GET" { + return Err(mkerr(CGIStatusCode::C405)); + } + let query: HashMap<_, _, RandomState> = + if let Ok(query_string) = env::var("QUERY_STRING") { + if query_string.len() > 0 { + HashMap::from_iter(query_string.split('&').map(|qi| { + let (k, v) = config::split_at_first(qi, "="); + (String::from(k), String::from(v)) + })) + } else { HashMap::from([(String::from("page"), String::from("1"))]) } + } else { HashMap::from([(String::from("page"), String::from("1"))]) }; + let m = monolith::Monolith::new(String::from("posts.monolith")); + if let Some(ps) = query.get("page") { + let p = usize::from_str_radix(&ps, 10).map_err(|_| mkerr(CGIStatusCode::C400))?.checked_sub(1).ok_or(mkerr(CGIStatusCode::C404))?; + let ps = m.get_page_posts(p).ok_or(mkerr(CGIStatusCode::C404))?; + let r = render::Renderer::load("./template"); + return Ok(CGIResponse { + status: CGIStatusCode::S200, + headers, + body: r.render_page(ps, p, m.get_page_count(), conf) + }); + } else if let Some(ds) = query.get("post") { + let d = i64::from_str_radix(&ds, 10).map_err(|_| mkerr(CGIStatusCode::C400))?; + let p = m.get_post_2(d).ok_or(mkerr(CGIStatusCode::C404))?; + let r = render::Renderer::load("./template"); + return Ok(CGIResponse { + status: CGIStatusCode::S200, + headers, + body: r.render_single_post(p, conf) + }); + } + Err(mkerr(CGIStatusCode::C400)) +} + +fn cgimain(conf: config::Config) -> Result<(), &'static str> { + let r = match cgi_handle_request(&conf) { + Ok(r) => r, + Err(r) => r + }; + let (status, status_str) = match r.status { + CGIStatusCode::S200 => (200, "OK"), + CGIStatusCode::C400 => (400, "Bad Request"), + CGIStatusCode::C404 => (404, "Not Found"), + CGIStatusCode::C405 => (405, "Method Not Allowed") + }; + print!("Status: {} {}\r\n", status, status_str); + r.headers.iter().for_each(|(f, v)| print!("{}: {}\r\n", f, v)); + print!("\r\n"); + if status < 400 { + print!("{}", r.body); + } else { + let rdr = render::Renderer::load("./template"); + print!("{}", rdr.render_error(status, String::from(status_str), &conf)); + } + Ok(()) +} + +fn dbgmain(conf: config::Config) -> Result<(), &'static str> { + eprintln!("in debug mode"); + eprintln!("notekins version {}", conf.get("VERSION_STRING")); + let mut m = monolith::Monolith::new(String::from("posts.monolith")); + let mut args = env::args(); + args.next(); + if let Some(dbgop) = args.next() { + match dbgop.as_str() { + "get_post" => { + let tss = args.next().ok_or("missing timestamp")?; + let ts = i64::from_str_radix(&tss, 10).map_err(|_| "invalid timestamp")?; + m.load_index(); + let p = m.get_post(ts).ok_or("post not found")?; + monolith::test_print_post(&p); + Ok(()) + }, + "get_post2" => { + let tss = args.next().ok_or("missing timestamp")?; + let ts = i64::from_str_radix(&tss, 10).map_err(|_| "invalid timestamp")?; + let p = m.get_post_2(ts).ok_or("post not found")?; + monolith::test_print_post(&p); + Ok(()) + }, + "get_page" => { + let pgs = args.next().ok_or("missing page")?; + let pg = usize::from_str_radix(&pgs, 10).map_err(|_| "invalid page")?; + let ps = m.get_page_posts(pg).ok_or("page out of range")?; + for p in ps { + monolith::test_print_post(&p); + } + Ok(()) + }, + "render_page" => { + let pgs = args.next().ok_or("missing page")?; + let pg = usize::from_str_radix(&pgs, 10).map_err(|_| "invalid page")?; + let ps = m.get_page_posts(pg).ok_or("page out of range")?; + let r = render::Renderer::load("./template"); + println!("{}", r.render_page(ps, pg, m.get_page_count(), &conf)); + Ok(()) + }, + "render_post" => { + let tss = args.next().ok_or("missing timestamp")?; + let ts = i64::from_str_radix(&tss, 10).map_err(|_| "invalid timestamp")?; + let p = m.get_post_2(ts).ok_or("post not found")?; + let r = render::Renderer::load("./template"); + println!("{}", r.render_single_post(p, &conf)); + Ok(()) + }, + _ => Err("unsupported debug option") + } + } else { + m.load_index(); + let dates = m.get_all_dates(); + for d in dates { + let p = m.get_post(d); + println!("{:?}", p) + } + Ok(()) + } +} + +fn main() -> Result<(), &'static str> { + let conf = config::Config::parse_config("notekins.conf"); + if let Ok(_) = env::var("SERVER_SOFTWARE") { + cgimain(conf) + } else { + dbgmain(conf) + } +} diff --git a/backend/src/monolith.rs b/backend/src/monolith.rs new file mode 100644 index 0000000..a471138 --- /dev/null +++ b/backend/src/monolith.rs @@ -0,0 +1,238 @@ +use std::fs::File; +use std::vec::Vec; +use std::io::Read; +use memmap::Mmap; + +fn consume_str(buf: &[u8]) -> (String, usize) { + let nulp = buf.iter().position(|x| *x == 0u8).unwrap(); + let s = String::from_utf8_lossy(&buf[..nulp]); + (s.to_string(), nulp) +} + +#[derive(Debug)] +pub enum MediaInstance { + Image {thmb: String, orig: String}, + Video +} + +impl MediaInstance { + fn consume(buf: &[u8]) -> (MediaInstance, usize) { + match buf[0] as char { + 'I' => { + let b = &buf[1..]; + let (thmb, p1) = consume_str(b); + let b = &b[p1 + 1..]; + let (orig, p2) = consume_str(b); + (MediaInstance::Image{thmb, orig}, p1 + p2 + 3) + }, + 'V' => { + (MediaInstance::Video, 1) + } + _ => panic!("Invalid media type") + } + } +} + +#[derive(Debug)] +pub struct Post { + pub content: String, + pub date: i64, + pub media: Vec, + pub tags: Vec +} + +pub fn test_print_post(p: &Post) { + println!("{}", p.content); + println!("{}", p.date); + for m in &p.media { + match m { + MediaInstance::Image{thmb, orig} => println!("Image {} {}", thmb, orig), + MediaInstance::Video => println!("Video") + } + } + for t in &p.tags { + println!("{}", t); + } +} + +impl Post { + fn consume(buf: &[u8]) -> (Post, usize) { + let (content, p) = consume_str(buf); + let mut l = p + 1; + let b = &buf[p + 1..]; + let date = i64::from_le_bytes(b[..8].try_into().unwrap()); + l += 8; + let b = &b[8..]; + let mut media = vec![]; + let mut tags = vec![]; + let nmedia = b[0]; + let mut b = &b[1..]; + l += 1; + for _ in 0..nmedia { + let (m, ml) = MediaInstance::consume(b); + media.push(m); + b = &b[ml..]; + l += ml; + } + let ntags = b[0]; + let mut b = &b[1..]; + l += 1; + for _ in 0..ntags { + let (t, p) = consume_str(b); + tags.push(t); + b = &b[p + 1..]; + l+= p + 1; + } + (Post{content, date, media, tags}, l) + } +} + +pub struct Monolith { + filen: String, + idxfn: String, + pdxfn: String, + post_ranges: Vec<(i64, usize, usize)>, +} + +impl Monolith { + pub fn new(filen: String) -> Monolith { + let idxfn = filen.clone() + ".idx"; + let pdxfn = filen.clone() + ".pdx"; + Monolith { + filen, + idxfn, + pdxfn, + post_ranges: vec![] + } + } + + pub fn load_index(&mut self) { + let mut f = File::open(&self.idxfn).unwrap(); + let mut last_r: u64 = 0; + self.post_ranges.clear(); + loop { + let mut buf: [u8; 16] = [0; 16]; + match f.read_exact(&mut buf) { + Ok(_) => (), + Err(e) => match e.kind() { + std::io::ErrorKind::UnexpectedEof => break, + _ => panic!("unexpected error {}", e) + } + } + let t = i64::from_le_bytes(buf[..8].try_into().unwrap()); + let l = last_r; + let r = u64::from_le_bytes(buf[8..].try_into().unwrap()); + self.post_ranges.push((t, l as usize, r as usize)); + last_r = r; + } + } + + fn find_post(&self, date: i64) -> Option<(usize, usize)> { + if let Ok(p) = self.post_ranges.binary_search_by(|p| p.0.cmp(&date)) { + let (_, l, r) = self.post_ranges[p]; + Some((l, r)) + } else { None } + } + + fn find_post_2(&self, date: i64) -> Option<(usize, usize)> { + let f = File::open(&self.idxfn).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + let len = map.len(); + let nposts = len / 16; + let mut l = 0; + let mut r = nposts; + let mut s = r; + let mut postidx = None; + while l < r { + let m = l + s / 2; + let b = &map[m * 16..]; + let cdate = i64::from_le_bytes(b[..8].try_into().unwrap()); + l = if cdate < date { m + 1 } else { l }; + r = if cdate > date { m } else { r }; + s = r - l; + if cdate == date { + postidx = Some(m); + break; + } + } + if let Some(postidx) = postidx { + let b = &map[postidx * 16 + 8..]; + let r = u64::from_le_bytes(b[..8].try_into().unwrap()); + let l = if postidx == 0 { + 0 + } else { + let b = &map[postidx * 16 - 8..]; + u64::from_le_bytes(b[..8].try_into().unwrap()) + }; + Some((l as usize, r as usize)) + } else { None } + } + + fn get_page_range(&self, page: usize) -> Option<(usize, usize)> { + let f = File::open(&self.pdxfn).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + let len = map.len(); + let npages = len / 8; + if page >= npages { + return None; + } + let b = &map[page * 8..]; + let r = u64::from_le_bytes(b[..8].try_into().unwrap()); + let l = if page == npages - 1 { + 0 + } else { + let b = &map[(page + 1) * 8..]; + u64::from_le_bytes(b[..8].try_into().unwrap()) + }; + Some((l as usize, r as usize)) + } + + fn get_posts_in_range(&self, l: usize, r: usize) -> Vec { + let f = File::open(&self.filen).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + let mut remaining = &map[l..r]; + let mut ret = vec![]; + loop { + if remaining.len() == 0 { + break; + } + let (post, len) = Post::consume(remaining); + ret.push(post); + remaining = &remaining[len + 1..]; + } + ret + } + + pub fn get_all_dates(&self) -> Vec { + self.post_ranges.iter().map(|x| x.0).collect() + } + + pub fn get_post(&self, date: i64) -> Option { + if let Some((l, r)) = self.find_post(date) { + let f = File::open(&self.filen).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + Some(Post::consume(&map[l..r]).0) + } else { None } + } + + pub fn get_post_2(&self, date: i64) -> Option { + if let Some((l, r)) = self.find_post_2(date) { + let f = File::open(&self.filen).unwrap(); + let map = unsafe { Mmap::map(&f) }.unwrap(); + Some(Post::consume(&map[l..r]).0) + } else { None } + } + + pub fn get_page_posts(&self, page: usize) -> Option> { + if let Some((l, r)) = self.get_page_range(page) { + Some(self.get_posts_in_range(l, r)) + } else { + None + } + } + + pub fn get_page_count(&self) -> usize { + let m = std::fs::metadata(&self.pdxfn).unwrap(); + (m.len() / 8) as usize + } +} diff --git a/backend/src/render.rs b/backend/src/render.rs new file mode 100644 index 0000000..76b5490 --- /dev/null +++ b/backend/src/render.rs @@ -0,0 +1,346 @@ +use regex::Regex; +use chrono::DateTime; +use tzfile::Tz; + +use std::path::Path; +use std::fs::File; +use std::io::Read; +use std::borrow::Cow; + +use crate::monolith::MediaInstance; +use crate::monolith::Post; +use crate::config::Config; + +pub struct Renderer { + paget: String, + pagert: String, + postt: String, + media_contt: String, + media_imgt: String, + errort: String +} + +#[derive(Clone)] +struct SubstitutionContext<'p> { + ps: Option<&'p Vec>, + p: Option<&'p Post>, + curmedia: Option, + curpage: Option, + maxpage: Option, + single_post: bool, + error: Option, + error_str: Option +} + +impl Renderer { + pub fn load>(template_dir: P) -> Renderer { + let template_dir = template_dir.as_ref(); + let mut f = File::open(template_dir.join("page.template")).unwrap(); + let mut paget = String::from(""); + f.read_to_string(&mut paget).unwrap(); + let mut f = File::open(template_dir.join("pager.template")).unwrap(); + let mut pagert = String::from(""); + f.read_to_string(&mut pagert).unwrap(); + let mut f = File::open(template_dir.join("post.template")).unwrap(); + let mut postt = String::from(""); + f.read_to_string(&mut postt).unwrap(); + let mut f = File::open(template_dir.join("media-container.template")).unwrap(); + let mut media_contt = String::from(""); + f.read_to_string(&mut media_contt).unwrap(); + let mut f = File::open(template_dir.join("media-image.template")).unwrap(); + let mut media_imgt = String::from(""); + f.read_to_string(&mut media_imgt).unwrap(); + let mut f = File::open(template_dir.join("error.template")).unwrap(); + let mut errort = String::from(""); + f.read_to_string(&mut errort).unwrap(); + Renderer{paget, pagert, postt, media_contt, media_imgt, errort} + } + fn resolve_current_page(sub: &str, curpage: Option, maxpage: Option) -> String { + if curpage.is_none() || maxpage.is_none() { + return String::from("") + } + let curpage = curpage.unwrap() + 1; + let maxpage = maxpage.unwrap(); + let p: Vec<&str> = sub.split('/').collect(); + if p.len() < 2 { + curpage.to_string() + } else { + let d = isize::from_str_radix(p[1], 10).unwrap_or_default(); + if let Some(v) = curpage.checked_add_signed(d) { + if (v == 0) || (v > maxpage) { + String::from("") + } else { + v.to_string() + } + } else { String::from("") } + } + } + + fn resolve_current_page_url(sub: &str, curpage: Option, maxpage: Option) -> String { + if curpage.is_none() || maxpage.is_none() { + return String::from(""); + } + let curpage = curpage.unwrap() + 1; + let maxpage = maxpage.unwrap(); + let mut p = sub.split('/'); + p.next(); + let d = isize::from_str_radix(p.next().unwrap_or_default(), 10).unwrap_or_default(); + let default = p.next().unwrap_or_default(); + if let Some(v) = curpage.checked_add_signed(d) { + if (v == 0) || (v > maxpage) { + String::from(default) + } else { + String::from("?page=") + &v.to_string() + } + } else { String::from(default) } + } + + fn resolve_post_content_plain(sub: &str, p: Option<&Post>) -> String { + if p.is_none() { + return String::from(""); + } + let p = p.unwrap(); + let mut params = sub.split('/'); + params.next(); + let c = usize::from_str_radix(params.next().unwrap_or_default(), 10).unwrap_or_default(); + let re = Regex::new("<[^>]+>").unwrap(); + let plain = re.replace_all(&p.content, ""); + let taken: String = plain.chars().take(c).collect(); + if taken.len() < plain.len() { + taken + " ..." + } else { taken } + } + + fn resolve_img_thumb_url(p: Option<&Post>, curmedia: Option, conf: &Config) -> String { + if p.is_none() { + return String::from(""); + } + let p = p.unwrap(); + if let Some(curmedia) = curmedia { + if let MediaInstance::Image{thmb, orig: _} = &p.media[curmedia] { + return conf.get_str("SERVED_DATA_ROOT") + "/" + thmb; + } + } + String::from("") + } + + fn resolve_img_orig_url(p: Option<&Post>, curmedia: Option, conf: &Config) -> String { + if p.is_none() { + return String::from(""); + } + let p = p.unwrap(); + if let Some(curmedia) = curmedia { + if let MediaInstance::Image{thmb: _, orig} = &p.media[curmedia] { + return conf.get_str("SERVED_DATA_ROOT") + "/" + orig; + } + } + String::from("") + } + + fn resolve_max_page(maxpage: Option) -> String { + if let Some(maxpage) = maxpage { + maxpage.to_string() + } else { + String::from("") + } + } + + fn resolve_post_content(p: Option<&Post>) -> String { + if let Some(p) = p { + p.content.clone() + } else { String::from("") } + } + + fn resolve_post_date_formatted(p: Option<&Post>, conf: &Config) -> String { + if p.is_none() { + return String::from(""); + } + let p = p.unwrap(); + if let Some(dt) = DateTime::from_timestamp(p.date, 0) { + if let Ok(tz) = Tz::named(&conf.get_str("DISPLAY_TIMEZONE")) { + dt.with_timezone(&&tz).to_rfc3339_opts(chrono::SecondsFormat::Secs, true) + } else { dt.to_rfc3339_opts(chrono::SecondsFormat::Secs, true) } + } else { String::from("") } + } + + fn resolve_post_date_timestamp(p: Option<&Post>) -> String { + if let Some(p) = p { + p.date.to_string() + } else { String::from("") } + } + + fn resolve_post_tags(p: Option<&Post>) -> String { + if let Some(p) = p { + String::from(p.tags.iter().fold(String::from(""), |s, t| s + "#" + &t + " ").trim_end()) + } else { String::from("") } + } + + fn render_post(&self, sc: &SubstitutionContext, conf: &Config) -> String { + self.substitute(&self.postt, sc, conf) + } + + fn render_posts(&self, sc: &SubstitutionContext, conf: &Config) -> String { + if let Some(ps) = sc.ps { + let s = ps.iter().rev().fold(String::from(""), |r, p| { + let psc = SubstitutionContext { + p: Some(&p), + .. sc.clone() + }; + r + &self.render_post(&psc, conf) + }); + return s; + } + String::from("") + } + + fn render_pager(&self, sc: &SubstitutionContext, conf: &Config) -> String { + if sc.single_post { + String::from("") + } else { + self.substitute(&self.pagert, sc, conf) + } + } + + fn render_media_instance(&self, sc: &SubstitutionContext, conf: &Config) -> String { + if let Some(curmedia) = sc.curmedia { + if let Some(p) = sc.p { + if curmedia < p.media.len() { + if let MediaInstance::Image{thmb: _, orig: _} = p.media[curmedia] { + return self.substitute(&self.media_imgt, sc, conf); + } + } + } + } + String::from("") + } + + fn render_media(&self, sc: &SubstitutionContext, conf: &Config) -> String { + if let Some(p) = sc.p { + let s = (0..p.media.len()).fold(String::from(""), |r, midx| { + let nsc = SubstitutionContext { + curmedia: Some(midx), + .. sc.clone() + }; + r + &self.render_media_instance(&nsc, conf) + }); + return s; + } + String::from("") + } + + fn resolve_media_container(&self, sc: &SubstitutionContext, conf: &Config) -> String { + if let Some(p) = sc.p { + if p.media.len() > 0 { + return self.substitute(&self.media_contt, sc, conf); + } + } + String::from("") + } + + fn resolve_error_status(sc: &SubstitutionContext) -> String { + if let Some(err) = sc.error { + err.to_string() + } else { + String::from("") + } + } + + fn resolve_error_description(sc: &SubstitutionContext) -> String { + if let Some(errs) = &sc.error_str { + String::from(errs) + } else { + String::from("") + } + } + + fn resolve_notekins_version(conf: &Config) -> String { + conf.get_str("VERSION_STRING") + } + + fn resolve_substitution(&self, sub: &str, sc: &SubstitutionContext, conf: &Config) -> String { + if sub.starts_with("CURRENT_PAGE_URL") { + Self::resolve_current_page_url(sub, sc.curpage, sc.maxpage) + } else if sub.starts_with("CURRENT_PAGE") { + Self::resolve_current_page(sub, sc.curpage, sc.maxpage) + } else if sub.starts_with("POST_CONTENT_PLAIN") { + Self::resolve_post_content_plain(sub, sc.p) + } else { + match sub { + "IMG_THUMB_URL" => Self::resolve_img_thumb_url(sc.p, sc.curmedia, conf), + "IMG_ORIG_URL" => Self::resolve_img_orig_url(sc.p, sc.curmedia, conf), + "POSTS" => self.render_posts(sc, conf), + "MAX_PAGE" => Self::resolve_max_page(sc.maxpage), + "PAGER" => self.render_pager(sc, conf), + "MEDIA" => self.render_media(sc, conf), + "MEDIA_CONTAINER" => self.resolve_media_container(sc, conf), + "POST_CONTENT" => Self::resolve_post_content(sc.p), + "POST_DATE_FORMATTED" => Self::resolve_post_date_formatted(sc.p, conf), + "POST_DATE_TIMESTAMP" => Self::resolve_post_date_timestamp(sc.p), + "POST_TAGS" => Self::resolve_post_tags(sc.p), + "ERROR_STATUS" => Self::resolve_error_status(sc), + "ERROR_DESCRIPTION" => Self::resolve_error_description(sc), + "NOTEKINS_VERSION" => Self::resolve_notekins_version(conf), + _ => { + eprintln!("unknown substitution string {}", sub); + String::from("") + } + } + } + } + + fn substitute(&self, template: &str, sc: &SubstitutionContext, conf: &Config) -> String { + let mut sp: Vec> = template.split('@').map(|x| Cow::Borrowed(x)).collect(); + for sub in sp.iter_mut().skip(1).step_by(2) { + let subbed = self.resolve_substitution(sub, sc, conf); + *sub = Cow::Owned(subbed); + } + sp.iter().fold(String::from(""), |r, s| r + &s) + } + + fn render_page_internal(&self, sc: &SubstitutionContext, conf: &Config) -> String { + self.substitute(&self.paget, sc, conf) + } + + pub fn render_page(&self, posts: Vec, curpage: usize, maxpage: usize, conf: &Config) -> String { + let sc = SubstitutionContext { + ps: Some(&posts), + p: None, + curmedia: None, + curpage: Some(curpage), + maxpage: Some(maxpage), + single_post: false, + error: None, + error_str: None + }; + self.render_page_internal(&sc, conf) + } + + pub fn render_single_post(&self, post: Post, conf: &Config) -> String { + let ps = vec![post]; + let sc = SubstitutionContext { + ps: Some(&ps), + p: Some(&ps[0]), + curmedia: None, + curpage: None, + maxpage: None, + single_post: true, + error: None, + error_str: None + }; + self.render_page_internal(&sc, conf) + } + + pub fn render_error(&self, err: usize, errs: String, conf: &Config) -> String { + let sc = SubstitutionContext { + ps: None, + p: None, + curmedia: None, + curpage: None, + maxpage: None, + single_post: false, + error: Some(err), + error_str: Some(errs) + }; + self.substitute(&self.errort, &sc, &conf) + } +} diff --git a/template/error.template b/template/error.template new file mode 100644 index 0000000..804d439 --- /dev/null +++ b/template/error.template @@ -0,0 +1,11 @@ + + + + + +This is fine. + + +

@ERROR_STATUS@. This is fine.

+ + diff --git a/template/media-container.template b/template/media-container.template new file mode 100644 index 0000000..ff22e4d --- /dev/null +++ b/template/media-container.template @@ -0,0 +1,3 @@ +
+ @MEDIA@ +
diff --git a/template/media-image.template b/template/media-image.template new file mode 100644 index 0000000..d22447c --- /dev/null +++ b/template/media-image.template @@ -0,0 +1 @@ +
diff --git a/template/page.template b/template/page.template new file mode 100644 index 0000000..e0f5e7c --- /dev/null +++ b/template/page.template @@ -0,0 +1,77 @@ + + + + + + +Notekins + + + + + +
+
+ @POSTS@ +
+
+ +
+ + diff --git a/template/pager.template b/template/pager.template new file mode 100644 index 0000000..3a35cfd --- /dev/null +++ b/template/pager.template @@ -0,0 +1,5 @@ +
+ << + @CURRENT_PAGE@/@MAX_PAGE@ + >> +
diff --git a/template/post.template b/template/post.template new file mode 100644 index 0000000..64a10f4 --- /dev/null +++ b/template/post.template @@ -0,0 +1,7 @@ +
+ @POST_CONTENT@ +
+ @MEDIA_CONTAINER@ +
@POST_TAGS@ + @POST_DATE_FORMATTED@ §
+
diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/atomgen.py b/utils/atomgen.py new file mode 100644 index 0000000..453465d --- /dev/null +++ b/utils/atomgen.py @@ -0,0 +1,51 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +import os +import re +from datetime import datetime, timezone + +from monolith import Monolith +from config import conf + +tagrex = re.compile(r'<[^>]+>') + +def remove_tags(s): + return tagrex.sub('', s) + +def ellide(s, l): + return s if len(s) <= l else s[:l] + " ..." + +def gen_atom(): + xmlbuf = f'' + xmlbuf += f'' + xmlbuf += f'Notekins' + xmlbuf += f'{datetime.now(timezone.utc).replace(microsecond=0).isoformat()}' + xmlbuf += f'' + xmlbuf += f'' + xmlbuf += f'{conf.ATOM_ROOT}/atom.xml' + xmlbuf += f'{conf.ATOM_TITLE}' + xmlbuf += f'{conf.ATOM_SUBTITLE}' + xmlbuf += f'{conf.ATOM_ICON}' + xmlbuf += f'{conf.ATOM_ICON}' + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.load_index() + dates = list(reversed(m.get_all_dates()[-conf.ATOM_NPOSTS:])) + for d in dates: + p = m.get_post(d) + link = f"{conf.SERVED_DATA_ROOT}/?post={p.date}" + title = ellide(remove_tags(p.content), 32) + date = datetime.fromtimestamp(p.date, timezone.utc).replace(microsecond=0).isoformat() + xmlbuf += f'' + xmlbuf += f'{title}' + xmlbuf += f'' + xmlbuf += f'{date}' + xmlbuf += f'{date}' + xmlbuf += f'{link}' + xmlbuf += f'{conf.ATOM_AUTHOR}' + xmlbuf += f'' + xmlbuf += f'' + xmlbuf += f'' + atomfn = os.path.join(conf.LOCAL_DATA_ROOT, "atom.xml") + with open(atomfn, "w") as f: + f.write(xmlbuf) diff --git a/utils/config.py b/utils/config.py new file mode 100644 index 0000000..05c1f9d --- /dev/null +++ b/utils/config.py @@ -0,0 +1,64 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +import os +from os import path + +def stripped_str(x): return str(x).strip() + +def s2bool(x): return str(x).strip() in ["True", "true", "TRUE"] + +# "CONFIG_ITEM_NAME": (default value, parse function) +CONFIG_ITEMS = { + "LOCAL_DATA_ROOT": ("", stripped_str), + # e.g. https://chrisoft.org/notekins/ + "SERVED_DATA_ROOT": ("", stripped_str), + # e.g. chrisoft@10.24.1.1:/home/chrisoft/notedir/ + "SYNC_TARGET": ("", stripped_str), + "POSTS_PER_PAGE": (20, int), + "THUMBNAIL_DIM": (1280, int), + "DISPLAY_TIMEZONE": ("UTC", stripped_str), # only used by backend + # atom generator stuff + "ATOM_ENABLED": (False, s2bool), + "VERSION_STRING": ("1.0", stripped_str), + "ATOM_TITLE": ("", stripped_str), + "ATOM_ROOT": ("", stripped_str), + "ATOM_SUBTITLE": ("", stripped_str), + "ATOM_ICON": ("", stripped_str), + "ATOM_AUTHOR": ("", stripped_str), + "ATOM_NPOSTS": (20, int) +} + +class config: + def __init__(self): + self.d = dict([(name, prop[0]) for name, prop in CONFIG_ITEMS.items()]) + p = os.getcwd() + self.f = None + try: + while not path.isfile(path.join(p, "notekins.conf")): + if p == path.dirname(p): + raise FileNotFoundError("Cannot locate configuration file.") + p = path.dirname(p) + fn = path.join(p, "notekins.conf") + self.f = fn + print(f"Using configuration file {fn}") + with open(fn, "r") as f: + for l in f: + try: + n, v = l.split('=') + if n not in CONFIG_ITEMS: + continue + self.d[n] = CONFIG_ITEMS[n][1](v) + except ValueError: + pass + except FileNotFoundError: + pass + + def __getattr__(self, k): + return self.d[k] + def require(self): + if self.f is None: + print("This operation requires a configuration file, but none can be found.") + exit(1) + +conf = config() diff --git a/utils/mistune_emote.py b/utils/mistune_emote.py new file mode 100644 index 0000000..6c557a5 --- /dev/null +++ b/utils/mistune_emote.py @@ -0,0 +1,21 @@ +# Chris Xiong 2024 +# License: Expat (MIT) +from config import conf + +EMOTE_PATTERN = r":(?!\s)(?P.+?)(?!\s):" + +emote_root = f"{conf.SERVED_DATA_ROOT}/emotes" +emote_extension = ".webp" + +def parse_inline_emote(inline, m, state): + ename = m.group("emote_name") + state.append_token({"type": "inline_emote", "raw": ename}) + return m.end() + +def render_inline_emote(renderer, ename): + return f'{ename}' + +def emote(md): + md.inline.register("inline_emote", EMOTE_PATTERN, parse_inline_emote, before="link") + if md.renderer and md.renderer.NAME == "html": + md.renderer.register("inline_emote", render_inline_emote) diff --git a/utils/monolith.py b/utils/monolith.py new file mode 100644 index 0000000..d9a90e6 --- /dev/null +++ b/utils/monolith.py @@ -0,0 +1,285 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +''' +I/O facilities for the Monolith file used by the backend. + +Structure of the Monolith file: +Element Length in byte Notes + (varies) +<␋> 1 Treated as part of the post that preceeds it + (varies) +<␋> 1 +... + +Post: +Element Length in byte Notes +content (varies) utf8 string, null-terminated. HTML fragment. +date 8 seconds since unix epoch +media (varies) See below +ntags 1 +tag[0] (varies) null-terminated utf8 string, excluding the hash prefix. HTML-escaped. +tag[1] .. .. +... +tag[ntags-1].. .. + +Media: +Element Length in byte Notes +nmedia 1 + (varies) + (varies) +... + +MediaInstance: +Element Length in byte Notes +type 1 'I' = image +-----------------type == 'I'----------------- +thumbnail (varies) null-terminated utf8 string, relative path to storage url +original (varies) .. + +Index file (.idx) +00 08 09 0F +---- +---- +... + +Page index file (.pdx) +00 08 09 0F + +... + +Page #0 contains the latest posts. The final page always starts +at byte #0 in the monolith. + +''' + +from enum import Enum +from mmap import mmap +from bisect import bisect_left +from config import conf +from datetime import datetime, timezone + +MediaType = Enum("MediaType", ["IMAGE", "VIDEO"]) + +def consume_str(buf): + nulp = buf.find(b'\0') + rets = buf[0 : nulp].decode("utf-8") + return rets, nulp + +def strbuf(s): + return s.encode("utf-8") + b'\0' + +class MediaInstance: + def __init__(self, type): + self.type = type + + def __str__(self): + match self.type: + case MediaType.IMAGE: + return f"Image: {self.thumbnail} {self.original}\n" + case _: + return f"Unknown media\n" + + def consume(buf): + match buf[0 : 1]: + case b'I': + l = 1 + b = buf[1:] + thmb, p = consume_str(b) + l += p + 1 + b = b[p + 1:] + orig, p = consume_str(b) + l += p + 1 + return MediaInstance.makeImage(thmb, orig), l + case _: + raise ValueError("Invalid media type") + + def to_buf(self): + match self.type: + case MediaType.IMAGE: + return b'I' + strbuf(self.thumbnail) + strbuf(self.original) + case _: + raise ValueError("Unsupported media type") + + def dump(self): + match self.type: + case MediaType.IMAGE: + print(f"({self.thumbnail}, {self.original})") + case _: + raise ValueError("Unsupported media type") + + def makeImage(thumb, orig): + r = MediaInstance(MediaType.IMAGE) + r.thumbnail = thumb + r.original = orig + return r + +class Post: + ''' + .content: utf-8 string + .date: int, secs since unix epoch + .media: list of MediaInstance + .tags: list of strings + ''' + def __init__(self, cont, date, media, tags): + self.content = cont + self.date = date + self.media = media + self.tags = tags + + def __str__(self): + medias = "\n".join([str(m) for m in self.media]) + tags = ",".join([f'"{t}"' for t in self.tags]) + return f"{self.content}\n{self.date}\n{medias}\n[{tags}]" + + def from_buf(buf): + content, p = consume_str(buf) + buf = buf[p + 1 :] + date = int.from_bytes(buf[: 8], "little") + buf = buf[8 :] + media = [] + nmedia = int.from_bytes(buf[: 1], "little") + buf = buf[1 :] + for i in range(0, nmedia): + m, l = MediaInstance.consume(buf) + media.append(m) + buf = buf[l :] + tags = [] + ntags = int.from_bytes(buf[: 1], "little") + buf = buf[1 :] + for i in range(0, ntags): + t, p = consume_str(buf) + tags.append(t) + buf = buf[p + 1:] + return Post(content, date, media, tags) + + def to_buf(self): + ret = strbuf(self.content) + ret += self.date.to_bytes(8, "little") + ret += len(self.media).to_bytes(1, "little") + for m in self.media: + ret += m.to_buf() + ret += len(self.tags).to_bytes(1, "little") + for t in self.tags: + ret += strbuf(t) + return ret + b'\x0b' + + def dump(self): + print('=' * 40) + print(self.content) + print(datetime.fromtimestamp(self.date, tz=timezone.utc).isoformat()) + for t in self.tags: + print(f"#{t} ", end='') + print("") + for m in self.media: + m.dump() + +class Monolith: + def __init__(self, fn): + self.filename = fn + self.idxfn = f"{fn}.idx" + self.pdxfn = f"{fn}.pdx" + self.postranges = [] + + def _append_idxf(self, t, r): + with open(self.idxfn, "ab") as f: + buf = t.to_bytes(8, "little") + \ + r.to_bytes(8, "little") + if f.write(buf) != len(buf): + raise RuntimeError("write failure") + + def clear(self): + with open(self.filename, "wb"): pass + with open(self.idxfn, "wb"): pass + with open(self.pdxfn, "wb"): pass + + def append(self, post): + with open(self.filename, "ab") as f: + postbuf = post.to_buf() + t = post.date + l = f.tell() + w = f.write(postbuf) + if w != len(postbuf): + raise RuntimeError("write failure") + r = l + w + self.postranges.append((t, l, r)) + self._append_idxf(t, r) + # self.generate_page_index() + + def load_index(self): + with open(self.idxfn, "rb") as f: + last_r = 0 + self.postranges = [] + while True: + bs = f.read(16) + if len(bs) == 0: break + t = int.from_bytes(bs[0 : 8], "little") + l = last_r + r = int.from_bytes(bs[8 :16], "little") + self.postranges.append((t, l, r)) + last_r = r + + def write_index(self): + with open(self.idxfn, "wb") as f: + for (t, _, r) in self.postranges: + f.write(t.to_bytes(8, "little") + \ + r.to_bytes(8, "little")) + + def find_post(self, date): + p = bisect_left(self.postranges, date, key=lambda p: p[0]) + if p != len(self.postranges) and self.postranges[p][0] == date: + return p + return None + + def find_nearby_posts(self, date, r=2): + p = bisect_left(self.postranges, date, key=lambda p: p[0]) + left = max(p - r, 0) + right = min(p + r + 1, len(self.postranges)) + return [t for (t, _, _) in self.postranges[left : right]] + + def get_all_dates(self): + return [t for (t, _, _) in self.postranges] + + def get_post(self, date): + p = self.find_post(date) + if p is None: return None + t, l, r = self.postranges[p] + with open(self.filename, "r+b") as f: + d = mmap(f.fileno(), 0) + post = Post.from_buf(d[l : r]) + return post + + def replace_post(self, date, post): + p = self.find_post(date) + if p is None: return None + t, l, r = self.postranges[p] + new_post_buf = post.to_buf() + dlen = len(new_post_buf) - (r - l) + with open(self.filename, "r+b") as f: + d = mmap(f.fileno(), 0) + mlength = len(d) + oldend = r + newend = l + len(new_post_buf) + if dlen > 0: + d.resize(mlength + dlen) + if dlen != 0: + d.move(newend, oldend, mlength - oldend) + if dlen < 0: + d.resize(mlength + dlen) + d[l : newend] = new_post_buf + self.postranges[p] = (t, l, r + dlen) + for i in range(p + 1, len(self.postranges)): + t, l, r = self.postranges[i] + self.postranges[i] = (t, l + dlen, r + dlen) + self.write_index() + + def generate_page_index(self): + posts_per_page = conf.POSTS_PER_PAGE + ranges = [] + for ub in range(len(self.postranges), 0, -posts_per_page): + pr = ub - 1 + _, _, r = self.postranges[pr] + ranges.append(r) + with open(self.pdxfn, "wb") as f: + for r in ranges: + f.write(r.to_bytes(8, "little")) diff --git a/utils/monolith_test.py b/utils/monolith_test.py new file mode 100644 index 0000000..b4e1b30 --- /dev/null +++ b/utils/monolith_test.py @@ -0,0 +1,96 @@ +# Chris Xiong 2024 +# License: Expat (MIT) +# +# Basic unit tests for the Python Monolith class + +import monolith +import unittest +import random +import os +from mmap import mmap + +def randstr(len): + return ''.join(random.choices(''.join([chr(i + ord('0')) for i in range(0, 75)]), k=len)) + +def randpost(last_time): + content = randstr(random.randint(10, 1024)) + date = random.randint(last_time + 1, last_time + 999999) + media = [] + tags = [] + for _ in range(0, random.randint(0, 9)): + media.append(monolith.MediaInstance.makeImage(randstr(20), randstr(20))) + for _ in range(0, random.randint(0, 4)): + tags.append(randstr(random.randint(1, 8))) + return monolith.Post(content, date, media, tags) + +def posteq(a, b): + if a is None or b is None: + return False + if len(a.media) != len(b.media) or len(a.tags) != len(b.tags): + return False + for x, y in zip(a.media, b.media): + if x.thumbnail != y.thumbnail or x.original != y.original: + return False + for x, y in zip(a.tags, b.tags): + if x != y: return False + return a.content == b. content and a.date == b.date + +class TestMonolith(unittest.TestCase): + def test_replace(self): + posts = [] + filename = "rep.monolith" + m = monolith.Monolith(filename) + p1 = randpost(123) + p2 = randpost(p1.date) + p3 = randpost(0) + p3.date = p1.date + m.append(p1) + m.append(p2) + m.replace_post(p1.date, p3) + self.assertTrue(posteq(m.get_post(p3.date), p3)) + self.assertTrue(posteq(m.get_post(p2.date), p2)) + + def test_combined(self): + posts = [] + filename = "test.monolith" + if True: + m = monolith.Monolith(filename) + last_time = 0 + for _ in range(0, 100): + op = 1 if random.random() < 0.2 else 0 + if op == 1 and len(posts) == 0: + op = 0 + if op == 0: + p = randpost(last_time) + last_time = p.date + posts.append(p) + m.append(p) + elif op == 1: + p = randpost(0) + position = random.randint(0, len(posts) - 1) + p.date = posts[position].date + posts[position] = p + m.replace_post(p.date, p) + m.write_index() + m.generate_page_index() + for p in posts: + pp = m.get_post(p.date) + self.assertTrue(posteq(p, pp)) + + with open(filename, "r+b") as f: + d = mmap(f.fileno(), 0) + for _, _, r in m.postranges: + self.assertEqual(d[r - 1 : r], b'\v') + if True: + m = monolith.Monolith(filename) + m.load_index() + dates = m.get_all_dates() + self.assertEqual(len(dates), len(posts)) + for t, p in zip(dates, posts): + self.assertEqual(t, p.date) + for t, p in zip(dates, posts): + self.assertTrue(posteq(p, m.get_post(t))) + + +if __name__ == "__main__": + unittest.main() diff --git a/utils/notectl.py b/utils/notectl.py new file mode 100644 index 0000000..a2e4a19 --- /dev/null +++ b/utils/notectl.py @@ -0,0 +1,178 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +import os +import sys +import time +import shutil +import tempfile +import subprocess +from datetime import datetime, timezone + +import postutil +from atomgen import gen_atom +from monolith import Monolith +from config import conf + +''' +Launches an editor (set by $EDITOR) to edit the given file +''' +def edit_file(fn): + editor = os.environ["EDITOR"] + subprocess.run([editor, fn]) + +''' +Opens an editor to create a new post. +The post will be formated, and all media will be processed accordingly. +''' +def new_post(): + conf.require() + with tempfile.TemporaryDirectory() as dir: + fn = os.path.join(dir, "note.txt") + with open(fn, "w"): pass + edit_file(fn) + ts = time.time_ns() // 10 ** 9 + postpath = postutil.move_post(fn, ts) + p = postutil.process_post(postpath, False) + if len(p.content) == 0 and len(p.media) == 0: + print("No post was made.") + return + print(f"Post {os.path.basename(postpath)} made!") + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.append(p) + m.load_index() + m.generate_page_index() + if conf.ATOM_ENABLED: + gen_atom() + +''' +Opens an editor to edit an existing post. +Post time cannot be modified and will always stay the same. +Media that can be found in the media_dropoff folder will be updated. + +If a media entry is modified, the file it refers to must either be +present in the media_dropoff folder, or already in the corresponding +folder inside media_orig. +''' +def edit_post(ts): + conf.require() + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.load_index() + if m.get_post(ts) is None: + print("No post was made at that time!") + # TODO: allow the user to select a post made near this time + return + d = datetime.fromtimestamp(ts, tz=timezone.utc) + pfn = f"{ts}-{d.isoformat()[:-6]}Z.post" + rp = os.path.join(os.path.join("posts", str(d.year)), pfn) + edit_file(os.path.join(conf.LOCAL_DATA_ROOT, rp)) + p = postutil.process_post(rp, True) + m.replace_post(ts, p) + m.generate_page_index() + if conf.ATOM_ENABLED: + gen_atom() + +''' +Regenerate the ENTIRE monolith file. +Horribly slow. +''' +def regen_monolith(): + conf.require() + print("Do you want to regenerate the ENTIRE monolith file? [y/n]") + if input() not in ['Y', 'y']: + return + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.clear() + postlist = [] + postsp = os.path.join(conf.LOCAL_DATA_ROOT, "posts") + for ye in os.scandir(postsp): + inty = None + try: + inty = int(ye.name) + except ValueError: + pass + if inty is None or not ye.is_dir(): + continue + yearp = os.path.join(postsp, ye.name) + postlist += [os.path.join(yearp, p) for p in filter(lambda x: x.endswith(".post"), os.listdir(yearp))] + def keyf(x): + b = os.path.basename(x) + return int(b[:b.index('-')]) + postlist = sorted(postlist, key=keyf) + for pfn in postlist: + p = postutil.process_post(pfn, True) + m.append(p) + m.load_index() + m.generate_page_index() + if conf.ATOM_ENABLED: + gen_atom() + +''' +Sync all local data to the configured remote host for serving. +''' +def sync_remote(): + conf.require() + subprocess.run(["rsync", "-azv", "--exclude=posts", "--exclude=media_dropoff", conf.LOCAL_DATA_ROOT + "/", conf.SYNC_TARGET]) + +''' +Create a new notekins instance with all files and directories that it expects. +''' +def init_instance(): + repop = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + print(repop) + if len(sys.argv) < 3: + print("Missing path to the new instance.") + return + targetp = sys.argv[2].rstrip('/') + os.mkdir(targetp, mode=0o755) + os.mkdir(os.path.join(targetp, "posts"), mode=0o755) + os.mkdir(os.path.join(targetp, "emotes"), mode=0o755) + os.mkdir(os.path.join(targetp, "media_dropoff"), mode=0o755) + os.mkdir(os.path.join(targetp, "media_orig"), mode=0o755) + os.mkdir(os.path.join(targetp, "media_thmb"), mode=0o755) + shutil.copytree(os.path.join(repop, "template"), os.path.join(targetp, "template")) + with open(os.path.join(targetp, "notekins.conf"), "w") as f: + f.write(f"LOCAL_DATA_ROOT={targetp}") + m = Monolith(os.path.join(targetp, "posts.monolith")) + m.clear() + +''' +Clean up any media file that isn't used in the monolith file. +TODO. +''' +def media_cleanup(): + conf.require() + pass + +def main(): + if len(sys.argv) < 2: + print("Missing command. Available commands:") + print("new Create a new post.") + print("edit Edit an existing post. Requires a post timestamp.") + print("atom Generate atom feed.") + print("regen Regenerate the entire monolith file.") + print("sync Sync data to remote for hosting.") + print("init Initialize a new Notekins instance. Requires path to the instance.") + print("dump Dump the content of the monolith file.") + return + match sys.argv[1]: + case "new": + new_post() + case "edit": + edit_post(int(sys.argv[2])) + case "atom": + gen_atom() + case "regen": + regen_monolith() + case "sync": + sync_remote() + case "init": + init_instance() + case "dump": + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.load_index() + for d in m.get_all_dates(): + m.get_post(d).dump() + +if __name__ == "__main__": + main() diff --git a/utils/postutil.py b/utils/postutil.py new file mode 100644 index 0000000..c978d3e --- /dev/null +++ b/utils/postutil.py @@ -0,0 +1,155 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +''' +The anatomy of a post: + +--comment <= any line that starts with a double dash will be ignored +Post text (markdown) + +#tag #tag #tag <= a line that starts with # makes it a tag line. + only the first of such lines is used. + +[media] <= lines that are surrounded with [] are media lines. +[media] each contain a single file name inside the brackets. +[media] drop the files in LOCAL_DATA_ROOT/media_dropoff +''' + +from mistune.plugins.formatting import strikethrough, superscript, subscript +from mistune.plugins.url import url +from mistune.plugins.ruby import ruby +from mistune.plugins.spoiler import spoiler +import mistune + +from wand.image import Image + +import tempfile +import os +import shutil +import mimetypes +from datetime import datetime, timezone +from hashlib import file_digest + +from mistune_emote import emote +from monolith import MediaInstance, Post +from config import conf + +''' +Takes an abolute path to a static image, generate a thumbnail for it if needed +Returns path to the thumbnail, relative to conf.LOCAL_DATA_ROOT + +If a thumbnail isn't required, returns None +''' +def generate_thumbnail(file): + with tempfile.TemporaryDirectory() as dir: + outf = os.path.join(dir, "downsampled.webp") + dim = conf.THUMBNAIL_DIM + with Image(filename=file) as i: + if i.height <= dim and i.width <= dim and i.format.lower() != "png": + return None + s = dim / max(i.height, i.width) + i.resize(int(i.width * s), int(i.height * s), "lanczos2") + i.format = "webp" + i.save(filename=outf) + with open(outf, "rb") as f: + d = file_digest(f, "sha256") + shas = d.hexdigest() + destdirp = os.path.join(shas[0:2], shas[2:4]) + destdirp = os.path.join("media_thmb", destdirp) + destpath = os.path.join(destdirp, f"{shas}.webp") + destabsp = os.path.join(conf.LOCAL_DATA_ROOT, destpath) + os.makedirs(os.path.join(conf.LOCAL_DATA_ROOT, destdirp), 0o755, True) + if not os.path.isfile(destabsp): + shutil.move(outf, destabsp) + return destpath + +def should_generate_thumbnail(file): + thumbed_types = ["image/png", "image/jpeg", "image/webp"] + return mimetypes.guess_type(file)[0] in thumbed_types + +def process_body(text): + renderer = mistune.HTMLRenderer() + md = mistune.Markdown(renderer, plugins= + [strikethrough, url, superscript, subscript, ruby, spoiler, emote]) + return md(text) + +''' +move file at absolute path fn to conf.LOCAL_DATA_ROOT///destfilename +destfilename is the return value of dfnf(ts, datetime.fromtimestamp(ts, tz=timezone.utc), fn) + +returns path to the destination file relative to conf.LOCAL_DATA_ROOT +''' +def move_file(fn, ts, dirn, dfnf): + d = datetime.fromtimestamp(ts, tz=timezone.utc) + dfn = dfnf(ts, d, fn) + destydir = os.path.join(dirn, str(d.year)) + destpath = os.path.join(destydir, dfn) + os.makedirs(os.path.join(conf.LOCAL_DATA_ROOT, destydir), 0o755, True) + shutil.move(fn, os.path.join(conf.LOCAL_DATA_ROOT, destpath)) + return destpath + +def move_post(fn, ts): + return move_file(fn, ts, "posts", lambda ts, d, fn: f"{ts}-{d.isoformat()[:-6]}Z.post") + +def move_media(fn, ts): + return move_file(fn, ts, "media_orig", lambda ts, d, fn: f"{ts}-{os.path.basename(fn)}") + +''' +Reads and processes a post from fn. +fn must be a path relative to conf.LOCAL_DATA_ROOT +pointing to a raw post input (e.g. posts/2024/xxxxxx-yyyy-mm-ddThh:mm:ssZ.post) +If is_updating is False, assumes all media is present in the media_dropoff folder. + +Returns a Post struct for that post. +''' +def process_post(fn, is_updating): + body = "" + media_str = [] + media = [] + tags = [] + tagline = None + fbasen = os.path.basename(fn) + ts = int(fbasen[:fbasen.index('-')]) + with open(fn, "r") as f: + for l in f: + line = l.strip() + if line.startswith("--"): + continue + if line.startswith('[') and line.endswith(']'): + media_str.append(line[1 : -1]) + elif line.startswith('#'): + if tagline is None: + tagline = line + elif len(media_str) == 0 and tagline is None: + body += l + + rendered_body = process_body(body) + + if tagline is not None: + tags = [s[1:] for s in filter(lambda t: t.startswith('#'), tagline.split(' '))] + + for m in media_str: + destm = None + dropoff = os.path.join("media_dropoff", m) + dropoffa = os.path.join(conf.LOCAL_DATA_ROOT, dropoff) + e = os.path.isfile(dropoffa) + if not is_updating: + if not e: + raise FileNotFoundError(f"{dropoffa} does not exist.") + destm = move_media(dropoffa, ts) + elif e: + destm = move_media(dropoffa, ts) + if destm is None: + d = datetime.fromtimestamp(ts, tz=timezone.utc) + destm = os.path.join("media_orig", str(d.year)) + destm = os.path.join(destm, f"{ts}-{os.path.basename(fn)}") + if not os.path.isfile(os.path.join(conf.LOCAL_DATA_ROOT, destm)): + raise FileNotFoundError(f"Cannot find original media ({destm})") + thumbnail = None + if should_generate_thumbnail(destm): + thumbnail = generate_thumbnail(os.path.join(conf.LOCAL_DATA_ROOT, destm)) + if thumbnail is None: + thumbnail = destm + media.append(MediaInstance.makeImage(thumbnail, destm)) + + return Post(rendered_body, ts, media, tags) diff --git a/utils/rust_monolith_test.py b/utils/rust_monolith_test.py new file mode 100644 index 0000000..a66eea4 --- /dev/null +++ b/utils/rust_monolith_test.py @@ -0,0 +1,92 @@ +# Chris Xiong 2024 +# License: Expat (MIT) +# +# shitty tests for the Rust Monolith representation + +import os +import sys +import random +import tempfile +import subprocess + +import monolith +import monolith_test +from config import conf + +def make_random_monolith(dir): + filename = "posts.monolith" + m = monolith.Monolith(os.path.join(dir, filename)) + nposts = random.randint(10, 100) + last_time = 0 + posts = [] + for _ in range(0, nposts): + p = monolith_test.randpost(last_time) + last_time = p.date + posts.append(p) + m.append(p) + m.write_index() + m.generate_page_index() + with open(os.path.join(dir, "notekins.conf"), "w"): pass + return posts + +def run_rust_monolith_debug(dir, method, param): + p = subprocess.run([sys.argv[1], method, str(param)], capture_output=True, cwd=dir) + return p.stdout.decode("utf-8") + +def dbg_output(p): + pyout = p.content + '\n' + pyout += str(p.date) + '\n' + for m in p.media: + if m.type == monolith.MediaType.IMAGE: + pyout += f"Image {m.thumbnail} {m.original}\n" + for t in p.tags: + pyout += t + '\n' + return pyout + +def run_tests(dir): + posts = make_random_monolith(dir) + failed = False + for p in posts: + o = run_rust_monolith_debug(dir, "get_post", p.date) + e = dbg_output(p) + if o != e: + print(f"get_post failed, date: {p.date}") + print(f"expected\n{e}\ngot\n{o}") + failed = True + input() + o = run_rust_monolith_debug(dir, "get_post2", p.date) + e = dbg_output(p) + if o != e: + print(f"get_post2 failed, date: {p.date}") + print(f"expected\n{e}\ngot\n{o}") + failed = True + input() + posts_per_page = conf.POSTS_PER_PAGE + for page, ub in enumerate(range(len(posts), 0, -posts_per_page)): + pl = max(ub - posts_per_page, 0) + pr = ub - 1 + if (pr - pl + 1 > posts_per_page): + failed = True + print(f"paging error ???") + input() + e = "" + for x in range(pl, pr + 1): + e += dbg_output(posts[x]) + o = run_rust_monolith_debug(dir, "get_page", page) + if o != e: + print(f"get_page failed, page: {page}") + print(f"expected\n{e}\ngot\n{o}") + failed = True + input() + if not failed: + print(f"test of monolith with {len(posts)} posts passed.") + +def test_rust_monolith(): + if len(sys.argv) < 2: + print("missing path to executable") + for _ in range(0, 100): + with tempfile.TemporaryDirectory() as dir: + run_tests(dir) + +if __name__ == "__main__": + test_rust_monolith() -- cgit v1.2.3