From b736068ee7b82e05c2ede8bc48ace7ffa4709e29 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Wed, 24 Jul 2024 23:40:11 -0400 Subject: Initial commit. --- utils/__init__.py | 0 utils/atomgen.py | 51 ++++++++ utils/config.py | 64 ++++++++++ utils/mistune_emote.py | 21 ++++ utils/monolith.py | 285 ++++++++++++++++++++++++++++++++++++++++++++ utils/monolith_test.py | 96 +++++++++++++++ utils/notectl.py | 178 +++++++++++++++++++++++++++ utils/postutil.py | 155 ++++++++++++++++++++++++ utils/rust_monolith_test.py | 92 ++++++++++++++ 9 files changed, 942 insertions(+) create mode 100644 utils/__init__.py create mode 100644 utils/atomgen.py create mode 100644 utils/config.py create mode 100644 utils/mistune_emote.py create mode 100644 utils/monolith.py create mode 100644 utils/monolith_test.py create mode 100644 utils/notectl.py create mode 100644 utils/postutil.py create mode 100644 utils/rust_monolith_test.py (limited to 'utils') diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/atomgen.py b/utils/atomgen.py new file mode 100644 index 0000000..453465d --- /dev/null +++ b/utils/atomgen.py @@ -0,0 +1,51 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +import os +import re +from datetime import datetime, timezone + +from monolith import Monolith +from config import conf + +tagrex = re.compile(r'<[^>]+>') + +def remove_tags(s): + return tagrex.sub('', s) + +def ellide(s, l): + return s if len(s) <= l else s[:l] + " ..." + +def gen_atom(): + xmlbuf = f'' + xmlbuf += f'' + xmlbuf += f'Notekins' + xmlbuf += f'{datetime.now(timezone.utc).replace(microsecond=0).isoformat()}' + xmlbuf += f'' + xmlbuf += f'' + xmlbuf += f'{conf.ATOM_ROOT}/atom.xml' + xmlbuf += f'{conf.ATOM_TITLE}' + xmlbuf += f'{conf.ATOM_SUBTITLE}' + xmlbuf += f'{conf.ATOM_ICON}' + xmlbuf += f'{conf.ATOM_ICON}' + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.load_index() + dates = list(reversed(m.get_all_dates()[-conf.ATOM_NPOSTS:])) + for d in dates: + p = m.get_post(d) + link = f"{conf.SERVED_DATA_ROOT}/?post={p.date}" + title = ellide(remove_tags(p.content), 32) + date = datetime.fromtimestamp(p.date, timezone.utc).replace(microsecond=0).isoformat() + xmlbuf += f'' + xmlbuf += f'{title}' + xmlbuf += f'' + xmlbuf += f'{date}' + xmlbuf += f'{date}' + xmlbuf += f'{link}' + xmlbuf += f'{conf.ATOM_AUTHOR}' + xmlbuf += f'' + xmlbuf += f'' + xmlbuf += f'' + atomfn = os.path.join(conf.LOCAL_DATA_ROOT, "atom.xml") + with open(atomfn, "w") as f: + f.write(xmlbuf) diff --git a/utils/config.py b/utils/config.py new file mode 100644 index 0000000..05c1f9d --- /dev/null +++ b/utils/config.py @@ -0,0 +1,64 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +import os +from os import path + +def stripped_str(x): return str(x).strip() + +def s2bool(x): return str(x).strip() in ["True", "true", "TRUE"] + +# "CONFIG_ITEM_NAME": (default value, parse function) +CONFIG_ITEMS = { + "LOCAL_DATA_ROOT": ("", stripped_str), + # e.g. https://chrisoft.org/notekins/ + "SERVED_DATA_ROOT": ("", stripped_str), + # e.g. chrisoft@10.24.1.1:/home/chrisoft/notedir/ + "SYNC_TARGET": ("", stripped_str), + "POSTS_PER_PAGE": (20, int), + "THUMBNAIL_DIM": (1280, int), + "DISPLAY_TIMEZONE": ("UTC", stripped_str), # only used by backend + # atom generator stuff + "ATOM_ENABLED": (False, s2bool), + "VERSION_STRING": ("1.0", stripped_str), + "ATOM_TITLE": ("", stripped_str), + "ATOM_ROOT": ("", stripped_str), + "ATOM_SUBTITLE": ("", stripped_str), + "ATOM_ICON": ("", stripped_str), + "ATOM_AUTHOR": ("", stripped_str), + "ATOM_NPOSTS": (20, int) +} + +class config: + def __init__(self): + self.d = dict([(name, prop[0]) for name, prop in CONFIG_ITEMS.items()]) + p = os.getcwd() + self.f = None + try: + while not path.isfile(path.join(p, "notekins.conf")): + if p == path.dirname(p): + raise FileNotFoundError("Cannot locate configuration file.") + p = path.dirname(p) + fn = path.join(p, "notekins.conf") + self.f = fn + print(f"Using configuration file {fn}") + with open(fn, "r") as f: + for l in f: + try: + n, v = l.split('=') + if n not in CONFIG_ITEMS: + continue + self.d[n] = CONFIG_ITEMS[n][1](v) + except ValueError: + pass + except FileNotFoundError: + pass + + def __getattr__(self, k): + return self.d[k] + def require(self): + if self.f is None: + print("This operation requires a configuration file, but none can be found.") + exit(1) + +conf = config() diff --git a/utils/mistune_emote.py b/utils/mistune_emote.py new file mode 100644 index 0000000..6c557a5 --- /dev/null +++ b/utils/mistune_emote.py @@ -0,0 +1,21 @@ +# Chris Xiong 2024 +# License: Expat (MIT) +from config import conf + +EMOTE_PATTERN = r":(?!\s)(?P.+?)(?!\s):" + +emote_root = f"{conf.SERVED_DATA_ROOT}/emotes" +emote_extension = ".webp" + +def parse_inline_emote(inline, m, state): + ename = m.group("emote_name") + state.append_token({"type": "inline_emote", "raw": ename}) + return m.end() + +def render_inline_emote(renderer, ename): + return f'{ename}' + +def emote(md): + md.inline.register("inline_emote", EMOTE_PATTERN, parse_inline_emote, before="link") + if md.renderer and md.renderer.NAME == "html": + md.renderer.register("inline_emote", render_inline_emote) diff --git a/utils/monolith.py b/utils/monolith.py new file mode 100644 index 0000000..d9a90e6 --- /dev/null +++ b/utils/monolith.py @@ -0,0 +1,285 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +''' +I/O facilities for the Monolith file used by the backend. + +Structure of the Monolith file: +Element Length in byte Notes + (varies) +<␋> 1 Treated as part of the post that preceeds it + (varies) +<␋> 1 +... + +Post: +Element Length in byte Notes +content (varies) utf8 string, null-terminated. HTML fragment. +date 8 seconds since unix epoch +media (varies) See below +ntags 1 +tag[0] (varies) null-terminated utf8 string, excluding the hash prefix. HTML-escaped. +tag[1] .. .. +... +tag[ntags-1].. .. + +Media: +Element Length in byte Notes +nmedia 1 + (varies) + (varies) +... + +MediaInstance: +Element Length in byte Notes +type 1 'I' = image +-----------------type == 'I'----------------- +thumbnail (varies) null-terminated utf8 string, relative path to storage url +original (varies) .. + +Index file (.idx) +00 08 09 0F +---- +---- +... + +Page index file (.pdx) +00 08 09 0F + +... + +Page #0 contains the latest posts. The final page always starts +at byte #0 in the monolith. + +''' + +from enum import Enum +from mmap import mmap +from bisect import bisect_left +from config import conf +from datetime import datetime, timezone + +MediaType = Enum("MediaType", ["IMAGE", "VIDEO"]) + +def consume_str(buf): + nulp = buf.find(b'\0') + rets = buf[0 : nulp].decode("utf-8") + return rets, nulp + +def strbuf(s): + return s.encode("utf-8") + b'\0' + +class MediaInstance: + def __init__(self, type): + self.type = type + + def __str__(self): + match self.type: + case MediaType.IMAGE: + return f"Image: {self.thumbnail} {self.original}\n" + case _: + return f"Unknown media\n" + + def consume(buf): + match buf[0 : 1]: + case b'I': + l = 1 + b = buf[1:] + thmb, p = consume_str(b) + l += p + 1 + b = b[p + 1:] + orig, p = consume_str(b) + l += p + 1 + return MediaInstance.makeImage(thmb, orig), l + case _: + raise ValueError("Invalid media type") + + def to_buf(self): + match self.type: + case MediaType.IMAGE: + return b'I' + strbuf(self.thumbnail) + strbuf(self.original) + case _: + raise ValueError("Unsupported media type") + + def dump(self): + match self.type: + case MediaType.IMAGE: + print(f"({self.thumbnail}, {self.original})") + case _: + raise ValueError("Unsupported media type") + + def makeImage(thumb, orig): + r = MediaInstance(MediaType.IMAGE) + r.thumbnail = thumb + r.original = orig + return r + +class Post: + ''' + .content: utf-8 string + .date: int, secs since unix epoch + .media: list of MediaInstance + .tags: list of strings + ''' + def __init__(self, cont, date, media, tags): + self.content = cont + self.date = date + self.media = media + self.tags = tags + + def __str__(self): + medias = "\n".join([str(m) for m in self.media]) + tags = ",".join([f'"{t}"' for t in self.tags]) + return f"{self.content}\n{self.date}\n{medias}\n[{tags}]" + + def from_buf(buf): + content, p = consume_str(buf) + buf = buf[p + 1 :] + date = int.from_bytes(buf[: 8], "little") + buf = buf[8 :] + media = [] + nmedia = int.from_bytes(buf[: 1], "little") + buf = buf[1 :] + for i in range(0, nmedia): + m, l = MediaInstance.consume(buf) + media.append(m) + buf = buf[l :] + tags = [] + ntags = int.from_bytes(buf[: 1], "little") + buf = buf[1 :] + for i in range(0, ntags): + t, p = consume_str(buf) + tags.append(t) + buf = buf[p + 1:] + return Post(content, date, media, tags) + + def to_buf(self): + ret = strbuf(self.content) + ret += self.date.to_bytes(8, "little") + ret += len(self.media).to_bytes(1, "little") + for m in self.media: + ret += m.to_buf() + ret += len(self.tags).to_bytes(1, "little") + for t in self.tags: + ret += strbuf(t) + return ret + b'\x0b' + + def dump(self): + print('=' * 40) + print(self.content) + print(datetime.fromtimestamp(self.date, tz=timezone.utc).isoformat()) + for t in self.tags: + print(f"#{t} ", end='') + print("") + for m in self.media: + m.dump() + +class Monolith: + def __init__(self, fn): + self.filename = fn + self.idxfn = f"{fn}.idx" + self.pdxfn = f"{fn}.pdx" + self.postranges = [] + + def _append_idxf(self, t, r): + with open(self.idxfn, "ab") as f: + buf = t.to_bytes(8, "little") + \ + r.to_bytes(8, "little") + if f.write(buf) != len(buf): + raise RuntimeError("write failure") + + def clear(self): + with open(self.filename, "wb"): pass + with open(self.idxfn, "wb"): pass + with open(self.pdxfn, "wb"): pass + + def append(self, post): + with open(self.filename, "ab") as f: + postbuf = post.to_buf() + t = post.date + l = f.tell() + w = f.write(postbuf) + if w != len(postbuf): + raise RuntimeError("write failure") + r = l + w + self.postranges.append((t, l, r)) + self._append_idxf(t, r) + # self.generate_page_index() + + def load_index(self): + with open(self.idxfn, "rb") as f: + last_r = 0 + self.postranges = [] + while True: + bs = f.read(16) + if len(bs) == 0: break + t = int.from_bytes(bs[0 : 8], "little") + l = last_r + r = int.from_bytes(bs[8 :16], "little") + self.postranges.append((t, l, r)) + last_r = r + + def write_index(self): + with open(self.idxfn, "wb") as f: + for (t, _, r) in self.postranges: + f.write(t.to_bytes(8, "little") + \ + r.to_bytes(8, "little")) + + def find_post(self, date): + p = bisect_left(self.postranges, date, key=lambda p: p[0]) + if p != len(self.postranges) and self.postranges[p][0] == date: + return p + return None + + def find_nearby_posts(self, date, r=2): + p = bisect_left(self.postranges, date, key=lambda p: p[0]) + left = max(p - r, 0) + right = min(p + r + 1, len(self.postranges)) + return [t for (t, _, _) in self.postranges[left : right]] + + def get_all_dates(self): + return [t for (t, _, _) in self.postranges] + + def get_post(self, date): + p = self.find_post(date) + if p is None: return None + t, l, r = self.postranges[p] + with open(self.filename, "r+b") as f: + d = mmap(f.fileno(), 0) + post = Post.from_buf(d[l : r]) + return post + + def replace_post(self, date, post): + p = self.find_post(date) + if p is None: return None + t, l, r = self.postranges[p] + new_post_buf = post.to_buf() + dlen = len(new_post_buf) - (r - l) + with open(self.filename, "r+b") as f: + d = mmap(f.fileno(), 0) + mlength = len(d) + oldend = r + newend = l + len(new_post_buf) + if dlen > 0: + d.resize(mlength + dlen) + if dlen != 0: + d.move(newend, oldend, mlength - oldend) + if dlen < 0: + d.resize(mlength + dlen) + d[l : newend] = new_post_buf + self.postranges[p] = (t, l, r + dlen) + for i in range(p + 1, len(self.postranges)): + t, l, r = self.postranges[i] + self.postranges[i] = (t, l + dlen, r + dlen) + self.write_index() + + def generate_page_index(self): + posts_per_page = conf.POSTS_PER_PAGE + ranges = [] + for ub in range(len(self.postranges), 0, -posts_per_page): + pr = ub - 1 + _, _, r = self.postranges[pr] + ranges.append(r) + with open(self.pdxfn, "wb") as f: + for r in ranges: + f.write(r.to_bytes(8, "little")) diff --git a/utils/monolith_test.py b/utils/monolith_test.py new file mode 100644 index 0000000..b4e1b30 --- /dev/null +++ b/utils/monolith_test.py @@ -0,0 +1,96 @@ +# Chris Xiong 2024 +# License: Expat (MIT) +# +# Basic unit tests for the Python Monolith class + +import monolith +import unittest +import random +import os +from mmap import mmap + +def randstr(len): + return ''.join(random.choices(''.join([chr(i + ord('0')) for i in range(0, 75)]), k=len)) + +def randpost(last_time): + content = randstr(random.randint(10, 1024)) + date = random.randint(last_time + 1, last_time + 999999) + media = [] + tags = [] + for _ in range(0, random.randint(0, 9)): + media.append(monolith.MediaInstance.makeImage(randstr(20), randstr(20))) + for _ in range(0, random.randint(0, 4)): + tags.append(randstr(random.randint(1, 8))) + return monolith.Post(content, date, media, tags) + +def posteq(a, b): + if a is None or b is None: + return False + if len(a.media) != len(b.media) or len(a.tags) != len(b.tags): + return False + for x, y in zip(a.media, b.media): + if x.thumbnail != y.thumbnail or x.original != y.original: + return False + for x, y in zip(a.tags, b.tags): + if x != y: return False + return a.content == b. content and a.date == b.date + +class TestMonolith(unittest.TestCase): + def test_replace(self): + posts = [] + filename = "rep.monolith" + m = monolith.Monolith(filename) + p1 = randpost(123) + p2 = randpost(p1.date) + p3 = randpost(0) + p3.date = p1.date + m.append(p1) + m.append(p2) + m.replace_post(p1.date, p3) + self.assertTrue(posteq(m.get_post(p3.date), p3)) + self.assertTrue(posteq(m.get_post(p2.date), p2)) + + def test_combined(self): + posts = [] + filename = "test.monolith" + if True: + m = monolith.Monolith(filename) + last_time = 0 + for _ in range(0, 100): + op = 1 if random.random() < 0.2 else 0 + if op == 1 and len(posts) == 0: + op = 0 + if op == 0: + p = randpost(last_time) + last_time = p.date + posts.append(p) + m.append(p) + elif op == 1: + p = randpost(0) + position = random.randint(0, len(posts) - 1) + p.date = posts[position].date + posts[position] = p + m.replace_post(p.date, p) + m.write_index() + m.generate_page_index() + for p in posts: + pp = m.get_post(p.date) + self.assertTrue(posteq(p, pp)) + + with open(filename, "r+b") as f: + d = mmap(f.fileno(), 0) + for _, _, r in m.postranges: + self.assertEqual(d[r - 1 : r], b'\v') + if True: + m = monolith.Monolith(filename) + m.load_index() + dates = m.get_all_dates() + self.assertEqual(len(dates), len(posts)) + for t, p in zip(dates, posts): + self.assertEqual(t, p.date) + for t, p in zip(dates, posts): + self.assertTrue(posteq(p, m.get_post(t))) + + +if __name__ == "__main__": + unittest.main() diff --git a/utils/notectl.py b/utils/notectl.py new file mode 100644 index 0000000..a2e4a19 --- /dev/null +++ b/utils/notectl.py @@ -0,0 +1,178 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +import os +import sys +import time +import shutil +import tempfile +import subprocess +from datetime import datetime, timezone + +import postutil +from atomgen import gen_atom +from monolith import Monolith +from config import conf + +''' +Launches an editor (set by $EDITOR) to edit the given file +''' +def edit_file(fn): + editor = os.environ["EDITOR"] + subprocess.run([editor, fn]) + +''' +Opens an editor to create a new post. +The post will be formated, and all media will be processed accordingly. +''' +def new_post(): + conf.require() + with tempfile.TemporaryDirectory() as dir: + fn = os.path.join(dir, "note.txt") + with open(fn, "w"): pass + edit_file(fn) + ts = time.time_ns() // 10 ** 9 + postpath = postutil.move_post(fn, ts) + p = postutil.process_post(postpath, False) + if len(p.content) == 0 and len(p.media) == 0: + print("No post was made.") + return + print(f"Post {os.path.basename(postpath)} made!") + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.append(p) + m.load_index() + m.generate_page_index() + if conf.ATOM_ENABLED: + gen_atom() + +''' +Opens an editor to edit an existing post. +Post time cannot be modified and will always stay the same. +Media that can be found in the media_dropoff folder will be updated. + +If a media entry is modified, the file it refers to must either be +present in the media_dropoff folder, or already in the corresponding +folder inside media_orig. +''' +def edit_post(ts): + conf.require() + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.load_index() + if m.get_post(ts) is None: + print("No post was made at that time!") + # TODO: allow the user to select a post made near this time + return + d = datetime.fromtimestamp(ts, tz=timezone.utc) + pfn = f"{ts}-{d.isoformat()[:-6]}Z.post" + rp = os.path.join(os.path.join("posts", str(d.year)), pfn) + edit_file(os.path.join(conf.LOCAL_DATA_ROOT, rp)) + p = postutil.process_post(rp, True) + m.replace_post(ts, p) + m.generate_page_index() + if conf.ATOM_ENABLED: + gen_atom() + +''' +Regenerate the ENTIRE monolith file. +Horribly slow. +''' +def regen_monolith(): + conf.require() + print("Do you want to regenerate the ENTIRE monolith file? [y/n]") + if input() not in ['Y', 'y']: + return + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.clear() + postlist = [] + postsp = os.path.join(conf.LOCAL_DATA_ROOT, "posts") + for ye in os.scandir(postsp): + inty = None + try: + inty = int(ye.name) + except ValueError: + pass + if inty is None or not ye.is_dir(): + continue + yearp = os.path.join(postsp, ye.name) + postlist += [os.path.join(yearp, p) for p in filter(lambda x: x.endswith(".post"), os.listdir(yearp))] + def keyf(x): + b = os.path.basename(x) + return int(b[:b.index('-')]) + postlist = sorted(postlist, key=keyf) + for pfn in postlist: + p = postutil.process_post(pfn, True) + m.append(p) + m.load_index() + m.generate_page_index() + if conf.ATOM_ENABLED: + gen_atom() + +''' +Sync all local data to the configured remote host for serving. +''' +def sync_remote(): + conf.require() + subprocess.run(["rsync", "-azv", "--exclude=posts", "--exclude=media_dropoff", conf.LOCAL_DATA_ROOT + "/", conf.SYNC_TARGET]) + +''' +Create a new notekins instance with all files and directories that it expects. +''' +def init_instance(): + repop = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + print(repop) + if len(sys.argv) < 3: + print("Missing path to the new instance.") + return + targetp = sys.argv[2].rstrip('/') + os.mkdir(targetp, mode=0o755) + os.mkdir(os.path.join(targetp, "posts"), mode=0o755) + os.mkdir(os.path.join(targetp, "emotes"), mode=0o755) + os.mkdir(os.path.join(targetp, "media_dropoff"), mode=0o755) + os.mkdir(os.path.join(targetp, "media_orig"), mode=0o755) + os.mkdir(os.path.join(targetp, "media_thmb"), mode=0o755) + shutil.copytree(os.path.join(repop, "template"), os.path.join(targetp, "template")) + with open(os.path.join(targetp, "notekins.conf"), "w") as f: + f.write(f"LOCAL_DATA_ROOT={targetp}") + m = Monolith(os.path.join(targetp, "posts.monolith")) + m.clear() + +''' +Clean up any media file that isn't used in the monolith file. +TODO. +''' +def media_cleanup(): + conf.require() + pass + +def main(): + if len(sys.argv) < 2: + print("Missing command. Available commands:") + print("new Create a new post.") + print("edit Edit an existing post. Requires a post timestamp.") + print("atom Generate atom feed.") + print("regen Regenerate the entire monolith file.") + print("sync Sync data to remote for hosting.") + print("init Initialize a new Notekins instance. Requires path to the instance.") + print("dump Dump the content of the monolith file.") + return + match sys.argv[1]: + case "new": + new_post() + case "edit": + edit_post(int(sys.argv[2])) + case "atom": + gen_atom() + case "regen": + regen_monolith() + case "sync": + sync_remote() + case "init": + init_instance() + case "dump": + m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith")) + m.load_index() + for d in m.get_all_dates(): + m.get_post(d).dump() + +if __name__ == "__main__": + main() diff --git a/utils/postutil.py b/utils/postutil.py new file mode 100644 index 0000000..c978d3e --- /dev/null +++ b/utils/postutil.py @@ -0,0 +1,155 @@ +# Chris Xiong 2024 +# License: Expat (MIT) + +''' +The anatomy of a post: + +--comment <= any line that starts with a double dash will be ignored +Post text (markdown) + +#tag #tag #tag <= a line that starts with # makes it a tag line. + only the first of such lines is used. + +[media] <= lines that are surrounded with [] are media lines. +[media] each contain a single file name inside the brackets. +[media] drop the files in LOCAL_DATA_ROOT/media_dropoff +''' + +from mistune.plugins.formatting import strikethrough, superscript, subscript +from mistune.plugins.url import url +from mistune.plugins.ruby import ruby +from mistune.plugins.spoiler import spoiler +import mistune + +from wand.image import Image + +import tempfile +import os +import shutil +import mimetypes +from datetime import datetime, timezone +from hashlib import file_digest + +from mistune_emote import emote +from monolith import MediaInstance, Post +from config import conf + +''' +Takes an abolute path to a static image, generate a thumbnail for it if needed +Returns path to the thumbnail, relative to conf.LOCAL_DATA_ROOT + +If a thumbnail isn't required, returns None +''' +def generate_thumbnail(file): + with tempfile.TemporaryDirectory() as dir: + outf = os.path.join(dir, "downsampled.webp") + dim = conf.THUMBNAIL_DIM + with Image(filename=file) as i: + if i.height <= dim and i.width <= dim and i.format.lower() != "png": + return None + s = dim / max(i.height, i.width) + i.resize(int(i.width * s), int(i.height * s), "lanczos2") + i.format = "webp" + i.save(filename=outf) + with open(outf, "rb") as f: + d = file_digest(f, "sha256") + shas = d.hexdigest() + destdirp = os.path.join(shas[0:2], shas[2:4]) + destdirp = os.path.join("media_thmb", destdirp) + destpath = os.path.join(destdirp, f"{shas}.webp") + destabsp = os.path.join(conf.LOCAL_DATA_ROOT, destpath) + os.makedirs(os.path.join(conf.LOCAL_DATA_ROOT, destdirp), 0o755, True) + if not os.path.isfile(destabsp): + shutil.move(outf, destabsp) + return destpath + +def should_generate_thumbnail(file): + thumbed_types = ["image/png", "image/jpeg", "image/webp"] + return mimetypes.guess_type(file)[0] in thumbed_types + +def process_body(text): + renderer = mistune.HTMLRenderer() + md = mistune.Markdown(renderer, plugins= + [strikethrough, url, superscript, subscript, ruby, spoiler, emote]) + return md(text) + +''' +move file at absolute path fn to conf.LOCAL_DATA_ROOT///destfilename +destfilename is the return value of dfnf(ts, datetime.fromtimestamp(ts, tz=timezone.utc), fn) + +returns path to the destination file relative to conf.LOCAL_DATA_ROOT +''' +def move_file(fn, ts, dirn, dfnf): + d = datetime.fromtimestamp(ts, tz=timezone.utc) + dfn = dfnf(ts, d, fn) + destydir = os.path.join(dirn, str(d.year)) + destpath = os.path.join(destydir, dfn) + os.makedirs(os.path.join(conf.LOCAL_DATA_ROOT, destydir), 0o755, True) + shutil.move(fn, os.path.join(conf.LOCAL_DATA_ROOT, destpath)) + return destpath + +def move_post(fn, ts): + return move_file(fn, ts, "posts", lambda ts, d, fn: f"{ts}-{d.isoformat()[:-6]}Z.post") + +def move_media(fn, ts): + return move_file(fn, ts, "media_orig", lambda ts, d, fn: f"{ts}-{os.path.basename(fn)}") + +''' +Reads and processes a post from fn. +fn must be a path relative to conf.LOCAL_DATA_ROOT +pointing to a raw post input (e.g. posts/2024/xxxxxx-yyyy-mm-ddThh:mm:ssZ.post) +If is_updating is False, assumes all media is present in the media_dropoff folder. + +Returns a Post struct for that post. +''' +def process_post(fn, is_updating): + body = "" + media_str = [] + media = [] + tags = [] + tagline = None + fbasen = os.path.basename(fn) + ts = int(fbasen[:fbasen.index('-')]) + with open(fn, "r") as f: + for l in f: + line = l.strip() + if line.startswith("--"): + continue + if line.startswith('[') and line.endswith(']'): + media_str.append(line[1 : -1]) + elif line.startswith('#'): + if tagline is None: + tagline = line + elif len(media_str) == 0 and tagline is None: + body += l + + rendered_body = process_body(body) + + if tagline is not None: + tags = [s[1:] for s in filter(lambda t: t.startswith('#'), tagline.split(' '))] + + for m in media_str: + destm = None + dropoff = os.path.join("media_dropoff", m) + dropoffa = os.path.join(conf.LOCAL_DATA_ROOT, dropoff) + e = os.path.isfile(dropoffa) + if not is_updating: + if not e: + raise FileNotFoundError(f"{dropoffa} does not exist.") + destm = move_media(dropoffa, ts) + elif e: + destm = move_media(dropoffa, ts) + if destm is None: + d = datetime.fromtimestamp(ts, tz=timezone.utc) + destm = os.path.join("media_orig", str(d.year)) + destm = os.path.join(destm, f"{ts}-{os.path.basename(fn)}") + if not os.path.isfile(os.path.join(conf.LOCAL_DATA_ROOT, destm)): + raise FileNotFoundError(f"Cannot find original media ({destm})") + thumbnail = None + if should_generate_thumbnail(destm): + thumbnail = generate_thumbnail(os.path.join(conf.LOCAL_DATA_ROOT, destm)) + if thumbnail is None: + thumbnail = destm + media.append(MediaInstance.makeImage(thumbnail, destm)) + + return Post(rendered_body, ts, media, tags) diff --git a/utils/rust_monolith_test.py b/utils/rust_monolith_test.py new file mode 100644 index 0000000..a66eea4 --- /dev/null +++ b/utils/rust_monolith_test.py @@ -0,0 +1,92 @@ +# Chris Xiong 2024 +# License: Expat (MIT) +# +# shitty tests for the Rust Monolith representation + +import os +import sys +import random +import tempfile +import subprocess + +import monolith +import monolith_test +from config import conf + +def make_random_monolith(dir): + filename = "posts.monolith" + m = monolith.Monolith(os.path.join(dir, filename)) + nposts = random.randint(10, 100) + last_time = 0 + posts = [] + for _ in range(0, nposts): + p = monolith_test.randpost(last_time) + last_time = p.date + posts.append(p) + m.append(p) + m.write_index() + m.generate_page_index() + with open(os.path.join(dir, "notekins.conf"), "w"): pass + return posts + +def run_rust_monolith_debug(dir, method, param): + p = subprocess.run([sys.argv[1], method, str(param)], capture_output=True, cwd=dir) + return p.stdout.decode("utf-8") + +def dbg_output(p): + pyout = p.content + '\n' + pyout += str(p.date) + '\n' + for m in p.media: + if m.type == monolith.MediaType.IMAGE: + pyout += f"Image {m.thumbnail} {m.original}\n" + for t in p.tags: + pyout += t + '\n' + return pyout + +def run_tests(dir): + posts = make_random_monolith(dir) + failed = False + for p in posts: + o = run_rust_monolith_debug(dir, "get_post", p.date) + e = dbg_output(p) + if o != e: + print(f"get_post failed, date: {p.date}") + print(f"expected\n{e}\ngot\n{o}") + failed = True + input() + o = run_rust_monolith_debug(dir, "get_post2", p.date) + e = dbg_output(p) + if o != e: + print(f"get_post2 failed, date: {p.date}") + print(f"expected\n{e}\ngot\n{o}") + failed = True + input() + posts_per_page = conf.POSTS_PER_PAGE + for page, ub in enumerate(range(len(posts), 0, -posts_per_page)): + pl = max(ub - posts_per_page, 0) + pr = ub - 1 + if (pr - pl + 1 > posts_per_page): + failed = True + print(f"paging error ???") + input() + e = "" + for x in range(pl, pr + 1): + e += dbg_output(posts[x]) + o = run_rust_monolith_debug(dir, "get_page", page) + if o != e: + print(f"get_page failed, page: {page}") + print(f"expected\n{e}\ngot\n{o}") + failed = True + input() + if not failed: + print(f"test of monolith with {len(posts)} posts passed.") + +def test_rust_monolith(): + if len(sys.argv) < 2: + print("missing path to executable") + for _ in range(0, 100): + with tempfile.TemporaryDirectory() as dir: + run_tests(dir) + +if __name__ == "__main__": + test_rust_monolith() -- cgit v1.2.3