aboutsummaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorGravatar Chris Xiong <chirs241097@gmail.com> 2024-07-24 23:40:11 -0400
committerGravatar Chris Xiong <chirs241097@gmail.com> 2024-07-24 23:40:11 -0400
commitb736068ee7b82e05c2ede8bc48ace7ffa4709e29 (patch)
treeeaa3cce9fdd9973043a7a55613584f90f6598a20 /utils
downloadnotekins-b736068ee7b82e05c2ede8bc48ace7ffa4709e29.tar.xz
Initial commit.
Diffstat (limited to 'utils')
-rw-r--r--utils/__init__.py0
-rw-r--r--utils/atomgen.py51
-rw-r--r--utils/config.py64
-rw-r--r--utils/mistune_emote.py21
-rw-r--r--utils/monolith.py285
-rw-r--r--utils/monolith_test.py96
-rw-r--r--utils/notectl.py178
-rw-r--r--utils/postutil.py155
-rw-r--r--utils/rust_monolith_test.py92
9 files changed, 942 insertions, 0 deletions
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/utils/__init__.py
diff --git a/utils/atomgen.py b/utils/atomgen.py
new file mode 100644
index 0000000..453465d
--- /dev/null
+++ b/utils/atomgen.py
@@ -0,0 +1,51 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+
+import os
+import re
+from datetime import datetime, timezone
+
+from monolith import Monolith
+from config import conf
+
+tagrex = re.compile(r'<[^>]+>')
+
+def remove_tags(s):
+ return tagrex.sub('', s)
+
+def ellide(s, l):
+ return s if len(s) <= l else s[:l] + " ..."
+
+def gen_atom():
+ xmlbuf = f'<?xml version="1.0" encoding="utf-8"?>'
+ xmlbuf += f'<feed xmlns="http://www.w3.org/2005/Atom">'
+ xmlbuf += f'<generator uri="https://cgit.chrisoft.org/notekins.git" version="{conf.VERSION_STRING}">Notekins</generator>'
+ xmlbuf += f'<updated>{datetime.now(timezone.utc).replace(microsecond=0).isoformat()}</updated>'
+ xmlbuf += f'<link href="{conf.ATOM_ROOT}/atom.xml" rel="self" type="application/atom+xml" title="Atom"/>'
+ xmlbuf += f'<link href="{conf.ATOM_ROOT}/" rel="alternate" type="text/html"/>'
+ xmlbuf += f'<id>{conf.ATOM_ROOT}/atom.xml</id>'
+ xmlbuf += f'<title>{conf.ATOM_TITLE}</title>'
+ xmlbuf += f'<subtitle>{conf.ATOM_SUBTITLE}</subtitle>'
+ xmlbuf += f'<icon>{conf.ATOM_ICON}</icon>'
+ xmlbuf += f'<logo>{conf.ATOM_ICON}</logo>'
+ m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith"))
+ m.load_index()
+ dates = list(reversed(m.get_all_dates()[-conf.ATOM_NPOSTS:]))
+ for d in dates:
+ p = m.get_post(d)
+ link = f"{conf.SERVED_DATA_ROOT}/?post={p.date}"
+ title = ellide(remove_tags(p.content), 32)
+ date = datetime.fromtimestamp(p.date, timezone.utc).replace(microsecond=0).isoformat()
+ xmlbuf += f'<entry>'
+ xmlbuf += f'<title>{title}</title>'
+ xmlbuf += f'<link href="{link}" title="{title}"/>'
+ xmlbuf += f'<published>{date}</published>'
+ xmlbuf += f'<updated>{date}</updated>'
+ xmlbuf += f'<id>{link}</id>'
+ xmlbuf += f'<author><name>{conf.ATOM_AUTHOR}</name></author>'
+ xmlbuf += f'<content type="html" xml:base="{link}"><![CDATA[{p.content}]]></content>'
+ xmlbuf += f'</entry>'
+ xmlbuf += f'</feed>'
+ atomfn = os.path.join(conf.LOCAL_DATA_ROOT, "atom.xml")
+ with open(atomfn, "w") as f:
+ f.write(xmlbuf)
diff --git a/utils/config.py b/utils/config.py
new file mode 100644
index 0000000..05c1f9d
--- /dev/null
+++ b/utils/config.py
@@ -0,0 +1,64 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+
+import os
+from os import path
+
+def stripped_str(x): return str(x).strip()
+
+def s2bool(x): return str(x).strip() in ["True", "true", "TRUE"]
+
+# "CONFIG_ITEM_NAME": (default value, parse function)
+CONFIG_ITEMS = {
+ "LOCAL_DATA_ROOT": ("", stripped_str),
+ # e.g. https://chrisoft.org/notekins/
+ "SERVED_DATA_ROOT": ("", stripped_str),
+ # e.g. chrisoft@10.24.1.1:/home/chrisoft/notedir/
+ "SYNC_TARGET": ("", stripped_str),
+ "POSTS_PER_PAGE": (20, int),
+ "THUMBNAIL_DIM": (1280, int),
+ "DISPLAY_TIMEZONE": ("UTC", stripped_str), # only used by backend
+ # atom generator stuff
+ "ATOM_ENABLED": (False, s2bool),
+ "VERSION_STRING": ("1.0", stripped_str),
+ "ATOM_TITLE": ("", stripped_str),
+ "ATOM_ROOT": ("", stripped_str),
+ "ATOM_SUBTITLE": ("", stripped_str),
+ "ATOM_ICON": ("", stripped_str),
+ "ATOM_AUTHOR": ("", stripped_str),
+ "ATOM_NPOSTS": (20, int)
+}
+
+class config:
+ def __init__(self):
+ self.d = dict([(name, prop[0]) for name, prop in CONFIG_ITEMS.items()])
+ p = os.getcwd()
+ self.f = None
+ try:
+ while not path.isfile(path.join(p, "notekins.conf")):
+ if p == path.dirname(p):
+ raise FileNotFoundError("Cannot locate configuration file.")
+ p = path.dirname(p)
+ fn = path.join(p, "notekins.conf")
+ self.f = fn
+ print(f"Using configuration file {fn}")
+ with open(fn, "r") as f:
+ for l in f:
+ try:
+ n, v = l.split('=')
+ if n not in CONFIG_ITEMS:
+ continue
+ self.d[n] = CONFIG_ITEMS[n][1](v)
+ except ValueError:
+ pass
+ except FileNotFoundError:
+ pass
+
+ def __getattr__(self, k):
+ return self.d[k]
+ def require(self):
+ if self.f is None:
+ print("This operation requires a configuration file, but none can be found.")
+ exit(1)
+
+conf = config()
diff --git a/utils/mistune_emote.py b/utils/mistune_emote.py
new file mode 100644
index 0000000..6c557a5
--- /dev/null
+++ b/utils/mistune_emote.py
@@ -0,0 +1,21 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+from config import conf
+
+EMOTE_PATTERN = r":(?!\s)(?P<emote_name>.+?)(?!\s):"
+
+emote_root = f"{conf.SERVED_DATA_ROOT}/emotes"
+emote_extension = ".webp"
+
+def parse_inline_emote(inline, m, state):
+ ename = m.group("emote_name")
+ state.append_token({"type": "inline_emote", "raw": ename})
+ return m.end()
+
+def render_inline_emote(renderer, ename):
+ return f'<img class="emote" src="{emote_root}/{ename}{emote_extension}" alt="{ename}" title="{ename}">'
+
+def emote(md):
+ md.inline.register("inline_emote", EMOTE_PATTERN, parse_inline_emote, before="link")
+ if md.renderer and md.renderer.NAME == "html":
+ md.renderer.register("inline_emote", render_inline_emote)
diff --git a/utils/monolith.py b/utils/monolith.py
new file mode 100644
index 0000000..d9a90e6
--- /dev/null
+++ b/utils/monolith.py
@@ -0,0 +1,285 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+
+'''
+I/O facilities for the Monolith file used by the backend.
+
+Structure of the Monolith file:
+Element Length in byte Notes
+<Post> (varies)
+<␋> 1 Treated as part of the post that preceeds it
+<Post> (varies)
+<␋> 1
+...
+
+Post:
+Element Length in byte Notes
+content (varies) utf8 string, null-terminated. HTML fragment.
+date 8 seconds since unix epoch
+media (varies) See below
+ntags 1
+tag[0] (varies) null-terminated utf8 string, excluding the hash prefix. HTML-escaped.
+tag[1] .. ..
+...
+tag[ntags-1].. ..
+
+Media:
+Element Length in byte Notes
+nmedia 1
+<MediaIns.> (varies)
+<MediaIns.> (varies)
+...
+
+MediaInstance:
+Element Length in byte Notes
+type 1 'I' = image
+-----------------type == 'I'-----------------
+thumbnail (varies) null-terminated utf8 string, relative path to storage url
+original (varies) ..
+
+Index file (.idx)
+00 08 09 0F
+--<post#0 date>-- <post#0 end byte>
+--<post#1 date>-- <post#1 end byte>
+...
+
+Page index file (.pdx)
+00 08 09 0F
+<page#0 ends at#> <page#1 ends at#>
+...
+
+Page #0 contains the latest posts. The final page always starts
+at byte #0 in the monolith.
+
+'''
+
+from enum import Enum
+from mmap import mmap
+from bisect import bisect_left
+from config import conf
+from datetime import datetime, timezone
+
+MediaType = Enum("MediaType", ["IMAGE", "VIDEO"])
+
+def consume_str(buf):
+ nulp = buf.find(b'\0')
+ rets = buf[0 : nulp].decode("utf-8")
+ return rets, nulp
+
+def strbuf(s):
+ return s.encode("utf-8") + b'\0'
+
+class MediaInstance:
+ def __init__(self, type):
+ self.type = type
+
+ def __str__(self):
+ match self.type:
+ case MediaType.IMAGE:
+ return f"Image: {self.thumbnail} {self.original}\n"
+ case _:
+ return f"Unknown media\n"
+
+ def consume(buf):
+ match buf[0 : 1]:
+ case b'I':
+ l = 1
+ b = buf[1:]
+ thmb, p = consume_str(b)
+ l += p + 1
+ b = b[p + 1:]
+ orig, p = consume_str(b)
+ l += p + 1
+ return MediaInstance.makeImage(thmb, orig), l
+ case _:
+ raise ValueError("Invalid media type")
+
+ def to_buf(self):
+ match self.type:
+ case MediaType.IMAGE:
+ return b'I' + strbuf(self.thumbnail) + strbuf(self.original)
+ case _:
+ raise ValueError("Unsupported media type")
+
+ def dump(self):
+ match self.type:
+ case MediaType.IMAGE:
+ print(f"({self.thumbnail}, {self.original})")
+ case _:
+ raise ValueError("Unsupported media type")
+
+ def makeImage(thumb, orig):
+ r = MediaInstance(MediaType.IMAGE)
+ r.thumbnail = thumb
+ r.original = orig
+ return r
+
+class Post:
+ '''
+ .content: utf-8 string
+ .date: int, secs since unix epoch
+ .media: list of MediaInstance
+ .tags: list of strings
+ '''
+ def __init__(self, cont, date, media, tags):
+ self.content = cont
+ self.date = date
+ self.media = media
+ self.tags = tags
+
+ def __str__(self):
+ medias = "\n".join([str(m) for m in self.media])
+ tags = ",".join([f'"{t}"' for t in self.tags])
+ return f"{self.content}\n{self.date}\n{medias}\n[{tags}]"
+
+ def from_buf(buf):
+ content, p = consume_str(buf)
+ buf = buf[p + 1 :]
+ date = int.from_bytes(buf[: 8], "little")
+ buf = buf[8 :]
+ media = []
+ nmedia = int.from_bytes(buf[: 1], "little")
+ buf = buf[1 :]
+ for i in range(0, nmedia):
+ m, l = MediaInstance.consume(buf)
+ media.append(m)
+ buf = buf[l :]
+ tags = []
+ ntags = int.from_bytes(buf[: 1], "little")
+ buf = buf[1 :]
+ for i in range(0, ntags):
+ t, p = consume_str(buf)
+ tags.append(t)
+ buf = buf[p + 1:]
+ return Post(content, date, media, tags)
+
+ def to_buf(self):
+ ret = strbuf(self.content)
+ ret += self.date.to_bytes(8, "little")
+ ret += len(self.media).to_bytes(1, "little")
+ for m in self.media:
+ ret += m.to_buf()
+ ret += len(self.tags).to_bytes(1, "little")
+ for t in self.tags:
+ ret += strbuf(t)
+ return ret + b'\x0b'
+
+ def dump(self):
+ print('=' * 40)
+ print(self.content)
+ print(datetime.fromtimestamp(self.date, tz=timezone.utc).isoformat())
+ for t in self.tags:
+ print(f"#{t} ", end='')
+ print("")
+ for m in self.media:
+ m.dump()
+
+class Monolith:
+ def __init__(self, fn):
+ self.filename = fn
+ self.idxfn = f"{fn}.idx"
+ self.pdxfn = f"{fn}.pdx"
+ self.postranges = []
+
+ def _append_idxf(self, t, r):
+ with open(self.idxfn, "ab") as f:
+ buf = t.to_bytes(8, "little") + \
+ r.to_bytes(8, "little")
+ if f.write(buf) != len(buf):
+ raise RuntimeError("write failure")
+
+ def clear(self):
+ with open(self.filename, "wb"): pass
+ with open(self.idxfn, "wb"): pass
+ with open(self.pdxfn, "wb"): pass
+
+ def append(self, post):
+ with open(self.filename, "ab") as f:
+ postbuf = post.to_buf()
+ t = post.date
+ l = f.tell()
+ w = f.write(postbuf)
+ if w != len(postbuf):
+ raise RuntimeError("write failure")
+ r = l + w
+ self.postranges.append((t, l, r))
+ self._append_idxf(t, r)
+ # self.generate_page_index()
+
+ def load_index(self):
+ with open(self.idxfn, "rb") as f:
+ last_r = 0
+ self.postranges = []
+ while True:
+ bs = f.read(16)
+ if len(bs) == 0: break
+ t = int.from_bytes(bs[0 : 8], "little")
+ l = last_r
+ r = int.from_bytes(bs[8 :16], "little")
+ self.postranges.append((t, l, r))
+ last_r = r
+
+ def write_index(self):
+ with open(self.idxfn, "wb") as f:
+ for (t, _, r) in self.postranges:
+ f.write(t.to_bytes(8, "little") + \
+ r.to_bytes(8, "little"))
+
+ def find_post(self, date):
+ p = bisect_left(self.postranges, date, key=lambda p: p[0])
+ if p != len(self.postranges) and self.postranges[p][0] == date:
+ return p
+ return None
+
+ def find_nearby_posts(self, date, r=2):
+ p = bisect_left(self.postranges, date, key=lambda p: p[0])
+ left = max(p - r, 0)
+ right = min(p + r + 1, len(self.postranges))
+ return [t for (t, _, _) in self.postranges[left : right]]
+
+ def get_all_dates(self):
+ return [t for (t, _, _) in self.postranges]
+
+ def get_post(self, date):
+ p = self.find_post(date)
+ if p is None: return None
+ t, l, r = self.postranges[p]
+ with open(self.filename, "r+b") as f:
+ d = mmap(f.fileno(), 0)
+ post = Post.from_buf(d[l : r])
+ return post
+
+ def replace_post(self, date, post):
+ p = self.find_post(date)
+ if p is None: return None
+ t, l, r = self.postranges[p]
+ new_post_buf = post.to_buf()
+ dlen = len(new_post_buf) - (r - l)
+ with open(self.filename, "r+b") as f:
+ d = mmap(f.fileno(), 0)
+ mlength = len(d)
+ oldend = r
+ newend = l + len(new_post_buf)
+ if dlen > 0:
+ d.resize(mlength + dlen)
+ if dlen != 0:
+ d.move(newend, oldend, mlength - oldend)
+ if dlen < 0:
+ d.resize(mlength + dlen)
+ d[l : newend] = new_post_buf
+ self.postranges[p] = (t, l, r + dlen)
+ for i in range(p + 1, len(self.postranges)):
+ t, l, r = self.postranges[i]
+ self.postranges[i] = (t, l + dlen, r + dlen)
+ self.write_index()
+
+ def generate_page_index(self):
+ posts_per_page = conf.POSTS_PER_PAGE
+ ranges = []
+ for ub in range(len(self.postranges), 0, -posts_per_page):
+ pr = ub - 1
+ _, _, r = self.postranges[pr]
+ ranges.append(r)
+ with open(self.pdxfn, "wb") as f:
+ for r in ranges:
+ f.write(r.to_bytes(8, "little"))
diff --git a/utils/monolith_test.py b/utils/monolith_test.py
new file mode 100644
index 0000000..b4e1b30
--- /dev/null
+++ b/utils/monolith_test.py
@@ -0,0 +1,96 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+#
+# Basic unit tests for the Python Monolith class
+
+import monolith
+import unittest
+import random
+import os
+from mmap import mmap
+
+def randstr(len):
+ return ''.join(random.choices(''.join([chr(i + ord('0')) for i in range(0, 75)]), k=len))
+
+def randpost(last_time):
+ content = randstr(random.randint(10, 1024))
+ date = random.randint(last_time + 1, last_time + 999999)
+ media = []
+ tags = []
+ for _ in range(0, random.randint(0, 9)):
+ media.append(monolith.MediaInstance.makeImage(randstr(20), randstr(20)))
+ for _ in range(0, random.randint(0, 4)):
+ tags.append(randstr(random.randint(1, 8)))
+ return monolith.Post(content, date, media, tags)
+
+def posteq(a, b):
+ if a is None or b is None:
+ return False
+ if len(a.media) != len(b.media) or len(a.tags) != len(b.tags):
+ return False
+ for x, y in zip(a.media, b.media):
+ if x.thumbnail != y.thumbnail or x.original != y.original:
+ return False
+ for x, y in zip(a.tags, b.tags):
+ if x != y: return False
+ return a.content == b. content and a.date == b.date
+
+class TestMonolith(unittest.TestCase):
+ def test_replace(self):
+ posts = []
+ filename = "rep.monolith"
+ m = monolith.Monolith(filename)
+ p1 = randpost(123)
+ p2 = randpost(p1.date)
+ p3 = randpost(0)
+ p3.date = p1.date
+ m.append(p1)
+ m.append(p2)
+ m.replace_post(p1.date, p3)
+ self.assertTrue(posteq(m.get_post(p3.date), p3))
+ self.assertTrue(posteq(m.get_post(p2.date), p2))
+
+ def test_combined(self):
+ posts = []
+ filename = "test.monolith"
+ if True:
+ m = monolith.Monolith(filename)
+ last_time = 0
+ for _ in range(0, 100):
+ op = 1 if random.random() < 0.2 else 0
+ if op == 1 and len(posts) == 0:
+ op = 0
+ if op == 0:
+ p = randpost(last_time)
+ last_time = p.date
+ posts.append(p)
+ m.append(p)
+ elif op == 1:
+ p = randpost(0)
+ position = random.randint(0, len(posts) - 1)
+ p.date = posts[position].date
+ posts[position] = p
+ m.replace_post(p.date, p)
+ m.write_index()
+ m.generate_page_index()
+ for p in posts:
+ pp = m.get_post(p.date)
+ self.assertTrue(posteq(p, pp))
+
+ with open(filename, "r+b") as f:
+ d = mmap(f.fileno(), 0)
+ for _, _, r in m.postranges:
+ self.assertEqual(d[r - 1 : r], b'\v')
+ if True:
+ m = monolith.Monolith(filename)
+ m.load_index()
+ dates = m.get_all_dates()
+ self.assertEqual(len(dates), len(posts))
+ for t, p in zip(dates, posts):
+ self.assertEqual(t, p.date)
+ for t, p in zip(dates, posts):
+ self.assertTrue(posteq(p, m.get_post(t)))
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/utils/notectl.py b/utils/notectl.py
new file mode 100644
index 0000000..a2e4a19
--- /dev/null
+++ b/utils/notectl.py
@@ -0,0 +1,178 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+
+import os
+import sys
+import time
+import shutil
+import tempfile
+import subprocess
+from datetime import datetime, timezone
+
+import postutil
+from atomgen import gen_atom
+from monolith import Monolith
+from config import conf
+
+'''
+Launches an editor (set by $EDITOR) to edit the given file
+'''
+def edit_file(fn):
+ editor = os.environ["EDITOR"]
+ subprocess.run([editor, fn])
+
+'''
+Opens an editor to create a new post.
+The post will be formated, and all media will be processed accordingly.
+'''
+def new_post():
+ conf.require()
+ with tempfile.TemporaryDirectory() as dir:
+ fn = os.path.join(dir, "note.txt")
+ with open(fn, "w"): pass
+ edit_file(fn)
+ ts = time.time_ns() // 10 ** 9
+ postpath = postutil.move_post(fn, ts)
+ p = postutil.process_post(postpath, False)
+ if len(p.content) == 0 and len(p.media) == 0:
+ print("No post was made.")
+ return
+ print(f"Post {os.path.basename(postpath)} made!")
+ m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith"))
+ m.append(p)
+ m.load_index()
+ m.generate_page_index()
+ if conf.ATOM_ENABLED:
+ gen_atom()
+
+'''
+Opens an editor to edit an existing post.
+Post time cannot be modified and will always stay the same.
+Media that can be found in the media_dropoff folder will be updated.
+
+If a media entry is modified, the file it refers to must either be
+present in the media_dropoff folder, or already in the corresponding
+folder inside media_orig.
+'''
+def edit_post(ts):
+ conf.require()
+ m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith"))
+ m.load_index()
+ if m.get_post(ts) is None:
+ print("No post was made at that time!")
+ # TODO: allow the user to select a post made near this time
+ return
+ d = datetime.fromtimestamp(ts, tz=timezone.utc)
+ pfn = f"{ts}-{d.isoformat()[:-6]}Z.post"
+ rp = os.path.join(os.path.join("posts", str(d.year)), pfn)
+ edit_file(os.path.join(conf.LOCAL_DATA_ROOT, rp))
+ p = postutil.process_post(rp, True)
+ m.replace_post(ts, p)
+ m.generate_page_index()
+ if conf.ATOM_ENABLED:
+ gen_atom()
+
+'''
+Regenerate the ENTIRE monolith file.
+Horribly slow.
+'''
+def regen_monolith():
+ conf.require()
+ print("Do you want to regenerate the ENTIRE monolith file? [y/n]")
+ if input() not in ['Y', 'y']:
+ return
+ m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith"))
+ m.clear()
+ postlist = []
+ postsp = os.path.join(conf.LOCAL_DATA_ROOT, "posts")
+ for ye in os.scandir(postsp):
+ inty = None
+ try:
+ inty = int(ye.name)
+ except ValueError:
+ pass
+ if inty is None or not ye.is_dir():
+ continue
+ yearp = os.path.join(postsp, ye.name)
+ postlist += [os.path.join(yearp, p) for p in filter(lambda x: x.endswith(".post"), os.listdir(yearp))]
+ def keyf(x):
+ b = os.path.basename(x)
+ return int(b[:b.index('-')])
+ postlist = sorted(postlist, key=keyf)
+ for pfn in postlist:
+ p = postutil.process_post(pfn, True)
+ m.append(p)
+ m.load_index()
+ m.generate_page_index()
+ if conf.ATOM_ENABLED:
+ gen_atom()
+
+'''
+Sync all local data to the configured remote host for serving.
+'''
+def sync_remote():
+ conf.require()
+ subprocess.run(["rsync", "-azv", "--exclude=posts", "--exclude=media_dropoff", conf.LOCAL_DATA_ROOT + "/", conf.SYNC_TARGET])
+
+'''
+Create a new notekins instance with all files and directories that it expects.
+'''
+def init_instance():
+ repop = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+ print(repop)
+ if len(sys.argv) < 3:
+ print("Missing path to the new instance.")
+ return
+ targetp = sys.argv[2].rstrip('/')
+ os.mkdir(targetp, mode=0o755)
+ os.mkdir(os.path.join(targetp, "posts"), mode=0o755)
+ os.mkdir(os.path.join(targetp, "emotes"), mode=0o755)
+ os.mkdir(os.path.join(targetp, "media_dropoff"), mode=0o755)
+ os.mkdir(os.path.join(targetp, "media_orig"), mode=0o755)
+ os.mkdir(os.path.join(targetp, "media_thmb"), mode=0o755)
+ shutil.copytree(os.path.join(repop, "template"), os.path.join(targetp, "template"))
+ with open(os.path.join(targetp, "notekins.conf"), "w") as f:
+ f.write(f"LOCAL_DATA_ROOT={targetp}")
+ m = Monolith(os.path.join(targetp, "posts.monolith"))
+ m.clear()
+
+'''
+Clean up any media file that isn't used in the monolith file.
+TODO.
+'''
+def media_cleanup():
+ conf.require()
+ pass
+
+def main():
+ if len(sys.argv) < 2:
+ print("Missing command. Available commands:")
+ print("new Create a new post.")
+ print("edit Edit an existing post. Requires a post timestamp.")
+ print("atom Generate atom feed.")
+ print("regen Regenerate the entire monolith file.")
+ print("sync Sync data to remote for hosting.")
+ print("init Initialize a new Notekins instance. Requires path to the instance.")
+ print("dump Dump the content of the monolith file.")
+ return
+ match sys.argv[1]:
+ case "new":
+ new_post()
+ case "edit":
+ edit_post(int(sys.argv[2]))
+ case "atom":
+ gen_atom()
+ case "regen":
+ regen_monolith()
+ case "sync":
+ sync_remote()
+ case "init":
+ init_instance()
+ case "dump":
+ m = Monolith(os.path.join(conf.LOCAL_DATA_ROOT, "posts.monolith"))
+ m.load_index()
+ for d in m.get_all_dates():
+ m.get_post(d).dump()
+
+if __name__ == "__main__":
+ main()
diff --git a/utils/postutil.py b/utils/postutil.py
new file mode 100644
index 0000000..c978d3e
--- /dev/null
+++ b/utils/postutil.py
@@ -0,0 +1,155 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+
+'''
+The anatomy of a post:
+
+--comment <= any line that starts with a double dash will be ignored
+Post text (markdown)
+
+#tag #tag #tag <= a line that starts with # makes it a tag line.
+ only the first of such lines is used.
+
+[media] <= lines that are surrounded with [] are media lines.
+[media] each contain a single file name inside the brackets.
+[media] drop the files in LOCAL_DATA_ROOT/media_dropoff
+'''
+
+from mistune.plugins.formatting import strikethrough, superscript, subscript
+from mistune.plugins.url import url
+from mistune.plugins.ruby import ruby
+from mistune.plugins.spoiler import spoiler
+import mistune
+
+from wand.image import Image
+
+import tempfile
+import os
+import shutil
+import mimetypes
+from datetime import datetime, timezone
+from hashlib import file_digest
+
+from mistune_emote import emote
+from monolith import MediaInstance, Post
+from config import conf
+
+'''
+Takes an abolute path to a static image, generate a thumbnail for it if needed
+Returns path to the thumbnail, relative to conf.LOCAL_DATA_ROOT
+
+If a thumbnail isn't required, returns None
+'''
+def generate_thumbnail(file):
+ with tempfile.TemporaryDirectory() as dir:
+ outf = os.path.join(dir, "downsampled.webp")
+ dim = conf.THUMBNAIL_DIM
+ with Image(filename=file) as i:
+ if i.height <= dim and i.width <= dim and i.format.lower() != "png":
+ return None
+ s = dim / max(i.height, i.width)
+ i.resize(int(i.width * s), int(i.height * s), "lanczos2")
+ i.format = "webp"
+ i.save(filename=outf)
+ with open(outf, "rb") as f:
+ d = file_digest(f, "sha256")
+ shas = d.hexdigest()
+ destdirp = os.path.join(shas[0:2], shas[2:4])
+ destdirp = os.path.join("media_thmb", destdirp)
+ destpath = os.path.join(destdirp, f"{shas}.webp")
+ destabsp = os.path.join(conf.LOCAL_DATA_ROOT, destpath)
+ os.makedirs(os.path.join(conf.LOCAL_DATA_ROOT, destdirp), 0o755, True)
+ if not os.path.isfile(destabsp):
+ shutil.move(outf, destabsp)
+ return destpath
+
+def should_generate_thumbnail(file):
+ thumbed_types = ["image/png", "image/jpeg", "image/webp"]
+ return mimetypes.guess_type(file)[0] in thumbed_types
+
+def process_body(text):
+ renderer = mistune.HTMLRenderer()
+ md = mistune.Markdown(renderer, plugins=
+ [strikethrough, url, superscript, subscript, ruby, spoiler, emote])
+ return md(text)
+
+'''
+move file at absolute path fn to conf.LOCAL_DATA_ROOT/<dirn>/<year>/destfilename
+destfilename is the return value of dfnf(ts, datetime.fromtimestamp(ts, tz=timezone.utc), fn)
+
+returns path to the destination file relative to conf.LOCAL_DATA_ROOT
+'''
+def move_file(fn, ts, dirn, dfnf):
+ d = datetime.fromtimestamp(ts, tz=timezone.utc)
+ dfn = dfnf(ts, d, fn)
+ destydir = os.path.join(dirn, str(d.year))
+ destpath = os.path.join(destydir, dfn)
+ os.makedirs(os.path.join(conf.LOCAL_DATA_ROOT, destydir), 0o755, True)
+ shutil.move(fn, os.path.join(conf.LOCAL_DATA_ROOT, destpath))
+ return destpath
+
+def move_post(fn, ts):
+ return move_file(fn, ts, "posts", lambda ts, d, fn: f"{ts}-{d.isoformat()[:-6]}Z.post")
+
+def move_media(fn, ts):
+ return move_file(fn, ts, "media_orig", lambda ts, d, fn: f"{ts}-{os.path.basename(fn)}")
+
+'''
+Reads and processes a post from fn.
+fn must be a path relative to conf.LOCAL_DATA_ROOT
+pointing to a raw post input (e.g. posts/2024/xxxxxx-yyyy-mm-ddThh:mm:ssZ.post)
+If is_updating is False, assumes all media is present in the media_dropoff folder.
+
+Returns a Post struct for that post.
+'''
+def process_post(fn, is_updating):
+ body = ""
+ media_str = []
+ media = []
+ tags = []
+ tagline = None
+ fbasen = os.path.basename(fn)
+ ts = int(fbasen[:fbasen.index('-')])
+ with open(fn, "r") as f:
+ for l in f:
+ line = l.strip()
+ if line.startswith("--"):
+ continue
+ if line.startswith('[') and line.endswith(']'):
+ media_str.append(line[1 : -1])
+ elif line.startswith('#'):
+ if tagline is None:
+ tagline = line
+ elif len(media_str) == 0 and tagline is None:
+ body += l
+
+ rendered_body = process_body(body)
+
+ if tagline is not None:
+ tags = [s[1:] for s in filter(lambda t: t.startswith('#'), tagline.split(' '))]
+
+ for m in media_str:
+ destm = None
+ dropoff = os.path.join("media_dropoff", m)
+ dropoffa = os.path.join(conf.LOCAL_DATA_ROOT, dropoff)
+ e = os.path.isfile(dropoffa)
+ if not is_updating:
+ if not e:
+ raise FileNotFoundError(f"{dropoffa} does not exist.")
+ destm = move_media(dropoffa, ts)
+ elif e:
+ destm = move_media(dropoffa, ts)
+ if destm is None:
+ d = datetime.fromtimestamp(ts, tz=timezone.utc)
+ destm = os.path.join("media_orig", str(d.year))
+ destm = os.path.join(destm, f"{ts}-{os.path.basename(fn)}")
+ if not os.path.isfile(os.path.join(conf.LOCAL_DATA_ROOT, destm)):
+ raise FileNotFoundError(f"Cannot find original media ({destm})")
+ thumbnail = None
+ if should_generate_thumbnail(destm):
+ thumbnail = generate_thumbnail(os.path.join(conf.LOCAL_DATA_ROOT, destm))
+ if thumbnail is None:
+ thumbnail = destm
+ media.append(MediaInstance.makeImage(thumbnail, destm))
+
+ return Post(rendered_body, ts, media, tags)
diff --git a/utils/rust_monolith_test.py b/utils/rust_monolith_test.py
new file mode 100644
index 0000000..a66eea4
--- /dev/null
+++ b/utils/rust_monolith_test.py
@@ -0,0 +1,92 @@
+# Chris Xiong 2024
+# License: Expat (MIT)
+#
+# shitty tests for the Rust Monolith representation
+
+import os
+import sys
+import random
+import tempfile
+import subprocess
+
+import monolith
+import monolith_test
+from config import conf
+
+def make_random_monolith(dir):
+ filename = "posts.monolith"
+ m = monolith.Monolith(os.path.join(dir, filename))
+ nposts = random.randint(10, 100)
+ last_time = 0
+ posts = []
+ for _ in range(0, nposts):
+ p = monolith_test.randpost(last_time)
+ last_time = p.date
+ posts.append(p)
+ m.append(p)
+ m.write_index()
+ m.generate_page_index()
+ with open(os.path.join(dir, "notekins.conf"), "w"): pass
+ return posts
+
+def run_rust_monolith_debug(dir, method, param):
+ p = subprocess.run([sys.argv[1], method, str(param)], capture_output=True, cwd=dir)
+ return p.stdout.decode("utf-8")
+
+def dbg_output(p):
+ pyout = p.content + '\n'
+ pyout += str(p.date) + '\n'
+ for m in p.media:
+ if m.type == monolith.MediaType.IMAGE:
+ pyout += f"Image {m.thumbnail} {m.original}\n"
+ for t in p.tags:
+ pyout += t + '\n'
+ return pyout
+
+def run_tests(dir):
+ posts = make_random_monolith(dir)
+ failed = False
+ for p in posts:
+ o = run_rust_monolith_debug(dir, "get_post", p.date)
+ e = dbg_output(p)
+ if o != e:
+ print(f"get_post failed, date: {p.date}")
+ print(f"expected\n{e}\ngot\n{o}")
+ failed = True
+ input()
+ o = run_rust_monolith_debug(dir, "get_post2", p.date)
+ e = dbg_output(p)
+ if o != e:
+ print(f"get_post2 failed, date: {p.date}")
+ print(f"expected\n{e}\ngot\n{o}")
+ failed = True
+ input()
+ posts_per_page = conf.POSTS_PER_PAGE
+ for page, ub in enumerate(range(len(posts), 0, -posts_per_page)):
+ pl = max(ub - posts_per_page, 0)
+ pr = ub - 1
+ if (pr - pl + 1 > posts_per_page):
+ failed = True
+ print(f"paging error ???")
+ input()
+ e = ""
+ for x in range(pl, pr + 1):
+ e += dbg_output(posts[x])
+ o = run_rust_monolith_debug(dir, "get_page", page)
+ if o != e:
+ print(f"get_page failed, page: {page}")
+ print(f"expected\n{e}\ngot\n{o}")
+ failed = True
+ input()
+ if not failed:
+ print(f"test of monolith with {len(posts)} posts passed.")
+
+def test_rust_monolith():
+ if len(sys.argv) < 2:
+ print("missing path to executable")
+ for _ in range(0, 100):
+ with tempfile.TemporaryDirectory() as dir:
+ run_tests(dir)
+
+if __name__ == "__main__":
+ test_rust_monolith()