From de061b589dc4d8fc2f8a9866ea183f565b284cf9 Mon Sep 17 00:00:00 2001 From: Chris Xiong Date: Sun, 4 Aug 2024 18:49:45 -0400 Subject: Mastodon import script (only good enough for myself). --- utils/mastodon_import.py | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 utils/mastodon_import.py diff --git a/utils/mastodon_import.py b/utils/mastodon_import.py new file mode 100644 index 0000000..11925c2 --- /dev/null +++ b/utils/mastodon_import.py @@ -0,0 +1,58 @@ +import os +import sys +import json +from config import conf +from datetime import datetime + +def htmlunescape(s): + return s.replace("<", "<")\ + .replace(">", ">")\ + .replace("&", "&")\ + .replace("'", "'")\ + .replace(""", '"') + +def proc_post(o, msdata): + date = datetime.fromisoformat(o["published"]) + text = htmlunescape(o["object"]["content"]) + images = [] + for a in o["object"]["attachment"]: + if a["mediaType"] in ["image/jpeg", "image/png"]: + images.append(a["url"]) + is_reply = len(o["cc"]) > 1 + ts = int(date.timestamp()) + if is_reply: + return + ##################################################### + media_dir = os.path.join(os.path.join(conf.LOCAL_DATA_ROOT, "media_orig"), str(date.year)) + post_dir = os.path.join(os.path.join(conf.LOCAL_DATA_ROOT, "posts"), str(date.year)) + post_fn = f"{ts}-{date.isoformat()[:-6]}Z.post" + os.makedirs(post_dir, 0o755, True) + output_text = text + output_text += "\n\n#Toot\n" + if len(images) > 0: + output_text += '\n' + os.makedirs(media_dir, 0o755, True) + for i in images: + p = i[1:] + bn = os.path.basename(p) + os.link(os.path.join(msdata, p), + os.path.join(media_dir, f"{ts}-{bn}")) + output_text += f"[{bn}]\n" + with open(os.path.join(post_dir, post_fn), "w") as f: + f.write(output_text) + +def main(): + o = None + if len(sys.argv) < 2: + print("specify the data directory of your mastodon archive on the command line pls") + exit(1) + msdata = sys.argv[1] + with open(os.path.join(msdata, "outbox.json")) as f: + s = f.read() + o = json.loads(s) + posts = [] + for p in o['orderedItems']: + proc_post(p, msdata) + +if __name__ == "__main__": + main() -- cgit v1.2.3