import os import sys import json from config import conf from datetime import datetime def htmlunescape(s): return s.replace("<", "<")\ .replace(">", ">")\ .replace("&", "&")\ .replace("'", "'")\ .replace(""", '"') def proc_post(o, msdata): date = datetime.fromisoformat(o["published"]) text = htmlunescape(o["object"]["content"]) images = [] for a in o["object"]["attachment"]: if a["mediaType"] in ["image/jpeg", "image/png"]: images.append(a["url"]) is_reply = len(o["cc"]) > 1 ts = int(date.timestamp()) if is_reply: return ##################################################### media_dir = os.path.join(os.path.join(conf.LOCAL_DATA_ROOT, "media_orig"), str(date.year)) post_dir = os.path.join(os.path.join(conf.LOCAL_DATA_ROOT, "posts"), str(date.year)) post_fn = f"{ts}-{date.isoformat()[:-6]}Z.post" os.makedirs(post_dir, 0o755, True) output_text = text output_text += "\n\n#Toot\n" if len(images) > 0: output_text += '\n' os.makedirs(media_dir, 0o755, True) for i in images: p = i[1:] bn = os.path.basename(p) os.link(os.path.join(msdata, p), os.path.join(media_dir, f"{ts}-{bn}")) output_text += f"[{bn}]\n" with open(os.path.join(post_dir, post_fn), "w") as f: f.write(output_text) def main(): o = None if len(sys.argv) < 2: print("specify the data directory of your mastodon archive on the command line pls") exit(1) msdata = sys.argv[1] with open(os.path.join(msdata, "outbox.json")) as f: s = f.read() o = json.loads(s) posts = [] for p in o['orderedItems']: proc_post(p, msdata) if __name__ == "__main__": main()