aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Chris Xiong <chirs241097@gmail.com> 2024-08-04 18:49:45 -0400
committerGravatar Chris Xiong <chirs241097@gmail.com> 2024-08-04 18:49:45 -0400
commitde061b589dc4d8fc2f8a9866ea183f565b284cf9 (patch)
tree15a2e7657e63d755150542d01e491c4c842f9512
parentd85f7c3109df027110cf8c7ed2d3de2aa128e7ec (diff)
downloadnotekins-de061b589dc4d8fc2f8a9866ea183f565b284cf9.tar.xz
Mastodon import script (only good enough for myself).
-rw-r--r--utils/mastodon_import.py58
1 files changed, 58 insertions, 0 deletions
diff --git a/utils/mastodon_import.py b/utils/mastodon_import.py
new file mode 100644
index 0000000..11925c2
--- /dev/null
+++ b/utils/mastodon_import.py
@@ -0,0 +1,58 @@
+import os
+import sys
+import json
+from config import conf
+from datetime import datetime
+
+def htmlunescape(s):
+ return s.replace("&lt;", "<")\
+ .replace("&gt;", ">")\
+ .replace("&amp;", "&")\
+ .replace("&#39;", "'")\
+ .replace("&quot;", '"')
+
+def proc_post(o, msdata):
+ date = datetime.fromisoformat(o["published"])
+ text = htmlunescape(o["object"]["content"])
+ images = []
+ for a in o["object"]["attachment"]:
+ if a["mediaType"] in ["image/jpeg", "image/png"]:
+ images.append(a["url"])
+ is_reply = len(o["cc"]) > 1
+ ts = int(date.timestamp())
+ if is_reply:
+ return
+ #####################################################
+ media_dir = os.path.join(os.path.join(conf.LOCAL_DATA_ROOT, "media_orig"), str(date.year))
+ post_dir = os.path.join(os.path.join(conf.LOCAL_DATA_ROOT, "posts"), str(date.year))
+ post_fn = f"{ts}-{date.isoformat()[:-6]}Z.post"
+ os.makedirs(post_dir, 0o755, True)
+ output_text = text
+ output_text += "\n\n#Toot\n"
+ if len(images) > 0:
+ output_text += '\n'
+ os.makedirs(media_dir, 0o755, True)
+ for i in images:
+ p = i[1:]
+ bn = os.path.basename(p)
+ os.link(os.path.join(msdata, p),
+ os.path.join(media_dir, f"{ts}-{bn}"))
+ output_text += f"[{bn}]\n"
+ with open(os.path.join(post_dir, post_fn), "w") as f:
+ f.write(output_text)
+
+def main():
+ o = None
+ if len(sys.argv) < 2:
+ print("specify the data directory of your mastodon archive on the command line pls")
+ exit(1)
+ msdata = sys.argv[1]
+ with open(os.path.join(msdata, "outbox.json")) as f:
+ s = f.read()
+ o = json.loads(s)
+ posts = []
+ for p in o['orderedItems']:
+ proc_post(p, msdata)
+
+if __name__ == "__main__":
+ main()