aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Chris Xiong <chirs241097@gmail.com> 2024-08-04 18:48:36 -0400
committerGravatar Chris Xiong <chirs241097@gmail.com> 2024-08-04 18:48:36 -0400
commitd85f7c3109df027110cf8c7ed2d3de2aa128e7ec (patch)
tree34348960af1f26e0f07cc3e48578023fe8cf6cdc
parentafedaa6326d879477ddd56ab2b542d3c8af3f5b2 (diff)
downloadnotekins-d85f7c3109df027110cf8c7ed2d3de2aa128e7ec.tar.xz
Unescape html in tweets.
-rw-r--r--utils/twitter_import.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/utils/twitter_import.py b/utils/twitter_import.py
index 90d0c7c..76170bb 100644
--- a/utils/twitter_import.py
+++ b/utils/twitter_import.py
@@ -14,6 +14,11 @@ def procline(x):
r = chr(0x200b) + r # extremely hacky
return r
+def htmlunescape(s):
+ return s.replace("&lt;", "<")\
+ .replace("&gt;", ">")\
+ .replace("&amp;", "&")
+
class Tweet:
def __init__(self, tid, text, date, tags, media, urls, replying_to):
self.tid = tid
@@ -26,12 +31,13 @@ class Tweet:
def parse(json):
t = json["tweet"]
- text = t["full_text"]
+ text = htmlunescape(t["full_text"])
#tcolinks = TCORE.findall(text)
e = t["entities"]
ee = t["extended_entities"] if "extended_entities" in t else None
tid = t["id"]
tags = [tag["text"] for tag in e["hashtags"]]
+ tags = ["Tweet"] + tags
is_reply = "in_reply_to_status_id_str" in t
replying_to = None
if is_reply: