aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby <[email protected]>2024-10-01 00:58:46 -0400
committerBobby <[email protected]>2024-10-01 00:58:46 -0400
commit02348ba5b713f435e1e563507418412734957481 (patch)
tree0c29aac11a4fd04d29597f2d58ae9a587b70d83e
parentcb16eab87b078d69a198ff0337fa3577a011c263 (diff)
downloadyugen-02348ba5b713f435e1e563507418412734957481.tar.xz
yugen-02348ba5b713f435e1e563507418412734957481.zip
Optimized discussions
-rw-r--r--watch/utils.py112
1 files changed, 92 insertions, 20 deletions
diff --git a/watch/utils.py b/watch/utils.py
index 7455d8b..df9e36b 100644
--- a/watch/utils.py
+++ b/watch/utils.py
@@ -1,3 +1,4 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
import datetime
from difflib import SequenceMatcher
from functools import lru_cache
@@ -23,8 +24,8 @@ r = redis.Redis(
password=os.getenv("REDIS_PASSWORD"),
)
-# r.flushall()
-# print("Redis cache flushed")
+r.flushall()
+print("Redis cache flushed")
def get_episode_metadata(anime_data, episode):
episode_metadata = get_all_episode_metadata(anime_data)
@@ -526,25 +527,36 @@ def get_mal_episode_comments(mal_id, episode_number, mal_access_token):
return None
topic_id = topic_id_match.group(1)
-
- api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}"
-
- headers = {
- "Authorization": f"Bearer {mal_access_token}"
- }
-
+ api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}?limit=100"
+ headers = {"Authorization": f"Bearer {mal_access_token}"}
+
all_comments = []
next_url = api_url
+ def fetch_comments(url):
+ retries = 3
+ while retries > 0:
+ response = requests.get(url, headers=headers)
+ if response.status_code == 200:
+ return response.json()
+ elif response.status_code == 429:
+ print(f"Rate limit reached. Waiting before retrying... Retries left: {retries}")
+ retries -= 1
+ else:
+ print(f"Error fetching posts: {response.status_code}")
+ return None
+ return None
+
while next_url:
- response = requests.get(next_url, headers=headers)
- if response.status_code != 200:
- print(f"Error fetching posts: {response.status_code}")
- return None
-
- data = response.json()
- all_comments.extend(data["data"]["posts"])
- next_url = data.get("paging", {}).get("next")
+ data = fetch_comments(next_url)
+ if data:
+ all_comments.extend(data["data"]["posts"])
+ next_url = data.get("paging", {}).get("next")
+ else:
+ break # Stop if we encounter an error
+
+ if not all_comments:
+ return None
all_comments = sorted(
all_comments,
@@ -558,14 +570,74 @@ def get_mal_episode_comments(mal_id, episode_number, mal_access_token):
discussion_data["total"] = len(all_comments)
- data = {
+ result = {
"metadata": discussion_data,
"comments": all_comments
}
- store_in_redis_cache(cache_key, json.dumps(data))
+ store_in_redis_cache(cache_key, json.dumps(result), cache_time=3600) # Cache for 1 hour
+
+ return result
+
+
+# def get_mal_episode_comments(mal_id, episode_number, mal_access_token):
+# cache_key = f"anime:{mal_id}:episode:{episode_number}:comments"
+# cached_data = get_from_redis_cache(cache_key)
+
+# if cached_data:
+# return json.loads(cached_data)
+
+# discussion_data = get_mal_episode_discussion_data(mal_id, episode_number)
+
+# if not discussion_data:
+# return None
+
+# topic_id_match = re.search(r'topicid=(\d+)', discussion_data['forum_url'])
+# if not topic_id_match:
+# print(f"Could not extract topic ID from forum URL: {discussion_data['forum_url']}")
+# return None
+
+# topic_id = topic_id_match.group(1)
+
+# api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}"
+
+# headers = {
+# "Authorization": f"Bearer {mal_access_token}"
+# }
+
+# all_comments = []
+# next_url = api_url
+
+# while next_url:
+# response = requests.get(next_url, headers=headers)
+# if response.status_code != 200:
+# print(f"Error fetching posts: {response.status_code}")
+# return None
+
+# data = response.json()
+# all_comments.extend(data["data"]["posts"])
+# next_url = data.get("paging", {}).get("next")
+
+# all_comments = sorted(
+# all_comments,
+# key=lambda x: datetime.datetime.fromisoformat(x["created_at"].replace("Z", "+00:00")),
+# reverse=True
+# )
+
+# for post in all_comments:
+# decoded_text = html.unescape(post['body'])
+# post['body_html'] = parse_mixed_content(decoded_text)
+
+# discussion_data["total"] = len(all_comments)
+
+# data = {
+# "metadata": discussion_data,
+# "comments": all_comments
+# }
+
+# store_in_redis_cache(cache_key, json.dumps(data))
- return data
+# return data
def parse_mixed_content(content):
# Remove HTML comments