Optimized discussions

author: Bobby <[email protected]> 2024-10-01 00:58:46 -0400
committer: Bobby <[email protected]> 2024-10-01 00:58:46 -0400
commit: 02348ba5b713f435e1e563507418412734957481 (patch)
tree: 0c29aac11a4fd04d29597f2d58ae9a587b70d83e
parent: cb16eab87b078d69a198ff0337fa3577a011c263 (diff)
download: yugen-02348ba5b713f435e1e563507418412734957481.tar.xz
yugen-02348ba5b713f435e1e563507418412734957481.zip
1 files changed, 92 insertions, 20 deletions
diff --git a/watch/utils.py b/watch/utils.py
index 7455d8b..df9e36b 100644
--- a/watch/utils.py
+++ b/watch/utils.py
@@ -1,3 +1,4 @@
+from concurrent.futures import ThreadPoolExecutor, as_completed
 import datetime
 from difflib import SequenceMatcher
 from functools import lru_cache
@@ -23,8 +24,8 @@ r = redis.Redis(
     password=os.getenv("REDIS_PASSWORD"),
 )
 
-# r.flushall()
-# print("Redis cache flushed")
+r.flushall()
+print("Redis cache flushed")
 
 def get_episode_metadata(anime_data, episode):
     episode_metadata = get_all_episode_metadata(anime_data)
@@ -526,25 +527,36 @@ def get_mal_episode_comments(mal_id, episode_number, mal_access_token):
         return None
     
     topic_id = topic_id_match.group(1)
-    
-    api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}"
-    
-    headers = {
-        "Authorization": f"Bearer {mal_access_token}"
-    }
-    
+    api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}?limit=100"
+    headers = {"Authorization": f"Bearer {mal_access_token}"}
+
     all_comments = []
     next_url = api_url
 
+    def fetch_comments(url):
+        retries = 3
+        while retries > 0:
+            response = requests.get(url, headers=headers)
+            if response.status_code == 200:
+                return response.json()
+            elif response.status_code == 429:
+                print(f"Rate limit reached. Waiting before retrying... Retries left: {retries}")
+                retries -= 1
+            else:
+                print(f"Error fetching posts: {response.status_code}")
+                return None
+        return None
+
     while next_url:
-        response = requests.get(next_url, headers=headers)
-        if response.status_code != 200:
-            print(f"Error fetching posts: {response.status_code}")
-            return None
-        
-        data = response.json()
-        all_comments.extend(data["data"]["posts"])
-        next_url = data.get("paging", {}).get("next")
+        data = fetch_comments(next_url)
+        if data:
+            all_comments.extend(data["data"]["posts"])
+            next_url = data.get("paging", {}).get("next")
+        else:
+            break  # Stop if we encounter an error
+
+    if not all_comments:
+        return None
 
     all_comments = sorted(
         all_comments,
@@ -558,14 +570,74 @@ def get_mal_episode_comments(mal_id, episode_number, mal_access_token):
 
     discussion_data["total"] = len(all_comments)
 
-    data = {
+    result = {
         "metadata": discussion_data,
         "comments": all_comments
     }
 
-    store_in_redis_cache(cache_key, json.dumps(data))
+    store_in_redis_cache(cache_key, json.dumps(result), cache_time=3600)  # Cache for 1 hour
+
+    return result
+
+
+# def get_mal_episode_comments(mal_id, episode_number, mal_access_token):
+#     cache_key = f"anime:{mal_id}:episode:{episode_number}:comments"
+#     cached_data = get_from_redis_cache(cache_key)
+
+#     if cached_data:
+#         return json.loads(cached_data)
+
+#     discussion_data = get_mal_episode_discussion_data(mal_id, episode_number)
+
+#     if not discussion_data:
+#         return None
+    
+#     topic_id_match = re.search(r'topicid=(\d+)', discussion_data['forum_url'])
+#     if not topic_id_match:
+#         print(f"Could not extract topic ID from forum URL: {discussion_data['forum_url']}")
+#         return None
+    
+#     topic_id = topic_id_match.group(1)
+    
+#     api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}"
+    
+#     headers = {
+#         "Authorization": f"Bearer {mal_access_token}"
+#     }
+    
+#     all_comments = []
+#     next_url = api_url
+
+#     while next_url:
+#         response = requests.get(next_url, headers=headers)
+#         if response.status_code != 200:
+#             print(f"Error fetching posts: {response.status_code}")
+#             return None
+        
+#         data = response.json()
+#         all_comments.extend(data["data"]["posts"])
+#         next_url = data.get("paging", {}).get("next")
+
+#     all_comments = sorted(
+#         all_comments,
+#         key=lambda x: datetime.datetime.fromisoformat(x["created_at"].replace("Z", "+00:00")),
+#         reverse=True
+#     )
+
+#     for post in all_comments:
+#         decoded_text = html.unescape(post['body'])
+#         post['body_html'] = parse_mixed_content(decoded_text)
+
+#     discussion_data["total"] = len(all_comments)
+
+#     data = {
+#         "metadata": discussion_data,
+#         "comments": all_comments
+#     }
+
+#     store_in_redis_cache(cache_key, json.dumps(data))
 
-    return data
+#     return data
 
 def parse_mixed_content(content):
     # Remove HTML comments
author	Bobby <[email protected]>	2024-10-01 00:58:46 -0400
committer	Bobby <[email protected]>	2024-10-01 00:58:46 -0400
commit	02348ba5b713f435e1e563507418412734957481 (patch)
tree	0c29aac11a4fd04d29597f2d58ae9a587b70d83e
parent	cb16eab87b078d69a198ff0337fa3577a011c263 (diff)
download	yugen-02348ba5b713f435e1e563507418412734957481.tar.xz yugen-02348ba5b713f435e1e563507418412734957481.zip