diff options
| author | Bobby <[email protected]> | 2024-10-01 00:58:46 -0400 |
|---|---|---|
| committer | Bobby <[email protected]> | 2024-10-01 00:58:46 -0400 |
| commit | 02348ba5b713f435e1e563507418412734957481 (patch) | |
| tree | 0c29aac11a4fd04d29597f2d58ae9a587b70d83e | |
| parent | cb16eab87b078d69a198ff0337fa3577a011c263 (diff) | |
| download | yugen-02348ba5b713f435e1e563507418412734957481.tar.xz yugen-02348ba5b713f435e1e563507418412734957481.zip | |
Optimized discussions
| -rw-r--r-- | watch/utils.py | 112 |
1 files changed, 92 insertions, 20 deletions
diff --git a/watch/utils.py b/watch/utils.py index 7455d8b..df9e36b 100644 --- a/watch/utils.py +++ b/watch/utils.py @@ -1,3 +1,4 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed import datetime from difflib import SequenceMatcher from functools import lru_cache @@ -23,8 +24,8 @@ r = redis.Redis( password=os.getenv("REDIS_PASSWORD"), ) -# r.flushall() -# print("Redis cache flushed") +r.flushall() +print("Redis cache flushed") def get_episode_metadata(anime_data, episode): episode_metadata = get_all_episode_metadata(anime_data) @@ -526,25 +527,36 @@ def get_mal_episode_comments(mal_id, episode_number, mal_access_token): return None topic_id = topic_id_match.group(1) - - api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}" - - headers = { - "Authorization": f"Bearer {mal_access_token}" - } - + api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}?limit=100" + headers = {"Authorization": f"Bearer {mal_access_token}"} + all_comments = [] next_url = api_url + def fetch_comments(url): + retries = 3 + while retries > 0: + response = requests.get(url, headers=headers) + if response.status_code == 200: + return response.json() + elif response.status_code == 429: + print(f"Rate limit reached. Waiting before retrying... Retries left: {retries}") + retries -= 1 + else: + print(f"Error fetching posts: {response.status_code}") + return None + return None + while next_url: - response = requests.get(next_url, headers=headers) - if response.status_code != 200: - print(f"Error fetching posts: {response.status_code}") - return None - - data = response.json() - all_comments.extend(data["data"]["posts"]) - next_url = data.get("paging", {}).get("next") + data = fetch_comments(next_url) + if data: + all_comments.extend(data["data"]["posts"]) + next_url = data.get("paging", {}).get("next") + else: + break # Stop if we encounter an error + + if not all_comments: + return None all_comments = sorted( all_comments, @@ -558,14 +570,74 @@ def get_mal_episode_comments(mal_id, episode_number, mal_access_token): discussion_data["total"] = len(all_comments) - data = { + result = { "metadata": discussion_data, "comments": all_comments } - store_in_redis_cache(cache_key, json.dumps(data)) + store_in_redis_cache(cache_key, json.dumps(result), cache_time=3600) # Cache for 1 hour + + return result + + +# def get_mal_episode_comments(mal_id, episode_number, mal_access_token): +# cache_key = f"anime:{mal_id}:episode:{episode_number}:comments" +# cached_data = get_from_redis_cache(cache_key) + +# if cached_data: +# return json.loads(cached_data) + +# discussion_data = get_mal_episode_discussion_data(mal_id, episode_number) + +# if not discussion_data: +# return None + +# topic_id_match = re.search(r'topicid=(\d+)', discussion_data['forum_url']) +# if not topic_id_match: +# print(f"Could not extract topic ID from forum URL: {discussion_data['forum_url']}") +# return None + +# topic_id = topic_id_match.group(1) + +# api_url = f"https://api.myanimelist.net/v2/forum/topic/{topic_id}" + +# headers = { +# "Authorization": f"Bearer {mal_access_token}" +# } + +# all_comments = [] +# next_url = api_url + +# while next_url: +# response = requests.get(next_url, headers=headers) +# if response.status_code != 200: +# print(f"Error fetching posts: {response.status_code}") +# return None + +# data = response.json() +# all_comments.extend(data["data"]["posts"]) +# next_url = data.get("paging", {}).get("next") + +# all_comments = sorted( +# all_comments, +# key=lambda x: datetime.datetime.fromisoformat(x["created_at"].replace("Z", "+00:00")), +# reverse=True +# ) + +# for post in all_comments: +# decoded_text = html.unescape(post['body']) +# post['body_html'] = parse_mixed_content(decoded_text) + +# discussion_data["total"] = len(all_comments) + +# data = { +# "metadata": discussion_data, +# "comments": all_comments +# } + +# store_in_redis_cache(cache_key, json.dumps(data)) - return data +# return data def parse_mixed_content(content): # Remove HTML comments |
