diff options
| author | Bobby <[email protected]> | 2026-02-26 14:28:21 +0530 |
|---|---|---|
| committer | Bobby <[email protected]> | 2026-02-26 14:28:21 +0530 |
| commit | 627c2c239e0a44b6363a9f02235a73f5e2c81d2e (patch) | |
| tree | 6c7b3cad8a5fb42c5649905a20800edf8e63f666 /utils/mal | |
| parent | aa4cf5ff588c9082282ee57074199dc7d2a37e09 (diff) | |
| download | metachan-627c2c239e0a44b6363a9f02235a73f5e2c81d2e.tar.xz metachan-627c2c239e0a44b6363a9f02235a73f5e2c81d2e.zip | |
Add MAL client and anime parsing functionality
- Implemented a new CloudflareClient to handle requests with randomized browser profiles.
- Created structures and functions for parsing anime data from MyAnimeList (MAL), including anime details, episodes, and theme songs.
- Added enums for anime types, statuses, sources, and ratings.
- Developed utility functions for making HTTP requests with rate limiting and error handling.
- Introduced image handling for anime covers and thumbnails.
- Established a comprehensive data model for anime, including fields for statistics, trailers, and external links.
Diffstat (limited to 'utils/mal')
| -rw-r--r-- | utils/mal/anime.go | 517 | ||||
| -rw-r--r-- | utils/mal/client.go | 110 | ||||
| -rw-r--r-- | utils/mal/enums.go | 62 | ||||
| -rw-r--r-- | utils/mal/episodes.go | 82 | ||||
| -rw-r--r-- | utils/mal/types.go | 144 |
5 files changed, 915 insertions, 0 deletions
diff --git a/utils/mal/anime.go b/utils/mal/anime.go new file mode 100644 index 0000000..08bf841 --- /dev/null +++ b/utils/mal/anime.go @@ -0,0 +1,517 @@ +package mal + +import ( + "fmt" + "metachan/utils/logger" + "regexp" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" +) + +var ( + producerIDPattern = regexp.MustCompile(`/anime/producer/(\d+)`) + genreIDPattern = regexp.MustCompile(`/anime/genre/(\d+)`) + youtubeIDPattern = regexp.MustCompile(`/embed/([a-zA-Z0-9_-]+)`) + themeSongTitlePattern = regexp.MustCompile(`"(.+?)"`) + themeSongArtistPattern = regexp.MustCompile(`by\s+(.+?)(?:\s+\(eps|\s*$)`) + themeSongEpisodesPattern = regexp.MustCompile(`\(eps\s+(\d+)(?:-(\d+))?\)`) + japaneseTextInParensPattern = regexp.MustCompile(`\(([^\x00-\x7F]+)\)`) + broadcastTimePattern = regexp.MustCompile(`(\w+)s?\s+at\s+(\d{2}:\d{2})\s+\((\w+)\)`) + imageResizePrefixPattern = regexp.MustCompile(`/r/\d+x\d+`) + leadingIndexPattern = regexp.MustCompile(`^#?\d+:?\s*`) + trailingEpisodeInfoPattern = regexp.MustCompile(`\s*\(eps\s.*$`) +) + +const airedDateLayout = "Jan 2, 2006" + +func extractSidebarValue(document *goquery.Document, label string) string { + var extractedValue string + document.Find("span.dark_text").Each(func(index int, selection *goquery.Selection) { + if strings.TrimSpace(selection.Text()) == label { + parentClone := selection.Parent().Clone() + parentClone.Find("span.dark_text").Remove() + extractedValue = strings.TrimSpace(parentClone.Text()) + } + }) + return extractedValue +} + +func extractSidebarMALIDs(document *goquery.Document, label string, idPattern *regexp.Regexp) []int { + var malIDs []int + document.Find("span.dark_text").Each(func(index int, selection *goquery.Selection) { + if strings.TrimSpace(selection.Text()) != label { + return + } + parentNode := selection.Parent() + if strings.Contains(parentNode.Text(), "None found") || strings.Contains(parentNode.Text(), "No genres") { + return + } + parentNode.Find("a").Each(func(linkIndex int, linkElement *goquery.Selection) { + href, exists := linkElement.Attr("href") + if !exists { + return + } + matches := idPattern.FindStringSubmatch(href) + if len(matches) > 1 { + parsedID, parseErr := strconv.Atoi(matches[1]) + if parseErr == nil { + malIDs = append(malIDs, parsedID) + } + } + }) + }) + return malIDs +} + +func extractSidebarMALIDsMultiLabel(document *goquery.Document, labels []string, idPattern *regexp.Regexp) []int { + for _, label := range labels { + malIDs := extractSidebarMALIDs(document, label, idPattern) + if len(malIDs) > 0 { + return malIDs + } + } + return nil +} + +func buildImageFromBaseURL(rawURL string) Image { + cleanedURL := imageResizePrefixPattern.ReplaceAllString(rawURL, "") + extensionIndex := strings.LastIndex(cleanedURL, ".") + if extensionIndex == -1 { + return Image{} + } + + pathBase := cleanedURL[:extensionIndex] + + return Image{ + JPG: ImageFormat{ + Small: pathBase + "t.jpg", + Medium: pathBase + ".jpg", + Large: pathBase + "l.jpg", + Original: pathBase + ".jpg", + }, + WEBP: ImageFormat{ + Small: pathBase + "t.webp", + Medium: pathBase + ".webp", + Large: pathBase + "l.webp", + Original: pathBase + ".webp", + }, + } +} + +func buildYouTubeThumbnail(videoID string) Image { + thumbnailBase := fmt.Sprintf("https://img.youtube.com/vi/%s", videoID) + return Image{ + JPG: ImageFormat{ + Small: thumbnailBase + "/default.jpg", + Medium: thumbnailBase + "/mqdefault.jpg", + Large: thumbnailBase + "/hqdefault.jpg", + Original: thumbnailBase + "/maxresdefault.jpg", + }, + WEBP: ImageFormat{ + Small: thumbnailBase + "/default.webp", + Medium: thumbnailBase + "/mqdefault.webp", + Large: thumbnailBase + "/hqdefault.webp", + Original: thumbnailBase + "/maxresdefault.webp", + }, + } +} + +func parseAiredDateString(dateString string) AiredDate { + trimmedDate := strings.TrimSpace(dateString) + if trimmedDate == "" || trimmedDate == "?" || trimmedDate == "Not available" { + return AiredDate{} + } + parsedTime, parseErr := time.Parse(airedDateLayout, trimmedDate) + if parseErr != nil { + return AiredDate{String: trimmedDate} + } + return AiredDate{ + Day: parsedTime.Day(), + Month: int(parsedTime.Month()), + Year: parsedTime.Year(), + String: trimmedDate, + } +} + +func parseIntFromText(text string) int { + cleanedText := strings.ReplaceAll(strings.TrimSpace(text), ",", "") + cleanedText = strings.TrimPrefix(cleanedText, "#") + parsedValue, _ := strconv.Atoi(cleanedText) + return parsedValue +} + +func parseFloatFromText(text string) float64 { + trimmedText := strings.TrimSpace(text) + if trimmedText == "N/A" || trimmedText == "" { + return 0 + } + parsedValue, _ := strconv.ParseFloat(trimmedText, 64) + return parsedValue +} + +func parseAnimeTitle(document *goquery.Document) Title { + var animeTitle Title + romajiTitle, _ := document.Find(`meta[property="og:title"]`).Attr("content") + animeTitle.Romaji = strings.TrimSpace(romajiTitle) + + document.Find("span.dark_text").Each(func(index int, selection *goquery.Selection) { + label := strings.TrimSpace(selection.Text()) + parentClone := selection.Parent().Clone() + parentClone.Find("span.dark_text").Remove() + value := strings.TrimSpace(parentClone.Text()) + + switch label { + case "English:": + animeTitle.English = value + case "Japanese:": + animeTitle.Japanese = value + case "Synonyms:": + if value != "" { + animeTitle.Synonyms = strings.Split(value, ", ") + } + } + }) + + return animeTitle +} + +func parseAnimeImage(document *goquery.Document) Image { + imageURL, exists := document.Find(`meta[property="og:image"]`).Attr("content") + if !exists || imageURL == "" { + return Image{} + } + return buildImageFromBaseURL(imageURL) +} + +func parseAnimeStatistics(document *goquery.Document) Statistics { + return Statistics{ + Score: parseFloatFromText(document.Find(`span[itemprop="ratingValue"]`).Text()), + ScoredBy: parseIntFromText(document.Find(`span[itemprop="ratingCount"]`).Text()), + Rank: parseIntFromText(extractSidebarValue(document, "Ranked:")), + Popularity: parseIntFromText(extractSidebarValue(document, "Popularity:")), + Members: parseIntFromText(extractSidebarValue(document, "Members:")), + Favorites: parseIntFromText(extractSidebarValue(document, "Favorites:")), + } +} + +func parseAnimeSynopsis(document *goquery.Document) string { + synopsisNode := document.Find(`p[itemprop="description"]`) + if synopsisNode.Length() == 0 { + return "" + } + synopsisText := strings.TrimSpace(synopsisNode.Text()) + if strings.Contains(synopsisText, "No synopsis information has been added") { + return "" + } + return synopsisText +} + +func parseAnimeBackground(document *goquery.Document) string { + var backgroundParts []string + document.Find("h2").Each(func(index int, heading *goquery.Selection) { + if strings.TrimSpace(heading.Text()) != "Background" { + return + } + heading.NextUntil("h2").Each(func(siblingIndex int, sibling *goquery.Selection) { + text := strings.TrimSpace(sibling.Text()) + if text != "" && !strings.Contains(text, "No background information") { + backgroundParts = append(backgroundParts, text) + } + }) + }) + return strings.Join(backgroundParts, " ") +} + +func parseAnimeTrailer(document *goquery.Document) Trailer { + trailerLink := document.Find("div.video-promotion a") + if trailerLink.Length() == 0 { + return Trailer{} + } + embedURL, _ := trailerLink.Attr("href") + youtubeMatches := youtubeIDPattern.FindStringSubmatch(embedURL) + if len(youtubeMatches) < 2 { + return Trailer{EmbedURL: embedURL, Preview: Preview{URL: embedURL}} + } + videoID := youtubeMatches[1] + return Trailer{ + YoutubeID: videoID, + EmbedURL: embedURL, + Preview: Preview{ + URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID), + Thumbnail: buildYouTubeThumbnail(videoID), + }, + } +} + +func parseAnimePremiered(document *goquery.Document) Premiered { + text := extractSidebarValue(document, "Premiered:") + if text == "" || text == "?" { + return Premiered{} + } + parts := strings.SplitN(text, " ", 2) + if len(parts) != 2 { + return Premiered{} + } + year, _ := strconv.Atoi(parts[1]) + return Premiered{Season: Season(parts[0]), Year: year} +} + +func parseAnimeAired(document *goquery.Document) Aired { + text := extractSidebarValue(document, "Aired:") + if text == "" || text == "Not available" { + return Aired{} + } + parts := strings.SplitN(text, " to ", 2) + aired := Aired{String: text} + if len(parts) >= 1 { + aired.From = parseAiredDateString(parts[0]) + } + if len(parts) >= 2 { + aired.To = parseAiredDateString(parts[1]) + } + return aired +} + +func parseAnimeBroadcast(document *goquery.Document) Broadcast { + text := extractSidebarValue(document, "Broadcast:") + if text == "" { + return Broadcast{} + } + matches := broadcastTimePattern.FindStringSubmatch(text) + if len(matches) == 4 { + return Broadcast{Day: matches[1], Time: matches[2], Timezone: matches[3], String: text} + } + return Broadcast{String: text} +} + +func parseAnimeThemeSongs(document *goquery.Document, containerClass string) []ThemeSong { + var themeSongs []ThemeSong + document.Find(fmt.Sprintf("div.%s table tr", containerClass)).Each(func(index int, row *goquery.Selection) { + songText := strings.TrimSpace(row.Find("td.theme-song").Text()) + if songText == "" || strings.Contains(songText, "No opening themes") || strings.Contains(songText, "No ending themes") { + return + } + + themeSong := parseThemeSongText(songText) + + row.Find("td.theme-song-artist a").Each(func(linkIndex int, linkElement *goquery.Selection) { + href, exists := linkElement.Attr("href") + if !exists || href == "" { + return + } + siteName := strings.TrimSpace(linkElement.Text()) + if siteName == "" { + siteName, _ = linkElement.Attr("title") + } + if siteName != "" { + themeSong.Links = append(themeSong.Links, ExternalLink{Name: siteName, URL: href}) + } + }) + + themeSongs = append(themeSongs, themeSong) + }) + return themeSongs +} + +func parseThemeSongText(rawText string) ThemeSong { + text := leadingIndexPattern.ReplaceAllString(strings.TrimSpace(rawText), "") + var themeSong ThemeSong + + episodeMatches := themeSongEpisodesPattern.FindStringSubmatch(text) + if len(episodeMatches) > 1 { + themeSong.Episodes.Start, _ = strconv.Atoi(episodeMatches[1]) + if len(episodeMatches) > 2 && episodeMatches[2] != "" { + themeSong.Episodes.End, _ = strconv.Atoi(episodeMatches[2]) + } else { + themeSong.Episodes.End = themeSong.Episodes.Start + } + } + + titleMatches := themeSongTitlePattern.FindStringSubmatch(text) + if len(titleMatches) > 1 { + fullTitle := titleMatches[1] + japaneseMatches := japaneseTextInParensPattern.FindStringSubmatch(fullTitle) + if len(japaneseMatches) > 1 { + themeSong.Title.Japanese = japaneseMatches[1] + themeSong.Title.Romaji = strings.TrimSpace(japaneseTextInParensPattern.ReplaceAllString(fullTitle, "")) + } else { + themeSong.Title.Romaji = fullTitle + } + } + + artistMatches := themeSongArtistPattern.FindStringSubmatch(text) + if len(artistMatches) > 1 { + themeSong.Artist = strings.TrimSpace(trailingEpisodeInfoPattern.ReplaceAllString(artistMatches[1], "")) + } + + return themeSong +} + +func parseAnimeExternalLinks(document *goquery.Document) []ExternalLink { + var externalLinks []ExternalLink + document.Find("div.external_links a.link").Each(func(index int, linkElement *goquery.Selection) { + href, exists := linkElement.Attr("href") + if !exists || href == "" { + return + } + linkName := strings.TrimSpace(linkElement.Text()) + if linkName != "" { + externalLinks = append(externalLinks, ExternalLink{Name: linkName, URL: href}) + } + }) + return externalLinks +} + +func parseAnimeStreamingLinks(document *goquery.Document) []ExternalLink { + var streamingLinks []ExternalLink + document.Find("h2").Each(func(index int, heading *goquery.Selection) { + headingText := strings.TrimSpace(heading.Text()) + if headingText != "Available At" && headingText != "Streaming Platforms" { + return + } + heading.NextUntil("h2").Find("a").Each(func(linkIndex int, linkElement *goquery.Selection) { + href, exists := linkElement.Attr("href") + if !exists || href == "" { + return + } + linkName := strings.TrimSpace(linkElement.Text()) + if linkName == "" { + linkName, _ = linkElement.Attr("title") + } + if linkName != "" { + streamingLinks = append(streamingLinks, ExternalLink{Name: linkName, URL: href}) + } + }) + }) + return streamingLinks +} + +func parsePromotionalVideos(document *goquery.Document) []PromotionalVideo { + var videos []PromotionalVideo + document.Find("div.promotional-video section > div").Each(func(index int, videoElement *goquery.Selection) { + linkElement := videoElement.Find("a") + if linkElement.Length() == 0 { + return + } + embedURL, _ := linkElement.Attr("href") + titleText := strings.TrimSpace(linkElement.Find("span").First().Text()) + + youtubeMatches := youtubeIDPattern.FindStringSubmatch(embedURL) + if len(youtubeMatches) < 2 { + return + } + videoID := youtubeMatches[1] + + videos = append(videos, PromotionalVideo{ + Title: Title{Romaji: titleText}, + Preview: Preview{ + URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID), + Thumbnail: buildYouTubeThumbnail(videoID), + }, + }) + }) + return videos +} + +func parseMusicVideos(document *goquery.Document) []MusicVideo { + var videos []MusicVideo + document.Find("div.music-video section > div").Each(func(index int, videoElement *goquery.Selection) { + linkElement := videoElement.Find("a") + if linkElement.Length() == 0 { + return + } + embedURL, _ := linkElement.Attr("href") + titleText := strings.TrimSpace(linkElement.Find("span").First().Text()) + + youtubeMatches := youtubeIDPattern.FindStringSubmatch(embedURL) + if len(youtubeMatches) < 2 { + return + } + videoID := youtubeMatches[1] + + musicVideo := MusicVideo{ + Title: Title{Romaji: titleText}, + Preview: Preview{ + URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID), + Thumbnail: buildYouTubeThumbnail(videoID), + }, + } + + metadataText := strings.TrimSpace(videoElement.Find("div div").Last().Text()) + if separatorIndex := strings.Index(metadataText, " - "); separatorIndex != -1 { + musicVideo.Artist = strings.TrimSpace(metadataText[:separatorIndex]) + } + + videos = append(videos, musicVideo) + }) + return videos +} + +func parseAnimeDocument(document *goquery.Document, malID int) Anime { + pageURL, _ := document.Find(`meta[property="og:url"]`).Attr("content") + statusText := extractSidebarValue(document, "Status:") + ratingText := extractSidebarValue(document, "Rating:") + if ratingText == "None" { + ratingText = "" + } + + return Anime{ + MALID: malID, + URL: pageURL, + Image: parseAnimeImage(document), + Title: parseAnimeTitle(document), + Type: Type(extractSidebarValue(document, "Type:")), + Source: Source(extractSidebarValue(document, "Source:")), + Status: Status(statusText), + Airing: statusText == string(StatusAiring), + Rating: Rating(ratingText), + Synopsis: parseAnimeSynopsis(document), + Background: parseAnimeBackground(document), + Duration: extractSidebarValue(document, "Duration:"), + EpisodeCount: parseIntFromText(extractSidebarValue(document, "Episodes:")), + Premiered: parseAnimePremiered(document), + Aired: parseAnimeAired(document), + Broadcast: parseAnimeBroadcast(document), + Statistics: parseAnimeStatistics(document), + Trailer: parseAnimeTrailer(document), + + Openings: parseAnimeThemeSongs(document, "opnening"), + Endings: parseAnimeThemeSongs(document, "ending"), + + Genres: extractSidebarMALIDsMultiLabel(document, []string{"Genres:", "Genre:"}, genreIDPattern), + ExplicitGenres: extractSidebarMALIDs(document, "Explicit Genres:", genreIDPattern), + Themes: extractSidebarMALIDsMultiLabel(document, []string{"Themes:", "Theme:"}, genreIDPattern), + Demographics: extractSidebarMALIDsMultiLabel(document, []string{"Demographics:", "Demographic:"}, genreIDPattern), + Producers: extractSidebarMALIDs(document, "Producers:", producerIDPattern), + Studios: extractSidebarMALIDs(document, "Studios:", producerIDPattern), + Licensors: extractSidebarMALIDs(document, "Licensors:", producerIDPattern), + + External: parseAnimeExternalLinks(document), + Streaming: parseAnimeStreamingLinks(document), + } +} + +func GetAnimeByMALID(malID int) (*Anime, error) { + animePageURL := fmt.Sprintf("%s/anime/%d", malBaseURL, malID) + animeDocument, fetchErr := makeRequest(animePageURL) + if fetchErr != nil { + logger.Errorf("MALClient", "Failed to fetch anime page for MAL ID %d: %v", malID, fetchErr) + return nil, fmt.Errorf("failed to fetch anime page for MAL ID %d: %w", malID, fetchErr) + } + + anime := parseAnimeDocument(animeDocument, malID) + + videosPageURL := fmt.Sprintf("%s/anime/%d/_/video", malBaseURL, malID) + videosDocument, videosFetchErr := makeRequest(videosPageURL) + if videosFetchErr != nil { + logger.Warnf("MALClient", "Failed to fetch videos page for MAL ID %d: %v", malID, videosFetchErr) + } else { + anime.Videos = parsePromotionalVideos(videosDocument) + anime.MusicVideos = parseMusicVideos(videosDocument) + } + + return &anime, nil +}
\ No newline at end of file diff --git a/utils/mal/client.go b/utils/mal/client.go new file mode 100644 index 0000000..9bc5d98 --- /dev/null +++ b/utils/mal/client.go @@ -0,0 +1,110 @@ +package mal + +import ( + "fmt" + "math" + "metachan/utils/cfbypass" + "metachan/utils/logger" + "metachan/utils/ratelimit" + "net/http" + "time" + + "github.com/PuerkitoBio/goquery" +) + +const ( + malBaseURL = "https://myanimelist.net" + rateLimitPerSec = 4 + requestTimeout = 30 * time.Second + requestJitter = 250 * time.Millisecond + maxRetries = 3 + backoffBase = 2 * time.Second +) + +var ( + rateLimiter = ratelimit.NewRateLimiter(rateLimitPerSec, time.Second) + cloudflareClient = cfbypass.NewCloudflareClient(requestTimeout) +) + +func StopRateLimiters() { + rateLimiter.Stop() +} + +func makeRequest(targetURL string) (*goquery.Document, error) { + var retries int + + for retries < maxRetries { + rateLimiter.Wait() + time.Sleep(cfbypass.AddJitter(requestJitter)) + + request, err := http.NewRequest("GET", targetURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request for %s: %w", targetURL, err) + } + + for headerName, headerValue := range cloudflareClient.BrowserProfile.Headers { + if headerName == "Accept-Encoding" { + continue + } + request.Header.Set(headerName, headerValue) + } + request.Header.Set("User-Agent", cloudflareClient.BrowserProfile.UserAgent) + + response, err := cloudflareClient.HttpClient.Do(request) + if err != nil { + retries++ + if retries >= maxRetries { + return nil, fmt.Errorf("all retries exhausted for %s: %w", targetURL, err) + } + logger.Debugf("MALClient", "Request failed for %s (attempt %d/%d)", targetURL, retries, maxRetries) + time.Sleep(getBackoffDuration(retries)) + continue + } + + if response.StatusCode == http.StatusOK { + document, parseErr := goquery.NewDocumentFromReader(response.Body) + response.Body.Close() + if parseErr != nil { + return nil, fmt.Errorf("failed to parse HTML from %s: %w", targetURL, parseErr) + } + + pageTitle := document.Find("title").Text() + logger.Debugf("MALClient", "Page title for %s: %q", targetURL, pageTitle) + + htmlContent, _ := document.Html() + if len(htmlContent) > 500 { + htmlContent = htmlContent[:500] + } + logger.Debugf("MALClient", "HTML preview for %s: %s", targetURL, htmlContent) + + return document, nil + } + + response.Body.Close() + + if response.StatusCode == http.StatusNotFound { + return nil, fmt.Errorf("resource not found: %s", targetURL) + } + + if response.StatusCode >= 400 && response.StatusCode < 500 && + response.StatusCode != http.StatusTooManyRequests && + response.StatusCode != http.StatusForbidden { + return nil, fmt.Errorf("client error %d for %s", response.StatusCode, targetURL) + } + + retries++ + if retries >= maxRetries { + return nil, fmt.Errorf("all retries exhausted for %s (status %d)", targetURL, response.StatusCode) + } + + logger.Warnf("MALClient", "Status %d for %s (attempt %d/%d)", response.StatusCode, targetURL, retries, maxRetries) + time.Sleep(getBackoffDuration(retries)) + } + + return nil, fmt.Errorf("all retries exhausted for %s", targetURL) +} + +func getBackoffDuration(attempt int) time.Duration { + exponentialDelay := time.Duration(float64(backoffBase) * math.Pow(2, float64(attempt-1))) + return cfbypass.AddJitter(exponentialDelay) +}
\ No newline at end of file diff --git a/utils/mal/enums.go b/utils/mal/enums.go new file mode 100644 index 0000000..76d6f3d --- /dev/null +++ b/utils/mal/enums.go @@ -0,0 +1,62 @@ +package mal + +type Type string + +const ( + TypeTV Type = "TV" + TypeMovie Type = "Movie" + TypeOVA Type = "OVA" + TypeONA Type = "ONA" + TypeSpecial Type = "Special" + TypeMusic Type = "Music" + TypeUnknown Type = "Unknown" +) + +type Status string + +const ( + StatusAiring Status = "Currently Airing" + StatusFinished Status = "Finished Airing" + StatusNotYetAired Status = "Not yet aired" +) + +type Source string + +const ( + SourceOriginal Source = "Original" + SourceManga Source = "Manga" + SourceLightNovel Source = "Light novel" + SourceVisualNovel Source = "Visual novel" + SourceGame Source = "Game" + SourceNovel Source = "Novel" + SourceWebManga Source = "Web manga" + SourceWebNovel Source = "Web novel" + SourceCardGame Source = "Card game" + SourceFourKomaManga Source = "4-koma manga" + SourceBook Source = "Book" + SourcePictureBook Source = "Picture book" + SourceRadio Source = "Radio" + SourceMusic Source = "Music" + SourceOther Source = "Other" + SourceUnknown Source = "Unknown" +) + +type Rating string + +const ( + RatingG Rating = "G - All Ages" + RatingPG Rating = "PG - Children" + RatingPG13 Rating = "PG-13 - Teens 13 or older" + RatingR17 Rating = "R - 17+ (violence & profanity)" + RatingR Rating = "R+ - Mild Nudity" + RatingRx Rating = "Rx - Hentai" +) + +type Season string + +const ( + SeasonWinter Season = "Winter" + SeasonSpring Season = "Spring" + SeasonSummer Season = "Summer" + SeasonFall Season = "Fall" +)
\ No newline at end of file diff --git a/utils/mal/episodes.go b/utils/mal/episodes.go new file mode 100644 index 0000000..ff61686 --- /dev/null +++ b/utils/mal/episodes.go @@ -0,0 +1,82 @@ +package mal + +import ( + "fmt" + "metachan/utils/logger" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" +) + +func parseEpisodeRow(row *goquery.Selection) Episode { + numberText := strings.TrimSpace(row.Find("td.episode-number").Text()) + episodeNumber, _ := strconv.Atoi(numberText) + + titleCell := row.Find("td.episode-title") + titleLink := titleCell.Find("a") + episodeURL, _ := titleLink.Attr("href") + + englishTitle := strings.TrimSpace(titleLink.Text()) + japaneseTitle := strings.TrimSpace(titleCell.Find("span.di-ib").Text()) + + airedText := strings.TrimSpace(row.Find("td.episode-aired").Text()) + + forumLink := row.Find("td.episode-forum a") + forumURL, _ := forumLink.Attr("href") + + fillerTag := row.Find("span.filler") + recapTag := row.Find("span.recap") + + return Episode{ + Number: episodeNumber, + URL: episodeURL, + Title: Title{ + English: englishTitle, + Japanese: japaneseTitle, + }, + Aired: parseAiredDateString(airedText), + ForumURL: forumURL, + Filler: fillerTag.Length() > 0, + Recap: recapTag.Length() > 0, + } +} + +func GetAnimeEpisodesByMALID(malID int) ([]Episode, error) { + var allEpisodes []Episode + offset := 0 + + for { + pageURL := fmt.Sprintf("%s/anime/%d/_/episode?offset=%d", malBaseURL, malID, offset) + document, fetchErr := makeRequest(pageURL) + if fetchErr != nil { + if len(allEpisodes) > 0 { + logger.Warnf("MALClient", "Failed to fetch episodes page at offset %d for MAL ID %d: %v", offset, malID, fetchErr) + break + } + logger.Errorf("MALClient", "Failed to fetch episodes for MAL ID %d: %v", malID, fetchErr) + return nil, fmt.Errorf("failed to fetch episodes for MAL ID %d: %w", malID, fetchErr) + } + + episodeRows := document.Find("table.episode_list tbody tr") + if episodeRows.Length() == 0 { + break + } + + episodeRows.Each(func(index int, row *goquery.Selection) { + episode := parseEpisodeRow(row) + if episode.Number > 0 { + allEpisodes = append(allEpisodes, episode) + } + }) + + nextPageLink := document.Find("a.link-blue-box.next") + if nextPageLink.Length() == 0 { + break + } + + offset += 100 + } + + return allEpisodes, nil +}
\ No newline at end of file diff --git a/utils/mal/types.go b/utils/mal/types.go new file mode 100644 index 0000000..9226072 --- /dev/null +++ b/utils/mal/types.go @@ -0,0 +1,144 @@ +package mal + +type ImageFormat struct { + Small string + Medium string + Large string + Original string +} + +type Image struct { + JPG ImageFormat + WEBP ImageFormat +} + +type Title struct { + English string + Japanese string + Romaji string + Synonyms []string +} + +type AiredDate struct { + Day int + Month int + Year int + String string +} + +type Premiered struct { + Season Season + Year int +} + +type Aired struct { + From AiredDate + To AiredDate + String string +} + +type Broadcast struct { + Day string + Time string + Timezone string + String string +} + +type Statistics struct { + Score float64 + ScoredBy int + Rank int + Popularity int + Members int + Favorites int +} + +type Preview struct { + URL string + Thumbnail Image +} + +type Trailer struct { + YoutubeID string + EmbedURL string + Preview +} + +type EpisodeRange struct { + Start int + End int +} + +type ExternalLink struct { + Name string + URL string +} + +type ThemeSong struct { + Title Title + Artist string + Episodes EpisodeRange + Links []ExternalLink +} + +type PromotionalVideo struct { + Title Title + Preview +} + +type MusicVideo struct { + Title Title + Artist string + Preview +} + +type Episode struct { + Number int + URL string + Title Title + Aired AiredDate + Score float64 + Filler bool + Recap bool + ForumURL string + Synopsis string + Preview Preview +} + +type Anime struct { + MALID int + URL string + Image Image + Title Title + Type Type + Source Source + Status Status + Airing bool + Rating Rating + Synopsis string + Background string + Duration string + EpisodeCount int + Premiered Premiered + Aired Aired + Broadcast Broadcast + Statistics Statistics + Trailer Trailer + + Openings []ThemeSong + Endings []ThemeSong + Videos []PromotionalVideo + MusicVideos []MusicVideo + Episodes []Episode + + Genres []int + ExplicitGenres []int + Themes []int + Demographics []int + Producers []int + Studios []int + Licensors []int + + External []ExternalLink + Streaming []ExternalLink +}
\ No newline at end of file |
