aboutsummaryrefslogtreecommitdiff
path: root/utils/mal
diff options
context:
space:
mode:
authorBobby <[email protected]>2026-02-26 14:28:21 +0530
committerBobby <[email protected]>2026-02-26 14:28:21 +0530
commit627c2c239e0a44b6363a9f02235a73f5e2c81d2e (patch)
tree6c7b3cad8a5fb42c5649905a20800edf8e63f666 /utils/mal
parentaa4cf5ff588c9082282ee57074199dc7d2a37e09 (diff)
downloadmetachan-627c2c239e0a44b6363a9f02235a73f5e2c81d2e.tar.xz
metachan-627c2c239e0a44b6363a9f02235a73f5e2c81d2e.zip
Add MAL client and anime parsing functionality
- Implemented a new CloudflareClient to handle requests with randomized browser profiles. - Created structures and functions for parsing anime data from MyAnimeList (MAL), including anime details, episodes, and theme songs. - Added enums for anime types, statuses, sources, and ratings. - Developed utility functions for making HTTP requests with rate limiting and error handling. - Introduced image handling for anime covers and thumbnails. - Established a comprehensive data model for anime, including fields for statistics, trailers, and external links.
Diffstat (limited to 'utils/mal')
-rw-r--r--utils/mal/anime.go517
-rw-r--r--utils/mal/client.go110
-rw-r--r--utils/mal/enums.go62
-rw-r--r--utils/mal/episodes.go82
-rw-r--r--utils/mal/types.go144
5 files changed, 915 insertions, 0 deletions
diff --git a/utils/mal/anime.go b/utils/mal/anime.go
new file mode 100644
index 0000000..08bf841
--- /dev/null
+++ b/utils/mal/anime.go
@@ -0,0 +1,517 @@
+package mal
+
+import (
+ "fmt"
+ "metachan/utils/logger"
+ "regexp"
+ "strconv"
+ "strings"
+ "time"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+var (
+ producerIDPattern = regexp.MustCompile(`/anime/producer/(\d+)`)
+ genreIDPattern = regexp.MustCompile(`/anime/genre/(\d+)`)
+ youtubeIDPattern = regexp.MustCompile(`/embed/([a-zA-Z0-9_-]+)`)
+ themeSongTitlePattern = regexp.MustCompile(`"(.+?)"`)
+ themeSongArtistPattern = regexp.MustCompile(`by\s+(.+?)(?:\s+\(eps|\s*$)`)
+ themeSongEpisodesPattern = regexp.MustCompile(`\(eps\s+(\d+)(?:-(\d+))?\)`)
+ japaneseTextInParensPattern = regexp.MustCompile(`\(([^\x00-\x7F]+)\)`)
+ broadcastTimePattern = regexp.MustCompile(`(\w+)s?\s+at\s+(\d{2}:\d{2})\s+\((\w+)\)`)
+ imageResizePrefixPattern = regexp.MustCompile(`/r/\d+x\d+`)
+ leadingIndexPattern = regexp.MustCompile(`^#?\d+:?\s*`)
+ trailingEpisodeInfoPattern = regexp.MustCompile(`\s*\(eps\s.*$`)
+)
+
+const airedDateLayout = "Jan 2, 2006"
+
+func extractSidebarValue(document *goquery.Document, label string) string {
+ var extractedValue string
+ document.Find("span.dark_text").Each(func(index int, selection *goquery.Selection) {
+ if strings.TrimSpace(selection.Text()) == label {
+ parentClone := selection.Parent().Clone()
+ parentClone.Find("span.dark_text").Remove()
+ extractedValue = strings.TrimSpace(parentClone.Text())
+ }
+ })
+ return extractedValue
+}
+
+func extractSidebarMALIDs(document *goquery.Document, label string, idPattern *regexp.Regexp) []int {
+ var malIDs []int
+ document.Find("span.dark_text").Each(func(index int, selection *goquery.Selection) {
+ if strings.TrimSpace(selection.Text()) != label {
+ return
+ }
+ parentNode := selection.Parent()
+ if strings.Contains(parentNode.Text(), "None found") || strings.Contains(parentNode.Text(), "No genres") {
+ return
+ }
+ parentNode.Find("a").Each(func(linkIndex int, linkElement *goquery.Selection) {
+ href, exists := linkElement.Attr("href")
+ if !exists {
+ return
+ }
+ matches := idPattern.FindStringSubmatch(href)
+ if len(matches) > 1 {
+ parsedID, parseErr := strconv.Atoi(matches[1])
+ if parseErr == nil {
+ malIDs = append(malIDs, parsedID)
+ }
+ }
+ })
+ })
+ return malIDs
+}
+
+func extractSidebarMALIDsMultiLabel(document *goquery.Document, labels []string, idPattern *regexp.Regexp) []int {
+ for _, label := range labels {
+ malIDs := extractSidebarMALIDs(document, label, idPattern)
+ if len(malIDs) > 0 {
+ return malIDs
+ }
+ }
+ return nil
+}
+
+func buildImageFromBaseURL(rawURL string) Image {
+ cleanedURL := imageResizePrefixPattern.ReplaceAllString(rawURL, "")
+ extensionIndex := strings.LastIndex(cleanedURL, ".")
+ if extensionIndex == -1 {
+ return Image{}
+ }
+
+ pathBase := cleanedURL[:extensionIndex]
+
+ return Image{
+ JPG: ImageFormat{
+ Small: pathBase + "t.jpg",
+ Medium: pathBase + ".jpg",
+ Large: pathBase + "l.jpg",
+ Original: pathBase + ".jpg",
+ },
+ WEBP: ImageFormat{
+ Small: pathBase + "t.webp",
+ Medium: pathBase + ".webp",
+ Large: pathBase + "l.webp",
+ Original: pathBase + ".webp",
+ },
+ }
+}
+
+func buildYouTubeThumbnail(videoID string) Image {
+ thumbnailBase := fmt.Sprintf("https://img.youtube.com/vi/%s", videoID)
+ return Image{
+ JPG: ImageFormat{
+ Small: thumbnailBase + "/default.jpg",
+ Medium: thumbnailBase + "/mqdefault.jpg",
+ Large: thumbnailBase + "/hqdefault.jpg",
+ Original: thumbnailBase + "/maxresdefault.jpg",
+ },
+ WEBP: ImageFormat{
+ Small: thumbnailBase + "/default.webp",
+ Medium: thumbnailBase + "/mqdefault.webp",
+ Large: thumbnailBase + "/hqdefault.webp",
+ Original: thumbnailBase + "/maxresdefault.webp",
+ },
+ }
+}
+
+func parseAiredDateString(dateString string) AiredDate {
+ trimmedDate := strings.TrimSpace(dateString)
+ if trimmedDate == "" || trimmedDate == "?" || trimmedDate == "Not available" {
+ return AiredDate{}
+ }
+ parsedTime, parseErr := time.Parse(airedDateLayout, trimmedDate)
+ if parseErr != nil {
+ return AiredDate{String: trimmedDate}
+ }
+ return AiredDate{
+ Day: parsedTime.Day(),
+ Month: int(parsedTime.Month()),
+ Year: parsedTime.Year(),
+ String: trimmedDate,
+ }
+}
+
+func parseIntFromText(text string) int {
+ cleanedText := strings.ReplaceAll(strings.TrimSpace(text), ",", "")
+ cleanedText = strings.TrimPrefix(cleanedText, "#")
+ parsedValue, _ := strconv.Atoi(cleanedText)
+ return parsedValue
+}
+
+func parseFloatFromText(text string) float64 {
+ trimmedText := strings.TrimSpace(text)
+ if trimmedText == "N/A" || trimmedText == "" {
+ return 0
+ }
+ parsedValue, _ := strconv.ParseFloat(trimmedText, 64)
+ return parsedValue
+}
+
+func parseAnimeTitle(document *goquery.Document) Title {
+ var animeTitle Title
+ romajiTitle, _ := document.Find(`meta[property="og:title"]`).Attr("content")
+ animeTitle.Romaji = strings.TrimSpace(romajiTitle)
+
+ document.Find("span.dark_text").Each(func(index int, selection *goquery.Selection) {
+ label := strings.TrimSpace(selection.Text())
+ parentClone := selection.Parent().Clone()
+ parentClone.Find("span.dark_text").Remove()
+ value := strings.TrimSpace(parentClone.Text())
+
+ switch label {
+ case "English:":
+ animeTitle.English = value
+ case "Japanese:":
+ animeTitle.Japanese = value
+ case "Synonyms:":
+ if value != "" {
+ animeTitle.Synonyms = strings.Split(value, ", ")
+ }
+ }
+ })
+
+ return animeTitle
+}
+
+func parseAnimeImage(document *goquery.Document) Image {
+ imageURL, exists := document.Find(`meta[property="og:image"]`).Attr("content")
+ if !exists || imageURL == "" {
+ return Image{}
+ }
+ return buildImageFromBaseURL(imageURL)
+}
+
+func parseAnimeStatistics(document *goquery.Document) Statistics {
+ return Statistics{
+ Score: parseFloatFromText(document.Find(`span[itemprop="ratingValue"]`).Text()),
+ ScoredBy: parseIntFromText(document.Find(`span[itemprop="ratingCount"]`).Text()),
+ Rank: parseIntFromText(extractSidebarValue(document, "Ranked:")),
+ Popularity: parseIntFromText(extractSidebarValue(document, "Popularity:")),
+ Members: parseIntFromText(extractSidebarValue(document, "Members:")),
+ Favorites: parseIntFromText(extractSidebarValue(document, "Favorites:")),
+ }
+}
+
+func parseAnimeSynopsis(document *goquery.Document) string {
+ synopsisNode := document.Find(`p[itemprop="description"]`)
+ if synopsisNode.Length() == 0 {
+ return ""
+ }
+ synopsisText := strings.TrimSpace(synopsisNode.Text())
+ if strings.Contains(synopsisText, "No synopsis information has been added") {
+ return ""
+ }
+ return synopsisText
+}
+
+func parseAnimeBackground(document *goquery.Document) string {
+ var backgroundParts []string
+ document.Find("h2").Each(func(index int, heading *goquery.Selection) {
+ if strings.TrimSpace(heading.Text()) != "Background" {
+ return
+ }
+ heading.NextUntil("h2").Each(func(siblingIndex int, sibling *goquery.Selection) {
+ text := strings.TrimSpace(sibling.Text())
+ if text != "" && !strings.Contains(text, "No background information") {
+ backgroundParts = append(backgroundParts, text)
+ }
+ })
+ })
+ return strings.Join(backgroundParts, " ")
+}
+
+func parseAnimeTrailer(document *goquery.Document) Trailer {
+ trailerLink := document.Find("div.video-promotion a")
+ if trailerLink.Length() == 0 {
+ return Trailer{}
+ }
+ embedURL, _ := trailerLink.Attr("href")
+ youtubeMatches := youtubeIDPattern.FindStringSubmatch(embedURL)
+ if len(youtubeMatches) < 2 {
+ return Trailer{EmbedURL: embedURL, Preview: Preview{URL: embedURL}}
+ }
+ videoID := youtubeMatches[1]
+ return Trailer{
+ YoutubeID: videoID,
+ EmbedURL: embedURL,
+ Preview: Preview{
+ URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID),
+ Thumbnail: buildYouTubeThumbnail(videoID),
+ },
+ }
+}
+
+func parseAnimePremiered(document *goquery.Document) Premiered {
+ text := extractSidebarValue(document, "Premiered:")
+ if text == "" || text == "?" {
+ return Premiered{}
+ }
+ parts := strings.SplitN(text, " ", 2)
+ if len(parts) != 2 {
+ return Premiered{}
+ }
+ year, _ := strconv.Atoi(parts[1])
+ return Premiered{Season: Season(parts[0]), Year: year}
+}
+
+func parseAnimeAired(document *goquery.Document) Aired {
+ text := extractSidebarValue(document, "Aired:")
+ if text == "" || text == "Not available" {
+ return Aired{}
+ }
+ parts := strings.SplitN(text, " to ", 2)
+ aired := Aired{String: text}
+ if len(parts) >= 1 {
+ aired.From = parseAiredDateString(parts[0])
+ }
+ if len(parts) >= 2 {
+ aired.To = parseAiredDateString(parts[1])
+ }
+ return aired
+}
+
+func parseAnimeBroadcast(document *goquery.Document) Broadcast {
+ text := extractSidebarValue(document, "Broadcast:")
+ if text == "" {
+ return Broadcast{}
+ }
+ matches := broadcastTimePattern.FindStringSubmatch(text)
+ if len(matches) == 4 {
+ return Broadcast{Day: matches[1], Time: matches[2], Timezone: matches[3], String: text}
+ }
+ return Broadcast{String: text}
+}
+
+func parseAnimeThemeSongs(document *goquery.Document, containerClass string) []ThemeSong {
+ var themeSongs []ThemeSong
+ document.Find(fmt.Sprintf("div.%s table tr", containerClass)).Each(func(index int, row *goquery.Selection) {
+ songText := strings.TrimSpace(row.Find("td.theme-song").Text())
+ if songText == "" || strings.Contains(songText, "No opening themes") || strings.Contains(songText, "No ending themes") {
+ return
+ }
+
+ themeSong := parseThemeSongText(songText)
+
+ row.Find("td.theme-song-artist a").Each(func(linkIndex int, linkElement *goquery.Selection) {
+ href, exists := linkElement.Attr("href")
+ if !exists || href == "" {
+ return
+ }
+ siteName := strings.TrimSpace(linkElement.Text())
+ if siteName == "" {
+ siteName, _ = linkElement.Attr("title")
+ }
+ if siteName != "" {
+ themeSong.Links = append(themeSong.Links, ExternalLink{Name: siteName, URL: href})
+ }
+ })
+
+ themeSongs = append(themeSongs, themeSong)
+ })
+ return themeSongs
+}
+
+func parseThemeSongText(rawText string) ThemeSong {
+ text := leadingIndexPattern.ReplaceAllString(strings.TrimSpace(rawText), "")
+ var themeSong ThemeSong
+
+ episodeMatches := themeSongEpisodesPattern.FindStringSubmatch(text)
+ if len(episodeMatches) > 1 {
+ themeSong.Episodes.Start, _ = strconv.Atoi(episodeMatches[1])
+ if len(episodeMatches) > 2 && episodeMatches[2] != "" {
+ themeSong.Episodes.End, _ = strconv.Atoi(episodeMatches[2])
+ } else {
+ themeSong.Episodes.End = themeSong.Episodes.Start
+ }
+ }
+
+ titleMatches := themeSongTitlePattern.FindStringSubmatch(text)
+ if len(titleMatches) > 1 {
+ fullTitle := titleMatches[1]
+ japaneseMatches := japaneseTextInParensPattern.FindStringSubmatch(fullTitle)
+ if len(japaneseMatches) > 1 {
+ themeSong.Title.Japanese = japaneseMatches[1]
+ themeSong.Title.Romaji = strings.TrimSpace(japaneseTextInParensPattern.ReplaceAllString(fullTitle, ""))
+ } else {
+ themeSong.Title.Romaji = fullTitle
+ }
+ }
+
+ artistMatches := themeSongArtistPattern.FindStringSubmatch(text)
+ if len(artistMatches) > 1 {
+ themeSong.Artist = strings.TrimSpace(trailingEpisodeInfoPattern.ReplaceAllString(artistMatches[1], ""))
+ }
+
+ return themeSong
+}
+
+func parseAnimeExternalLinks(document *goquery.Document) []ExternalLink {
+ var externalLinks []ExternalLink
+ document.Find("div.external_links a.link").Each(func(index int, linkElement *goquery.Selection) {
+ href, exists := linkElement.Attr("href")
+ if !exists || href == "" {
+ return
+ }
+ linkName := strings.TrimSpace(linkElement.Text())
+ if linkName != "" {
+ externalLinks = append(externalLinks, ExternalLink{Name: linkName, URL: href})
+ }
+ })
+ return externalLinks
+}
+
+func parseAnimeStreamingLinks(document *goquery.Document) []ExternalLink {
+ var streamingLinks []ExternalLink
+ document.Find("h2").Each(func(index int, heading *goquery.Selection) {
+ headingText := strings.TrimSpace(heading.Text())
+ if headingText != "Available At" && headingText != "Streaming Platforms" {
+ return
+ }
+ heading.NextUntil("h2").Find("a").Each(func(linkIndex int, linkElement *goquery.Selection) {
+ href, exists := linkElement.Attr("href")
+ if !exists || href == "" {
+ return
+ }
+ linkName := strings.TrimSpace(linkElement.Text())
+ if linkName == "" {
+ linkName, _ = linkElement.Attr("title")
+ }
+ if linkName != "" {
+ streamingLinks = append(streamingLinks, ExternalLink{Name: linkName, URL: href})
+ }
+ })
+ })
+ return streamingLinks
+}
+
+func parsePromotionalVideos(document *goquery.Document) []PromotionalVideo {
+ var videos []PromotionalVideo
+ document.Find("div.promotional-video section > div").Each(func(index int, videoElement *goquery.Selection) {
+ linkElement := videoElement.Find("a")
+ if linkElement.Length() == 0 {
+ return
+ }
+ embedURL, _ := linkElement.Attr("href")
+ titleText := strings.TrimSpace(linkElement.Find("span").First().Text())
+
+ youtubeMatches := youtubeIDPattern.FindStringSubmatch(embedURL)
+ if len(youtubeMatches) < 2 {
+ return
+ }
+ videoID := youtubeMatches[1]
+
+ videos = append(videos, PromotionalVideo{
+ Title: Title{Romaji: titleText},
+ Preview: Preview{
+ URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID),
+ Thumbnail: buildYouTubeThumbnail(videoID),
+ },
+ })
+ })
+ return videos
+}
+
+func parseMusicVideos(document *goquery.Document) []MusicVideo {
+ var videos []MusicVideo
+ document.Find("div.music-video section > div").Each(func(index int, videoElement *goquery.Selection) {
+ linkElement := videoElement.Find("a")
+ if linkElement.Length() == 0 {
+ return
+ }
+ embedURL, _ := linkElement.Attr("href")
+ titleText := strings.TrimSpace(linkElement.Find("span").First().Text())
+
+ youtubeMatches := youtubeIDPattern.FindStringSubmatch(embedURL)
+ if len(youtubeMatches) < 2 {
+ return
+ }
+ videoID := youtubeMatches[1]
+
+ musicVideo := MusicVideo{
+ Title: Title{Romaji: titleText},
+ Preview: Preview{
+ URL: fmt.Sprintf("https://www.youtube.com/watch?v=%s", videoID),
+ Thumbnail: buildYouTubeThumbnail(videoID),
+ },
+ }
+
+ metadataText := strings.TrimSpace(videoElement.Find("div div").Last().Text())
+ if separatorIndex := strings.Index(metadataText, " - "); separatorIndex != -1 {
+ musicVideo.Artist = strings.TrimSpace(metadataText[:separatorIndex])
+ }
+
+ videos = append(videos, musicVideo)
+ })
+ return videos
+}
+
+func parseAnimeDocument(document *goquery.Document, malID int) Anime {
+ pageURL, _ := document.Find(`meta[property="og:url"]`).Attr("content")
+ statusText := extractSidebarValue(document, "Status:")
+ ratingText := extractSidebarValue(document, "Rating:")
+ if ratingText == "None" {
+ ratingText = ""
+ }
+
+ return Anime{
+ MALID: malID,
+ URL: pageURL,
+ Image: parseAnimeImage(document),
+ Title: parseAnimeTitle(document),
+ Type: Type(extractSidebarValue(document, "Type:")),
+ Source: Source(extractSidebarValue(document, "Source:")),
+ Status: Status(statusText),
+ Airing: statusText == string(StatusAiring),
+ Rating: Rating(ratingText),
+ Synopsis: parseAnimeSynopsis(document),
+ Background: parseAnimeBackground(document),
+ Duration: extractSidebarValue(document, "Duration:"),
+ EpisodeCount: parseIntFromText(extractSidebarValue(document, "Episodes:")),
+ Premiered: parseAnimePremiered(document),
+ Aired: parseAnimeAired(document),
+ Broadcast: parseAnimeBroadcast(document),
+ Statistics: parseAnimeStatistics(document),
+ Trailer: parseAnimeTrailer(document),
+
+ Openings: parseAnimeThemeSongs(document, "opnening"),
+ Endings: parseAnimeThemeSongs(document, "ending"),
+
+ Genres: extractSidebarMALIDsMultiLabel(document, []string{"Genres:", "Genre:"}, genreIDPattern),
+ ExplicitGenres: extractSidebarMALIDs(document, "Explicit Genres:", genreIDPattern),
+ Themes: extractSidebarMALIDsMultiLabel(document, []string{"Themes:", "Theme:"}, genreIDPattern),
+ Demographics: extractSidebarMALIDsMultiLabel(document, []string{"Demographics:", "Demographic:"}, genreIDPattern),
+ Producers: extractSidebarMALIDs(document, "Producers:", producerIDPattern),
+ Studios: extractSidebarMALIDs(document, "Studios:", producerIDPattern),
+ Licensors: extractSidebarMALIDs(document, "Licensors:", producerIDPattern),
+
+ External: parseAnimeExternalLinks(document),
+ Streaming: parseAnimeStreamingLinks(document),
+ }
+}
+
+func GetAnimeByMALID(malID int) (*Anime, error) {
+ animePageURL := fmt.Sprintf("%s/anime/%d", malBaseURL, malID)
+ animeDocument, fetchErr := makeRequest(animePageURL)
+ if fetchErr != nil {
+ logger.Errorf("MALClient", "Failed to fetch anime page for MAL ID %d: %v", malID, fetchErr)
+ return nil, fmt.Errorf("failed to fetch anime page for MAL ID %d: %w", malID, fetchErr)
+ }
+
+ anime := parseAnimeDocument(animeDocument, malID)
+
+ videosPageURL := fmt.Sprintf("%s/anime/%d/_/video", malBaseURL, malID)
+ videosDocument, videosFetchErr := makeRequest(videosPageURL)
+ if videosFetchErr != nil {
+ logger.Warnf("MALClient", "Failed to fetch videos page for MAL ID %d: %v", malID, videosFetchErr)
+ } else {
+ anime.Videos = parsePromotionalVideos(videosDocument)
+ anime.MusicVideos = parseMusicVideos(videosDocument)
+ }
+
+ return &anime, nil
+} \ No newline at end of file
diff --git a/utils/mal/client.go b/utils/mal/client.go
new file mode 100644
index 0000000..9bc5d98
--- /dev/null
+++ b/utils/mal/client.go
@@ -0,0 +1,110 @@
+package mal
+
+import (
+ "fmt"
+ "math"
+ "metachan/utils/cfbypass"
+ "metachan/utils/logger"
+ "metachan/utils/ratelimit"
+ "net/http"
+ "time"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+const (
+ malBaseURL = "https://myanimelist.net"
+ rateLimitPerSec = 4
+ requestTimeout = 30 * time.Second
+ requestJitter = 250 * time.Millisecond
+ maxRetries = 3
+ backoffBase = 2 * time.Second
+)
+
+var (
+ rateLimiter = ratelimit.NewRateLimiter(rateLimitPerSec, time.Second)
+ cloudflareClient = cfbypass.NewCloudflareClient(requestTimeout)
+)
+
+func StopRateLimiters() {
+ rateLimiter.Stop()
+}
+
+func makeRequest(targetURL string) (*goquery.Document, error) {
+ var retries int
+
+ for retries < maxRetries {
+ rateLimiter.Wait()
+ time.Sleep(cfbypass.AddJitter(requestJitter))
+
+ request, err := http.NewRequest("GET", targetURL, nil)
+ if err != nil {
+ return nil, fmt.Errorf("failed to create request for %s: %w", targetURL, err)
+ }
+
+ for headerName, headerValue := range cloudflareClient.BrowserProfile.Headers {
+ if headerName == "Accept-Encoding" {
+ continue
+ }
+ request.Header.Set(headerName, headerValue)
+ }
+ request.Header.Set("User-Agent", cloudflareClient.BrowserProfile.UserAgent)
+
+ response, err := cloudflareClient.HttpClient.Do(request)
+ if err != nil {
+ retries++
+ if retries >= maxRetries {
+ return nil, fmt.Errorf("all retries exhausted for %s: %w", targetURL, err)
+ }
+ logger.Debugf("MALClient", "Request failed for %s (attempt %d/%d)", targetURL, retries, maxRetries)
+ time.Sleep(getBackoffDuration(retries))
+ continue
+ }
+
+ if response.StatusCode == http.StatusOK {
+ document, parseErr := goquery.NewDocumentFromReader(response.Body)
+ response.Body.Close()
+ if parseErr != nil {
+ return nil, fmt.Errorf("failed to parse HTML from %s: %w", targetURL, parseErr)
+ }
+
+ pageTitle := document.Find("title").Text()
+ logger.Debugf("MALClient", "Page title for %s: %q", targetURL, pageTitle)
+
+ htmlContent, _ := document.Html()
+ if len(htmlContent) > 500 {
+ htmlContent = htmlContent[:500]
+ }
+ logger.Debugf("MALClient", "HTML preview for %s: %s", targetURL, htmlContent)
+
+ return document, nil
+ }
+
+ response.Body.Close()
+
+ if response.StatusCode == http.StatusNotFound {
+ return nil, fmt.Errorf("resource not found: %s", targetURL)
+ }
+
+ if response.StatusCode >= 400 && response.StatusCode < 500 &&
+ response.StatusCode != http.StatusTooManyRequests &&
+ response.StatusCode != http.StatusForbidden {
+ return nil, fmt.Errorf("client error %d for %s", response.StatusCode, targetURL)
+ }
+
+ retries++
+ if retries >= maxRetries {
+ return nil, fmt.Errorf("all retries exhausted for %s (status %d)", targetURL, response.StatusCode)
+ }
+
+ logger.Warnf("MALClient", "Status %d for %s (attempt %d/%d)", response.StatusCode, targetURL, retries, maxRetries)
+ time.Sleep(getBackoffDuration(retries))
+ }
+
+ return nil, fmt.Errorf("all retries exhausted for %s", targetURL)
+}
+
+func getBackoffDuration(attempt int) time.Duration {
+ exponentialDelay := time.Duration(float64(backoffBase) * math.Pow(2, float64(attempt-1)))
+ return cfbypass.AddJitter(exponentialDelay)
+} \ No newline at end of file
diff --git a/utils/mal/enums.go b/utils/mal/enums.go
new file mode 100644
index 0000000..76d6f3d
--- /dev/null
+++ b/utils/mal/enums.go
@@ -0,0 +1,62 @@
+package mal
+
+type Type string
+
+const (
+ TypeTV Type = "TV"
+ TypeMovie Type = "Movie"
+ TypeOVA Type = "OVA"
+ TypeONA Type = "ONA"
+ TypeSpecial Type = "Special"
+ TypeMusic Type = "Music"
+ TypeUnknown Type = "Unknown"
+)
+
+type Status string
+
+const (
+ StatusAiring Status = "Currently Airing"
+ StatusFinished Status = "Finished Airing"
+ StatusNotYetAired Status = "Not yet aired"
+)
+
+type Source string
+
+const (
+ SourceOriginal Source = "Original"
+ SourceManga Source = "Manga"
+ SourceLightNovel Source = "Light novel"
+ SourceVisualNovel Source = "Visual novel"
+ SourceGame Source = "Game"
+ SourceNovel Source = "Novel"
+ SourceWebManga Source = "Web manga"
+ SourceWebNovel Source = "Web novel"
+ SourceCardGame Source = "Card game"
+ SourceFourKomaManga Source = "4-koma manga"
+ SourceBook Source = "Book"
+ SourcePictureBook Source = "Picture book"
+ SourceRadio Source = "Radio"
+ SourceMusic Source = "Music"
+ SourceOther Source = "Other"
+ SourceUnknown Source = "Unknown"
+)
+
+type Rating string
+
+const (
+ RatingG Rating = "G - All Ages"
+ RatingPG Rating = "PG - Children"
+ RatingPG13 Rating = "PG-13 - Teens 13 or older"
+ RatingR17 Rating = "R - 17+ (violence & profanity)"
+ RatingR Rating = "R+ - Mild Nudity"
+ RatingRx Rating = "Rx - Hentai"
+)
+
+type Season string
+
+const (
+ SeasonWinter Season = "Winter"
+ SeasonSpring Season = "Spring"
+ SeasonSummer Season = "Summer"
+ SeasonFall Season = "Fall"
+) \ No newline at end of file
diff --git a/utils/mal/episodes.go b/utils/mal/episodes.go
new file mode 100644
index 0000000..ff61686
--- /dev/null
+++ b/utils/mal/episodes.go
@@ -0,0 +1,82 @@
+package mal
+
+import (
+ "fmt"
+ "metachan/utils/logger"
+ "strconv"
+ "strings"
+
+ "github.com/PuerkitoBio/goquery"
+)
+
+func parseEpisodeRow(row *goquery.Selection) Episode {
+ numberText := strings.TrimSpace(row.Find("td.episode-number").Text())
+ episodeNumber, _ := strconv.Atoi(numberText)
+
+ titleCell := row.Find("td.episode-title")
+ titleLink := titleCell.Find("a")
+ episodeURL, _ := titleLink.Attr("href")
+
+ englishTitle := strings.TrimSpace(titleLink.Text())
+ japaneseTitle := strings.TrimSpace(titleCell.Find("span.di-ib").Text())
+
+ airedText := strings.TrimSpace(row.Find("td.episode-aired").Text())
+
+ forumLink := row.Find("td.episode-forum a")
+ forumURL, _ := forumLink.Attr("href")
+
+ fillerTag := row.Find("span.filler")
+ recapTag := row.Find("span.recap")
+
+ return Episode{
+ Number: episodeNumber,
+ URL: episodeURL,
+ Title: Title{
+ English: englishTitle,
+ Japanese: japaneseTitle,
+ },
+ Aired: parseAiredDateString(airedText),
+ ForumURL: forumURL,
+ Filler: fillerTag.Length() > 0,
+ Recap: recapTag.Length() > 0,
+ }
+}
+
+func GetAnimeEpisodesByMALID(malID int) ([]Episode, error) {
+ var allEpisodes []Episode
+ offset := 0
+
+ for {
+ pageURL := fmt.Sprintf("%s/anime/%d/_/episode?offset=%d", malBaseURL, malID, offset)
+ document, fetchErr := makeRequest(pageURL)
+ if fetchErr != nil {
+ if len(allEpisodes) > 0 {
+ logger.Warnf("MALClient", "Failed to fetch episodes page at offset %d for MAL ID %d: %v", offset, malID, fetchErr)
+ break
+ }
+ logger.Errorf("MALClient", "Failed to fetch episodes for MAL ID %d: %v", malID, fetchErr)
+ return nil, fmt.Errorf("failed to fetch episodes for MAL ID %d: %w", malID, fetchErr)
+ }
+
+ episodeRows := document.Find("table.episode_list tbody tr")
+ if episodeRows.Length() == 0 {
+ break
+ }
+
+ episodeRows.Each(func(index int, row *goquery.Selection) {
+ episode := parseEpisodeRow(row)
+ if episode.Number > 0 {
+ allEpisodes = append(allEpisodes, episode)
+ }
+ })
+
+ nextPageLink := document.Find("a.link-blue-box.next")
+ if nextPageLink.Length() == 0 {
+ break
+ }
+
+ offset += 100
+ }
+
+ return allEpisodes, nil
+} \ No newline at end of file
diff --git a/utils/mal/types.go b/utils/mal/types.go
new file mode 100644
index 0000000..9226072
--- /dev/null
+++ b/utils/mal/types.go
@@ -0,0 +1,144 @@
+package mal
+
+type ImageFormat struct {
+ Small string
+ Medium string
+ Large string
+ Original string
+}
+
+type Image struct {
+ JPG ImageFormat
+ WEBP ImageFormat
+}
+
+type Title struct {
+ English string
+ Japanese string
+ Romaji string
+ Synonyms []string
+}
+
+type AiredDate struct {
+ Day int
+ Month int
+ Year int
+ String string
+}
+
+type Premiered struct {
+ Season Season
+ Year int
+}
+
+type Aired struct {
+ From AiredDate
+ To AiredDate
+ String string
+}
+
+type Broadcast struct {
+ Day string
+ Time string
+ Timezone string
+ String string
+}
+
+type Statistics struct {
+ Score float64
+ ScoredBy int
+ Rank int
+ Popularity int
+ Members int
+ Favorites int
+}
+
+type Preview struct {
+ URL string
+ Thumbnail Image
+}
+
+type Trailer struct {
+ YoutubeID string
+ EmbedURL string
+ Preview
+}
+
+type EpisodeRange struct {
+ Start int
+ End int
+}
+
+type ExternalLink struct {
+ Name string
+ URL string
+}
+
+type ThemeSong struct {
+ Title Title
+ Artist string
+ Episodes EpisodeRange
+ Links []ExternalLink
+}
+
+type PromotionalVideo struct {
+ Title Title
+ Preview
+}
+
+type MusicVideo struct {
+ Title Title
+ Artist string
+ Preview
+}
+
+type Episode struct {
+ Number int
+ URL string
+ Title Title
+ Aired AiredDate
+ Score float64
+ Filler bool
+ Recap bool
+ ForumURL string
+ Synopsis string
+ Preview Preview
+}
+
+type Anime struct {
+ MALID int
+ URL string
+ Image Image
+ Title Title
+ Type Type
+ Source Source
+ Status Status
+ Airing bool
+ Rating Rating
+ Synopsis string
+ Background string
+ Duration string
+ EpisodeCount int
+ Premiered Premiered
+ Aired Aired
+ Broadcast Broadcast
+ Statistics Statistics
+ Trailer Trailer
+
+ Openings []ThemeSong
+ Endings []ThemeSong
+ Videos []PromotionalVideo
+ MusicVideos []MusicVideo
+ Episodes []Episode
+
+ Genres []int
+ ExplicitGenres []int
+ Themes []int
+ Demographics []int
+ Producers []int
+ Studios []int
+ Licensors []int
+
+ External []ExternalLink
+ Streaming []ExternalLink
+} \ No newline at end of file