diff options
| author | Bobby <[email protected]> | 2026-02-26 14:28:21 +0530 |
|---|---|---|
| committer | Bobby <[email protected]> | 2026-02-26 14:28:21 +0530 |
| commit | 627c2c239e0a44b6363a9f02235a73f5e2c81d2e (patch) | |
| tree | 6c7b3cad8a5fb42c5649905a20800edf8e63f666 /utils/mal/client.go | |
| parent | aa4cf5ff588c9082282ee57074199dc7d2a37e09 (diff) | |
| download | metachan-627c2c239e0a44b6363a9f02235a73f5e2c81d2e.tar.xz metachan-627c2c239e0a44b6363a9f02235a73f5e2c81d2e.zip | |
Add MAL client and anime parsing functionality
- Implemented a new CloudflareClient to handle requests with randomized browser profiles.
- Created structures and functions for parsing anime data from MyAnimeList (MAL), including anime details, episodes, and theme songs.
- Added enums for anime types, statuses, sources, and ratings.
- Developed utility functions for making HTTP requests with rate limiting and error handling.
- Introduced image handling for anime covers and thumbnails.
- Established a comprehensive data model for anime, including fields for statistics, trailers, and external links.
Diffstat (limited to 'utils/mal/client.go')
| -rw-r--r-- | utils/mal/client.go | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/utils/mal/client.go b/utils/mal/client.go new file mode 100644 index 0000000..9bc5d98 --- /dev/null +++ b/utils/mal/client.go @@ -0,0 +1,110 @@ +package mal + +import ( + "fmt" + "math" + "metachan/utils/cfbypass" + "metachan/utils/logger" + "metachan/utils/ratelimit" + "net/http" + "time" + + "github.com/PuerkitoBio/goquery" +) + +const ( + malBaseURL = "https://myanimelist.net" + rateLimitPerSec = 4 + requestTimeout = 30 * time.Second + requestJitter = 250 * time.Millisecond + maxRetries = 3 + backoffBase = 2 * time.Second +) + +var ( + rateLimiter = ratelimit.NewRateLimiter(rateLimitPerSec, time.Second) + cloudflareClient = cfbypass.NewCloudflareClient(requestTimeout) +) + +func StopRateLimiters() { + rateLimiter.Stop() +} + +func makeRequest(targetURL string) (*goquery.Document, error) { + var retries int + + for retries < maxRetries { + rateLimiter.Wait() + time.Sleep(cfbypass.AddJitter(requestJitter)) + + request, err := http.NewRequest("GET", targetURL, nil) + if err != nil { + return nil, fmt.Errorf("failed to create request for %s: %w", targetURL, err) + } + + for headerName, headerValue := range cloudflareClient.BrowserProfile.Headers { + if headerName == "Accept-Encoding" { + continue + } + request.Header.Set(headerName, headerValue) + } + request.Header.Set("User-Agent", cloudflareClient.BrowserProfile.UserAgent) + + response, err := cloudflareClient.HttpClient.Do(request) + if err != nil { + retries++ + if retries >= maxRetries { + return nil, fmt.Errorf("all retries exhausted for %s: %w", targetURL, err) + } + logger.Debugf("MALClient", "Request failed for %s (attempt %d/%d)", targetURL, retries, maxRetries) + time.Sleep(getBackoffDuration(retries)) + continue + } + + if response.StatusCode == http.StatusOK { + document, parseErr := goquery.NewDocumentFromReader(response.Body) + response.Body.Close() + if parseErr != nil { + return nil, fmt.Errorf("failed to parse HTML from %s: %w", targetURL, parseErr) + } + + pageTitle := document.Find("title").Text() + logger.Debugf("MALClient", "Page title for %s: %q", targetURL, pageTitle) + + htmlContent, _ := document.Html() + if len(htmlContent) > 500 { + htmlContent = htmlContent[:500] + } + logger.Debugf("MALClient", "HTML preview for %s: %s", targetURL, htmlContent) + + return document, nil + } + + response.Body.Close() + + if response.StatusCode == http.StatusNotFound { + return nil, fmt.Errorf("resource not found: %s", targetURL) + } + + if response.StatusCode >= 400 && response.StatusCode < 500 && + response.StatusCode != http.StatusTooManyRequests && + response.StatusCode != http.StatusForbidden { + return nil, fmt.Errorf("client error %d for %s", response.StatusCode, targetURL) + } + + retries++ + if retries >= maxRetries { + return nil, fmt.Errorf("all retries exhausted for %s (status %d)", targetURL, response.StatusCode) + } + + logger.Warnf("MALClient", "Status %d for %s (attempt %d/%d)", response.StatusCode, targetURL, retries, maxRetries) + time.Sleep(getBackoffDuration(retries)) + } + + return nil, fmt.Errorf("all retries exhausted for %s", targetURL) +} + +func getBackoffDuration(attempt int) time.Duration { + exponentialDelay := time.Duration(float64(backoffBase) * math.Pow(2, float64(attempt-1))) + return cfbypass.AddJitter(exponentialDelay) +}
\ No newline at end of file |
