aboutsummaryrefslogtreecommitdiff
path: root/utils/mal/client.go
blob: 1cbb26ab43bfe4010d7738d7c8ffd191db20c541 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package mal

import (
	"fmt"
	"math"
	"metachan/utils/cfbypass"
	"metachan/utils/logger"
	"metachan/utils/ratelimit"
	"net/http"
	"time"

	"github.com/PuerkitoBio/goquery"
)

const (
	malBaseURL      = "https://myanimelist.net"
	rateLimitPerSec = 4
	requestTimeout  = 30 * time.Second
	requestJitter   = 250 * time.Millisecond
	maxRetries      = 3
	backoffBase     = 2 * time.Second
)

var (
	rateLimiter      = ratelimit.NewRateLimiter(rateLimitPerSec, time.Second)
	cloudflareClient = cfbypass.NewCloudflareClient(requestTimeout)
)

func StopRateLimiters() {
	rateLimiter.Stop()
}

func makeRequest(targetURL string) (*goquery.Document, error) {
	var retries int

	for retries < maxRetries {
		rateLimiter.Wait()
		time.Sleep(cfbypass.AddJitter(requestJitter))

		request, err := http.NewRequest("GET", targetURL, nil)
		if err != nil {
			return nil, fmt.Errorf("failed to create request for %s: %w", targetURL, err)
		}

		for headerName, headerValue := range cloudflareClient.BrowserProfile.Headers {
			if headerName == "Accept-Encoding" {
				continue
			}
			request.Header.Set(headerName, headerValue)
		}
		request.Header.Set("User-Agent", cloudflareClient.BrowserProfile.UserAgent)

		response, err := cloudflareClient.HttpClient.Do(request)
		if err != nil {
			retries++
			if retries >= maxRetries {
				return nil, fmt.Errorf("all retries exhausted for %s: %w", targetURL, err)
			}
			logger.Debugf("MALClient", "Request failed for %s (attempt %d/%d)", targetURL, retries, maxRetries)
			time.Sleep(getBackoffDuration(retries))
			continue
		}

		if response.StatusCode == http.StatusOK {
			document, parseErr := goquery.NewDocumentFromReader(response.Body)
			response.Body.Close()
			if parseErr != nil {
				return nil, fmt.Errorf("failed to parse HTML from %s: %w", targetURL, parseErr)
			}
			return document, nil
		}

		response.Body.Close()

		if response.StatusCode == http.StatusNotFound {
			return nil, fmt.Errorf("resource not found: %s", targetURL)
		}

		if response.StatusCode >= 400 && response.StatusCode < 500 &&
			response.StatusCode != http.StatusTooManyRequests &&
			response.StatusCode != http.StatusForbidden {
			return nil, fmt.Errorf("client error %d for %s", response.StatusCode, targetURL)
		}

		retries++
		if retries >= maxRetries {
			return nil, fmt.Errorf("all retries exhausted for %s (status %d)", targetURL, response.StatusCode)
		}

		logger.Warnf("MALClient", "Status %d for %s (attempt %d/%d)", response.StatusCode, targetURL, retries, maxRetries)
		time.Sleep(getBackoffDuration(retries))
	}

	return nil, fmt.Errorf("all retries exhausted for %s", targetURL)
}

func getBackoffDuration(attempt int) time.Duration {
	exponentialDelay := time.Duration(float64(backoffBase) * math.Pow(2, float64(attempt-1)))
	return cfbypass.AddJitter(exponentialDelay)
}