1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
package format
import (
"regexp"
"strings"
)
func GenerateSnippet(bodyText, bodyHTML string) string {
text := bodyText
if text == "" && bodyHTML != "" {
text = StripHTML(bodyHTML)
}
text = strings.TrimSpace(text)
if len(text) > 150 {
text = text[:150] + "..."
}
return text
}
func StripHTML(html string) string {
text := html
styleRegex := regexp.MustCompile(`(?i)<style[^>]*>[\s\S]*?</style>`)
text = styleRegex.ReplaceAllString(text, "")
scriptRegex := regexp.MustCompile(`(?i)<script[^>]*>[\s\S]*?</script>`)
text = scriptRegex.ReplaceAllString(text, "")
headRegex := regexp.MustCompile(`(?i)<head[^>]*>[\s\S]*?</head>`)
text = headRegex.ReplaceAllString(text, "")
text = strings.ReplaceAll(text, "<br>", "\n")
text = strings.ReplaceAll(text, "<br/>", "\n")
text = strings.ReplaceAll(text, "<br />", "\n")
text = strings.ReplaceAll(text, "</p>", "\n\n")
text = strings.ReplaceAll(text, "</div>", "\n")
text = strings.ReplaceAll(text, "</tr>", "\n")
text = strings.ReplaceAll(text, "</h1>", "\n")
text = strings.ReplaceAll(text, "</h2>", "\n")
text = strings.ReplaceAll(text, "</h3>", "\n")
text = strings.ReplaceAll(text, "</li>", "\n")
inTag := false
var result strings.Builder
for _, char := range text {
if char == '<' {
inTag = true
continue
}
if char == '>' {
inTag = false
continue
}
if !inTag {
result.WriteRune(char)
}
}
cleanText := result.String()
lines := strings.Split(cleanText, "\n")
var cleanLines []string
for _, line := range lines {
line = strings.TrimSpace(line)
if line != "" {
cleanLines = append(cleanLines, line)
}
}
return strings.TrimSpace(strings.Join(cleanLines, " "))
}
|