Compare commits

...

1 Commits
v2.2.6 ... v2

Author SHA1 Message Date
Admin
1a49cb5e75 fix(scraper): add Brotli decompression to HTTP client
Some checks failed
CI / Scraper / Lint (push) Failing after 29s
CI / Scraper / Lint (pull_request) Failing after 29s
CI / Scraper / Test (push) Failing after 38s
CI / Scraper / Docker Push (push) Has been skipped
CI / UI / Build (pull_request) Successful in 47s
CI / UI / Docker Push (pull_request) Has been skipped
CI / Scraper / Test (pull_request) Successful in 54s
CI / Scraper / Docker Push (pull_request) Has been skipped
iOS CI / Build (pull_request) Successful in 3m35s
iOS CI / Test (pull_request) Successful in 5m47s
novelfire.net responds with Content-Encoding: br when the scraper
advertises 'gzip, deflate, br'. The client only handled gzip, so
Brotli-compressed bytes were fed raw into the HTML parser producing
garbage — empty titles, zero chapters, and selector failures.

Added github.com/andybalholm/brotli and wired it into GetContent
alongside the existing gzip path.
2026-03-20 11:20:50 +05:00
3 changed files with 9 additions and 3 deletions

View File

@@ -10,6 +10,7 @@ require (
require (
github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect
github.com/andybalholm/brotli v1.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/go-ini/ini v1.67.0 // indirect

View File

@@ -1,5 +1,7 @@
github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c h1:pxW6RcqyfI9/kWtOwnv/G+AzdKuy2ZrqINhenH4HyNs=
github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho=
github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=

View File

@@ -10,6 +10,8 @@ import (
"os"
"strings"
"time"
"github.com/andybalholm/brotli"
)
type httpClient struct {
@@ -106,16 +108,17 @@ func (c *httpClient) GetContent(ctx context.Context, req ContentRequest) (string
// net/http decompresses gzip automatically only when it sets the header
// itself; since we set Accept-Encoding explicitly we must do it ourselves.
body := resp.Body
if strings.EqualFold(resp.Header.Get("Content-Encoding"), "gzip") {
switch strings.ToLower(resp.Header.Get("Content-Encoding")) {
case "gzip":
gr, gzErr := gzip.NewReader(resp.Body)
if gzErr != nil {
return "", fmt.Errorf("http: gzip reader: %w", gzErr)
}
defer gr.Close()
body = gr
case "br":
body = io.NopCloser(brotli.NewReader(resp.Body))
}
// br (Brotli) decompression requires an external package; skip for now —
// the server will fall back to gzip or plain text for unknown encodings.
raw, err := io.ReadAll(body)
if err != nil {