Files
libnovel/backend/internal/scraper/scraper.go
Admin 59e8cdb19a
Some checks failed
CI / v3 / Check ui (pull_request) Failing after 15s
CI / v3 / Test backend (pull_request) Failing after 16s
CI / v3 / Docker / backend (pull_request) Has been skipped
CI / v3 / Docker / runner (pull_request) Has been skipped
CI / v3 / Docker / ui (pull_request) Has been skipped
chore: migrate to v3, Doppler secrets, clean up legacy code
- Remove all pre-v3 code: scraper, ui-v2, backend v1, ios v1+v2, legacy CI workflows
- Flatten v3/ contents to repo root
- Add Doppler secrets management (project=libnovel, config=prd)
- Add justfile with doppler run wrappers for all docker compose commands
- Strip hardcoded env fallbacks from docker-compose.yml
- Add minimal README.md
- Clean up .gitignore
2026-03-23 17:21:12 +05:00

61 lines
2.0 KiB
Go

// Package scraper defines the NovelScraper interface and its sub-interfaces.
// Domain types live in internal/domain — this package only defines the scraping
// contract so that novelfire and any future scrapers can be swapped freely.
package scraper
import (
"context"
"github.com/libnovel/backend/internal/domain"
)
// CatalogueProvider can enumerate every novel available on a source site.
type CatalogueProvider interface {
ScrapeCatalogue(ctx context.Context) (<-chan domain.CatalogueEntry, <-chan error)
}
// MetadataProvider can extract structured book metadata from a novel's landing page.
type MetadataProvider interface {
ScrapeMetadata(ctx context.Context, bookURL string) (domain.BookMeta, error)
}
// ChapterListProvider can enumerate all chapters of a book.
// upTo > 0 stops pagination once at least upTo chapter numbers have been
// collected (early-exit optimisation for range scrapes). upTo == 0 fetches all pages.
type ChapterListProvider interface {
ScrapeChapterList(ctx context.Context, bookURL string, upTo int) ([]domain.ChapterRef, error)
}
// ChapterTextProvider can extract the readable text from a single chapter page.
type ChapterTextProvider interface {
ScrapeChapterText(ctx context.Context, ref domain.ChapterRef) (domain.Chapter, error)
}
// RankingProvider can enumerate novels from a ranking page.
type RankingProvider interface {
// ScrapeRanking pages through up to maxPages ranking pages.
// maxPages <= 0 means all pages.
ScrapeRanking(ctx context.Context, maxPages int) (<-chan domain.BookMeta, <-chan error)
}
// NovelScraper is the full interface a concrete novel source must implement.
type NovelScraper interface {
CatalogueProvider
MetadataProvider
ChapterListProvider
ChapterTextProvider
RankingProvider
// SourceName returns the human-readable name of this scraper, e.g. "novelfire.net".
SourceName() string
}
// Selector describes how to locate an element in an HTML document.
type Selector struct {
Tag string
Class string
ID string
Attr string
Multiple bool
}