Some checks failed
CI / v3 / Check ui (pull_request) Failing after 15s
CI / v3 / Test backend (pull_request) Failing after 16s
CI / v3 / Docker / backend (pull_request) Has been skipped
CI / v3 / Docker / runner (pull_request) Has been skipped
CI / v3 / Docker / ui (pull_request) Has been skipped
- Remove all pre-v3 code: scraper, ui-v2, backend v1, ios v1+v2, legacy CI workflows - Flatten v3/ contents to repo root - Add Doppler secrets management (project=libnovel, config=prd) - Add justfile with doppler run wrappers for all docker compose commands - Strip hardcoded env fallbacks from docker-compose.yml - Add minimal README.md - Clean up .gitignore
61 lines
2.0 KiB
Go
61 lines
2.0 KiB
Go
// Package scraper defines the NovelScraper interface and its sub-interfaces.
|
|
// Domain types live in internal/domain — this package only defines the scraping
|
|
// contract so that novelfire and any future scrapers can be swapped freely.
|
|
package scraper
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/libnovel/backend/internal/domain"
|
|
)
|
|
|
|
// CatalogueProvider can enumerate every novel available on a source site.
|
|
type CatalogueProvider interface {
|
|
ScrapeCatalogue(ctx context.Context) (<-chan domain.CatalogueEntry, <-chan error)
|
|
}
|
|
|
|
// MetadataProvider can extract structured book metadata from a novel's landing page.
|
|
type MetadataProvider interface {
|
|
ScrapeMetadata(ctx context.Context, bookURL string) (domain.BookMeta, error)
|
|
}
|
|
|
|
// ChapterListProvider can enumerate all chapters of a book.
|
|
// upTo > 0 stops pagination once at least upTo chapter numbers have been
|
|
// collected (early-exit optimisation for range scrapes). upTo == 0 fetches all pages.
|
|
type ChapterListProvider interface {
|
|
ScrapeChapterList(ctx context.Context, bookURL string, upTo int) ([]domain.ChapterRef, error)
|
|
}
|
|
|
|
// ChapterTextProvider can extract the readable text from a single chapter page.
|
|
type ChapterTextProvider interface {
|
|
ScrapeChapterText(ctx context.Context, ref domain.ChapterRef) (domain.Chapter, error)
|
|
}
|
|
|
|
// RankingProvider can enumerate novels from a ranking page.
|
|
type RankingProvider interface {
|
|
// ScrapeRanking pages through up to maxPages ranking pages.
|
|
// maxPages <= 0 means all pages.
|
|
ScrapeRanking(ctx context.Context, maxPages int) (<-chan domain.BookMeta, <-chan error)
|
|
}
|
|
|
|
// NovelScraper is the full interface a concrete novel source must implement.
|
|
type NovelScraper interface {
|
|
CatalogueProvider
|
|
MetadataProvider
|
|
ChapterListProvider
|
|
ChapterTextProvider
|
|
RankingProvider
|
|
|
|
// SourceName returns the human-readable name of this scraper, e.g. "novelfire.net".
|
|
SourceName() string
|
|
}
|
|
|
|
// Selector describes how to locate an element in an HTML document.
|
|
type Selector struct {
|
|
Tag string
|
|
Class string
|
|
ID string
|
|
Attr string
|
|
Multiple bool
|
|
}
|