libnovel/backend/internal/scraper/scraper.go

// Package scraper defines the NovelScraper interface and its sub-interfaces.
// Domain types live in internal/domain — this package only defines the scraping
// contract so that novelfire and any future scrapers can be swapped freely.
package scraper

import (
	"context"

	"github.com/libnovel/backend/internal/domain"
)

// CatalogueProvider can enumerate every novel available on a source site.
type CatalogueProvider interface {
	ScrapeCatalogue(ctx context.Context) (<-chan domain.CatalogueEntry, <-chan error)
}

// MetadataProvider can extract structured book metadata from a novel's landing page.
type MetadataProvider interface {
	ScrapeMetadata(ctx context.Context, bookURL string) (domain.BookMeta, error)
}

// ChapterListProvider can enumerate all chapters of a book.
// upTo > 0 stops pagination once at least upTo chapter numbers have been
// collected (early-exit optimisation for range scrapes). upTo == 0 fetches all pages.
type ChapterListProvider interface {
	ScrapeChapterList(ctx context.Context, bookURL string, upTo int) ([]domain.ChapterRef, error)
}

// ChapterTextProvider can extract the readable text from a single chapter page.
type ChapterTextProvider interface {
	ScrapeChapterText(ctx context.Context, ref domain.ChapterRef) (domain.Chapter, error)
}

// RankingProvider can enumerate novels from a ranking page.
type RankingProvider interface {
	// ScrapeRanking pages through up to maxPages ranking pages.
	// maxPages <= 0 means all pages.
	ScrapeRanking(ctx context.Context, maxPages int) (<-chan domain.BookMeta, <-chan error)
}

// NovelScraper is the full interface a concrete novel source must implement.
type NovelScraper interface {
	CatalogueProvider
	MetadataProvider
	ChapterListProvider
	ChapterTextProvider
	RankingProvider

	// SourceName returns the human-readable name of this scraper, e.g. "novelfire.net".
	SourceName() string
}

// Selector describes how to locate an element in an HTML document.
type Selector struct {
	Tag      string
	Class    string
	ID       string
	Attr     string
	Multiple bool
}