feat: ranking page pagination, popular URL, and per-page HTML disk cache
Some checks failed
CI / Lint (push) Has been cancelled
CI / Test (push) Has been cancelled
CI / Build (push) Has been cancelled

- Switch ScrapeRanking to novelfire.net/genre-all/sort-popular URL and updated DOM selectors (div.novel-item, h3.novel-title, div.genres)
- Replace 5 hardcoded refresh buttons with dynamic 100-page paginator (smart ellipsis via rankingPageNums)
- Add RankingPageCacher interface and writer methods to cache raw HTML per page under static/books/_ranking_cache/page-N.html
- ScrapeRanking serves from disk cache on hit and writes to cache on miss, skipping Browserless round-trip
- Thread writer as PageCacher through novelfire.New and main.go
- Add TestScrapeRanking_CacheHit and TestScrapeRanking_CacheMiss tests
This commit is contained in:
Admin
2026-03-01 21:32:50 +05:00
parent 73869f01fa
commit 9cf94576d8
7 changed files with 334 additions and 103 deletions

View File

@@ -88,7 +88,9 @@ func run(log *slog.Logger) error {
bc := newBrowserClient(strategy, browserCfg)
urlClient := newBrowserClient(urlStrategy, browserCfg)
nf := novelfire.New(bc, log, urlClient)
staticRoot := envOr("SCRAPER_STATIC_ROOT", "./static/books")
w := writer.New(staticRoot)
nf := novelfire.New(bc, log, urlClient, w)
workers := 0
if s := os.Getenv("SCRAPER_WORKERS"); s != "" {
@@ -103,7 +105,7 @@ func run(log *slog.Logger) error {
oCfg := orchestrator.Config{
Workers: workers,
StaticRoot: envOr("SCRAPER_STATIC_ROOT", "./static/books"),
StaticRoot: staticRoot,
}
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)

View File

@@ -2,54 +2,57 @@ package novelfire
import (
"context"
"fmt"
"os"
"path/filepath"
"testing"
"github.com/libnovel/scraper/internal/browser"
"github.com/libnovel/scraper/internal/scraper"
"github.com/libnovel/scraper/internal/writer"
)
// rankingPage1HTML is a realistic mock of novelfire.net/ranking?page=1.
// It contains two novel-item entries and a "next" link for pagination tests.
// rankingPage1HTML is a realistic mock of the popular genre listing page
// (novelfire.net/genre-all/sort-popular/status-all/all-novel?page=1).
// It contains two novel-item cards and a "next" link for pagination tests.
func rankingPage1HTML() string {
return `<!DOCTYPE html>
<html><body>
<ul class="rank-novels">
<li class="novel-item">
<figure class="cover"><a href="/book/the-iron-throne"><img data-src="/covers/iron-throne.jpg"></a></figure>
<div class="list-novel">
<div class="novel-item">
<figure class="cover"><img src="/covers/iron-throne.jpg"></figure>
<div class="item-body">
<h2 class="title"><a href="/book/the-iron-throne">The Iron Throne</a></h2>
<h3 class="novel-title"><a href="/book/the-iron-throne">The Iron Throne</a></h3>
<span class="status">Ongoing</span>
<div class="categories"><div class="scroll"><span>Fantasy</span><span>Action</span></div></div>
<div class="genres"><a>Fantasy</a><a>Action</a></div>
</div>
</li>
<li class="novel-item">
<figure class="cover"><a href="/book/shadow-mage"><img data-src="/covers/shadow-mage.jpg"></a></figure>
</div>
<div class="novel-item">
<figure class="cover"><img src="/covers/shadow-mage.jpg"></figure>
<div class="item-body">
<h2 class="title"><a href="/book/shadow-mage">Shadow Mage</a></h2>
<h3 class="novel-title"><a href="/book/shadow-mage">Shadow Mage</a></h3>
<span class="status">Completed</span>
<div class="categories"><div class="scroll"><span>Magic</span></div></div>
<div class="genres"><a>Magic</a></div>
</div>
</li>
</ul>
<a class="next" href="/ranking?page=2">Next</a>
</div>
</div>
<a class="next" href="/genre-all/sort-popular/status-all/all-novel?page=2">Next</a>
</body></html>`
}
func rankingPage2HTML() string {
return `<!DOCTYPE html>
<html><body>
<ul class="rank-novels">
<li class="novel-item">
<figure class="cover"><a href="/book/void-hunter"><img data-src="/covers/void-hunter.jpg"></a></figure>
<div class="list-novel">
<div class="novel-item">
<figure class="cover"><img src="/covers/void-hunter.jpg"></figure>
<div class="item-body">
<h2 class="title"><a href="/book/void-hunter">Void Hunter</a></h2>
<h3 class="novel-title"><a href="/book/void-hunter">Void Hunter</a></h3>
<span class="status">Ongoing</span>
<div class="categories"><div class="scroll"><span>Sci-Fi</span></div></div>
<div class="genres"><a>Sci-Fi</a></div>
</div>
</li>
</ul>
</div>
</div>
<!-- no .next link → last page -->
</body></html>`
}
@@ -108,7 +111,7 @@ func TestScrapeRanking_MultiPage(t *testing.T) {
// Use pagedStubClient for s.client so each GetContent call returns the
// next page. ScrapeRanking now calls s.client directly.
urlClient := &pagedStubClient{pages: []string{rankingPage1HTML(), rankingPage2HTML()}}
s := New(urlClient, nil, nil) // urlClient == nil → falls back to client
s := New(urlClient, nil, nil, nil) // nil cache — no disk I/O in tests
entryCh, errCh := s.ScrapeRanking(context.Background(), 0) // 0 = all pages
entries := drainRanking(t, entryCh, errCh)
@@ -132,8 +135,8 @@ func TestScrapeRanking_MultiPage(t *testing.T) {
}
}
// TestScrapeRanking_EmptyPage verifies that a page with no .rank-novels
// container produces zero entries and closes channels cleanly (no deadlock).
// TestScrapeRanking_EmptyPage verifies that a page with no .novel-item
// cards produces zero entries and closes channels cleanly (no deadlock).
func TestScrapeRanking_EmptyPage(t *testing.T) {
s := newScraper(`<!DOCTYPE html><html><body><div class="no-rankings"></div></body></html>`)
entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
@@ -185,3 +188,89 @@ func TestWriteRanking_RoundTrip(t *testing.T) {
}
}
}
// ── in-memory page cacher ─────────────────────────────────────────────────────
// memPageCacher is a RankingPageCacher backed by an in-memory map.
// It records how many times each page was written and exposes the stored HTML.
type memPageCacher struct {
pages map[int]string
writes map[int]int
}
func newMemPageCacher() *memPageCacher {
return &memPageCacher{pages: make(map[int]string), writes: make(map[int]int)}
}
func (c *memPageCacher) WriteRankingPageCache(page int, html string) error {
c.pages[page] = html
c.writes[page]++
return nil
}
func (c *memPageCacher) ReadRankingPageCache(page int) (string, error) {
return c.pages[page], nil // returns "" on miss, satisfying the interface contract
}
var _ scraper.RankingPageCacher = (*memPageCacher)(nil) // compile-time check
// TestScrapeRanking_CacheHit verifies that when a page is already in the cache
// ScrapeRanking serves from cache and does NOT call the browser client.
func TestScrapeRanking_CacheHit(t *testing.T) {
cache := newMemPageCacher()
// Pre-populate the cache with page 1 HTML.
if err := cache.WriteRankingPageCache(1, rankingPage1HTML()); err != nil {
t.Fatalf("cache write: %v", err)
}
cache.writes[1] = 0 // reset write counter — we only care about fetches
// The stub client panics on any GetContent call so we can prove it is not used.
panicClient := &panicOnGetContent{}
s := New(panicClient, nil, panicClient, cache)
entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
entries := drainRanking(t, entryCh, errCh)
if len(entries) != 2 {
t.Fatalf("expected 2 entries from cache, got %d", len(entries))
}
// Cache should not have been written again (we served from cache).
if cache.writes[1] != 0 {
t.Errorf("expected 0 cache writes on a hit, got %d", cache.writes[1])
}
}
// TestScrapeRanking_CacheMiss verifies that on a cache miss the page is fetched
// from the network and the result is written to the cache.
func TestScrapeRanking_CacheMiss(t *testing.T) {
cache := newMemPageCacher() // empty cache
s := New(&stubClient{html: rankingPage1HTML()}, nil, nil, cache)
entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
entries := drainRanking(t, entryCh, errCh)
if len(entries) != 2 {
t.Fatalf("expected 2 entries, got %d", len(entries))
}
if cache.writes[1] != 1 {
t.Errorf("expected 1 cache write on a miss, got %d", cache.writes[1])
}
if cache.pages[1] == "" {
t.Error("expected page 1 to be stored in cache after miss")
}
}
// panicOnGetContent is a BrowserClient whose GetContent panics, letting tests
// assert that it is never called (i.e. the cache was used instead).
type panicOnGetContent struct{}
func (p *panicOnGetContent) Strategy() browser.Strategy { return browser.StrategyContent }
func (p *panicOnGetContent) GetContent(_ context.Context, req browser.ContentRequest) (string, error) {
panic(fmt.Sprintf("unexpected GetContent call for URL %s — should have been served from cache", req.URL))
}
func (p *panicOnGetContent) ScrapePage(_ context.Context, _ browser.ScrapeRequest) (browser.ScrapeResponse, error) {
return browser.ScrapeResponse{}, nil
}
func (p *panicOnGetContent) CDPSession(_ context.Context, _ string, _ browser.CDPSessionFunc) error {
return nil
}

View File

@@ -26,7 +26,7 @@ import (
const (
baseURL = "https://novelfire.net"
cataloguePath = "/genre-all/sort-new/status-all/all-novel"
rankingPath = "/ranking"
rankingPath = "/genre-all/sort-popular/status-all/all-novel"
)
// rejectResourceTypes lists Browserless resource types to block on every request.
@@ -53,20 +53,22 @@ var rejectResourceTypes = []string{
type Scraper struct {
client browser.BrowserClient
urlClient browser.BrowserClient // separate client for URL retrieval (uses browserless content strategy)
pageCache scraper.RankingPageCacher
log *slog.Logger
}
// New returns a new novelfire Scraper.
// client is used for content fetching, urlClient is used for URL retrieval (chapter list).
// If urlClient is nil, client will be used for both.
func New(client browser.BrowserClient, log *slog.Logger, urlClient browser.BrowserClient) *Scraper {
// pageCache is optional; pass nil to disable ranking page caching.
func New(client browser.BrowserClient, log *slog.Logger, urlClient browser.BrowserClient, pageCache scraper.RankingPageCacher) *Scraper {
if log == nil {
log = slog.Default()
}
if urlClient == nil {
urlClient = client
}
return &Scraper{client: client, urlClient: urlClient, log: log}
return &Scraper{client: client, urlClient: urlClient, pageCache: pageCache, log: log}
}
// SourceName implements NovelScraper.
@@ -365,9 +367,9 @@ func (s *Scraper) ScrapeChapterList(ctx context.Context, bookURL string) ([]scra
// ─── RankingProvider ───────────────────────────────────────────────────────────
// ScrapeRanking pages through up to maxPages ranking pages on novelfire.net/ranking.
// Pages are fetched one at a time, strictly sequentially: the next page is only
// requested after every entry from the current page has been sent to the channel.
// ScrapeRanking pages through up to maxPages pages of the popular-novels genre
// listing on novelfire.net (/genre-all/sort-popular/status-all/all-novel).
// Pages are fetched one at a time, strictly sequentially.
// maxPages <= 0 means "fetch all pages until no more are found".
func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scraper.BookMeta, <-chan error) {
entries := make(chan scraper.BookMeta, 32)
@@ -387,17 +389,39 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
}
pageURL := fmt.Sprintf("%s%s?page=%d", baseURL, rankingPath, page)
s.log.Info("scraping ranking page", "page", page, "url", pageURL)
// The ranking page is fully server-rendered; a direct HTTP GET is
// sufficient and avoids the Browserless round-trip overhead.
raw, err := s.client.GetContent(ctx, browser.ContentRequest{
URL: pageURL,
})
if err != nil {
s.log.Debug("ranking page fetch failed", "page", page, "url", pageURL, "err", err)
errs <- fmt.Errorf("ranking page %d: %w", page, err)
return
// Try to serve from disk cache before hitting the network.
var raw string
if s.pageCache != nil {
if cached, err := s.pageCache.ReadRankingPageCache(page); err != nil {
s.log.Warn("ranking page cache read error", "page", page, "err", err)
} else if cached != "" {
s.log.Info("serving ranking page from cache", "page", page)
raw = cached
}
}
if raw == "" {
s.log.Info("scraping popular ranking page", "page", page, "url", pageURL)
fetched, err := s.client.GetContent(ctx, browser.ContentRequest{
URL: pageURL,
WaitFor: &browser.WaitForSelector{Selector: ".novel-item", Timeout: 5000},
RejectResourceTypes: rejectResourceTypes,
GotoOptions: &browser.GotoOptions{Timeout: 60000},
})
if err != nil {
s.log.Debug("ranking page fetch failed", "page", page, "url", pageURL, "err", err)
errs <- fmt.Errorf("ranking page %d: %w", page, err)
return
}
raw = fetched
// Persist to cache for future runs.
if s.pageCache != nil {
if werr := s.pageCache.WriteRankingPageCache(page, raw); werr != nil {
s.log.Warn("ranking page cache write error", "page", page, "err", werr)
}
}
}
root, err := htmlutil.ParseHTML(raw)
@@ -406,48 +430,48 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
return
}
rankList := htmlutil.FindFirst(root, scraper.Selector{Class: "rank-novels"})
if rankList == nil {
s.log.Debug("rank-novels container not found, stopping pagination", "page", page)
// Genre listing uses div.novel-item cards (same structure as catalogue).
cards := htmlutil.FindAll(root, scraper.Selector{Tag: "div", Class: "novel-item", Multiple: true})
if len(cards) == 0 {
s.log.Debug("no novel cards found, stopping pagination", "page", page)
break
}
items := htmlutil.FindAll(rankList, scraper.Selector{Tag: "li", Class: "novel-item"})
if len(items) == 0 {
s.log.Debug("no ranking items on page, stopping pagination", "page", page)
break
}
for _, item := range items {
// Cover: <figure class="cover"><a href="/book/slug"><img data-src="..."></a></figure>
for _, card := range cards {
// Cover: <figure class="cover"><img src="..." or data-src="...">
var cover string
if fig := htmlutil.FindFirst(item, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "data-src"})
if cover != "" {
if fig := htmlutil.FindFirst(card, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "src"})
if cover == "" {
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "data-src"})
}
if cover != "" && !strings.HasPrefix(cover, "http") {
cover = baseURL + cover
}
}
// Title and URL: <h2 class="title"><a href="/book/slug">Title</a></h2>
titleNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "h2", Class: "title"})
// Title and URL: <h3 class="novel-title"><a href="/book/slug">Title</a></h3>
titleNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "h3", Class: "novel-title"})
var title, bookURL string
if titleNode != nil {
linkNode := htmlutil.FindFirst(titleNode, scraper.Selector{Tag: "a"})
linkNode := htmlutil.FindFirst(titleNode, scraper.Selector{Tag: "a", Attr: "href"})
if linkNode != nil {
title = htmlutil.ExtractText(linkNode, scraper.Selector{})
href := htmlutil.ExtractText(linkNode, scraper.Selector{Attr: "href"})
href := htmlutil.ExtractText(linkNode, scraper.Selector{Tag: "a", Attr: "href"})
bookURL = resolveURL(baseURL, href)
}
}
if title == "" || bookURL == "" {
continue
}
// Status: <span class="status"> Ongoing/Completed </span>
status := htmlutil.ExtractFirst(item, scraper.Selector{Tag: "span", Class: "status"})
// Status: <span class="status">Ongoing</span>
status := strings.TrimSpace(htmlutil.ExtractFirst(card, scraper.Selector{Tag: "span", Class: "status"}))
// Genres: <div class="categories"><div class="scroll"><span>Genre1</span>...</div></div>
// Genres: <div class="genres"><a>Genre</a>...
var genres []string
categoriesNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "div", Class: "categories"})
if categoriesNode != nil {
genres = htmlutil.ExtractAll(categoriesNode, scraper.Selector{Tag: "span", Multiple: true})
if genresNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "div", Class: "genres"}); genresNode != nil {
genres = htmlutil.ExtractAll(genresNode, scraper.Selector{Tag: "a", Multiple: true})
}
slug := slugFromURL(bookURL)
@@ -456,7 +480,7 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
Slug: slug,
Title: title,
Cover: cover,
Status: strings.TrimSpace(status),
Status: status,
Genres: genres,
SourceURL: bookURL,
Ranking: rank,
@@ -470,7 +494,7 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
}
}
// Stop if no next-page link exists (natural end of ranking list).
// Stop if no next-page link exists (natural end of listing).
nextHref := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "a", Class: "next", Attr: "href"})
if nextHref == "" {
s.log.Debug("no next-page link found, stopping pagination", "page", page)

View File

@@ -62,12 +62,12 @@ func (c *pagedStubClient) CDPSession(_ context.Context, _ string, _ browser.CDPS
// ── helpers ───────────────────────────────────────────────────────────────────
func newScraper(html string) *Scraper {
return New(&stubClient{html: html}, nil, &stubClient{html: html})
return New(&stubClient{html: html}, nil, &stubClient{html: html}, nil)
}
func newPagedScraper(pages ...string) *Scraper {
urlClient := &pagedStubClient{pages: pages}
return New(&stubClient{}, nil, urlClient)
return New(&stubClient{}, nil, urlClient, nil)
}
// ── ScrapeChapterText ─────────────────────────────────────────────────────────

View File

@@ -120,6 +120,16 @@ type RankingProvider interface {
ScrapeRanking(ctx context.Context, maxPages int) (<-chan BookMeta, <-chan error)
}
// RankingPageCacher persists and retrieves raw HTML for individual ranking pages.
// Implementations (e.g. writer.Writer) store files on disk so that a
// subsequent ScrapeRanking call can serve cached HTML without a network round-trip.
type RankingPageCacher interface {
// WriteRankingPageCache stores the raw HTML string for the given page number.
WriteRankingPageCache(page int, html string) error
// ReadRankingPageCache returns the cached HTML for page, or ("", nil) on a miss.
ReadRankingPageCache(page int) (string, error)
}
// NovelScraper is the full interface that a concrete novel source must implement.
// It composes all four provider interfaces.
type NovelScraper interface {

View File

@@ -546,25 +546,31 @@ const rankingTmpl = `
</div>
<!-- Pagination: fetch pages from novelfire -->
<div class="flex items-center gap-2 mb-6 flex-wrap">
<span class="text-xs text-zinc-500">Fetch pages:</span>
{{range .Pages}}
<form hx-post="/ranking/refresh"
hx-target="#ranking-refresh-status"
hx-swap="innerHTML">
<input type="hidden" name="pages" value="{{.Pages}}">
<button type="submit"
class="text-xs px-2.5 py-1 rounded-lg bg-zinc-800 hover:bg-amber-700 border border-zinc-700 hover:border-amber-600 text-zinc-300 hover:text-white transition-colors">
{{.Label}}
</button>
</form>
{{end}}
<span class="text-xs text-zinc-600 ml-1">
(source:
<div class="mb-6">
<div class="flex items-center gap-1.5 mb-2 flex-wrap">
<span class="text-xs text-zinc-500 mr-1">Fetch up to page:</span>
{{range .PageNums}}
{{if eq .Num 0}}
<span class="text-xs text-zinc-600 px-1 select-none">…</span>
{{else}}
<form hx-post="/ranking/refresh"
hx-target="#ranking-refresh-status"
hx-swap="innerHTML">
<input type="hidden" name="pages" value="{{.Num}}">
<button type="submit"
class="text-xs w-8 h-7 rounded-lg bg-zinc-800 hover:bg-amber-700 border border-zinc-700 hover:border-amber-600 text-zinc-300 hover:text-white transition-colors text-center">
{{.Num}}
</button>
</form>
{{end}}
{{end}}
</div>
<p class="text-xs text-zinc-600">
Each page = ~20 novels from
<a href="https://novelfire.net/genre-all/sort-popular/status-all/all-novel?page=1"
target="_blank" rel="noopener noreferrer"
class="text-zinc-500 hover:text-amber-400 underline underline-offset-2">novelfire.net</a>)
</span>
class="text-zinc-500 hover:text-amber-400 underline underline-offset-2">novelfire.net popular</a>
</p>
</div>
<!-- Book grid -->
@@ -693,21 +699,70 @@ func toRankingViewItems(items []writer.RankingItem, localSlugs map[string]bool)
return out
}
// refreshPage is one entry in the ranking refresh pagination bar.
type refreshPage struct {
Label string
Pages int // 0 means "all pages"
// pageNum is one entry in the ranking pagination bar.
// Num == 0 is a sentinel that renders as an ellipsis gap.
type pageNum struct {
Num int
}
// rankingRefreshPages defines the pagination buttons shown on the ranking page.
// Each entry fetches that many pages from novelfire.net (100 novels per page).
// Pages == 0 means fetch all pages.
var rankingRefreshPages = []refreshPage{
{"p.1 (top 100)", 1},
{"p.13 (top 300)", 3},
{"p.15 (top 500)", 5},
{"p.110 (top 1000)", 10},
{"All", 0},
// rankingPageNums builds the dynamic pagination list for 100 pages with
// smart ellipsis: always show the first 3, last 3, and a sliding window of
// 3 around the ends — compressed with "…" separators between runs.
//
// For a flat 100-page list it produces:
//
// 1 2 3 … 6 7 8 … 93 94 95 … 98 99 100
//
// (The middle range is omitted intentionally; users click individual pages.)
func rankingPageNums(total int) []pageNum {
if total <= 0 {
return nil
}
// Build the set of visible page numbers using a sliding-window approach.
show := make(map[int]bool)
// Always show first 3 and last 3.
for i := 1; i <= 3 && i <= total; i++ {
show[i] = true
}
for i := total - 2; i <= total; i++ {
if i >= 1 {
show[i] = true
}
}
// For large ranges, show a few pages near the 1/4 and 3/4 marks.
if total > 12 {
q1 := total / 4
q3 := total * 3 / 4
for _, p := range []int{q1 - 1, q1, q1 + 1, q3 - 1, q3, q3 + 1} {
if p >= 1 && p <= total {
show[p] = true
}
}
}
// Collect sorted visible pages.
pages := make([]int, 0, len(show))
for p := range show {
pages = append(pages, p)
}
// Sort pages slice.
for i := 0; i < len(pages); i++ {
for j := i + 1; j < len(pages); j++ {
if pages[j] < pages[i] {
pages[i], pages[j] = pages[j], pages[i]
}
}
}
// Build output with ellipsis sentinels (Num==0) between non-consecutive pages.
out := make([]pageNum, 0, len(pages)*2)
for i, p := range pages {
if i > 0 && p > pages[i-1]+1 {
out = append(out, pageNum{0}) // ellipsis
}
out = append(out, pageNum{p})
}
return out
}
// handleRanking serves the ranking page from the cached ranking.md file.
@@ -728,11 +783,11 @@ func (s *Server) handleRanking(w http.ResponseWriter, r *http.Request) {
_ = t.Execute(&buf, struct {
Books interface{}
CachedAt string
Pages []refreshPage
PageNums []pageNum
}{
Books: toRankingViewItems(rankingItems, s.writer.LocalSlugs()),
CachedAt: cachedAt,
Pages: rankingRefreshPages,
PageNums: rankingPageNums(100),
})
s.respond(w, r, "Rankings", buf.String())
}

View File

@@ -403,6 +403,57 @@ func (w *Writer) rankingPath() string {
return filepath.Join(w.root, "ranking.md")
}
// ─── Ranking page HTML cache ──────────────────────────────────────────────────
// rankingCacheDir returns the directory that stores per-page HTML caches.
func (w *Writer) rankingCacheDir() string {
return filepath.Join(w.root, "_ranking_cache")
}
// rankingPageCachePath returns the path for a cached ranking page HTML file.
func (w *Writer) rankingPageCachePath(page int) string {
return filepath.Join(w.rankingCacheDir(), fmt.Sprintf("page-%d.html", page))
}
// WriteRankingPageCache persists raw HTML for the given ranking page number.
func (w *Writer) WriteRankingPageCache(page int, html string) error {
dir := w.rankingCacheDir()
if err := os.MkdirAll(dir, 0o755); err != nil {
return fmt.Errorf("writer: mkdir ranking cache %s: %w", dir, err)
}
path := w.rankingPageCachePath(page)
if err := os.WriteFile(path, []byte(html), 0o644); err != nil {
return fmt.Errorf("writer: write ranking page cache %s: %w", path, err)
}
return nil
}
// ReadRankingPageCache reads the cached HTML for the given ranking page.
// Returns ("", nil) when no cache file exists yet.
func (w *Writer) ReadRankingPageCache(page int) (string, error) {
data, err := os.ReadFile(w.rankingPageCachePath(page))
if err != nil {
if os.IsNotExist(err) {
return "", nil
}
return "", fmt.Errorf("writer: read ranking page cache page %d: %w", page, err)
}
return string(data), nil
}
// RankingPageCacheInfo returns os.FileInfo for a cached ranking page file.
// Returns (nil, nil) when the file does not exist.
func (w *Writer) RankingPageCacheInfo(page int) (os.FileInfo, error) {
info, err := os.Stat(w.rankingPageCachePath(page))
if err != nil {
if os.IsNotExist(err) {
return nil, nil
}
return nil, err
}
return info, nil
}
// bookDir returns the root directory for a book slug.
func (w *Writer) bookDir(slug string) string {
return filepath.Join(w.root, slug)