feat: ranking page pagination, popular URL, and per-page HTML disk cache

- Switch ScrapeRanking to novelfire.net/genre-all/sort-popular URL and updated DOM selectors (div.novel-item, h3.novel-title, div.genres) - Replace 5 hardcoded refresh buttons with dynamic 100-page paginator (smart ellipsis via rankingPageNums) - Add RankingPageCacher interface and writer methods to cache raw HTML per page under static/books/_ranking_cache/page-N.html - ScrapeRanking serves from disk cache on hit and writes to cache on miss, skipping Browserless round-trip - Thread writer as PageCacher through novelfire.New and main.go - Add TestScrapeRanking_CacheHit and TestScrapeRanking_CacheMiss tests
2026-03-01 21:32:50 +05:00
parent 73869f01fa
commit 9cf94576d8
7 changed files with 334 additions and 103 deletions
--- a/scraper/cmd/scraper/main.go
+++ b/scraper/cmd/scraper/main.go
@@ -88,7 +88,9 @@ func run(log *slog.Logger) error {
 	bc := newBrowserClient(strategy, browserCfg)
 	urlClient := newBrowserClient(urlStrategy, browserCfg)

-	nf := novelfire.New(bc, log, urlClient)
+	staticRoot := envOr("SCRAPER_STATIC_ROOT", "./static/books")
+	w := writer.New(staticRoot)
+	nf := novelfire.New(bc, log, urlClient, w)

 	workers := 0
 	if s := os.Getenv("SCRAPER_WORKERS"); s != "" {
@@ -103,7 +105,7 @@ func run(log *slog.Logger) error {

 	oCfg := orchestrator.Config{
 		Workers:    workers,
-		StaticRoot: envOr("SCRAPER_STATIC_ROOT", "./static/books"),
+		StaticRoot: staticRoot,
 	}

 	ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
--- a/scraper/internal/novelfire/ranking_test.go
+++ b/scraper/internal/novelfire/ranking_test.go
@@ -2,54 +2,57 @@ package novelfire

 import (
 	"context"
+	"fmt"
 	"os"
 	"path/filepath"
 	"testing"

+	"github.com/libnovel/scraper/internal/browser"
 	"github.com/libnovel/scraper/internal/scraper"
 	"github.com/libnovel/scraper/internal/writer"
 )

-// rankingPage1HTML is a realistic mock of novelfire.net/ranking?page=1.
-// It contains two novel-item entries and a "next" link for pagination tests.
+// rankingPage1HTML is a realistic mock of the popular genre listing page
+// (novelfire.net/genre-all/sort-popular/status-all/all-novel?page=1).
+// It contains two novel-item cards and a "next" link for pagination tests.
 func rankingPage1HTML() string {
 	return `<!DOCTYPE html>
 <html><body>
-<ul class="rank-novels">
-  <li class="novel-item">
-    <figure class="cover"><a href="/book/the-iron-throne"><img data-src="/covers/iron-throne.jpg"></a></figure>
+<div class="list-novel">
+  <div class="novel-item">
+    <figure class="cover"><img src="/covers/iron-throne.jpg"></figure>
    <div class="item-body">
-      <h2 class="title"><a href="/book/the-iron-throne">The Iron Throne</a></h2>
+      <h3 class="novel-title"><a href="/book/the-iron-throne">The Iron Throne</a></h3>
      <span class="status">Ongoing</span>
-      <div class="categories"><div class="scroll"><span>Fantasy</span><span>Action</span></div></div>
+      <div class="genres"><a>Fantasy</a><a>Action</a></div>
    </div>
-  </li>
-  <li class="novel-item">
-    <figure class="cover"><a href="/book/shadow-mage"><img data-src="/covers/shadow-mage.jpg"></a></figure>
+  </div>
+  <div class="novel-item">
+    <figure class="cover"><img src="/covers/shadow-mage.jpg"></figure>
    <div class="item-body">
-      <h2 class="title"><a href="/book/shadow-mage">Shadow Mage</a></h2>
+      <h3 class="novel-title"><a href="/book/shadow-mage">Shadow Mage</a></h3>
      <span class="status">Completed</span>
-      <div class="categories"><div class="scroll"><span>Magic</span></div></div>
+      <div class="genres"><a>Magic</a></div>
    </div>
-  </li>
-</ul>
-<a class="next" href="/ranking?page=2">Next</a>
+  </div>
+</div>
+<a class="next" href="/genre-all/sort-popular/status-all/all-novel?page=2">Next</a>
 </body></html>`
 }

 func rankingPage2HTML() string {
 	return `<!DOCTYPE html>
 <html><body>
-<ul class="rank-novels">
-  <li class="novel-item">
-    <figure class="cover"><a href="/book/void-hunter"><img data-src="/covers/void-hunter.jpg"></a></figure>
+<div class="list-novel">
+  <div class="novel-item">
+    <figure class="cover"><img src="/covers/void-hunter.jpg"></figure>
    <div class="item-body">
-      <h2 class="title"><a href="/book/void-hunter">Void Hunter</a></h2>
+      <h3 class="novel-title"><a href="/book/void-hunter">Void Hunter</a></h3>
      <span class="status">Ongoing</span>
-      <div class="categories"><div class="scroll"><span>Sci-Fi</span></div></div>
+      <div class="genres"><a>Sci-Fi</a></div>
    </div>
-  </li>
-</ul>
+  </div>
+</div>
 <!-- no .next link → last page -->
 </body></html>`
 }
@@ -108,7 +111,7 @@ func TestScrapeRanking_MultiPage(t *testing.T) {
 	// Use pagedStubClient for s.client so each GetContent call returns the
 	// next page. ScrapeRanking now calls s.client directly.
 	urlClient := &pagedStubClient{pages: []string{rankingPage1HTML(), rankingPage2HTML()}}
-	s := New(urlClient, nil, nil) // urlClient == nil → falls back to client
+	s := New(urlClient, nil, nil, nil) // nil cache — no disk I/O in tests

 	entryCh, errCh := s.ScrapeRanking(context.Background(), 0) // 0 = all pages
 	entries := drainRanking(t, entryCh, errCh)
@@ -132,8 +135,8 @@ func TestScrapeRanking_MultiPage(t *testing.T) {
 	}
 }

-// TestScrapeRanking_EmptyPage verifies that a page with no .rank-novels
-// container produces zero entries and closes channels cleanly (no deadlock).
+// TestScrapeRanking_EmptyPage verifies that a page with no .novel-item
+// cards produces zero entries and closes channels cleanly (no deadlock).
 func TestScrapeRanking_EmptyPage(t *testing.T) {
 	s := newScraper(`<!DOCTYPE html><html><body><div class="no-rankings"></div></body></html>`)
 	entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
@@ -185,3 +188,89 @@ func TestWriteRanking_RoundTrip(t *testing.T) {
 		}
 	}
 }
+
+// ── in-memory page cacher ─────────────────────────────────────────────────────
+
+// memPageCacher is a RankingPageCacher backed by an in-memory map.
+// It records how many times each page was written and exposes the stored HTML.
+type memPageCacher struct {
+	pages  map[int]string
+	writes map[int]int
+}
+
+func newMemPageCacher() *memPageCacher {
+	return &memPageCacher{pages: make(map[int]string), writes: make(map[int]int)}
+}
+
+func (c *memPageCacher) WriteRankingPageCache(page int, html string) error {
+	c.pages[page] = html
+	c.writes[page]++
+	return nil
+}
+
+func (c *memPageCacher) ReadRankingPageCache(page int) (string, error) {
+	return c.pages[page], nil // returns "" on miss, satisfying the interface contract
+}
+
+var _ scraper.RankingPageCacher = (*memPageCacher)(nil) // compile-time check
+
+// TestScrapeRanking_CacheHit verifies that when a page is already in the cache
+// ScrapeRanking serves from cache and does NOT call the browser client.
+func TestScrapeRanking_CacheHit(t *testing.T) {
+	cache := newMemPageCacher()
+	// Pre-populate the cache with page 1 HTML.
+	if err := cache.WriteRankingPageCache(1, rankingPage1HTML()); err != nil {
+		t.Fatalf("cache write: %v", err)
+	}
+	cache.writes[1] = 0 // reset write counter — we only care about fetches
+
+	// The stub client panics on any GetContent call so we can prove it is not used.
+	panicClient := &panicOnGetContent{}
+	s := New(panicClient, nil, panicClient, cache)
+
+	entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
+	entries := drainRanking(t, entryCh, errCh)
+
+	if len(entries) != 2 {
+		t.Fatalf("expected 2 entries from cache, got %d", len(entries))
+	}
+	// Cache should not have been written again (we served from cache).
+	if cache.writes[1] != 0 {
+		t.Errorf("expected 0 cache writes on a hit, got %d", cache.writes[1])
+	}
+}
+
+// TestScrapeRanking_CacheMiss verifies that on a cache miss the page is fetched
+// from the network and the result is written to the cache.
+func TestScrapeRanking_CacheMiss(t *testing.T) {
+	cache := newMemPageCacher() // empty cache
+	s := New(&stubClient{html: rankingPage1HTML()}, nil, nil, cache)
+
+	entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
+	entries := drainRanking(t, entryCh, errCh)
+
+	if len(entries) != 2 {
+		t.Fatalf("expected 2 entries, got %d", len(entries))
+	}
+	if cache.writes[1] != 1 {
+		t.Errorf("expected 1 cache write on a miss, got %d", cache.writes[1])
+	}
+	if cache.pages[1] == "" {
+		t.Error("expected page 1 to be stored in cache after miss")
+	}
+}
+
+// panicOnGetContent is a BrowserClient whose GetContent panics, letting tests
+// assert that it is never called (i.e. the cache was used instead).
+type panicOnGetContent struct{}
+
+func (p *panicOnGetContent) Strategy() browser.Strategy { return browser.StrategyContent }
+func (p *panicOnGetContent) GetContent(_ context.Context, req browser.ContentRequest) (string, error) {
+	panic(fmt.Sprintf("unexpected GetContent call for URL %s — should have been served from cache", req.URL))
+}
+func (p *panicOnGetContent) ScrapePage(_ context.Context, _ browser.ScrapeRequest) (browser.ScrapeResponse, error) {
+	return browser.ScrapeResponse{}, nil
+}
+func (p *panicOnGetContent) CDPSession(_ context.Context, _ string, _ browser.CDPSessionFunc) error {
+	return nil
+}
--- a/scraper/internal/novelfire/scraper.go
+++ b/scraper/internal/novelfire/scraper.go
@@ -26,7 +26,7 @@ import (
 const (
 	baseURL       = "https://novelfire.net"
 	cataloguePath = "/genre-all/sort-new/status-all/all-novel"
-	rankingPath   = "/ranking"
+	rankingPath   = "/genre-all/sort-popular/status-all/all-novel"
 )

 // rejectResourceTypes lists Browserless resource types to block on every request.
@@ -53,20 +53,22 @@ var rejectResourceTypes = []string{
 type Scraper struct {
 	client    browser.BrowserClient
 	urlClient browser.BrowserClient // separate client for URL retrieval (uses browserless content strategy)
+	pageCache scraper.RankingPageCacher
 	log       *slog.Logger
 }

 // New returns a new novelfire Scraper.
 // client is used for content fetching, urlClient is used for URL retrieval (chapter list).
 // If urlClient is nil, client will be used for both.
-func New(client browser.BrowserClient, log *slog.Logger, urlClient browser.BrowserClient) *Scraper {
+// pageCache is optional; pass nil to disable ranking page caching.
+func New(client browser.BrowserClient, log *slog.Logger, urlClient browser.BrowserClient, pageCache scraper.RankingPageCacher) *Scraper {
 	if log == nil {
 		log = slog.Default()
 	}
 	if urlClient == nil {
 		urlClient = client
 	}
-	return &Scraper{client: client, urlClient: urlClient, log: log}
+	return &Scraper{client: client, urlClient: urlClient, pageCache: pageCache, log: log}
 }

 // SourceName implements NovelScraper.
@@ -365,9 +367,9 @@ func (s *Scraper) ScrapeChapterList(ctx context.Context, bookURL string) ([]scra

 // ─── RankingProvider ───────────────────────────────────────────────────────────

-// ScrapeRanking pages through up to maxPages ranking pages on novelfire.net/ranking.
-// Pages are fetched one at a time, strictly sequentially: the next page is only
-// requested after every entry from the current page has been sent to the channel.
+// ScrapeRanking pages through up to maxPages pages of the popular-novels genre
+// listing on novelfire.net (/genre-all/sort-popular/status-all/all-novel).
+// Pages are fetched one at a time, strictly sequentially.
 // maxPages <= 0 means "fetch all pages until no more are found".
 func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scraper.BookMeta, <-chan error) {
 	entries := make(chan scraper.BookMeta, 32)
@@ -387,18 +389,40 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
 			}

 			pageURL := fmt.Sprintf("%s%s?page=%d", baseURL, rankingPath, page)
-			s.log.Info("scraping ranking page", "page", page, "url", pageURL)

-			// The ranking page is fully server-rendered; a direct HTTP GET is
-			// sufficient and avoids the Browserless round-trip overhead.
-			raw, err := s.client.GetContent(ctx, browser.ContentRequest{
+			// Try to serve from disk cache before hitting the network.
+			var raw string
+			if s.pageCache != nil {
+				if cached, err := s.pageCache.ReadRankingPageCache(page); err != nil {
+					s.log.Warn("ranking page cache read error", "page", page, "err", err)
+				} else if cached != "" {
+					s.log.Info("serving ranking page from cache", "page", page)
+					raw = cached
+				}
+			}
+
+			if raw == "" {
+				s.log.Info("scraping popular ranking page", "page", page, "url", pageURL)
+				fetched, err := s.client.GetContent(ctx, browser.ContentRequest{
 					URL:                 pageURL,
+					WaitFor:             &browser.WaitForSelector{Selector: ".novel-item", Timeout: 5000},
+					RejectResourceTypes: rejectResourceTypes,
+					GotoOptions:         &browser.GotoOptions{Timeout: 60000},
 				})
 				if err != nil {
 					s.log.Debug("ranking page fetch failed", "page", page, "url", pageURL, "err", err)
 					errs <- fmt.Errorf("ranking page %d: %w", page, err)
 					return
 				}
+				raw = fetched
+
+				// Persist to cache for future runs.
+				if s.pageCache != nil {
+					if werr := s.pageCache.WriteRankingPageCache(page, raw); werr != nil {
+						s.log.Warn("ranking page cache write error", "page", page, "err", werr)
+					}
+				}
+			}

 			root, err := htmlutil.ParseHTML(raw)
 			if err != nil {
@@ -406,48 +430,48 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
 				return
 			}

-			rankList := htmlutil.FindFirst(root, scraper.Selector{Class: "rank-novels"})
-			if rankList == nil {
-				s.log.Debug("rank-novels container not found, stopping pagination", "page", page)
+			// Genre listing uses div.novel-item cards (same structure as catalogue).
+			cards := htmlutil.FindAll(root, scraper.Selector{Tag: "div", Class: "novel-item", Multiple: true})
+			if len(cards) == 0 {
+				s.log.Debug("no novel cards found, stopping pagination", "page", page)
 				break
 			}

-			items := htmlutil.FindAll(rankList, scraper.Selector{Tag: "li", Class: "novel-item"})
-			if len(items) == 0 {
-				s.log.Debug("no ranking items on page, stopping pagination", "page", page)
-				break
-			}
-
-			for _, item := range items {
-				// Cover: <figure class="cover"><a href="/book/slug"><img data-src="..."></a></figure>
+			for _, card := range cards {
+				// Cover: <figure class="cover"><img src="..." or data-src="...">
 				var cover string
-				if fig := htmlutil.FindFirst(item, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
+				if fig := htmlutil.FindFirst(card, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
+					cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "src"})
+					if cover == "" {
 						cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "data-src"})
-					if cover != "" {
+					}
+					if cover != "" && !strings.HasPrefix(cover, "http") {
 						cover = baseURL + cover
 					}
 				}

-				// Title and URL: <h2 class="title"><a href="/book/slug">Title</a></h2>
-				titleNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "h2", Class: "title"})
+				// Title and URL: <h3 class="novel-title"><a href="/book/slug">Title</a></h3>
+				titleNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "h3", Class: "novel-title"})
 				var title, bookURL string
 				if titleNode != nil {
-					linkNode := htmlutil.FindFirst(titleNode, scraper.Selector{Tag: "a"})
+					linkNode := htmlutil.FindFirst(titleNode, scraper.Selector{Tag: "a", Attr: "href"})
 					if linkNode != nil {
 						title = htmlutil.ExtractText(linkNode, scraper.Selector{})
-						href := htmlutil.ExtractText(linkNode, scraper.Selector{Attr: "href"})
+						href := htmlutil.ExtractText(linkNode, scraper.Selector{Tag: "a", Attr: "href"})
 						bookURL = resolveURL(baseURL, href)
 					}
 				}
+				if title == "" || bookURL == "" {
+					continue
+				}

-				// Status: <span class="status"> Ongoing/Completed </span>
-				status := htmlutil.ExtractFirst(item, scraper.Selector{Tag: "span", Class: "status"})
+				// Status: <span class="status">Ongoing</span>
+				status := strings.TrimSpace(htmlutil.ExtractFirst(card, scraper.Selector{Tag: "span", Class: "status"}))

-				// Genres: <div class="categories"><div class="scroll"><span>Genre1</span>...</div></div>
+				// Genres: <div class="genres"><a>Genre</a>...
 				var genres []string
-				categoriesNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "div", Class: "categories"})
-				if categoriesNode != nil {
-					genres = htmlutil.ExtractAll(categoriesNode, scraper.Selector{Tag: "span", Multiple: true})
+				if genresNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "div", Class: "genres"}); genresNode != nil {
+					genres = htmlutil.ExtractAll(genresNode, scraper.Selector{Tag: "a", Multiple: true})
 				}

 				slug := slugFromURL(bookURL)
@@ -456,7 +480,7 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
 					Slug:      slug,
 					Title:     title,
 					Cover:     cover,
-					Status:    strings.TrimSpace(status),
+					Status:    status,
 					Genres:    genres,
 					SourceURL: bookURL,
 					Ranking:   rank,
@@ -470,7 +494,7 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
 				}
 			}

-			// Stop if no next-page link exists (natural end of ranking list).
+			// Stop if no next-page link exists (natural end of listing).
 			nextHref := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "a", Class: "next", Attr: "href"})
 			if nextHref == "" {
 				s.log.Debug("no next-page link found, stopping pagination", "page", page)
--- a/scraper/internal/novelfire/scraper_test.go
+++ b/scraper/internal/novelfire/scraper_test.go
@@ -62,12 +62,12 @@ func (c *pagedStubClient) CDPSession(_ context.Context, _ string, _ browser.CDPS
 // ── helpers ───────────────────────────────────────────────────────────────────

 func newScraper(html string) *Scraper {
-	return New(&stubClient{html: html}, nil, &stubClient{html: html})
+	return New(&stubClient{html: html}, nil, &stubClient{html: html}, nil)
 }

 func newPagedScraper(pages ...string) *Scraper {
 	urlClient := &pagedStubClient{pages: pages}
-	return New(&stubClient{}, nil, urlClient)
+	return New(&stubClient{}, nil, urlClient, nil)
 }

 // ── ScrapeChapterText ─────────────────────────────────────────────────────────
--- a/scraper/internal/scraper/interfaces.go
+++ b/scraper/internal/scraper/interfaces.go
@@ -120,6 +120,16 @@ type RankingProvider interface {
 	ScrapeRanking(ctx context.Context, maxPages int) (<-chan BookMeta, <-chan error)
 }

+// RankingPageCacher persists and retrieves raw HTML for individual ranking pages.
+// Implementations (e.g. writer.Writer) store files on disk so that a
+// subsequent ScrapeRanking call can serve cached HTML without a network round-trip.
+type RankingPageCacher interface {
+	// WriteRankingPageCache stores the raw HTML string for the given page number.
+	WriteRankingPageCache(page int, html string) error
+	// ReadRankingPageCache returns the cached HTML for page, or ("", nil) on a miss.
+	ReadRankingPageCache(page int) (string, error)
+}
+
 // NovelScraper is the full interface that a concrete novel source must implement.
 // It composes all four provider interfaces.
 type NovelScraper interface {
--- a/scraper/internal/server/ui.go
+++ b/scraper/internal/server/ui.go
@@ -546,25 +546,31 @@ const rankingTmpl = `
  </div>

  <!-- Pagination: fetch pages from novelfire -->
-  <div class="flex items-center gap-2 mb-6 flex-wrap">
-    <span class="text-xs text-zinc-500">Fetch pages:</span>
-    {{range .Pages}}
+  <div class="mb-6">
+    <div class="flex items-center gap-1.5 mb-2 flex-wrap">
+      <span class="text-xs text-zinc-500 mr-1">Fetch up to page:</span>
+      {{range .PageNums}}
+      {{if eq .Num 0}}
+      <span class="text-xs text-zinc-600 px-1 select-none">…</span>
+      {{else}}
      <form hx-post="/ranking/refresh"
            hx-target="#ranking-refresh-status"
            hx-swap="innerHTML">
-      <input type="hidden" name="pages" value="{{.Pages}}">
+        <input type="hidden" name="pages" value="{{.Num}}">
        <button type="submit"
-        class="text-xs px-2.5 py-1 rounded-lg bg-zinc-800 hover:bg-amber-700 border border-zinc-700 hover:border-amber-600 text-zinc-300 hover:text-white transition-colors">
-        {{.Label}}
+          class="text-xs w-8 h-7 rounded-lg bg-zinc-800 hover:bg-amber-700 border border-zinc-700 hover:border-amber-600 text-zinc-300 hover:text-white transition-colors text-center">
+          {{.Num}}
        </button>
      </form>
      {{end}}
-    <span class="text-xs text-zinc-600 ml-1">
-      (source:
+      {{end}}
+    </div>
+    <p class="text-xs text-zinc-600">
+      Each page = ~20 novels from
      <a href="https://novelfire.net/genre-all/sort-popular/status-all/all-novel?page=1"
         target="_blank" rel="noopener noreferrer"
-         class="text-zinc-500 hover:text-amber-400 underline underline-offset-2">novelfire.net</a>)
-    </span>
+         class="text-zinc-500 hover:text-amber-400 underline underline-offset-2">novelfire.net popular</a>
+    </p>
  </div>

  <!-- Book grid -->
@@ -693,21 +699,70 @@ func toRankingViewItems(items []writer.RankingItem, localSlugs map[string]bool)
 	return out
 }

-// refreshPage is one entry in the ranking refresh pagination bar.
-type refreshPage struct {
-	Label string
-	Pages int // 0 means "all pages"
+// pageNum is one entry in the ranking pagination bar.
+// Num == 0 is a sentinel that renders as an ellipsis gap.
+type pageNum struct {
+	Num int
 }

-// rankingRefreshPages defines the pagination buttons shown on the ranking page.
-// Each entry fetches that many pages from novelfire.net (100 novels per page).
-// Pages == 0 means fetch all pages.
-var rankingRefreshPages = []refreshPage{
-	{"p.1 (top 100)", 1},
-	{"p.1–3 (top 300)", 3},
-	{"p.1–5 (top 500)", 5},
-	{"p.1–10 (top 1000)", 10},
-	{"All", 0},
+// rankingPageNums builds the dynamic pagination list for 100 pages with
+// smart ellipsis: always show the first 3, last 3, and a sliding window of
+// 3 around the ends — compressed with "…" separators between runs.
+//
+// For a flat 100-page list it produces:
+//
+//	1 2 3 … 6 7 8 … 93 94 95 … 98 99 100
+//
+// (The middle range is omitted intentionally; users click individual pages.)
+func rankingPageNums(total int) []pageNum {
+	if total <= 0 {
+		return nil
+	}
+	// Build the set of visible page numbers using a sliding-window approach.
+	show := make(map[int]bool)
+	// Always show first 3 and last 3.
+	for i := 1; i <= 3 && i <= total; i++ {
+		show[i] = true
+	}
+	for i := total - 2; i <= total; i++ {
+		if i >= 1 {
+			show[i] = true
+		}
+	}
+	// For large ranges, show a few pages near the 1/4 and 3/4 marks.
+	if total > 12 {
+		q1 := total / 4
+		q3 := total * 3 / 4
+		for _, p := range []int{q1 - 1, q1, q1 + 1, q3 - 1, q3, q3 + 1} {
+			if p >= 1 && p <= total {
+				show[p] = true
+			}
+		}
+	}
+
+	// Collect sorted visible pages.
+	pages := make([]int, 0, len(show))
+	for p := range show {
+		pages = append(pages, p)
+	}
+	// Sort pages slice.
+	for i := 0; i < len(pages); i++ {
+		for j := i + 1; j < len(pages); j++ {
+			if pages[j] < pages[i] {
+				pages[i], pages[j] = pages[j], pages[i]
+			}
+		}
+	}
+
+	// Build output with ellipsis sentinels (Num==0) between non-consecutive pages.
+	out := make([]pageNum, 0, len(pages)*2)
+	for i, p := range pages {
+		if i > 0 && p > pages[i-1]+1 {
+			out = append(out, pageNum{0}) // ellipsis
+		}
+		out = append(out, pageNum{p})
+	}
+	return out
 }

 // handleRanking serves the ranking page from the cached ranking.md file.
@@ -728,11 +783,11 @@ func (s *Server) handleRanking(w http.ResponseWriter, r *http.Request) {
 	_ = t.Execute(&buf, struct {
 		Books    interface{}
 		CachedAt string
-		Pages    []refreshPage
+		PageNums []pageNum
 	}{
 		Books:    toRankingViewItems(rankingItems, s.writer.LocalSlugs()),
 		CachedAt: cachedAt,
-		Pages:    rankingRefreshPages,
+		PageNums: rankingPageNums(100),
 	})
 	s.respond(w, r, "Rankings", buf.String())
 }
--- a/scraper/internal/writer/writer.go
+++ b/scraper/internal/writer/writer.go
@@ -403,6 +403,57 @@ func (w *Writer) rankingPath() string {
 	return filepath.Join(w.root, "ranking.md")
 }

+// ─── Ranking page HTML cache ──────────────────────────────────────────────────
+
+// rankingCacheDir returns the directory that stores per-page HTML caches.
+func (w *Writer) rankingCacheDir() string {
+	return filepath.Join(w.root, "_ranking_cache")
+}
+
+// rankingPageCachePath returns the path for a cached ranking page HTML file.
+func (w *Writer) rankingPageCachePath(page int) string {
+	return filepath.Join(w.rankingCacheDir(), fmt.Sprintf("page-%d.html", page))
+}
+
+// WriteRankingPageCache persists raw HTML for the given ranking page number.
+func (w *Writer) WriteRankingPageCache(page int, html string) error {
+	dir := w.rankingCacheDir()
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return fmt.Errorf("writer: mkdir ranking cache %s: %w", dir, err)
+	}
+	path := w.rankingPageCachePath(page)
+	if err := os.WriteFile(path, []byte(html), 0o644); err != nil {
+		return fmt.Errorf("writer: write ranking page cache %s: %w", path, err)
+	}
+	return nil
+}
+
+// ReadRankingPageCache reads the cached HTML for the given ranking page.
+// Returns ("", nil) when no cache file exists yet.
+func (w *Writer) ReadRankingPageCache(page int) (string, error) {
+	data, err := os.ReadFile(w.rankingPageCachePath(page))
+	if err != nil {
+		if os.IsNotExist(err) {
+			return "", nil
+		}
+		return "", fmt.Errorf("writer: read ranking page cache page %d: %w", page, err)
+	}
+	return string(data), nil
+}
+
+// RankingPageCacheInfo returns os.FileInfo for a cached ranking page file.
+// Returns (nil, nil) when the file does not exist.
+func (w *Writer) RankingPageCacheInfo(page int) (os.FileInfo, error) {
+	info, err := os.Stat(w.rankingPageCachePath(page))
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
+		return nil, err
+	}
+	return info, nil
+}
+
 // bookDir returns the root directory for a book slug.
 func (w *Writer) bookDir(slug string) string {
 	return filepath.Join(w.root, slug)