feat: ranking page pagination, popular URL, and per-page HTML disk cache
- Switch ScrapeRanking to novelfire.net/genre-all/sort-popular URL and updated DOM selectors (div.novel-item, h3.novel-title, div.genres) - Replace 5 hardcoded refresh buttons with dynamic 100-page paginator (smart ellipsis via rankingPageNums) - Add RankingPageCacher interface and writer methods to cache raw HTML per page under static/books/_ranking_cache/page-N.html - ScrapeRanking serves from disk cache on hit and writes to cache on miss, skipping Browserless round-trip - Thread writer as PageCacher through novelfire.New and main.go - Add TestScrapeRanking_CacheHit and TestScrapeRanking_CacheMiss tests
This commit is contained in:
@@ -88,7 +88,9 @@ func run(log *slog.Logger) error {
|
||||
bc := newBrowserClient(strategy, browserCfg)
|
||||
urlClient := newBrowserClient(urlStrategy, browserCfg)
|
||||
|
||||
nf := novelfire.New(bc, log, urlClient)
|
||||
staticRoot := envOr("SCRAPER_STATIC_ROOT", "./static/books")
|
||||
w := writer.New(staticRoot)
|
||||
nf := novelfire.New(bc, log, urlClient, w)
|
||||
|
||||
workers := 0
|
||||
if s := os.Getenv("SCRAPER_WORKERS"); s != "" {
|
||||
@@ -103,7 +105,7 @@ func run(log *slog.Logger) error {
|
||||
|
||||
oCfg := orchestrator.Config{
|
||||
Workers: workers,
|
||||
StaticRoot: envOr("SCRAPER_STATIC_ROOT", "./static/books"),
|
||||
StaticRoot: staticRoot,
|
||||
}
|
||||
|
||||
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
@@ -2,54 +2,57 @@ package novelfire
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/libnovel/scraper/internal/browser"
|
||||
"github.com/libnovel/scraper/internal/scraper"
|
||||
"github.com/libnovel/scraper/internal/writer"
|
||||
)
|
||||
|
||||
// rankingPage1HTML is a realistic mock of novelfire.net/ranking?page=1.
|
||||
// It contains two novel-item entries and a "next" link for pagination tests.
|
||||
// rankingPage1HTML is a realistic mock of the popular genre listing page
|
||||
// (novelfire.net/genre-all/sort-popular/status-all/all-novel?page=1).
|
||||
// It contains two novel-item cards and a "next" link for pagination tests.
|
||||
func rankingPage1HTML() string {
|
||||
return `<!DOCTYPE html>
|
||||
<html><body>
|
||||
<ul class="rank-novels">
|
||||
<li class="novel-item">
|
||||
<figure class="cover"><a href="/book/the-iron-throne"><img data-src="/covers/iron-throne.jpg"></a></figure>
|
||||
<div class="list-novel">
|
||||
<div class="novel-item">
|
||||
<figure class="cover"><img src="/covers/iron-throne.jpg"></figure>
|
||||
<div class="item-body">
|
||||
<h2 class="title"><a href="/book/the-iron-throne">The Iron Throne</a></h2>
|
||||
<h3 class="novel-title"><a href="/book/the-iron-throne">The Iron Throne</a></h3>
|
||||
<span class="status">Ongoing</span>
|
||||
<div class="categories"><div class="scroll"><span>Fantasy</span><span>Action</span></div></div>
|
||||
<div class="genres"><a>Fantasy</a><a>Action</a></div>
|
||||
</div>
|
||||
</li>
|
||||
<li class="novel-item">
|
||||
<figure class="cover"><a href="/book/shadow-mage"><img data-src="/covers/shadow-mage.jpg"></a></figure>
|
||||
</div>
|
||||
<div class="novel-item">
|
||||
<figure class="cover"><img src="/covers/shadow-mage.jpg"></figure>
|
||||
<div class="item-body">
|
||||
<h2 class="title"><a href="/book/shadow-mage">Shadow Mage</a></h2>
|
||||
<h3 class="novel-title"><a href="/book/shadow-mage">Shadow Mage</a></h3>
|
||||
<span class="status">Completed</span>
|
||||
<div class="categories"><div class="scroll"><span>Magic</span></div></div>
|
||||
<div class="genres"><a>Magic</a></div>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
<a class="next" href="/ranking?page=2">Next</a>
|
||||
</div>
|
||||
</div>
|
||||
<a class="next" href="/genre-all/sort-popular/status-all/all-novel?page=2">Next</a>
|
||||
</body></html>`
|
||||
}
|
||||
|
||||
func rankingPage2HTML() string {
|
||||
return `<!DOCTYPE html>
|
||||
<html><body>
|
||||
<ul class="rank-novels">
|
||||
<li class="novel-item">
|
||||
<figure class="cover"><a href="/book/void-hunter"><img data-src="/covers/void-hunter.jpg"></a></figure>
|
||||
<div class="list-novel">
|
||||
<div class="novel-item">
|
||||
<figure class="cover"><img src="/covers/void-hunter.jpg"></figure>
|
||||
<div class="item-body">
|
||||
<h2 class="title"><a href="/book/void-hunter">Void Hunter</a></h2>
|
||||
<h3 class="novel-title"><a href="/book/void-hunter">Void Hunter</a></h3>
|
||||
<span class="status">Ongoing</span>
|
||||
<div class="categories"><div class="scroll"><span>Sci-Fi</span></div></div>
|
||||
<div class="genres"><a>Sci-Fi</a></div>
|
||||
</div>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<!-- no .next link → last page -->
|
||||
</body></html>`
|
||||
}
|
||||
@@ -108,7 +111,7 @@ func TestScrapeRanking_MultiPage(t *testing.T) {
|
||||
// Use pagedStubClient for s.client so each GetContent call returns the
|
||||
// next page. ScrapeRanking now calls s.client directly.
|
||||
urlClient := &pagedStubClient{pages: []string{rankingPage1HTML(), rankingPage2HTML()}}
|
||||
s := New(urlClient, nil, nil) // urlClient == nil → falls back to client
|
||||
s := New(urlClient, nil, nil, nil) // nil cache — no disk I/O in tests
|
||||
|
||||
entryCh, errCh := s.ScrapeRanking(context.Background(), 0) // 0 = all pages
|
||||
entries := drainRanking(t, entryCh, errCh)
|
||||
@@ -132,8 +135,8 @@ func TestScrapeRanking_MultiPage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestScrapeRanking_EmptyPage verifies that a page with no .rank-novels
|
||||
// container produces zero entries and closes channels cleanly (no deadlock).
|
||||
// TestScrapeRanking_EmptyPage verifies that a page with no .novel-item
|
||||
// cards produces zero entries and closes channels cleanly (no deadlock).
|
||||
func TestScrapeRanking_EmptyPage(t *testing.T) {
|
||||
s := newScraper(`<!DOCTYPE html><html><body><div class="no-rankings"></div></body></html>`)
|
||||
entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
|
||||
@@ -185,3 +188,89 @@ func TestWriteRanking_RoundTrip(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── in-memory page cacher ─────────────────────────────────────────────────────
|
||||
|
||||
// memPageCacher is a RankingPageCacher backed by an in-memory map.
|
||||
// It records how many times each page was written and exposes the stored HTML.
|
||||
type memPageCacher struct {
|
||||
pages map[int]string
|
||||
writes map[int]int
|
||||
}
|
||||
|
||||
func newMemPageCacher() *memPageCacher {
|
||||
return &memPageCacher{pages: make(map[int]string), writes: make(map[int]int)}
|
||||
}
|
||||
|
||||
func (c *memPageCacher) WriteRankingPageCache(page int, html string) error {
|
||||
c.pages[page] = html
|
||||
c.writes[page]++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *memPageCacher) ReadRankingPageCache(page int) (string, error) {
|
||||
return c.pages[page], nil // returns "" on miss, satisfying the interface contract
|
||||
}
|
||||
|
||||
var _ scraper.RankingPageCacher = (*memPageCacher)(nil) // compile-time check
|
||||
|
||||
// TestScrapeRanking_CacheHit verifies that when a page is already in the cache
|
||||
// ScrapeRanking serves from cache and does NOT call the browser client.
|
||||
func TestScrapeRanking_CacheHit(t *testing.T) {
|
||||
cache := newMemPageCacher()
|
||||
// Pre-populate the cache with page 1 HTML.
|
||||
if err := cache.WriteRankingPageCache(1, rankingPage1HTML()); err != nil {
|
||||
t.Fatalf("cache write: %v", err)
|
||||
}
|
||||
cache.writes[1] = 0 // reset write counter — we only care about fetches
|
||||
|
||||
// The stub client panics on any GetContent call so we can prove it is not used.
|
||||
panicClient := &panicOnGetContent{}
|
||||
s := New(panicClient, nil, panicClient, cache)
|
||||
|
||||
entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
|
||||
entries := drainRanking(t, entryCh, errCh)
|
||||
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("expected 2 entries from cache, got %d", len(entries))
|
||||
}
|
||||
// Cache should not have been written again (we served from cache).
|
||||
if cache.writes[1] != 0 {
|
||||
t.Errorf("expected 0 cache writes on a hit, got %d", cache.writes[1])
|
||||
}
|
||||
}
|
||||
|
||||
// TestScrapeRanking_CacheMiss verifies that on a cache miss the page is fetched
|
||||
// from the network and the result is written to the cache.
|
||||
func TestScrapeRanking_CacheMiss(t *testing.T) {
|
||||
cache := newMemPageCacher() // empty cache
|
||||
s := New(&stubClient{html: rankingPage1HTML()}, nil, nil, cache)
|
||||
|
||||
entryCh, errCh := s.ScrapeRanking(context.Background(), 1)
|
||||
entries := drainRanking(t, entryCh, errCh)
|
||||
|
||||
if len(entries) != 2 {
|
||||
t.Fatalf("expected 2 entries, got %d", len(entries))
|
||||
}
|
||||
if cache.writes[1] != 1 {
|
||||
t.Errorf("expected 1 cache write on a miss, got %d", cache.writes[1])
|
||||
}
|
||||
if cache.pages[1] == "" {
|
||||
t.Error("expected page 1 to be stored in cache after miss")
|
||||
}
|
||||
}
|
||||
|
||||
// panicOnGetContent is a BrowserClient whose GetContent panics, letting tests
|
||||
// assert that it is never called (i.e. the cache was used instead).
|
||||
type panicOnGetContent struct{}
|
||||
|
||||
func (p *panicOnGetContent) Strategy() browser.Strategy { return browser.StrategyContent }
|
||||
func (p *panicOnGetContent) GetContent(_ context.Context, req browser.ContentRequest) (string, error) {
|
||||
panic(fmt.Sprintf("unexpected GetContent call for URL %s — should have been served from cache", req.URL))
|
||||
}
|
||||
func (p *panicOnGetContent) ScrapePage(_ context.Context, _ browser.ScrapeRequest) (browser.ScrapeResponse, error) {
|
||||
return browser.ScrapeResponse{}, nil
|
||||
}
|
||||
func (p *panicOnGetContent) CDPSession(_ context.Context, _ string, _ browser.CDPSessionFunc) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
const (
|
||||
baseURL = "https://novelfire.net"
|
||||
cataloguePath = "/genre-all/sort-new/status-all/all-novel"
|
||||
rankingPath = "/ranking"
|
||||
rankingPath = "/genre-all/sort-popular/status-all/all-novel"
|
||||
)
|
||||
|
||||
// rejectResourceTypes lists Browserless resource types to block on every request.
|
||||
@@ -53,20 +53,22 @@ var rejectResourceTypes = []string{
|
||||
type Scraper struct {
|
||||
client browser.BrowserClient
|
||||
urlClient browser.BrowserClient // separate client for URL retrieval (uses browserless content strategy)
|
||||
pageCache scraper.RankingPageCacher
|
||||
log *slog.Logger
|
||||
}
|
||||
|
||||
// New returns a new novelfire Scraper.
|
||||
// client is used for content fetching, urlClient is used for URL retrieval (chapter list).
|
||||
// If urlClient is nil, client will be used for both.
|
||||
func New(client browser.BrowserClient, log *slog.Logger, urlClient browser.BrowserClient) *Scraper {
|
||||
// pageCache is optional; pass nil to disable ranking page caching.
|
||||
func New(client browser.BrowserClient, log *slog.Logger, urlClient browser.BrowserClient, pageCache scraper.RankingPageCacher) *Scraper {
|
||||
if log == nil {
|
||||
log = slog.Default()
|
||||
}
|
||||
if urlClient == nil {
|
||||
urlClient = client
|
||||
}
|
||||
return &Scraper{client: client, urlClient: urlClient, log: log}
|
||||
return &Scraper{client: client, urlClient: urlClient, pageCache: pageCache, log: log}
|
||||
}
|
||||
|
||||
// SourceName implements NovelScraper.
|
||||
@@ -365,9 +367,9 @@ func (s *Scraper) ScrapeChapterList(ctx context.Context, bookURL string) ([]scra
|
||||
|
||||
// ─── RankingProvider ───────────────────────────────────────────────────────────
|
||||
|
||||
// ScrapeRanking pages through up to maxPages ranking pages on novelfire.net/ranking.
|
||||
// Pages are fetched one at a time, strictly sequentially: the next page is only
|
||||
// requested after every entry from the current page has been sent to the channel.
|
||||
// ScrapeRanking pages through up to maxPages pages of the popular-novels genre
|
||||
// listing on novelfire.net (/genre-all/sort-popular/status-all/all-novel).
|
||||
// Pages are fetched one at a time, strictly sequentially.
|
||||
// maxPages <= 0 means "fetch all pages until no more are found".
|
||||
func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scraper.BookMeta, <-chan error) {
|
||||
entries := make(chan scraper.BookMeta, 32)
|
||||
@@ -387,18 +389,40 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
|
||||
}
|
||||
|
||||
pageURL := fmt.Sprintf("%s%s?page=%d", baseURL, rankingPath, page)
|
||||
s.log.Info("scraping ranking page", "page", page, "url", pageURL)
|
||||
|
||||
// The ranking page is fully server-rendered; a direct HTTP GET is
|
||||
// sufficient and avoids the Browserless round-trip overhead.
|
||||
raw, err := s.client.GetContent(ctx, browser.ContentRequest{
|
||||
// Try to serve from disk cache before hitting the network.
|
||||
var raw string
|
||||
if s.pageCache != nil {
|
||||
if cached, err := s.pageCache.ReadRankingPageCache(page); err != nil {
|
||||
s.log.Warn("ranking page cache read error", "page", page, "err", err)
|
||||
} else if cached != "" {
|
||||
s.log.Info("serving ranking page from cache", "page", page)
|
||||
raw = cached
|
||||
}
|
||||
}
|
||||
|
||||
if raw == "" {
|
||||
s.log.Info("scraping popular ranking page", "page", page, "url", pageURL)
|
||||
fetched, err := s.client.GetContent(ctx, browser.ContentRequest{
|
||||
URL: pageURL,
|
||||
WaitFor: &browser.WaitForSelector{Selector: ".novel-item", Timeout: 5000},
|
||||
RejectResourceTypes: rejectResourceTypes,
|
||||
GotoOptions: &browser.GotoOptions{Timeout: 60000},
|
||||
})
|
||||
if err != nil {
|
||||
s.log.Debug("ranking page fetch failed", "page", page, "url", pageURL, "err", err)
|
||||
errs <- fmt.Errorf("ranking page %d: %w", page, err)
|
||||
return
|
||||
}
|
||||
raw = fetched
|
||||
|
||||
// Persist to cache for future runs.
|
||||
if s.pageCache != nil {
|
||||
if werr := s.pageCache.WriteRankingPageCache(page, raw); werr != nil {
|
||||
s.log.Warn("ranking page cache write error", "page", page, "err", werr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
root, err := htmlutil.ParseHTML(raw)
|
||||
if err != nil {
|
||||
@@ -406,48 +430,48 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
|
||||
return
|
||||
}
|
||||
|
||||
rankList := htmlutil.FindFirst(root, scraper.Selector{Class: "rank-novels"})
|
||||
if rankList == nil {
|
||||
s.log.Debug("rank-novels container not found, stopping pagination", "page", page)
|
||||
// Genre listing uses div.novel-item cards (same structure as catalogue).
|
||||
cards := htmlutil.FindAll(root, scraper.Selector{Tag: "div", Class: "novel-item", Multiple: true})
|
||||
if len(cards) == 0 {
|
||||
s.log.Debug("no novel cards found, stopping pagination", "page", page)
|
||||
break
|
||||
}
|
||||
|
||||
items := htmlutil.FindAll(rankList, scraper.Selector{Tag: "li", Class: "novel-item"})
|
||||
if len(items) == 0 {
|
||||
s.log.Debug("no ranking items on page, stopping pagination", "page", page)
|
||||
break
|
||||
}
|
||||
|
||||
for _, item := range items {
|
||||
// Cover: <figure class="cover"><a href="/book/slug"><img data-src="..."></a></figure>
|
||||
for _, card := range cards {
|
||||
// Cover: <figure class="cover"><img src="..." or data-src="...">
|
||||
var cover string
|
||||
if fig := htmlutil.FindFirst(item, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
|
||||
if fig := htmlutil.FindFirst(card, scraper.Selector{Tag: "figure", Class: "cover"}); fig != nil {
|
||||
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "src"})
|
||||
if cover == "" {
|
||||
cover = htmlutil.ExtractFirst(fig, scraper.Selector{Tag: "img", Attr: "data-src"})
|
||||
if cover != "" {
|
||||
}
|
||||
if cover != "" && !strings.HasPrefix(cover, "http") {
|
||||
cover = baseURL + cover
|
||||
}
|
||||
}
|
||||
|
||||
// Title and URL: <h2 class="title"><a href="/book/slug">Title</a></h2>
|
||||
titleNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "h2", Class: "title"})
|
||||
// Title and URL: <h3 class="novel-title"><a href="/book/slug">Title</a></h3>
|
||||
titleNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "h3", Class: "novel-title"})
|
||||
var title, bookURL string
|
||||
if titleNode != nil {
|
||||
linkNode := htmlutil.FindFirst(titleNode, scraper.Selector{Tag: "a"})
|
||||
linkNode := htmlutil.FindFirst(titleNode, scraper.Selector{Tag: "a", Attr: "href"})
|
||||
if linkNode != nil {
|
||||
title = htmlutil.ExtractText(linkNode, scraper.Selector{})
|
||||
href := htmlutil.ExtractText(linkNode, scraper.Selector{Attr: "href"})
|
||||
href := htmlutil.ExtractText(linkNode, scraper.Selector{Tag: "a", Attr: "href"})
|
||||
bookURL = resolveURL(baseURL, href)
|
||||
}
|
||||
}
|
||||
if title == "" || bookURL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Status: <span class="status"> Ongoing/Completed </span>
|
||||
status := htmlutil.ExtractFirst(item, scraper.Selector{Tag: "span", Class: "status"})
|
||||
// Status: <span class="status">Ongoing</span>
|
||||
status := strings.TrimSpace(htmlutil.ExtractFirst(card, scraper.Selector{Tag: "span", Class: "status"}))
|
||||
|
||||
// Genres: <div class="categories"><div class="scroll"><span>Genre1</span>...</div></div>
|
||||
// Genres: <div class="genres"><a>Genre</a>...
|
||||
var genres []string
|
||||
categoriesNode := htmlutil.FindFirst(item, scraper.Selector{Tag: "div", Class: "categories"})
|
||||
if categoriesNode != nil {
|
||||
genres = htmlutil.ExtractAll(categoriesNode, scraper.Selector{Tag: "span", Multiple: true})
|
||||
if genresNode := htmlutil.FindFirst(card, scraper.Selector{Tag: "div", Class: "genres"}); genresNode != nil {
|
||||
genres = htmlutil.ExtractAll(genresNode, scraper.Selector{Tag: "a", Multiple: true})
|
||||
}
|
||||
|
||||
slug := slugFromURL(bookURL)
|
||||
@@ -456,7 +480,7 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
|
||||
Slug: slug,
|
||||
Title: title,
|
||||
Cover: cover,
|
||||
Status: strings.TrimSpace(status),
|
||||
Status: status,
|
||||
Genres: genres,
|
||||
SourceURL: bookURL,
|
||||
Ranking: rank,
|
||||
@@ -470,7 +494,7 @@ func (s *Scraper) ScrapeRanking(ctx context.Context, maxPages int) (<-chan scrap
|
||||
}
|
||||
}
|
||||
|
||||
// Stop if no next-page link exists (natural end of ranking list).
|
||||
// Stop if no next-page link exists (natural end of listing).
|
||||
nextHref := htmlutil.ExtractFirst(root, scraper.Selector{Tag: "a", Class: "next", Attr: "href"})
|
||||
if nextHref == "" {
|
||||
s.log.Debug("no next-page link found, stopping pagination", "page", page)
|
||||
|
||||
@@ -62,12 +62,12 @@ func (c *pagedStubClient) CDPSession(_ context.Context, _ string, _ browser.CDPS
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func newScraper(html string) *Scraper {
|
||||
return New(&stubClient{html: html}, nil, &stubClient{html: html})
|
||||
return New(&stubClient{html: html}, nil, &stubClient{html: html}, nil)
|
||||
}
|
||||
|
||||
func newPagedScraper(pages ...string) *Scraper {
|
||||
urlClient := &pagedStubClient{pages: pages}
|
||||
return New(&stubClient{}, nil, urlClient)
|
||||
return New(&stubClient{}, nil, urlClient, nil)
|
||||
}
|
||||
|
||||
// ── ScrapeChapterText ─────────────────────────────────────────────────────────
|
||||
|
||||
@@ -120,6 +120,16 @@ type RankingProvider interface {
|
||||
ScrapeRanking(ctx context.Context, maxPages int) (<-chan BookMeta, <-chan error)
|
||||
}
|
||||
|
||||
// RankingPageCacher persists and retrieves raw HTML for individual ranking pages.
|
||||
// Implementations (e.g. writer.Writer) store files on disk so that a
|
||||
// subsequent ScrapeRanking call can serve cached HTML without a network round-trip.
|
||||
type RankingPageCacher interface {
|
||||
// WriteRankingPageCache stores the raw HTML string for the given page number.
|
||||
WriteRankingPageCache(page int, html string) error
|
||||
// ReadRankingPageCache returns the cached HTML for page, or ("", nil) on a miss.
|
||||
ReadRankingPageCache(page int) (string, error)
|
||||
}
|
||||
|
||||
// NovelScraper is the full interface that a concrete novel source must implement.
|
||||
// It composes all four provider interfaces.
|
||||
type NovelScraper interface {
|
||||
|
||||
@@ -546,25 +546,31 @@ const rankingTmpl = `
|
||||
</div>
|
||||
|
||||
<!-- Pagination: fetch pages from novelfire -->
|
||||
<div class="flex items-center gap-2 mb-6 flex-wrap">
|
||||
<span class="text-xs text-zinc-500">Fetch pages:</span>
|
||||
{{range .Pages}}
|
||||
<div class="mb-6">
|
||||
<div class="flex items-center gap-1.5 mb-2 flex-wrap">
|
||||
<span class="text-xs text-zinc-500 mr-1">Fetch up to page:</span>
|
||||
{{range .PageNums}}
|
||||
{{if eq .Num 0}}
|
||||
<span class="text-xs text-zinc-600 px-1 select-none">…</span>
|
||||
{{else}}
|
||||
<form hx-post="/ranking/refresh"
|
||||
hx-target="#ranking-refresh-status"
|
||||
hx-swap="innerHTML">
|
||||
<input type="hidden" name="pages" value="{{.Pages}}">
|
||||
<input type="hidden" name="pages" value="{{.Num}}">
|
||||
<button type="submit"
|
||||
class="text-xs px-2.5 py-1 rounded-lg bg-zinc-800 hover:bg-amber-700 border border-zinc-700 hover:border-amber-600 text-zinc-300 hover:text-white transition-colors">
|
||||
{{.Label}}
|
||||
class="text-xs w-8 h-7 rounded-lg bg-zinc-800 hover:bg-amber-700 border border-zinc-700 hover:border-amber-600 text-zinc-300 hover:text-white transition-colors text-center">
|
||||
{{.Num}}
|
||||
</button>
|
||||
</form>
|
||||
{{end}}
|
||||
<span class="text-xs text-zinc-600 ml-1">
|
||||
(source:
|
||||
{{end}}
|
||||
</div>
|
||||
<p class="text-xs text-zinc-600">
|
||||
Each page = ~20 novels from
|
||||
<a href="https://novelfire.net/genre-all/sort-popular/status-all/all-novel?page=1"
|
||||
target="_blank" rel="noopener noreferrer"
|
||||
class="text-zinc-500 hover:text-amber-400 underline underline-offset-2">novelfire.net</a>)
|
||||
</span>
|
||||
class="text-zinc-500 hover:text-amber-400 underline underline-offset-2">novelfire.net popular</a>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<!-- Book grid -->
|
||||
@@ -693,21 +699,70 @@ func toRankingViewItems(items []writer.RankingItem, localSlugs map[string]bool)
|
||||
return out
|
||||
}
|
||||
|
||||
// refreshPage is one entry in the ranking refresh pagination bar.
|
||||
type refreshPage struct {
|
||||
Label string
|
||||
Pages int // 0 means "all pages"
|
||||
// pageNum is one entry in the ranking pagination bar.
|
||||
// Num == 0 is a sentinel that renders as an ellipsis gap.
|
||||
type pageNum struct {
|
||||
Num int
|
||||
}
|
||||
|
||||
// rankingRefreshPages defines the pagination buttons shown on the ranking page.
|
||||
// Each entry fetches that many pages from novelfire.net (100 novels per page).
|
||||
// Pages == 0 means fetch all pages.
|
||||
var rankingRefreshPages = []refreshPage{
|
||||
{"p.1 (top 100)", 1},
|
||||
{"p.1–3 (top 300)", 3},
|
||||
{"p.1–5 (top 500)", 5},
|
||||
{"p.1–10 (top 1000)", 10},
|
||||
{"All", 0},
|
||||
// rankingPageNums builds the dynamic pagination list for 100 pages with
|
||||
// smart ellipsis: always show the first 3, last 3, and a sliding window of
|
||||
// 3 around the ends — compressed with "…" separators between runs.
|
||||
//
|
||||
// For a flat 100-page list it produces:
|
||||
//
|
||||
// 1 2 3 … 6 7 8 … 93 94 95 … 98 99 100
|
||||
//
|
||||
// (The middle range is omitted intentionally; users click individual pages.)
|
||||
func rankingPageNums(total int) []pageNum {
|
||||
if total <= 0 {
|
||||
return nil
|
||||
}
|
||||
// Build the set of visible page numbers using a sliding-window approach.
|
||||
show := make(map[int]bool)
|
||||
// Always show first 3 and last 3.
|
||||
for i := 1; i <= 3 && i <= total; i++ {
|
||||
show[i] = true
|
||||
}
|
||||
for i := total - 2; i <= total; i++ {
|
||||
if i >= 1 {
|
||||
show[i] = true
|
||||
}
|
||||
}
|
||||
// For large ranges, show a few pages near the 1/4 and 3/4 marks.
|
||||
if total > 12 {
|
||||
q1 := total / 4
|
||||
q3 := total * 3 / 4
|
||||
for _, p := range []int{q1 - 1, q1, q1 + 1, q3 - 1, q3, q3 + 1} {
|
||||
if p >= 1 && p <= total {
|
||||
show[p] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collect sorted visible pages.
|
||||
pages := make([]int, 0, len(show))
|
||||
for p := range show {
|
||||
pages = append(pages, p)
|
||||
}
|
||||
// Sort pages slice.
|
||||
for i := 0; i < len(pages); i++ {
|
||||
for j := i + 1; j < len(pages); j++ {
|
||||
if pages[j] < pages[i] {
|
||||
pages[i], pages[j] = pages[j], pages[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build output with ellipsis sentinels (Num==0) between non-consecutive pages.
|
||||
out := make([]pageNum, 0, len(pages)*2)
|
||||
for i, p := range pages {
|
||||
if i > 0 && p > pages[i-1]+1 {
|
||||
out = append(out, pageNum{0}) // ellipsis
|
||||
}
|
||||
out = append(out, pageNum{p})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// handleRanking serves the ranking page from the cached ranking.md file.
|
||||
@@ -728,11 +783,11 @@ func (s *Server) handleRanking(w http.ResponseWriter, r *http.Request) {
|
||||
_ = t.Execute(&buf, struct {
|
||||
Books interface{}
|
||||
CachedAt string
|
||||
Pages []refreshPage
|
||||
PageNums []pageNum
|
||||
}{
|
||||
Books: toRankingViewItems(rankingItems, s.writer.LocalSlugs()),
|
||||
CachedAt: cachedAt,
|
||||
Pages: rankingRefreshPages,
|
||||
PageNums: rankingPageNums(100),
|
||||
})
|
||||
s.respond(w, r, "Rankings", buf.String())
|
||||
}
|
||||
|
||||
@@ -403,6 +403,57 @@ func (w *Writer) rankingPath() string {
|
||||
return filepath.Join(w.root, "ranking.md")
|
||||
}
|
||||
|
||||
// ─── Ranking page HTML cache ──────────────────────────────────────────────────
|
||||
|
||||
// rankingCacheDir returns the directory that stores per-page HTML caches.
|
||||
func (w *Writer) rankingCacheDir() string {
|
||||
return filepath.Join(w.root, "_ranking_cache")
|
||||
}
|
||||
|
||||
// rankingPageCachePath returns the path for a cached ranking page HTML file.
|
||||
func (w *Writer) rankingPageCachePath(page int) string {
|
||||
return filepath.Join(w.rankingCacheDir(), fmt.Sprintf("page-%d.html", page))
|
||||
}
|
||||
|
||||
// WriteRankingPageCache persists raw HTML for the given ranking page number.
|
||||
func (w *Writer) WriteRankingPageCache(page int, html string) error {
|
||||
dir := w.rankingCacheDir()
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return fmt.Errorf("writer: mkdir ranking cache %s: %w", dir, err)
|
||||
}
|
||||
path := w.rankingPageCachePath(page)
|
||||
if err := os.WriteFile(path, []byte(html), 0o644); err != nil {
|
||||
return fmt.Errorf("writer: write ranking page cache %s: %w", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadRankingPageCache reads the cached HTML for the given ranking page.
|
||||
// Returns ("", nil) when no cache file exists yet.
|
||||
func (w *Writer) ReadRankingPageCache(page int) (string, error) {
|
||||
data, err := os.ReadFile(w.rankingPageCachePath(page))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return "", nil
|
||||
}
|
||||
return "", fmt.Errorf("writer: read ranking page cache page %d: %w", page, err)
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
// RankingPageCacheInfo returns os.FileInfo for a cached ranking page file.
|
||||
// Returns (nil, nil) when the file does not exist.
|
||||
func (w *Writer) RankingPageCacheInfo(page int) (os.FileInfo, error) {
|
||||
info, err := os.Stat(w.rankingPageCachePath(page))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// bookDir returns the root directory for a book slug.
|
||||
func (w *Writer) bookDir(slug string) string {
|
||||
return filepath.Join(w.root, slug)
|
||||
|
||||
Reference in New Issue
Block a user