Files
libnovel/scraper/internal/storage/integration_test.go
Admin fb6b364382 refactor: audit, split server.go, add unit tests, and fix latent bugs
- Remove dead code: browser cdp/content_scrape strategies, writer package,
  printUsage, downloadAndStoreCoverCLI in main.go
- Fix bugs: defer-in-loop in pocketbase deleteWhere, listAll() pagination
  hard cap removed, splitChapterTitle off-by-one in date extraction
- Split server.go (~1700 lines) into focused handler files:
  handlers_audio, handlers_browse, handlers_progress, handlers_ranking,
  handlers_scrape
- Export htmlutil.AttrVal/TextContent/ResolveURL; add storage/coverutil.go
  to consolidate duplicate helpers
- Flatten deeply nested conditionals: voices() early-return guards,
  ScrapeCatalogue next-link double attr scan, chapterNumberFromKey dead
  strings.Cut line, splitChapterTitle double-nested unit/suffix loop
- Add unit tests: htmlutil (9 funcs), novelfire ScrapeMetadata (3 cases),
  orchestrator Run (5 cases), storage chapterNumberFromKey/splitChapterTitle
  (22 cases); all pass with go build/vet/test clean
2026-03-04 22:14:23 +05:00

656 lines
19 KiB
Go

//go:build integration
// Integration tests for MinioClient and PocketBaseStore against live instances.
//
// These tests require running MinIO and PocketBase services. They are gated
// behind the "integration" build tag and are never run in a normal `go test ./...`.
//
// Run with:
//
// MINIO_ENDPOINT=localhost:9000 \
// POCKETBASE_URL=http://localhost:8090 \
// go test -v -tags integration -timeout 120s \
// github.com/libnovel/scraper/internal/storage
package storage
import (
"context"
"fmt"
"log/slog"
"os"
"strings"
"testing"
"time"
)
// ─── helpers ──────────────────────────────────────────────────────────────────
func envOr(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func newTestMinioClient(t *testing.T) *MinioClient {
t.Helper()
endpoint := os.Getenv("MINIO_ENDPOINT")
if endpoint == "" {
t.Skip("MINIO_ENDPOINT not set — skipping MinIO integration test")
}
useSSL := os.Getenv("MINIO_USE_SSL") == "true"
cfg := MinioConfig{
Endpoint: endpoint,
AccessKey: envOr("MINIO_ACCESS_KEY", "admin"),
SecretKey: envOr("MINIO_SECRET_KEY", "changeme123"),
UseSSL: useSSL,
BucketChapters: envOr("MINIO_BUCKET_CHAPTERS", "libnovel-chapters"),
BucketAudio: envOr("MINIO_BUCKET_AUDIO", "libnovel-audio"),
}
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
mc, err := NewMinioClient(ctx, cfg)
if err != nil {
t.Fatalf("NewMinioClient: %v", err)
}
return mc
}
func newTestPocketBaseStore(t *testing.T) *PocketBaseStore {
t.Helper()
pbURL := os.Getenv("POCKETBASE_URL")
if pbURL == "" {
t.Skip("POCKETBASE_URL not set — skipping PocketBase integration test")
}
cfg := PocketBaseConfig{
BaseURL: pbURL,
AdminEmail: envOr("POCKETBASE_ADMIN_EMAIL", "admin@libnovel.local"),
AdminPassword: envOr("POCKETBASE_ADMIN_PASSWORD", "changeme123"),
}
store := NewPocketBaseStore(cfg, slog.Default())
ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
defer cancel()
if err := store.EnsureCollections(ctx); err != nil {
t.Logf("EnsureCollections (may be harmless): %v", err)
}
return store
}
// testSlug generates a unique test slug to avoid collisions between parallel runs.
func testSlug(t *testing.T) string {
t.Helper()
safe := strings.Map(func(r rune) rune {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' {
return r
}
return '-'
}, strings.ToLower(t.Name()))
// Truncate and append a timestamp to keep it unique.
if len(safe) > 30 {
safe = safe[:30]
}
return fmt.Sprintf("test-%s-%d", safe, time.Now().UnixMilli()%100000)
}
// ─── MinioClient tests ────────────────────────────────────────────────────────
// TestMinioClient_ChapterRoundTrip verifies PutChapter → GetChapter →
// ChapterExists → ListChapterKeys for a single chapter.
func TestMinioClient_ChapterRoundTrip(t *testing.T) {
mc := newTestMinioClient(t)
slug := testSlug(t)
const vol = 0
const n = 1
content := "# Chapter 1\n\nHello integration world.\n"
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
t.Run("PutChapter", func(t *testing.T) {
if err := mc.PutChapter(ctx, slug, vol, n, content); err != nil {
t.Fatalf("PutChapter: %v", err)
}
t.Logf("stored chapter at key: %s", chapterKey(slug, vol, n))
})
t.Run("GetChapter", func(t *testing.T) {
got, err := mc.GetChapter(ctx, slug, vol, n)
if err != nil {
t.Fatalf("GetChapter: %v", err)
}
if got != content {
t.Errorf("GetChapter round-trip mismatch:\ngot: %q\nwant: %q", got, content)
}
t.Logf("retrieved %d bytes", len(got))
})
t.Run("ChapterExists", func(t *testing.T) {
if !mc.ChapterExists(ctx, slug, vol, n) {
t.Error("ChapterExists returned false for a just-stored chapter")
}
if mc.ChapterExists(ctx, slug, vol, 999) {
t.Error("ChapterExists returned true for a chapter that was never stored")
}
})
t.Run("ListChapterKeys", func(t *testing.T) {
keys, err := mc.ListChapterKeys(ctx, slug)
if err != nil {
t.Fatalf("ListChapterKeys: %v", err)
}
if len(keys) != 1 {
t.Fatalf("ListChapterKeys returned %d keys, want 1: %v", len(keys), keys)
}
expectedKey := chapterKey(slug, vol, n)
if keys[0] != expectedKey {
t.Errorf("key = %q, want %q", keys[0], expectedKey)
}
t.Logf("keys: %v", keys)
})
}
// TestMinioClient_MultiChapterList stores several chapters and verifies
// ListChapterKeys returns them all.
func TestMinioClient_MultiChapterList(t *testing.T) {
mc := newTestMinioClient(t)
slug := testSlug(t)
const vol = 0
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Store chapters 1, 2, 51 (crosses the 1-50 folder boundary).
chapters := []int{1, 2, 51}
for _, n := range chapters {
content := fmt.Sprintf("# Chapter %d\n\nContent for chapter %d.\n", n, n)
if err := mc.PutChapter(ctx, slug, vol, n, content); err != nil {
t.Fatalf("PutChapter(%d): %v", n, err)
}
}
keys, err := mc.ListChapterKeys(ctx, slug)
if err != nil {
t.Fatalf("ListChapterKeys: %v", err)
}
t.Logf("keys: %v", keys)
if len(keys) != len(chapters) {
t.Errorf("ListChapterKeys returned %d keys, want %d", len(keys), len(chapters))
}
count := mc.CountChapters(ctx, slug)
if count != len(chapters) {
t.Errorf("CountChapters = %d, want %d", count, len(chapters))
}
}
// TestMinioClient_PresignChapter verifies PresignChapter returns a non-empty URL.
func TestMinioClient_PresignChapter(t *testing.T) {
mc := newTestMinioClient(t)
slug := testSlug(t)
const vol = 0
const n = 1
content := "# Presign test\n\nSome content.\n"
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := mc.PutChapter(ctx, slug, vol, n, content); err != nil {
t.Fatalf("PutChapter: %v", err)
}
url, err := mc.PresignChapter(ctx, slug, vol, n, 10*time.Minute)
if err != nil {
t.Fatalf("PresignChapter: %v", err)
}
if url == "" {
t.Fatal("PresignChapter returned empty URL")
}
t.Logf("presigned URL: %s", url)
// URL must be an http(s) URL and contain the slug somewhere.
if !strings.HasPrefix(url, "http") {
t.Errorf("URL does not start with http: %q", url)
}
}
// TestMinioClient_AudioRoundTrip verifies PutAudio → GetAudio → AudioExists.
func TestMinioClient_AudioRoundTrip(t *testing.T) {
mc := newTestMinioClient(t)
slug := testSlug(t)
key := AudioObjectKey(slug, 1, "af_bella")
// Use minimal fake MP3 bytes (just a recognisable prefix).
fakeAudio := []byte("ID3\x03\x00\x00\x00\x00\x00\x00integration-test-audio")
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
t.Run("PutAudio", func(t *testing.T) {
if err := mc.PutAudio(ctx, key, fakeAudio); err != nil {
t.Fatalf("PutAudio: %v", err)
}
t.Logf("stored audio at key: %s", key)
})
t.Run("GetAudio", func(t *testing.T) {
got, err := mc.GetAudio(ctx, key)
if err != nil {
t.Fatalf("GetAudio: %v", err)
}
if string(got) != string(fakeAudio) {
t.Errorf("GetAudio round-trip mismatch: got %d bytes, want %d", len(got), len(fakeAudio))
}
t.Logf("retrieved %d bytes", len(got))
})
t.Run("AudioExists", func(t *testing.T) {
if !mc.AudioExists(ctx, key) {
t.Error("AudioExists returned false for a just-stored audio object")
}
if mc.AudioExists(ctx, "nonexistent/key.mp3") {
t.Error("AudioExists returned true for a key that was never stored")
}
})
}
// TestMinioClient_PresignAudio verifies PresignAudio returns a non-empty URL.
func TestMinioClient_PresignAudio(t *testing.T) {
mc := newTestMinioClient(t)
slug := testSlug(t)
key := AudioObjectKey(slug, 1, "af_bella")
fakeAudio := []byte("ID3\x03\x00\x00\x00\x00\x00\x00presign-audio-test")
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
if err := mc.PutAudio(ctx, key, fakeAudio); err != nil {
t.Fatalf("PutAudio: %v", err)
}
url, err := mc.PresignAudio(ctx, key, 10*time.Minute)
if err != nil {
t.Fatalf("PresignAudio: %v", err)
}
if url == "" {
t.Fatal("PresignAudio returned empty URL")
}
if !strings.HasPrefix(url, "http") {
t.Errorf("URL does not start with http: %q", url)
}
t.Logf("presigned audio URL: %s", url)
}
// ─── PocketBaseStore tests ────────────────────────────────────────────────────
// TestPocketBaseStore_Ping verifies that admin auth works.
func TestPocketBaseStore_Ping(t *testing.T) {
store := newTestPocketBaseStore(t)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := store.Ping(ctx); err != nil {
t.Fatalf("Ping: %v", err)
}
t.Log("Ping succeeded")
}
// TestPocketBaseStore_BookRoundTrip tests UpsertBook → GetBook → ListBooks →
// BookMetaUpdated.
func TestPocketBaseStore_BookRoundTrip(t *testing.T) {
store := newTestPocketBaseStore(t)
slug := testSlug(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Clean up after test.
t.Cleanup(func() {
cleanCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = store.pb.deleteWhere(cleanCtx, "books", fmt.Sprintf(`slug="%s"`, slug))
})
t.Run("UpsertBook_Create", func(t *testing.T) {
err := store.UpsertBook(ctx, slug,
"Integration Test Novel", "Test Author",
"https://example.com/cover.jpg", "Ongoing",
"A test summary.", "https://example.com/book/test",
[]string{"Action", "Fantasy"}, 42, 7,
)
if err != nil {
t.Fatalf("UpsertBook (create): %v", err)
}
t.Logf("created book %q", slug)
})
t.Run("GetBook", func(t *testing.T) {
rec, found, err := store.GetBook(ctx, slug)
if err != nil {
t.Fatalf("GetBook: %v", err)
}
if !found {
t.Fatal("GetBook: book not found after UpsertBook")
}
t.Logf("GetBook record: %v", rec)
if rec["title"] != "Integration Test Novel" {
t.Errorf("title = %v, want %q", rec["title"], "Integration Test Novel")
}
if rec["author"] != "Test Author" {
t.Errorf("author = %v, want %q", rec["author"], "Test Author")
}
})
t.Run("ListBooks", func(t *testing.T) {
books, err := store.ListBooks(ctx)
if err != nil {
t.Fatalf("ListBooks: %v", err)
}
found := false
for _, b := range books {
if s, _ := b["slug"].(string); s == slug {
found = true
break
}
}
if !found {
t.Errorf("ListBooks did not return book with slug %q (total=%d)", slug, len(books))
}
})
t.Run("UpsertBook_Update", func(t *testing.T) {
err := store.UpsertBook(ctx, slug,
"Integration Test Novel", "Test Author Updated",
"", "Completed", "", "https://example.com/book/test",
nil, 100, 3,
)
if err != nil {
t.Fatalf("UpsertBook (update): %v", err)
}
rec, found, err := store.GetBook(ctx, slug)
if err != nil || !found {
t.Fatalf("GetBook after update: found=%v err=%v", found, err)
}
if rec["author"] != "Test Author Updated" {
t.Errorf("author after update = %v, want %q", rec["author"], "Test Author Updated")
}
if rec["status"] != "Completed" {
t.Errorf("status after update = %v, want %q", rec["status"], "Completed")
}
})
t.Run("BookMetaUpdated", func(t *testing.T) {
ts, err := store.BookMetaUpdated(ctx, slug)
if err != nil {
t.Fatalf("BookMetaUpdated: %v", err)
}
if ts.IsZero() {
t.Error("BookMetaUpdated returned zero time")
}
t.Logf("meta_updated: %s", ts)
})
}
// TestPocketBaseStore_ChapterIdx tests UpsertChapterIdx → ListChapterIdx →
// CountChapterIdx.
func TestPocketBaseStore_ChapterIdx(t *testing.T) {
store := newTestPocketBaseStore(t)
slug := testSlug(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
t.Cleanup(func() {
cleanCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = store.pb.deleteWhere(cleanCtx, "chapters_idx", fmt.Sprintf(`slug="%s"`, slug))
})
chapters := []struct {
n int
title string
date string
}{
{1, "Chapter 1: The Beginning", "2 days ago"},
{2, "Chapter 2: Rising Action", "1 day ago"},
{3, "Chapter 3: Climax", "3 hours ago"},
}
for _, ch := range chapters {
if err := store.UpsertChapterIdx(ctx, slug, ch.n, ch.title, ch.date); err != nil {
t.Fatalf("UpsertChapterIdx(%d): %v", ch.n, err)
}
}
t.Run("ListChapterIdx", func(t *testing.T) {
rows, err := store.ListChapterIdx(ctx, slug)
if err != nil {
t.Fatalf("ListChapterIdx: %v", err)
}
if len(rows) != len(chapters) {
t.Errorf("ListChapterIdx returned %d rows, want %d", len(rows), len(chapters))
}
for i, row := range rows {
t.Logf("row[%d]: number=%v title=%v date_label=%v", i, row["number"], row["title"], row["date_label"])
}
})
t.Run("CountChapterIdx", func(t *testing.T) {
count := store.CountChapterIdx(ctx, slug)
if count != len(chapters) {
t.Errorf("CountChapterIdx = %d, want %d", count, len(chapters))
}
})
t.Run("UpsertChapterIdx_Update", func(t *testing.T) {
// Re-upsert chapter 2 with an updated title.
if err := store.UpsertChapterIdx(ctx, slug, 2, "Chapter 2: Revised Title", "1 day ago"); err != nil {
t.Fatalf("UpsertChapterIdx (update): %v", err)
}
rows, err := store.ListChapterIdx(ctx, slug)
if err != nil {
t.Fatalf("ListChapterIdx after update: %v", err)
}
if store.CountChapterIdx(ctx, slug) != len(chapters) {
t.Errorf("count changed after update: got %d, want %d", len(rows), len(chapters))
}
})
}
// TestPocketBaseStore_Ranking tests SetRanking → GetRanking → RankingModTime.
func TestPocketBaseStore_Ranking(t *testing.T) {
store := newTestPocketBaseStore(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
slug1 := testSlug(t) + "-rank1"
slug2 := testSlug(t) + "-rank2"
t.Cleanup(func() {
cleanCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
for _, sl := range []string{slug1, slug2} {
_ = store.pb.deleteWhere(cleanCtx, "ranking", fmt.Sprintf(`slug="%s"`, sl))
}
})
items := []RankingItem{
{Rank: 1, Slug: slug1, Title: "Test Book One", SourceURL: "https://example.com/1"},
{Rank: 2, Slug: slug2, Title: "Test Book Two", SourceURL: "https://example.com/2"},
}
t.Run("WriteRankingItem", func(t *testing.T) {
for _, item := range items {
if err := store.UpsertRankingItem(ctx, item); err != nil {
t.Fatalf("UpsertRankingItem(%q): %v", item.Slug, err)
}
}
t.Log("UpsertRankingItem succeeded")
})
t.Run("ReadRankingItems", func(t *testing.T) {
got, err := store.ListRankingItems(ctx)
if err != nil {
t.Fatalf("ListRankingItems: %v", err)
}
found := 0
for _, g := range got {
if g.Slug == slug1 || g.Slug == slug2 {
found++
}
}
if found != 2 {
t.Errorf("ListRankingItems: found %d of 2 test items in %d total", found, len(got))
}
t.Logf("ListRankingItems returned %d total items, %d test items", len(got), found)
})
t.Run("RankingFreshEnough", func(t *testing.T) {
updated, err := store.RankingLastUpdated(ctx)
if err != nil {
t.Fatalf("RankingLastUpdated: %v", err)
}
if updated.IsZero() {
t.Error("RankingLastUpdated returned zero time immediately after write")
}
fresh := time.Since(updated) < 24*time.Hour
if !fresh {
t.Errorf("RankingLastUpdated = %s; want within 24h", updated)
}
t.Logf("RankingLastUpdated = %s (fresh=%v)", updated, fresh)
})
}
// TestPocketBaseStore_Progress tests SetProgress → GetProgress → AllProgress →
// DeleteProgress.
func TestPocketBaseStore_Progress(t *testing.T) {
store := newTestPocketBaseStore(t)
slug := testSlug(t)
const sessionID = "integration-test-session-xyz"
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
t.Cleanup(func() {
cleanCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = store.pb.deleteWhere(cleanCtx, "progress",
fmt.Sprintf(`session_id="%s"`, sessionID))
})
t.Run("SetProgress", func(t *testing.T) {
if err := store.SetProgress(ctx, sessionID, slug, 5); err != nil {
t.Fatalf("SetProgress: %v", err)
}
})
t.Run("GetProgress", func(t *testing.T) {
ch, updated, found, err := store.GetProgress(ctx, sessionID, slug)
if err != nil {
t.Fatalf("GetProgress: %v", err)
}
if !found {
t.Fatal("GetProgress: not found after SetProgress")
}
if ch != 5 {
t.Errorf("chapter = %d, want 5", ch)
}
if updated.IsZero() {
t.Error("updated time is zero")
}
t.Logf("chapter=%d updated=%s", ch, updated)
})
t.Run("AllProgress", func(t *testing.T) {
rows, err := store.AllProgress(ctx, sessionID)
if err != nil {
t.Fatalf("AllProgress: %v", err)
}
found := false
for _, r := range rows {
if s, _ := r["slug"].(string); s == slug {
found = true
}
}
if !found {
t.Errorf("AllProgress did not include slug %q (total=%d)", slug, len(rows))
}
})
t.Run("SetProgress_Update", func(t *testing.T) {
if err := store.SetProgress(ctx, sessionID, slug, 12); err != nil {
t.Fatalf("SetProgress (update): %v", err)
}
ch, _, found, err := store.GetProgress(ctx, sessionID, slug)
if err != nil || !found {
t.Fatalf("GetProgress after update: found=%v err=%v", found, err)
}
if ch != 12 {
t.Errorf("chapter after update = %d, want 12", ch)
}
})
t.Run("DeleteProgress", func(t *testing.T) {
if err := store.DeleteProgress(ctx, sessionID, slug); err != nil {
t.Fatalf("DeleteProgress: %v", err)
}
_, _, found, err := store.GetProgress(ctx, sessionID, slug)
if err != nil {
t.Fatalf("GetProgress after delete: %v", err)
}
if found {
t.Error("GetProgress returned found=true after DeleteProgress")
}
t.Log("DeleteProgress confirmed")
})
}
// TestPocketBaseStore_AudioCache tests SetAudioCache → GetAudioCache.
func TestPocketBaseStore_AudioCache(t *testing.T) {
store := newTestPocketBaseStore(t)
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
cacheKey := fmt.Sprintf("integration-audio-cache-test-%d", time.Now().UnixMilli())
const filename = "speech_abc123.mp3"
t.Cleanup(func() {
cleanCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
_ = store.pb.deleteWhere(cleanCtx, "audio_cache",
fmt.Sprintf(`cache_key="%s"`, cacheKey))
})
t.Run("SetAudioCache", func(t *testing.T) {
if err := store.SetAudioCache(ctx, cacheKey, filename); err != nil {
t.Fatalf("SetAudioCache: %v", err)
}
})
t.Run("GetAudioCache", func(t *testing.T) {
got, found, err := store.GetAudioCache(ctx, cacheKey)
if err != nil {
t.Fatalf("GetAudioCache: %v", err)
}
if !found {
t.Fatal("GetAudioCache: not found after SetAudioCache")
}
if got != filename {
t.Errorf("filename = %q, want %q", got, filename)
}
t.Logf("filename: %s", got)
})
t.Run("GetAudioCache_Miss", func(t *testing.T) {
got, found, err := store.GetAudioCache(ctx, "does-not-exist-ever")
if err != nil {
t.Fatalf("GetAudioCache (miss): %v", err)
}
if found {
t.Errorf("GetAudioCache returned found=true for missing key, filename=%q", got)
}
})
}