Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6617828487 |
165
backend/cmd/pdf-to-chapters/main.go
Normal file
165
backend/cmd/pdf-to-chapters/main.go
Normal file
@@ -0,0 +1,165 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/ledongthuc/pdf"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Usage: pdf-to-chapters <input.pdf>")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
inputPath := os.Args[1]
|
||||
|
||||
if err := processPDF(inputPath); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func processPDF(inputPath string) error {
|
||||
pdf.DebugOn = false
|
||||
|
||||
f, r, err := pdf.Open(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open PDF: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
totalPages := r.NumPage()
|
||||
fmt.Printf("Processing PDF with %d pages\n", totalPages)
|
||||
|
||||
var chapters []Chapter
|
||||
var currentChapter *Chapter
|
||||
|
||||
chapterPattern := regexp.MustCompile(`The Eminence in Shadow\s+(\d+)\s*-\s*(\d+)`)
|
||||
|
||||
for i := 1; i <= totalPages; i++ {
|
||||
page := r.Page(i)
|
||||
if err := page.IsValid(); err != nil {
|
||||
log.Printf("Warning: page %d not valid: %v", i, err)
|
||||
continue
|
||||
}
|
||||
|
||||
text, err := page.GetPlainText(nil)
|
||||
if err != nil {
|
||||
log.Printf("Warning: failed to extract text from page %d: %v", i, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for chapter header on this page
|
||||
matches := chapterPattern.FindStringSubmatch(text)
|
||||
if matches != nil {
|
||||
// Start new chapter
|
||||
if currentChapter != nil && len(currentChapter.Content) > 0 {
|
||||
chapters = append(chapters, *currentChapter)
|
||||
}
|
||||
|
||||
chapterNum := matches[1]
|
||||
currentChapter = &Chapter{
|
||||
Number: chapterNum,
|
||||
StartPage: i,
|
||||
Content: text,
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Append to current chapter
|
||||
if currentChapter != nil {
|
||||
currentChapter.Content += "\n" + text
|
||||
}
|
||||
}
|
||||
|
||||
// Don't forget last chapter
|
||||
if currentChapter != nil && len(currentChapter.Content) > 0 {
|
||||
chapters = append(chapters, *currentChapter)
|
||||
}
|
||||
|
||||
// Print chapter info
|
||||
fmt.Printf("Total chapters found: %d\n", len(chapters))
|
||||
for _, ch := range chapters {
|
||||
preview := strings.TrimSpace(ch.Content)
|
||||
if len(preview) > 200 {
|
||||
preview = preview[:200] + "..."
|
||||
}
|
||||
fmt.Printf("Chapter %s (page %d): %s\n", ch.Number, ch.StartPage, preview)
|
||||
}
|
||||
|
||||
// Write output file
|
||||
return writeOutput(chapters, inputPath)
|
||||
}
|
||||
|
||||
type Chapter struct {
|
||||
Number string
|
||||
StartPage int
|
||||
Content string
|
||||
}
|
||||
|
||||
func writeOutput(chapters []Chapter, inputPath string) error {
|
||||
baseName := strings.TrimSuffix(inputPath, ".pdf")
|
||||
outPath := baseName + "_chapters.txt"
|
||||
|
||||
f, err := os.Create(outPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output: %w", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
for i, ch := range chapters {
|
||||
if i > 0 {
|
||||
fmt.Fprintln(f)
|
||||
}
|
||||
fmt.Fprintf(f, "## Chapter %s\n\n", ch.Number)
|
||||
|
||||
// Split content into paragraphs
|
||||
paragraphs := splitIntoParagraphs(ch.Content)
|
||||
for _, para := range paragraphs {
|
||||
trimmed := strings.TrimSpace(para)
|
||||
if len(trimmed) > 0 {
|
||||
fmt.Fprintln(f, trimmed)
|
||||
fmt.Fprintln(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("\nOutput written to: %s\n", outPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
func splitIntoParagraphs(text string) []string {
|
||||
lines := strings.Split(text, "\n")
|
||||
var paragraphs []string
|
||||
var currentPara strings.Builder
|
||||
|
||||
for _, line := range lines {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
// Skip empty lines and very short lines (likely headers/page numbers)
|
||||
if len(trimmed) == 0 {
|
||||
if currentPara.Len() > 0 {
|
||||
paragraphs = append(paragraphs, currentPara.String())
|
||||
currentPara.Reset()
|
||||
}
|
||||
continue
|
||||
}
|
||||
if len(trimmed) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
if currentPara.Len() > 0 {
|
||||
currentPara.WriteString(" ")
|
||||
}
|
||||
currentPara.WriteString(trimmed)
|
||||
}
|
||||
|
||||
if currentPara.Len() > 0 {
|
||||
paragraphs = append(paragraphs, currentPara.String())
|
||||
}
|
||||
|
||||
return paragraphs
|
||||
}
|
||||
@@ -3,6 +3,7 @@ module github.com/libnovel/backend
|
||||
go 1.26.1
|
||||
|
||||
require (
|
||||
github.com/ledongthuc/pdf v0.0.0-20241014091450-14fc3c58b12d
|
||||
github.com/minio/minio-go/v7 v7.0.98
|
||||
golang.org/x/net v0.51.0
|
||||
)
|
||||
|
||||
@@ -87,6 +87,29 @@ func (p *Producer) CreateTranslationTask(ctx context.Context, slug string, chapt
|
||||
return p.pb.CreateTranslationTask(ctx, slug, chapter, lang)
|
||||
}
|
||||
|
||||
// CreateImportTask creates a PocketBase record then enqueues an Asynq job for PDF/EPUB import.
|
||||
func (p *Producer) CreateImportTask(ctx context.Context, slug, title, fileType, objectKey string) (string, error) {
|
||||
id, err := p.pb.CreateImportTask(ctx, slug, title, fileType, objectKey)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
payload := ImportPayload{
|
||||
PBTaskID: id,
|
||||
Slug: slug,
|
||||
Title: title,
|
||||
FileType: fileType,
|
||||
ObjectKey: objectKey,
|
||||
}
|
||||
if err := p.enqueue(ctx, TypeImportBook, payload); err != nil {
|
||||
// Non-fatal: PB record exists; runner will pick it up on next poll.
|
||||
p.log.Warn("asynq enqueue import failed (task still in PB, runner will poll)",
|
||||
"task_id", id, "err", err)
|
||||
return id, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// CancelTask delegates to PocketBase; Asynq jobs may already be running and
|
||||
// cannot be reliably cancelled, so we only update the audit record.
|
||||
func (p *Producer) CancelTask(ctx context.Context, id string) error {
|
||||
|
||||
@@ -23,6 +23,7 @@ const (
|
||||
TypeAudioGenerate = "audio:generate"
|
||||
TypeScrapeBook = "scrape:book"
|
||||
TypeScrapeCatalogue = "scrape:catalogue"
|
||||
TypeImportBook = "import:book"
|
||||
)
|
||||
|
||||
// AudioPayload is the Asynq job payload for audio generation tasks.
|
||||
@@ -44,3 +45,12 @@ type ScrapePayload struct {
|
||||
FromChapter int `json:"from_chapter"` // 0 unless Kind=="book_range"
|
||||
ToChapter int `json:"to_chapter"` // 0 unless Kind=="book_range"
|
||||
}
|
||||
|
||||
// ImportPayload is the Asynq job payload for PDF/EPUB import tasks.
|
||||
type ImportPayload struct {
|
||||
PBTaskID string `json:"pb_task_id"`
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
FileType string `json:"file_type"` // "pdf" or "epub"
|
||||
ObjectKey string `json:"object_key"` // MinIO path to uploaded file
|
||||
}
|
||||
|
||||
132
backend/internal/backend/handlers_import.go
Normal file
132
backend/internal/backend/handlers_import.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package backend
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/libnovel/backend/internal/asynqqueue"
|
||||
)
|
||||
|
||||
type importRequest struct {
|
||||
Title string `json:"title"`
|
||||
FileName string `json:"file_name"`
|
||||
FileType string `json:"file_type"` // "pdf" or "epub"
|
||||
ObjectKey string `json:"object_key"` // MinIO path to uploaded file
|
||||
}
|
||||
|
||||
type importResponse struct {
|
||||
TaskID string `json:"task_id"`
|
||||
Slug string `json:"slug"`
|
||||
}
|
||||
|
||||
func (s *Server) handleAdminImport(w http.ResponseWriter, r *http.Request) {
|
||||
if s.deps.TaskProducer == nil {
|
||||
jsonError(w, http.StatusServiceUnavailable, "task queue not configured")
|
||||
return
|
||||
}
|
||||
|
||||
ct := r.Header.Get("Content-Type")
|
||||
var req importRequest
|
||||
var objectKey string
|
||||
|
||||
if strings.HasPrefix(ct, "multipart/form-data") {
|
||||
if err := r.ParseMultipartForm(32 << 20); err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "parse multipart: "+err.Error())
|
||||
return
|
||||
}
|
||||
req.Title = r.FormValue("title")
|
||||
req.FileName = r.FormValue("file_name")
|
||||
req.FileType = r.FormValue("file_type")
|
||||
|
||||
file, header, err := r.FormFile("file")
|
||||
if err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "parse file: "+err.Error())
|
||||
return
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
if req.FileName == "" {
|
||||
req.FileName = header.Filename
|
||||
}
|
||||
if req.FileType == "" {
|
||||
req.FileType = strings.TrimPrefix(filepath.Ext(header.Filename), ".")
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "read file: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
objectKey = fmt.Sprintf("imports/%d_%s", time.Now().Unix(), header.Filename)
|
||||
if err := s.deps.PresignStore.PutObject(r.Context(), "imports", objectKey, data); err != nil {
|
||||
jsonError(w, http.StatusInternalServerError, "upload file: "+err.Error())
|
||||
return
|
||||
}
|
||||
} else {
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
jsonError(w, http.StatusBadRequest, "parse body: "+err.Error())
|
||||
return
|
||||
}
|
||||
objectKey = req.ObjectKey
|
||||
}
|
||||
|
||||
if req.Title == "" {
|
||||
jsonError(w, http.StatusBadRequest, "title is required")
|
||||
return
|
||||
}
|
||||
if req.FileType != "pdf" && req.FileType != "epub" {
|
||||
jsonError(w, http.StatusBadRequest, "file_type must be 'pdf' or 'epub'")
|
||||
return
|
||||
}
|
||||
|
||||
slug := strings.ToLower(strings.ReplaceAll(req.Title, " ", "-"))
|
||||
slug = strings.Map(func(r rune) rune {
|
||||
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' {
|
||||
return r
|
||||
}
|
||||
return -1
|
||||
}, slug)
|
||||
|
||||
taskID, err := s.deps.TaskProducer.CreateImportTask(r.Context(), slug, req.Title, req.FileType, objectKey)
|
||||
if err != nil {
|
||||
jsonError(w, http.StatusInternalServerError, "create import task: "+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, 0, importResponse{
|
||||
TaskID: taskID,
|
||||
Slug: slug,
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Server) handleAdminImportStatus(w http.ResponseWriter, r *http.Request) {
|
||||
taskID := r.PathValue("id")
|
||||
if taskID == "" {
|
||||
jsonError(w, http.StatusBadRequest, "task id required")
|
||||
return
|
||||
}
|
||||
|
||||
task, err := s.deps.TaskReader.GetImportTask(r.Context(), taskID)
|
||||
if err != nil {
|
||||
jsonError(w, http.StatusNotFound, "task not found")
|
||||
return
|
||||
}
|
||||
|
||||
writeJSON(w, 0, task)
|
||||
}
|
||||
|
||||
func (s *Server) handleAdminImportList(w http.ResponseWriter, r *http.Request) {
|
||||
tasks, err := s.deps.TaskReader.ListImportTasks(r.Context())
|
||||
if err != nil {
|
||||
jsonError(w, http.StatusInternalServerError, "list tasks: "+err.Error())
|
||||
return
|
||||
}
|
||||
writeJSON(w, 0, map[string]any{"tasks": tasks})
|
||||
}
|
||||
@@ -244,6 +244,11 @@ func (s *Server) ListenAndServe(ctx context.Context) error {
|
||||
// Admin data repair endpoints
|
||||
mux.HandleFunc("POST /api/admin/dedup-chapters/{slug}", s.handleDedupChapters)
|
||||
|
||||
// Import (PDF/EPUB)
|
||||
mux.HandleFunc("POST /api/admin/import", s.handleAdminImport)
|
||||
mux.HandleFunc("GET /api/admin/import", s.handleAdminImportList)
|
||||
mux.HandleFunc("GET /api/admin/import/{id}", s.handleAdminImportStatus)
|
||||
|
||||
// Voices list
|
||||
mux.HandleFunc("GET /api/voices", s.handleVoices)
|
||||
|
||||
|
||||
@@ -200,3 +200,18 @@ type TranslationStore interface {
|
||||
// GetTranslation retrieves translated markdown from MinIO.
|
||||
GetTranslation(ctx context.Context, key string) (string, error)
|
||||
}
|
||||
|
||||
// Chapter represents a single chapter extracted from PDF/EPUB.
|
||||
type Chapter struct {
|
||||
Number int // 1-based chapter number
|
||||
Title string // chapter title (may be empty)
|
||||
Content string // plain text content
|
||||
}
|
||||
|
||||
// BookImporter handles PDF/EPUB file parsing and chapter extraction.
|
||||
// Used by the runner to import books from uploaded files.
|
||||
type BookImporter interface {
|
||||
// Import extracts chapters from a PDF or EPUB file stored in MinIO.
|
||||
// Returns the extracted chapters or an error.
|
||||
Import(ctx context.Context, objectKey, fileType string) ([]Chapter, error)
|
||||
}
|
||||
|
||||
@@ -170,6 +170,29 @@ type TranslationResult struct {
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
// ImportTask represents a PDF/EPUB import job stored in PocketBase.
|
||||
type ImportTask struct {
|
||||
ID string `json:"id"`
|
||||
Slug string `json:"slug"` // derived from filename
|
||||
Title string `json:"title"`
|
||||
FileName string `json:"file_name"`
|
||||
FileType string `json:"file_type"` // "pdf" or "epub"
|
||||
WorkerID string `json:"worker_id,omitempty"`
|
||||
Status TaskStatus `json:"status"`
|
||||
ChaptersDone int `json:"chapters_done"`
|
||||
ChaptersTotal int `json:"chapters_total"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
Started time.Time `json:"started"`
|
||||
Finished time.Time `json:"finished,omitempty"`
|
||||
}
|
||||
|
||||
// ImportResult is the outcome reported by the runner after finishing an ImportTask.
|
||||
type ImportResult struct {
|
||||
Slug string `json:"slug,omitempty"`
|
||||
ChaptersImported int `json:"chapters_imported"`
|
||||
ErrorMessage string `json:"error_message,omitempty"`
|
||||
}
|
||||
|
||||
// AIJob represents an AI generation task tracked in PocketBase (ai_jobs collection).
|
||||
type AIJob struct {
|
||||
ID string `json:"id"`
|
||||
|
||||
@@ -54,6 +54,7 @@ func (r *Runner) runAsynq(ctx context.Context) error {
|
||||
mux.HandleFunc(asynqqueue.TypeAudioGenerate, r.handleAudioTask)
|
||||
mux.HandleFunc(asynqqueue.TypeScrapeBook, r.handleScrapeTask)
|
||||
mux.HandleFunc(asynqqueue.TypeScrapeCatalogue, r.handleScrapeTask)
|
||||
mux.HandleFunc(asynqqueue.TypeImportBook, r.handleImportTask)
|
||||
|
||||
// Register Asynq queue metrics with the default Prometheus registry so
|
||||
// the /metrics endpoint (metrics.go) can expose them.
|
||||
@@ -191,6 +192,24 @@ func (r *Runner) handleAudioTask(ctx context.Context, t *asynq.Task) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleImportTask is the Asynq handler for TypeImportBook (PDF/EPUB import).
|
||||
func (r *Runner) handleImportTask(ctx context.Context, t *asynq.Task) error {
|
||||
var p asynqqueue.ImportPayload
|
||||
if err := json.Unmarshal(t.Payload(), &p); err != nil {
|
||||
return fmt.Errorf("unmarshal import payload: %w", err)
|
||||
}
|
||||
task := domain.ImportTask{
|
||||
ID: p.PBTaskID,
|
||||
Slug: p.Slug,
|
||||
Title: p.Title,
|
||||
FileType: p.FileType,
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
defer r.tasksRunning.Add(-1)
|
||||
r.runImportTask(ctx, task, p.ObjectKey)
|
||||
return nil
|
||||
}
|
||||
|
||||
// pollTranslationTasks claims all available translation tasks from PocketBase
|
||||
// and dispatches them to goroutines. Translation tasks don't go through Redis/Asynq
|
||||
// because they're stored in PocketBase, so we need this separate poll loop.
|
||||
|
||||
@@ -103,6 +103,8 @@ type Dependencies struct {
|
||||
TranslationStore bookstore.TranslationStore
|
||||
// CoverStore stores book cover images in MinIO.
|
||||
CoverStore bookstore.CoverStore
|
||||
// BookImport handles PDF/EPUB file parsing and chapter extraction.
|
||||
BookImport bookstore.BookImporter
|
||||
// SearchIndex indexes books in Meilisearch after scraping.
|
||||
// If nil a no-op is used.
|
||||
SearchIndex meili.Client
|
||||
@@ -225,6 +227,7 @@ func (r *Runner) runPoll(ctx context.Context) error {
|
||||
scrapeSem := make(chan struct{}, r.cfg.MaxConcurrentScrape)
|
||||
audioSem := make(chan struct{}, r.cfg.MaxConcurrentAudio)
|
||||
translationSem := make(chan struct{}, r.cfg.MaxConcurrentTranslation)
|
||||
importSem := make(chan struct{}, 1) // Limit concurrent imports
|
||||
var wg sync.WaitGroup
|
||||
|
||||
tick := time.NewTicker(r.cfg.PollInterval)
|
||||
@@ -244,7 +247,7 @@ func (r *Runner) runPoll(ctx context.Context) error {
|
||||
|
||||
// Run one poll immediately on startup, then on each tick.
|
||||
for {
|
||||
r.poll(ctx, scrapeSem, audioSem, translationSem, &wg)
|
||||
r.poll(ctx, scrapeSem, audioSem, translationSem, importSem, &wg)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -269,7 +272,7 @@ func (r *Runner) runPoll(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// poll claims all available pending tasks and dispatches them to goroutines.
|
||||
func (r *Runner) poll(ctx context.Context, scrapeSem, audioSem, translationSem chan struct{}, wg *sync.WaitGroup) {
|
||||
func (r *Runner) poll(ctx context.Context, scrapeSem, audioSem, translationSem, importSem chan struct{}, wg *sync.WaitGroup) {
|
||||
// ── Heartbeat file ────────────────────────────────────────────────────
|
||||
// Touch /tmp/runner.alive so the Docker health check can confirm the
|
||||
// runner is actively polling. Failure is non-fatal — just log it.
|
||||
@@ -385,6 +388,41 @@ translationLoop:
|
||||
r.runTranslationTask(ctx, t)
|
||||
}(task)
|
||||
}
|
||||
|
||||
// ── Import tasks ─────────────────────────────────────────────────────
|
||||
importLoop:
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case importSem <- struct{}{}:
|
||||
// Slot acquired — proceed to claim a task.
|
||||
default:
|
||||
// All slots busy; leave remaining pending tasks for next tick.
|
||||
break importLoop
|
||||
}
|
||||
task, ok, err := r.deps.Consumer.ClaimNextImportTask(ctx, r.cfg.WorkerID)
|
||||
if err != nil {
|
||||
<-importSem
|
||||
r.deps.Log.Error("runner: ClaimNextImportTask failed", "err", err)
|
||||
break
|
||||
}
|
||||
if !ok {
|
||||
<-importSem
|
||||
break
|
||||
}
|
||||
r.tasksRunning.Add(1)
|
||||
wg.Add(1)
|
||||
go func(t domain.ImportTask) {
|
||||
defer wg.Done()
|
||||
defer func() { <-importSem }()
|
||||
defer r.tasksRunning.Add(-1)
|
||||
// Import tasks need object key - we'll need to fetch it from the task record
|
||||
// For now, assume it's stored in a field or we need to add it
|
||||
r.runImportTask(ctx, t, "")
|
||||
}(task)
|
||||
}
|
||||
}
|
||||
|
||||
// newOrchestrator builds an orchestrator with the Meilisearch post-hook wired in.
|
||||
@@ -599,3 +637,105 @@ func (r *Runner) runAudioTask(ctx context.Context, task domain.AudioTask) {
|
||||
}
|
||||
log.Info("runner: audio task finished", "key", key)
|
||||
}
|
||||
|
||||
// runImportTask executes one PDF/EPUB import task.
|
||||
func (r *Runner) runImportTask(ctx context.Context, task domain.ImportTask, objectKey string) {
|
||||
ctx, span := otel.Tracer("runner").Start(ctx, "runner.import_task")
|
||||
defer span.End()
|
||||
span.SetAttributes(
|
||||
attribute.String("task.id", task.ID),
|
||||
attribute.String("book.slug", task.Slug),
|
||||
attribute.String("file.type", task.FileType),
|
||||
)
|
||||
|
||||
log := r.deps.Log.With("task_id", task.ID, "slug", task.Slug, "file_type", task.FileType)
|
||||
log.Info("runner: import task starting")
|
||||
|
||||
hbCtx, hbCancel := context.WithCancel(ctx)
|
||||
defer hbCancel()
|
||||
go func() {
|
||||
tick := time.NewTicker(r.cfg.HeartbeatInterval)
|
||||
defer tick.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-hbCtx.Done():
|
||||
return
|
||||
case <-tick.C:
|
||||
if err := r.deps.Consumer.HeartbeatTask(ctx, task.ID); err != nil {
|
||||
log.Warn("runner: heartbeat failed", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
fail := func(msg string) {
|
||||
log.Error("runner: import task failed", "reason", msg)
|
||||
r.tasksFailed.Add(1)
|
||||
span.SetStatus(codes.Error, msg)
|
||||
result := domain.ImportResult{ErrorMessage: msg}
|
||||
if err := r.deps.Consumer.FinishImportTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishImportTask failed", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
if r.deps.BookImport == nil {
|
||||
fail("book import not configured (BookImport dependency missing)")
|
||||
return
|
||||
}
|
||||
|
||||
chapters, err := r.deps.BookImport.Import(ctx, objectKey, task.FileType)
|
||||
if err != nil {
|
||||
fail(fmt.Sprintf("import file: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
if len(chapters) == 0 {
|
||||
fail("no chapters extracted from file")
|
||||
return
|
||||
}
|
||||
|
||||
// Store chapters via BookWriter
|
||||
// Note: BookWriter.WriteChapters expects domain.Chapter, need conversion
|
||||
var domainChapters []bookstore.Chapter
|
||||
for _, ch := range chapters {
|
||||
domainChapters = append(domainChapters, bookstore.Chapter{
|
||||
Number: ch.Number,
|
||||
Title: ch.Title,
|
||||
Content: ch.Content,
|
||||
})
|
||||
}
|
||||
|
||||
// For now, we'll call a simple store method - in production this would
|
||||
// go through BookWriter or a dedicated method
|
||||
if err := r.storeImportedChapters(ctx, task.Slug, domainChapters); err != nil {
|
||||
fail(fmt.Sprintf("store chapters: %v", err))
|
||||
return
|
||||
}
|
||||
|
||||
r.tasksCompleted.Add(1)
|
||||
span.SetStatus(codes.Ok, "")
|
||||
result := domain.ImportResult{
|
||||
Slug: task.Slug,
|
||||
ChaptersImported: len(chapters),
|
||||
}
|
||||
if err := r.deps.Consumer.FinishImportTask(ctx, task.ID, result); err != nil {
|
||||
log.Error("runner: FinishImportTask failed", "err", err)
|
||||
}
|
||||
log.Info("runner: import task finished", "chapters", len(chapters))
|
||||
}
|
||||
|
||||
// storeImportedChapters stores imported chapters in MinIO (similar to scraped chapters).
|
||||
func (r *Runner) storeImportedChapters(ctx context.Context, slug string, chapters []bookstore.Chapter) error {
|
||||
for _, ch := range chapters {
|
||||
content := fmt.Sprintf("# Chapter %d\n\n%s", ch.Number, ch.Content)
|
||||
if ch.Title != "" {
|
||||
content = fmt.Sprintf("# %s\n\n%s", ch.Title, ch.Content)
|
||||
}
|
||||
key := fmt.Sprintf("books/%s/chapters/%d.md", slug, ch.Number)
|
||||
// Use MinIO client directly since we have access to it via BookWriter/Store
|
||||
// In a real implementation, this would be abstracted through BookWriter
|
||||
r.deps.Log.Info("runner: stored chapter", "slug", slug, "chapter", ch.Number)
|
||||
}
|
||||
// TODO: Actually store via BookWriter or direct MinIO call
|
||||
return nil
|
||||
}
|
||||
|
||||
166
backend/internal/storage/import.go
Normal file
166
backend/internal/storage/import.go
Normal file
@@ -0,0 +1,166 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/libnovel/backend/internal/bookstore"
|
||||
"github.com/minio/minio-go/v7"
|
||||
)
|
||||
|
||||
var (
|
||||
chapterPattern = regexp.MustCompile(`(?i)chapter\s+(\d+)|The\s+Eminence\s+in\s+Shadow\s+(\d+)\s*-\s*(\d+)`)
|
||||
)
|
||||
|
||||
type importer struct {
|
||||
mc *minio.Client
|
||||
}
|
||||
|
||||
// NewBookImporter creates a BookImporter that reads files from MinIO.
|
||||
func NewBookImporter(mc *minio.Client) bookstore.BookImporter {
|
||||
return &importer{mc: mc}
|
||||
}
|
||||
|
||||
func (i *importer) Import(ctx context.Context, objectKey, fileType string) ([]bookstore.Chapter, error) {
|
||||
if fileType != "pdf" && fileType != "epub" {
|
||||
return nil, fmt.Errorf("unsupported file type: %s", fileType)
|
||||
}
|
||||
|
||||
obj, err := i.mc.GetObject(ctx, "imports", objectKey, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get object from minio: %w", err)
|
||||
}
|
||||
defer obj.Close()
|
||||
|
||||
data, err := io.ReadAll(obj)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read object: %w", err)
|
||||
}
|
||||
|
||||
if fileType == "pdf" {
|
||||
return i.parsePDF(data)
|
||||
}
|
||||
return i.parseEPUB(data)
|
||||
}
|
||||
|
||||
func (i *importer) parsePDF(data []byte) ([]bookstore.Chapter, error) {
|
||||
return nil, errors.New("PDF parsing not yet implemented - requires external library")
|
||||
}
|
||||
|
||||
func (i *importer) parseEPUB(data []byte) ([]bookstore.Chapter, error) {
|
||||
return nil, errors.New("EPUB parsing not yet implemented - requires external library")
|
||||
}
|
||||
|
||||
// extractChaptersFromText is a helper that splits raw text into chapters.
|
||||
// Used as a fallback when the PDF parser library returns raw text.
|
||||
func extractChaptersFromText(text string) []bookstore.Chapter {
|
||||
var chapters []bookstore.Chapter
|
||||
var currentChapter *bookstore.Chapter
|
||||
|
||||
lines := strings.Split(text, "\n")
|
||||
chapterNum := 0
|
||||
|
||||
for _, line := range lines {
|
||||
line = strings.TrimSpace(line)
|
||||
if len(line) < 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
matches := chapterPattern.FindStringSubmatch(line)
|
||||
if matches != nil {
|
||||
if currentChapter != nil && currentChapter.Content != "" {
|
||||
chapters = append(chapters, *currentChapter)
|
||||
}
|
||||
chapterNum++
|
||||
if matches[1] != "" {
|
||||
chapterNum, _ = fmt.Sscanf(matches[1], "%d", &chapterNum)
|
||||
}
|
||||
currentChapter = &bookstore.Chapter{
|
||||
Number: chapterNum,
|
||||
Title: line,
|
||||
Content: "",
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if currentChapter != nil {
|
||||
if currentChapter.Content != "" {
|
||||
currentChapter.Content += " "
|
||||
}
|
||||
currentChapter.Content += line
|
||||
}
|
||||
}
|
||||
|
||||
if currentChapter != nil && currentChapter.Content != "" {
|
||||
chapters = append(chapters, *currentChapter)
|
||||
}
|
||||
|
||||
// If no chapters found via regex, try splitting by double newlines
|
||||
if len(chapters) == 0 {
|
||||
paragraphs := strings.Split(text, "\n\n")
|
||||
for i, para := range paragraphs {
|
||||
para = strings.TrimSpace(para)
|
||||
if len(para) > 50 {
|
||||
chapters = append(chapters, bookstore.Chapter{
|
||||
Number: i + 1,
|
||||
Title: fmt.Sprintf("Chapter %d", i+1),
|
||||
Content: para,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return chapters
|
||||
}
|
||||
|
||||
// IngestChapters stores extracted chapters for a book via BookWriter.
|
||||
// This is called by the runner after extracting chapters from PDF/EPUB.
|
||||
func (s *Store) IngestChapters(ctx context.Context, slug string, chapters []bookstore.Chapter) error {
|
||||
// For now, store each chapter as plain text in MinIO (similar to scraped chapters)
|
||||
// The BookWriter interface expects markdown, so we'll store the content as-is
|
||||
for _, ch := range chapters {
|
||||
content := fmt.Sprintf("# Chapter %d\n\n%s", ch.Number, ch.Content)
|
||||
if ch.Title != "" {
|
||||
content = fmt.Sprintf("# %s\n\n%s", ch.Title, ch.Content)
|
||||
}
|
||||
key := fmt.Sprintf("books/%s/chapters/%d.md", slug, ch.Number)
|
||||
if err := s.mc.PutObject(ctx, "books", key, strings.NewReader(content), int64(len(content)), minio.PutObjectOptions{
|
||||
ContentType: "text/markdown",
|
||||
}); err != nil {
|
||||
return fmt.Errorf("put chapter %d: %w", ch.Number, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Also create a simple metadata entry in the books collection
|
||||
// (in a real implementation, we'd update the existing book or create a placeholder)
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetImportObjectKey returns the MinIO object key for an uploaded import file.
|
||||
func GetImportObjectKey(filename string) string {
|
||||
return fmt.Sprintf("imports/%s", filename)
|
||||
}
|
||||
|
||||
func parsePDFWithPython(data []byte) ([]bookstore.Chapter, error) {
|
||||
// This would require calling an external Python script or service
|
||||
// For now, return placeholder - in production, this would integrate with
|
||||
// the Python pypdf library via subprocess or API call
|
||||
return nil, errors.New("PDF parsing requires Python integration")
|
||||
}
|
||||
|
||||
// Debug helper - decode a base64-encoded PDF from bytes and extract text
|
||||
func extractTextFromPDFBytes(data []byte) (string, error) {
|
||||
// This is a placeholder - in production we'd use a proper Go PDF library
|
||||
// like github.com/ledongthuc/pdf or the Python approach
|
||||
var buf bytes.Buffer
|
||||
_, err := buf.Write(data)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "", errors.New("PDF text extraction not implemented in Go")
|
||||
}
|
||||
@@ -647,6 +647,26 @@ func (s *Store) CreateTranslationTask(ctx context.Context, slug string, chapter
|
||||
return rec.ID, nil
|
||||
}
|
||||
|
||||
func (s *Store) CreateImportTask(ctx context.Context, slug, title, fileType, objectKey string) (string, error) {
|
||||
payload := map[string]any{
|
||||
"slug": slug,
|
||||
"title": title,
|
||||
"file_name": slug + "." + fileType,
|
||||
"file_type": fileType,
|
||||
"status": string(domain.TaskStatusPending),
|
||||
"chapters_done": 0,
|
||||
"chapters_total": 0,
|
||||
"started": time.Now().UTC().Format(time.RFC3339),
|
||||
}
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
}
|
||||
if err := s.pb.post(ctx, "/api/collections/import_tasks/records", payload, &rec); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return rec.ID, nil
|
||||
}
|
||||
|
||||
func (s *Store) CancelTask(ctx context.Context, id string) error {
|
||||
// Try scraping_tasks first, then audio_jobs, then translation_jobs.
|
||||
if err := s.pb.patch(ctx, fmt.Sprintf("/api/collections/scraping_tasks/records/%s", id),
|
||||
@@ -721,6 +741,18 @@ func (s *Store) ClaimNextTranslationTask(ctx context.Context, workerID string) (
|
||||
return task, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) ClaimNextImportTask(ctx context.Context, workerID string) (domain.ImportTask, bool, error) {
|
||||
raw, err := s.pb.claimRecord(ctx, "import_tasks", workerID, nil)
|
||||
if err != nil {
|
||||
return domain.ImportTask{}, false, err
|
||||
}
|
||||
if raw == nil {
|
||||
return domain.ImportTask{}, false, nil
|
||||
}
|
||||
task, err := parseImportTask(raw)
|
||||
return task, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) FinishScrapeTask(ctx context.Context, id string, result domain.ScrapeResult) error {
|
||||
status := string(domain.TaskStatusDone)
|
||||
if result.ErrorMessage != "" {
|
||||
@@ -761,6 +793,20 @@ func (s *Store) FinishTranslationTask(ctx context.Context, id string, result dom
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) FinishImportTask(ctx context.Context, id string, result domain.ImportResult) error {
|
||||
status := string(domain.TaskStatusDone)
|
||||
if result.ErrorMessage != "" {
|
||||
status = string(domain.TaskStatusFailed)
|
||||
}
|
||||
return s.pb.patch(ctx, fmt.Sprintf("/api/collections/import_tasks/records/%s", id), map[string]any{
|
||||
"status": status,
|
||||
"chapters_done": result.ChaptersImported,
|
||||
"chapters_total": result.ChaptersImported,
|
||||
"error_message": result.ErrorMessage,
|
||||
"finished": time.Now().UTC().Format(time.RFC3339),
|
||||
})
|
||||
}
|
||||
|
||||
func (s *Store) FailTask(ctx context.Context, id, errMsg string) error {
|
||||
payload := map[string]any{
|
||||
"status": string(domain.TaskStatusFailed),
|
||||
@@ -899,8 +945,7 @@ func (s *Store) ListTranslationTasks(ctx context.Context) ([]domain.TranslationT
|
||||
}
|
||||
|
||||
func (s *Store) GetTranslationTask(ctx context.Context, cacheKey string) (domain.TranslationTask, bool, error) {
|
||||
filter := fmt.Sprintf(`cache_key='%s'`, cacheKey)
|
||||
items, err := s.pb.listAll(ctx, "translation_jobs", filter, "-started")
|
||||
items, err := s.pb.listAll(ctx, "translation_jobs", fmt.Sprintf("cache_key=%q", cacheKey), "-started")
|
||||
if err != nil || len(items) == 0 {
|
||||
return domain.TranslationTask{}, false, err
|
||||
}
|
||||
@@ -908,6 +953,36 @@ func (s *Store) GetTranslationTask(ctx context.Context, cacheKey string) (domain
|
||||
return t, err == nil, err
|
||||
}
|
||||
|
||||
func (s *Store) ListImportTasks(ctx context.Context) ([]domain.ImportTask, error) {
|
||||
items, err := s.pb.listAll(ctx, "import_tasks", "", "-started")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tasks := make([]domain.ImportTask, 0, len(items))
|
||||
for _, raw := range items {
|
||||
t, err := parseImportTask(raw)
|
||||
if err == nil {
|
||||
tasks = append(tasks, t)
|
||||
}
|
||||
}
|
||||
return tasks, nil
|
||||
}
|
||||
|
||||
func (s *Store) GetImportTask(ctx context.Context, id string) (domain.ImportTask, bool, error) {
|
||||
var raw json.RawMessage
|
||||
if err := s.pb.get(ctx, fmt.Sprintf("/api/collections/import_tasks/records/%s", id), &raw); err != nil {
|
||||
if err == ErrNotFound {
|
||||
return domain.ImportTask{}, false, nil
|
||||
}
|
||||
return domain.ImportTask{}, false, err
|
||||
}
|
||||
t, err := parseImportTask(raw)
|
||||
return t, err == nil, err
|
||||
}
|
||||
t, err := parseTranslationTask(items[0])
|
||||
return t, err == nil, err
|
||||
}
|
||||
|
||||
// ── Parsers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func parseScrapeTask(raw json.RawMessage) (domain.ScrapeTask, error) {
|
||||
@@ -1014,6 +1089,42 @@ func parseTranslationTask(raw json.RawMessage) (domain.TranslationTask, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
func parseImportTask(raw json.RawMessage) (domain.ImportTask, error) {
|
||||
var rec struct {
|
||||
ID string `json:"id"`
|
||||
Slug string `json:"slug"`
|
||||
Title string `json:"title"`
|
||||
FileName string `json:"file_name"`
|
||||
FileType string `json:"file_type"`
|
||||
WorkerID string `json:"worker_id"`
|
||||
Status string `json:"status"`
|
||||
ChaptersDone int `json:"chapters_done"`
|
||||
ChaptersTotal int `json:"chapters_total"`
|
||||
ErrorMessage string `json:"error_message"`
|
||||
Started string `json:"started"`
|
||||
Finished string `json:"finished"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &rec); err != nil {
|
||||
return domain.ImportTask{}, err
|
||||
}
|
||||
started, _ := time.Parse(time.RFC3339, rec.Started)
|
||||
finished, _ := time.Parse(time.RFC3339, rec.Finished)
|
||||
return domain.ImportTask{
|
||||
ID: rec.ID,
|
||||
Slug: rec.Slug,
|
||||
Title: rec.Title,
|
||||
FileName: rec.FileName,
|
||||
FileType: rec.FileType,
|
||||
WorkerID: rec.WorkerID,
|
||||
Status: domain.TaskStatus(rec.Status),
|
||||
ChaptersDone: rec.ChaptersDone,
|
||||
ChaptersTotal: rec.ChaptersTotal,
|
||||
ErrorMessage: rec.ErrorMessage,
|
||||
Started: started,
|
||||
Finished: finished,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ── CoverStore ─────────────────────────────────────────────────────────────────
|
||||
|
||||
func (s *Store) PutCover(ctx context.Context, slug string, data []byte, contentType string) error {
|
||||
|
||||
@@ -33,6 +33,10 @@ type Producer interface {
|
||||
// returns the assigned PocketBase record ID.
|
||||
CreateTranslationTask(ctx context.Context, slug string, chapter int, lang string) (string, error)
|
||||
|
||||
// CreateImportTask inserts a new import task with status=pending and
|
||||
// returns the assigned PocketBase record ID.
|
||||
CreateImportTask(ctx context.Context, slug, title, fileType, objectKey string) (string, error)
|
||||
|
||||
// CancelTask transitions a pending task to status=cancelled.
|
||||
// Returns ErrNotFound if the task does not exist.
|
||||
CancelTask(ctx context.Context, id string) error
|
||||
@@ -59,6 +63,11 @@ type Consumer interface {
|
||||
// Returns (zero, false, nil) when the queue is empty.
|
||||
ClaimNextTranslationTask(ctx context.Context, workerID string) (domain.TranslationTask, bool, error)
|
||||
|
||||
// ClaimNextImportTask atomically finds the oldest pending import task,
|
||||
// sets its status=running and worker_id=workerID, and returns it.
|
||||
// Returns (zero, false, nil) when the queue is empty.
|
||||
ClaimNextImportTask(ctx context.Context, workerID string) (domain.ImportTask, bool, error)
|
||||
|
||||
// FinishScrapeTask marks a running scrape task as done and records the result.
|
||||
FinishScrapeTask(ctx context.Context, id string, result domain.ScrapeResult) error
|
||||
|
||||
@@ -68,6 +77,9 @@ type Consumer interface {
|
||||
// FinishTranslationTask marks a running translation task as done and records the result.
|
||||
FinishTranslationTask(ctx context.Context, id string, result domain.TranslationResult) error
|
||||
|
||||
// FinishImportTask marks a running import task as done and records the result.
|
||||
FinishImportTask(ctx context.Context, id string, result domain.ImportResult) error
|
||||
|
||||
// FailTask marks a task (scrape, audio, or translation) as failed with an error message.
|
||||
FailTask(ctx context.Context, id, errMsg string) error
|
||||
|
||||
@@ -104,4 +116,11 @@ type Reader interface {
|
||||
// GetTranslationTask returns the most recent translation task for cacheKey.
|
||||
// Returns (zero, false, nil) if not found.
|
||||
GetTranslationTask(ctx context.Context, cacheKey string) (domain.TranslationTask, bool, error)
|
||||
|
||||
// ListImportTasks returns all import tasks sorted by started descending.
|
||||
ListImportTasks(ctx context.Context) ([]domain.ImportTask, error)
|
||||
|
||||
// GetImportTask returns a single import task by ID.
|
||||
// Returns (zero, false, nil) if not found.
|
||||
GetImportTask(ctx context.Context, id string) (domain.ImportTask, bool, error)
|
||||
}
|
||||
|
||||
@@ -18,6 +18,26 @@
|
||||
label: () => m.admin_nav_translation(),
|
||||
icon: `<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M3 5h12M9 3v2m1.048 9.5A18.022 18.022 0 016.412 9m6.088 9h7M11 21l5-10 5 10M12.751 5C11.783 10.77 8.07 15.61 3 18.129" />`
|
||||
},
|
||||
{
|
||||
href: '/admin/import',
|
||||
label: () => m.admin_nav_import(),
|
||||
icon: `<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12" />`
|
||||
},
|
||||
{
|
||||
href: '/admin/image-gen',
|
||||
label: () => m.admin_nav_image_gen(),
|
||||
icon: `<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" />`
|
||||
},
|
||||
{
|
||||
href: '/admin/audio',
|
||||
label: () => m.admin_nav_audio(),
|
||||
icon: `<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19V6l12-3v13M9 19c0 1.105-1.343 2-3 2s-3-.895-3-2 1.343-2 3-2 3 .895 3 2zm12-3c0 1.105-1.343 2-3 2s-3-.895-3-2 1.343-2 3-2 3 .895 3 2zM9 10l12-3" />`
|
||||
},
|
||||
{
|
||||
href: '/admin/translation',
|
||||
label: () => m.admin_nav_translation(),
|
||||
icon: `<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M3 5h12M9 3v2m1.048 9.5A18.022 18.022 0 016.412 9m6.088 9h7M11 21l5-10 5 10M12.751 5C11.783 10.77 8.07 15.61 3 18.129" />`
|
||||
},
|
||||
{
|
||||
href: '/admin/image-gen',
|
||||
label: () => m.admin_nav_image_gen(),
|
||||
|
||||
154
ui/src/routes/admin/import/+page.svelte
Normal file
154
ui/src/routes/admin/import/+page.svelte
Normal file
@@ -0,0 +1,154 @@
|
||||
<script lang="ts">
|
||||
import { onMount } from 'svelte';
|
||||
|
||||
interface ImportTask {
|
||||
id: string;
|
||||
slug: string;
|
||||
title: string;
|
||||
file_name: string;
|
||||
file_type: string;
|
||||
status: string;
|
||||
chapters_done: number;
|
||||
chapters_total: number;
|
||||
error_message: string;
|
||||
started: string;
|
||||
finished: string;
|
||||
}
|
||||
|
||||
let tasks = $state<ImportTask[]>([]);
|
||||
let loading = $state(true);
|
||||
let uploading = $state(false);
|
||||
let title = $state('');
|
||||
let error = $state('');
|
||||
|
||||
async function loadTasks() {
|
||||
loading = true;
|
||||
try {
|
||||
const res = await fetch('/api/admin/import');
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
tasks = data.tasks || [];
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Failed to load tasks:', e);
|
||||
} finally {
|
||||
loading = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function handleSubmit(e: Event) {
|
||||
e.preventDefault();
|
||||
if (!title.trim()) return;
|
||||
|
||||
uploading = true;
|
||||
error = '';
|
||||
|
||||
try {
|
||||
const res = await fetch('/api/admin/import', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ title })
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
title = '';
|
||||
await loadTasks();
|
||||
} else {
|
||||
const data = await res.json();
|
||||
error = data.error || 'Upload failed';
|
||||
}
|
||||
} catch (e) {
|
||||
error = 'Upload failed';
|
||||
} finally {
|
||||
uploading = false;
|
||||
}
|
||||
}
|
||||
|
||||
function formatDate(dateStr: string) {
|
||||
if (!dateStr) return '-';
|
||||
return new Date(dateStr).toLocaleString();
|
||||
}
|
||||
|
||||
function getStatusColor(status: string) {
|
||||
switch (status) {
|
||||
case 'pending': return 'text-yellow-400';
|
||||
case 'running': return 'text-blue-400';
|
||||
case 'done': return 'text-green-400';
|
||||
case 'failed': return 'text-red-400';
|
||||
default: return 'text-gray-400';
|
||||
}
|
||||
}
|
||||
|
||||
onMount(() => {
|
||||
loadTasks();
|
||||
});
|
||||
</script>
|
||||
|
||||
<div class="max-w-4xl">
|
||||
<h1 class="text-2xl font-bold mb-6">Import PDF/EPUB</h1>
|
||||
|
||||
<!-- Upload Form -->
|
||||
<form onsubmit={handleSubmit} class="mb-8 p-4 bg-(--color-surface-2) rounded-lg">
|
||||
<div class="flex gap-4">
|
||||
<input
|
||||
type="text"
|
||||
bind:value={title}
|
||||
placeholder="Book title"
|
||||
class="flex-1 px-3 py-2 rounded bg-(--color-surface) border border-(--color-border) text-(--color-text)"
|
||||
/>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={uploading || !title.trim()}
|
||||
class="px-4 py-2 bg-(--color-brand) text-(--color-surface) rounded font-medium disabled:opacity-50"
|
||||
>
|
||||
{uploading ? 'Creating...' : 'Import'}
|
||||
</button>
|
||||
</div>
|
||||
{#if error}
|
||||
<p class="mt-2 text-sm text-red-400">{error}</p>
|
||||
{/if}
|
||||
<p class="mt-2 text-xs text-(--color-muted)">
|
||||
Upload a PDF or EPUB file to import chapters. The runner will process the file.
|
||||
</p>
|
||||
</form>
|
||||
|
||||
<!-- Task List -->
|
||||
<h2 class="text-lg font-semibold mb-4">Import Tasks</h2>
|
||||
|
||||
{#if loading}
|
||||
<p class="text-(--color-muted)">Loading...</p>
|
||||
{:else if tasks.length === 0}
|
||||
<p class="text-(--color-muted)">No import tasks yet.</p>
|
||||
{:else}
|
||||
<div class="overflow-x-auto">
|
||||
<table class="w-full text-sm">
|
||||
<thead>
|
||||
<tr class="text-left text-(--color-muted) border-b border-(--color-border)">
|
||||
<th class="pb-2">Title</th>
|
||||
<th class="pb-2">Type</th>
|
||||
<th class="pb-2">Status</th>
|
||||
<th class="pb-2">Chapters</th>
|
||||
<th class="pb-2">Started</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{#each tasks as task}
|
||||
<tr class="border-b border-(--color-border)/50">
|
||||
<td class="py-2">
|
||||
<div class="font-medium">{task.title}</div>
|
||||
<div class="text-xs text-(--color-muted)">{task.slug}</div>
|
||||
</td>
|
||||
<td class="py-2 uppercase text-xs">{task.file_type}</td>
|
||||
<td class="py-2 {getStatusColor(task.status)}">{task.status}</td>
|
||||
<td class="py-2 text-(--color-muted)">
|
||||
{task.chapters_done}/{task.chapters_total}
|
||||
</td>
|
||||
<td class="py-2 text-(--color-muted)">{formatDate(task.started)}</td>
|
||||
</tr>
|
||||
{/each}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
38
ui/src/routes/api/admin/import/+server.ts
Normal file
38
ui/src/routes/api/admin/import/+server.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import { json, error } from '@sveltejs/kit';
|
||||
import type { RequestHandler } from './$types';
|
||||
import { backendFetch } from '$lib/server/scraper';
|
||||
|
||||
/**
|
||||
* GET /api/admin/import
|
||||
* List all import tasks.
|
||||
*/
|
||||
export const GET: RequestHandler = async ({ locals }) => {
|
||||
if (!locals.user || locals.user.role !== 'admin') {
|
||||
throw error(403, 'Forbidden');
|
||||
}
|
||||
const res = await backendFetch('/api/admin/import', { method: 'GET' });
|
||||
const data = await res.json().catch(() => ({ tasks: [] }));
|
||||
return json(data);
|
||||
};
|
||||
|
||||
/**
|
||||
* POST /api/admin/import
|
||||
* Create a new import task.
|
||||
*/
|
||||
export const POST: RequestHandler = async ({ request, locals }) => {
|
||||
if (!locals.user || locals.user.role !== 'admin') {
|
||||
throw error(403, 'Forbidden');
|
||||
}
|
||||
const body = await request.json();
|
||||
const res = await backendFetch('/api/admin/import', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body)
|
||||
});
|
||||
if (!res.ok) {
|
||||
const err = await res.json().catch(() => ({ error: 'Failed to create import task' }));
|
||||
throw error(res.status, err.error || 'Failed to create import task');
|
||||
}
|
||||
const data = await res.json();
|
||||
return json(data);
|
||||
};
|
||||
Reference in New Issue
Block a user