Compare commits

..

1 Commits

Author SHA1 Message Date
root
a8a7151fee feat: add PDF/EPUB import functionality
Some checks failed
Release / Test backend (push) Failing after 16s
Release / Check ui (push) Failing after 33s
Release / Docker (push) Has been skipped
Release / Gitea Release (push) Has been skipped
- Add ImportTask/ImportResult types to domain.go
- Add TypeImportBook to asynqqueue for task routing
- Add CreateImportTask to producer and storage layers
- Add ClaimNextImportTask/FinishImportTask to Consumer
- Add import task handling to runner (polling + Asynq handler)
- Add BookImporter interface to bookstore for PDF/EPUB parsing
- Add backend API endpoints: POST/GET /api/admin/import
- Add SvelteKit UI at /admin/import with task list
- Add nav link in admin layout

Note: PDF/EPUB parsing is a placeholder - needs external library integration.
2026-04-09 09:49:26 +05:00

View File

@@ -1,165 +0,0 @@
package main
import (
"fmt"
"log"
"os"
"regexp"
"strings"
"github.com/ledongthuc/pdf"
)
func main() {
if len(os.Args) < 2 {
fmt.Println("Usage: pdf-to-chapters <input.pdf>")
os.Exit(1)
}
inputPath := os.Args[1]
if err := processPDF(inputPath); err != nil {
log.Fatal(err)
}
}
func processPDF(inputPath string) error {
pdf.DebugOn = false
f, r, err := pdf.Open(inputPath)
if err != nil {
return fmt.Errorf("failed to open PDF: %w", err)
}
defer f.Close()
totalPages := r.NumPage()
fmt.Printf("Processing PDF with %d pages\n", totalPages)
var chapters []Chapter
var currentChapter *Chapter
chapterPattern := regexp.MustCompile(`The Eminence in Shadow\s+(\d+)\s*-\s*(\d+)`)
for i := 1; i <= totalPages; i++ {
page := r.Page(i)
if err := page.IsValid(); err != nil {
log.Printf("Warning: page %d not valid: %v", i, err)
continue
}
text, err := page.GetPlainText(nil)
if err != nil {
log.Printf("Warning: failed to extract text from page %d: %v", i, err)
continue
}
// Check for chapter header on this page
matches := chapterPattern.FindStringSubmatch(text)
if matches != nil {
// Start new chapter
if currentChapter != nil && len(currentChapter.Content) > 0 {
chapters = append(chapters, *currentChapter)
}
chapterNum := matches[1]
currentChapter = &Chapter{
Number: chapterNum,
StartPage: i,
Content: text,
}
continue
}
// Append to current chapter
if currentChapter != nil {
currentChapter.Content += "\n" + text
}
}
// Don't forget last chapter
if currentChapter != nil && len(currentChapter.Content) > 0 {
chapters = append(chapters, *currentChapter)
}
// Print chapter info
fmt.Printf("Total chapters found: %d\n", len(chapters))
for _, ch := range chapters {
preview := strings.TrimSpace(ch.Content)
if len(preview) > 200 {
preview = preview[:200] + "..."
}
fmt.Printf("Chapter %s (page %d): %s\n", ch.Number, ch.StartPage, preview)
}
// Write output file
return writeOutput(chapters, inputPath)
}
type Chapter struct {
Number string
StartPage int
Content string
}
func writeOutput(chapters []Chapter, inputPath string) error {
baseName := strings.TrimSuffix(inputPath, ".pdf")
outPath := baseName + "_chapters.txt"
f, err := os.Create(outPath)
if err != nil {
return fmt.Errorf("failed to create output: %w", err)
}
defer f.Close()
for i, ch := range chapters {
if i > 0 {
fmt.Fprintln(f)
}
fmt.Fprintf(f, "## Chapter %s\n\n", ch.Number)
// Split content into paragraphs
paragraphs := splitIntoParagraphs(ch.Content)
for _, para := range paragraphs {
trimmed := strings.TrimSpace(para)
if len(trimmed) > 0 {
fmt.Fprintln(f, trimmed)
fmt.Fprintln(f)
}
}
}
fmt.Printf("\nOutput written to: %s\n", outPath)
return nil
}
func splitIntoParagraphs(text string) []string {
lines := strings.Split(text, "\n")
var paragraphs []string
var currentPara strings.Builder
for _, line := range lines {
trimmed := strings.TrimSpace(line)
// Skip empty lines and very short lines (likely headers/page numbers)
if len(trimmed) == 0 {
if currentPara.Len() > 0 {
paragraphs = append(paragraphs, currentPara.String())
currentPara.Reset()
}
continue
}
if len(trimmed) < 3 {
continue
}
if currentPara.Len() > 0 {
currentPara.WriteString(" ")
}
currentPara.WriteString(trimmed)
}
if currentPara.Len() > 0 {
paragraphs = append(paragraphs, currentPara.String())
}
return paragraphs
}