Files
libnovel/backend/internal/storage/import.go
root 8662aed565
All checks were successful
Release / Check ui (push) Successful in 2m10s
Release / Test backend (push) Successful in 53s
Release / Docker (push) Successful in 6m7s
Release / Gitea Release (push) Successful in 23s
feat: PDF single-chapter import, EPUB numbering fix, admin chapter split tool
- parsePDF: return all text as single 'Full Text' chapter (admin splits manually)
- parseEPUB: fix chapter numbering to use sequential counter not spine index
- Remove dead code: chaptersFromBookmarks, cleanChapterText, extractChaptersFromText, chapterHeadingRE; drop pdfcpu alias and regexp imports
- Backend: POST /api/admin/books/:slug/split-chapters endpoint — splits text on '---' dividers, optional '## Title' headers, writes chapters via WriteChapter
- UI: admin panel now shows for all admin users regardless of source_url; chapter split tool shown when book has single 'Full Text' chapter, pre-fills from MinIO content
2026-04-09 23:59:24 +05:00

858 lines
22 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package storage
import (
"archive/zip"
"bytes"
"context"
"fmt"
"io"
"os"
"sort"
"strconv"
"strings"
"github.com/libnovel/backend/internal/bookstore"
"github.com/libnovel/backend/internal/domain"
minio "github.com/minio/minio-go/v7"
"github.com/pdfcpu/pdfcpu/pkg/api"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
"golang.org/x/net/html"
)
type importer struct {
mc *minioClient
}
// NewBookImporter creates a BookImporter that reads files from MinIO.
func NewBookImporter(s *Store) bookstore.BookImporter {
return &importer{mc: s.mc}
}
func (i *importer) Import(ctx context.Context, objectKey, fileType string) ([]bookstore.Chapter, error) {
if fileType != "pdf" && fileType != "epub" {
return nil, fmt.Errorf("unsupported file type: %s", fileType)
}
obj, err := i.mc.client.GetObject(ctx, "imports", objectKey, minio.GetObjectOptions{})
if err != nil {
return nil, fmt.Errorf("get object from minio: %w", err)
}
defer obj.Close()
data, err := io.ReadAll(obj)
if err != nil {
return nil, fmt.Errorf("read object: %w", err)
}
if fileType == "pdf" {
return parsePDF(data)
}
return parseEPUB(data)
}
// AnalyzeFile parses the given PDF or EPUB data and returns the detected
// chapter count and up to 3 preview lines (first non-empty line of each of
// the first 3 chapters). It is used by the analyze-only endpoint so users
// can preview chapter count before committing the import.
// Note: uses parsePDF which is backed by pdfcpu ExtractContent — fast, no hang risk.
func AnalyzeFile(data []byte, fileType string) (chapterCount int, firstLines []string, err error) {
var chapters []bookstore.Chapter
switch fileType {
case "pdf":
chapters, err = parsePDF(data)
case "epub":
chapters, err = parseEPUB(data)
default:
return 0, nil, fmt.Errorf("unsupported file type: %s", fileType)
}
if err != nil {
return 0, nil, err
}
chapterCount = len(chapters)
for i, ch := range chapters {
if i >= 3 {
break
}
line := strings.TrimSpace(ch.Content)
if nl := strings.Index(line, "\n"); nl > 0 {
line = line[:nl]
}
if len(line) > 120 {
line = line[:120] + "…"
}
firstLines = append(firstLines, line)
}
return chapterCount, firstLines, nil
}
// decryptPDF strips encryption from a PDF using an empty user password.
// Returns the decrypted bytes, or an error if decryption is not possible.
// This handles the common case of "owner-only" encrypted PDFs (copy/print
// restrictions) which use an empty user password and open normally in readers.
func decryptPDF(data []byte) ([]byte, error) {
conf := model.NewDefaultConfiguration()
conf.UserPW = ""
conf.OwnerPW = ""
var out bytes.Buffer
err := api.Decrypt(bytes.NewReader(data), &out, conf)
if err != nil {
return nil, err
}
return out.Bytes(), nil
}
// ParseImportFile parses a PDF or EPUB and returns chapters.
// Unlike AnalyzeFile it respects ctx cancellation so callers can apply a timeout.
// For PDFs it first attempts to strip encryption with an empty password.
func ParseImportFile(ctx context.Context, data []byte, fileType string) ([]bookstore.Chapter, error) {
type result struct {
chapters []bookstore.Chapter
err error
}
ch := make(chan result, 1)
go func() {
var chapters []bookstore.Chapter
var err error
switch fileType {
case "pdf":
chapters, err = parsePDF(data)
case "epub":
chapters, err = parseEPUB(data)
default:
err = fmt.Errorf("unsupported file type: %s", fileType)
}
ch <- result{chapters, err}
}()
select {
case <-ctx.Done():
return nil, fmt.Errorf("parse timed out: %w", ctx.Err())
case r := <-ch:
return r.chapters, r.err
}
}
// pdfSkipBookmarks lists bookmark titles that are front/back matter, not story chapters.
// These are skipped when building the chapter list.
var pdfSkipBookmarks = map[string]bool{
"cover": true, "insert": true, "title page": true, "copyright": true,
"appendix": true, "color insert": true, "color illustrations": true,
}
// parsePDF extracts text from PDF bytes and returns it as a single chapter.
//
// The full readable text is returned as one chapter so the admin can manually
// split it into chapters via the UI using --- markers.
//
// Strategy:
// 1. Decrypt owner-protected PDFs (empty user password).
// 2. Extract raw content streams for every page using pdfcpu ExtractContent.
// 3. Concatenate text from all pages in order, skipping front matter
// (cover, title page, copyright — typically the first 10 pages).
func parsePDF(data []byte) ([]bookstore.Chapter, error) {
// Decrypt owner-protected PDFs (empty user password).
decrypted, err := decryptPDF(data)
if err == nil {
data = decrypted
}
conf := model.NewDefaultConfiguration()
conf.UserPW = ""
conf.OwnerPW = ""
// Extract all page content streams to a temp directory.
tmpDir, err := os.MkdirTemp("", "pdf-extract-*")
if err != nil {
return nil, fmt.Errorf("create temp dir: %w", err)
}
defer os.RemoveAll(tmpDir)
if err := api.ExtractContent(bytes.NewReader(data), tmpDir, "out", nil, conf); err != nil {
return nil, fmt.Errorf("extract PDF content: %w", err)
}
entries, err := os.ReadDir(tmpDir)
if err != nil || len(entries) == 0 {
return nil, fmt.Errorf("PDF has no content pages")
}
// Parse page number from filename and build ordered text map.
pageTexts := make(map[int]string, len(entries))
maxPage := 0
for _, e := range entries {
pageNum := pageNumFromFilename(e.Name())
if pageNum <= 0 {
continue
}
raw, readErr := os.ReadFile(tmpDir + "/" + e.Name())
if readErr != nil {
continue
}
pageTexts[pageNum] = fixWin1252(extractTextFromContentStream(raw))
if pageNum > maxPage {
maxPage = pageNum
}
}
// Determine front-matter cutoff using bookmarks if available,
// otherwise skip the first 10 pages (cover/title/copyright).
bodyStart := 1
bookmarks, bmErr := api.Bookmarks(bytes.NewReader(data), conf)
if bmErr == nil {
for _, bm := range bookmarks {
title := strings.ToLower(strings.TrimSpace(bm.Title))
if !pdfSkipBookmarks[title] && bm.PageFrom > 0 {
// First non-front-matter bookmark — body starts here.
bodyStart = bm.PageFrom
break
}
}
} else if maxPage > 10 {
bodyStart = 11
}
// Concatenate all body pages.
var sb strings.Builder
for p := bodyStart; p <= maxPage; p++ {
t := strings.TrimSpace(pageTexts[p])
if t == "" {
continue
}
sb.WriteString(t)
sb.WriteString("\n\n")
}
text := strings.TrimSpace(sb.String())
if text == "" {
return nil, fmt.Errorf("could not extract any text from PDF")
}
return []bookstore.Chapter{{
Number: 1,
Title: "Full Text",
Content: text,
}}, nil
}
// pageNumFromFilename extracts the page number from a pdfcpu content-stream
// filename like "out_Content_page_42.txt". Returns 0 if not parseable.
func pageNumFromFilename(name string) int {
// Strip directory prefix and extension.
base := name
if idx := strings.LastIndex(base, "/"); idx >= 0 {
base = base[idx+1:]
}
if idx := strings.LastIndex(base, "."); idx >= 0 {
base = base[:idx]
}
// Find last "_" and parse the number after it.
if idx := strings.LastIndex(base, "_"); idx >= 0 {
n, err := strconv.Atoi(base[idx+1:])
if err == nil && n > 0 {
return n
}
}
return 0
}
// win1252ToUnicode maps the Windows-1252 control range 0x800x9F to the
// Unicode characters they actually represent in that encoding.
// Standard Latin-1 maps these bytes to control characters; Win-1252 maps
// them to typographic symbols that appear in publisher PDFs.
var win1252ToUnicode = map[byte]rune{
0x80: '\u20AC', // €
0x82: '\u201A', //
0x83: '\u0192', // ƒ
0x84: '\u201E', // „
0x85: '\u2026', // …
0x86: '\u2020', // †
0x87: '\u2021', // ‡
0x88: '\u02C6', // ˆ
0x89: '\u2030', // ‰
0x8A: '\u0160', // Š
0x8B: '\u2039', //
0x8C: '\u0152', // Œ
0x8E: '\u017D', // Ž
0x91: '\u2018', // ' (left single quotation mark)
0x92: '\u2019', // ' (right single quotation mark / apostrophe)
0x93: '\u201C', // " (left double quotation mark)
0x94: '\u201D', // " (right double quotation mark)
0x95: '\u2022', // • (bullet)
0x96: '\u2013', // (en dash)
0x97: '\u2014', // — (em dash)
0x98: '\u02DC', // ˜
0x99: '\u2122', // ™
0x9A: '\u0161', // š
0x9B: '\u203A', //
0x9C: '\u0153', // œ
0x9E: '\u017E', // ž
0x9F: '\u0178', // Ÿ
}
// fixWin1252 replaces Windows-1252 specific bytes (0x800x9F) in a string
// that was decoded as raw Latin-1 bytes with their proper Unicode equivalents.
func fixWin1252(s string) string {
// Fast path: if no bytes in 0x800x9F range, return unchanged.
needsFix := false
for i := 0; i < len(s); i++ {
b := s[i]
if b >= 0x80 && b <= 0x9F {
needsFix = true
break
}
}
if !needsFix {
return s
}
var sb strings.Builder
sb.Grow(len(s))
for i := 0; i < len(s); i++ {
b := s[i]
if b >= 0x80 && b <= 0x9F {
if r, ok := win1252ToUnicode[b]; ok {
sb.WriteRune(r)
continue
}
}
sb.WriteByte(b)
}
return sb.String()
}
// extractTextFromContentStream parses a raw PDF content stream and extracts
// readable text from Tj and TJ operators.
//
// TJ arrays may contain a mix of literal strings (parenthesised) and hex glyph
// arrays. Only the literal strings are decoded — hex arrays require per-font
// ToUnicode CMaps and are skipped. Kerning adjustment numbers inside TJ arrays
// are also ignored (they're just spacing hints).
//
// Line breaks are inserted on ET / Td / TD / T* operators.
func extractTextFromContentStream(stream []byte) string {
s := string(stream)
var sb strings.Builder
i := 0
n := len(s)
for i < n {
// TJ array: [ ... ]TJ — collect all literal strings, skip hex & numbers.
if s[i] == '[' {
j := i + 1
for j < n && s[j] != ']' {
if s[j] == '(' {
// Literal string inside TJ array.
k := j + 1
depth := 1
for k < n && depth > 0 {
if s[k] == '\\' {
k += 2
continue
}
if s[k] == '(' {
depth++
} else if s[k] == ')' {
depth--
}
k++
}
lit := pdfUnescapeString(s[j+1 : k-1])
if hasPrintableASCII(lit) {
sb.WriteString(lit)
}
j = k
continue
}
j++
}
// Check if this is a TJ operator (skip whitespace after ']').
end := j + 1
for end < n && (s[end] == ' ' || s[end] == '\t' || s[end] == '\r' || s[end] == '\n') {
end++
}
if end+2 <= n && s[end:end+2] == "TJ" && (end+2 == n || !isAlphaNum(s[end+2])) {
i = end + 2
continue
}
i = j + 1
continue
}
// Single string: (string) Tj
if s[i] == '(' {
j := i + 1
depth := 1
for j < n && depth > 0 {
if s[j] == '\\' {
j += 2
continue
}
if s[j] == '(' {
depth++
} else if s[j] == ')' {
depth--
}
j++
}
lit := pdfUnescapeString(s[i+1 : j-1])
if hasPrintableASCII(lit) {
// Check for Tj operator.
end := j
for end < n && (s[end] == ' ' || s[end] == '\t') {
end++
}
if end+2 <= n && s[end:end+2] == "Tj" && (end+2 == n || !isAlphaNum(s[end+2])) {
sb.WriteString(lit)
i = end + 2
continue
}
}
i = j
continue
}
// Detect end of text object (ET) — add a newline.
if i+2 <= n && s[i:i+2] == "ET" && (i+2 == n || !isAlphaNum(s[i+2])) {
sb.WriteByte('\n')
i += 2
continue
}
// Detect Td / TD / T* — newline within text block.
if i+2 <= n && (s[i:i+2] == "Td" || s[i:i+2] == "TD" || s[i:i+2] == "T*") &&
(i+2 == n || !isAlphaNum(s[i+2])) {
sb.WriteByte('\n')
i += 2
continue
}
i++
}
return sb.String()
}
func isAlphaNum(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || (b >= '0' && b <= '9') || b == '_'
}
func hasPrintableASCII(s string) bool {
for _, c := range s {
if c >= 0x20 && c < 0x7F {
return true
}
}
return false
}
// pdfUnescapeString handles PDF string escape sequences.
func pdfUnescapeString(s string) string {
if !strings.ContainsRune(s, '\\') {
return s
}
var sb strings.Builder
i := 0
for i < len(s) {
if s[i] == '\\' && i+1 < len(s) {
switch s[i+1] {
case 'n':
sb.WriteByte('\n')
case 'r':
sb.WriteByte('\r')
case 't':
sb.WriteByte('\t')
case '(', ')', '\\':
sb.WriteByte(s[i+1])
default:
// Octal escape \ddd
if s[i+1] >= '0' && s[i+1] <= '7' {
end := i + 2
for end < i+5 && end < len(s) && s[end] >= '0' && s[end] <= '7' {
end++
}
val, _ := strconv.ParseInt(s[i+1:end], 8, 16)
sb.WriteByte(byte(val))
i = end
continue
}
sb.WriteByte(s[i+1])
}
i += 2
} else {
sb.WriteByte(s[i])
i++
}
}
return sb.String()
}
// ── EPUB parsing ──────────────────────────────────────────────────────────────
func parseEPUB(data []byte) ([]bookstore.Chapter, error) {
zr, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
return nil, fmt.Errorf("open EPUB zip: %w", err)
}
// 1. Read META-INF/container.xml → find rootfile (content.opf path).
opfPath, err := epubRootfilePath(zr)
if err != nil {
return nil, fmt.Errorf("epub container: %w", err)
}
// 2. Parse content.opf → spine order of chapter files.
spineFiles, titleMap, err := epubSpine(zr, opfPath)
if err != nil {
return nil, fmt.Errorf("epub spine: %w", err)
}
if len(spineFiles) == 0 {
return nil, fmt.Errorf("EPUB spine is empty")
}
// Base directory of the OPF file for resolving relative hrefs.
opfDir := ""
if idx := strings.LastIndex(opfPath, "/"); idx >= 0 {
opfDir = opfPath[:idx+1]
}
var chapters []bookstore.Chapter
chNum := 0
for i, href := range spineFiles {
fullPath := opfDir + href
content, err := epubFileContent(zr, fullPath)
if err != nil {
continue
}
text := htmlToText(content)
if strings.TrimSpace(text) == "" {
continue
}
chNum++
title := titleMap[href]
if title == "" {
title = fmt.Sprintf("Chapter %d", chNum)
}
_ = i // spine index unused for numbering
chapters = append(chapters, bookstore.Chapter{
Number: chNum,
Title: title,
Content: text,
})
}
if len(chapters) == 0 {
return nil, fmt.Errorf("no readable chapters found in EPUB")
}
return chapters, nil
}
// epubRootfilePath parses META-INF/container.xml and returns the full-path
// of the OPF package document.
func epubRootfilePath(zr *zip.Reader) (string, error) {
f := zipFile(zr, "META-INF/container.xml")
if f == nil {
return "", fmt.Errorf("META-INF/container.xml not found")
}
rc, err := f.Open()
if err != nil {
return "", err
}
defer rc.Close()
doc, err := html.Parse(rc)
if err != nil {
return "", err
}
var path string
var walk func(*html.Node)
walk = func(n *html.Node) {
if n.Type == html.ElementNode && strings.EqualFold(n.Data, "rootfile") {
for _, a := range n.Attr {
if strings.EqualFold(a.Key, "full-path") {
path = a.Val
return
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
}
walk(doc)
if path == "" {
return "", fmt.Errorf("rootfile full-path not found in container.xml")
}
return path, nil
}
// epubSpine parses the OPF document and returns the spine item hrefs in order,
// plus a map from href → nav title (if available from NCX/NAV).
func epubSpine(zr *zip.Reader, opfPath string) ([]string, map[string]string, error) {
f := zipFile(zr, opfPath)
if f == nil {
return nil, nil, fmt.Errorf("OPF file %q not found in EPUB", opfPath)
}
rc, err := f.Open()
if err != nil {
return nil, nil, err
}
defer rc.Close()
opfData, err := io.ReadAll(rc)
if err != nil {
return nil, nil, err
}
// Build id→href map from <manifest>.
idToHref := make(map[string]string)
// Also keep a href→navTitle map (populated from NCX later).
hrefTitle := make(map[string]string)
// Parse OPF XML with html.Parse (handles malformed XML too).
doc, _ := html.Parse(bytes.NewReader(opfData))
var manifestItems []struct{ id, href, mediaType string }
var spineIdrefs []string
var ncxID string
var walk func(*html.Node)
walk = func(n *html.Node) {
if n.Type == html.ElementNode {
tag := strings.ToLower(n.Data)
switch tag {
case "item":
var id, href, mt string
for _, a := range n.Attr {
switch strings.ToLower(a.Key) {
case "id":
id = a.Val
case "href":
href = a.Val
case "media-type":
mt = a.Val
}
}
if id != "" && href != "" {
manifestItems = append(manifestItems, struct{ id, href, mediaType string }{id, href, mt})
idToHref[id] = href
}
case "itemref":
for _, a := range n.Attr {
if strings.ToLower(a.Key) == "idref" {
spineIdrefs = append(spineIdrefs, a.Val)
}
}
case "spine":
for _, a := range n.Attr {
if strings.ToLower(a.Key) == "toc" {
ncxID = a.Val
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
}
walk(doc)
// Build ordered spine href list.
var spineHrefs []string
for _, idref := range spineIdrefs {
if href, ok := idToHref[idref]; ok {
spineHrefs = append(spineHrefs, href)
}
}
// If no explicit spine, fall back to all XHTML items in manifest order.
if len(spineHrefs) == 0 {
sort.Slice(manifestItems, func(i, j int) bool {
return manifestItems[i].href < manifestItems[j].href
})
for _, it := range manifestItems {
mt := strings.ToLower(it.mediaType)
if strings.Contains(mt, "html") || strings.HasSuffix(strings.ToLower(it.href), ".html") || strings.HasSuffix(strings.ToLower(it.href), ".xhtml") {
spineHrefs = append(spineHrefs, it.href)
}
}
}
// Try to get chapter titles from NCX (toc.ncx).
opfDir := ""
if idx := strings.LastIndex(opfPath, "/"); idx >= 0 {
opfDir = opfPath[:idx+1]
}
if ncxHref, ok := idToHref[ncxID]; ok {
ncxPath := opfDir + ncxHref
if ncxFile := zipFile(zr, ncxPath); ncxFile != nil {
if ncxRC, err := ncxFile.Open(); err == nil {
defer ncxRC.Close()
parseNCXTitles(ncxRC, hrefTitle)
}
}
}
return spineHrefs, hrefTitle, nil
}
// parseNCXTitles extracts navPoint label→src mappings from a toc.ncx.
func parseNCXTitles(r io.Reader, out map[string]string) {
doc, err := html.Parse(r)
if err != nil {
return
}
// Collect navPoints: each has a <navLabel><text>…</text></navLabel> and
// a <content src="…"/> child.
var walk func(*html.Node)
walk = func(n *html.Node) {
if n.Type == html.ElementNode && strings.EqualFold(n.Data, "navpoint") {
var label, src string
var inner func(*html.Node)
inner = func(c *html.Node) {
if c.Type == html.ElementNode {
if strings.EqualFold(c.Data, "text") && label == "" {
if c.FirstChild != nil && c.FirstChild.Type == html.TextNode {
label = strings.TrimSpace(c.FirstChild.Data)
}
}
if strings.EqualFold(c.Data, "content") {
for _, a := range c.Attr {
if strings.EqualFold(a.Key, "src") {
// Strip fragment identifier (#...).
src = strings.SplitN(a.Val, "#", 2)[0]
}
}
}
}
for child := c.FirstChild; child != nil; child = child.NextSibling {
inner(child)
}
}
inner(n)
if label != "" && src != "" {
out[src] = label
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
}
walk(doc)
}
// epubFileContent returns the raw bytes of a file inside the EPUB zip.
func epubFileContent(zr *zip.Reader, path string) ([]byte, error) {
f := zipFile(zr, path)
if f == nil {
return nil, fmt.Errorf("file %q not in EPUB", path)
}
rc, err := f.Open()
if err != nil {
return nil, err
}
defer rc.Close()
return io.ReadAll(rc)
}
// zipFile finds a file by name (case-insensitive) in a zip.Reader.
func zipFile(zr *zip.Reader, name string) *zip.File {
nameLower := strings.ToLower(name)
for _, f := range zr.File {
if strings.ToLower(f.Name) == nameLower {
return f
}
}
return nil
}
// htmlToText converts HTML/XHTML content to plain text suitable for storage.
func htmlToText(data []byte) string {
doc, err := html.Parse(bytes.NewReader(data))
if err != nil {
return string(data)
}
var sb strings.Builder
var walk func(*html.Node)
walk = func(n *html.Node) {
if n.Type == html.TextNode {
text := strings.TrimSpace(n.Data)
if text != "" {
sb.WriteString(text)
sb.WriteByte(' ')
}
}
if n.Type == html.ElementNode {
switch strings.ToLower(n.Data) {
case "p", "div", "br", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr":
// Block-level: ensure newline before content.
if sb.Len() > 0 {
s := sb.String()
if s[len(s)-1] != '\n' {
sb.WriteByte('\n')
}
}
case "script", "style", "head":
// Skip entirely.
return
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
walk(c)
}
if n.Type == html.ElementNode {
switch strings.ToLower(n.Data) {
case "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li", "tr":
sb.WriteByte('\n')
}
}
}
walk(doc)
// Collapse multiple blank lines.
lines := strings.Split(sb.String(), "\n")
var out []string
blanks := 0
for _, l := range lines {
l = strings.TrimSpace(l)
if l == "" {
blanks++
if blanks <= 1 {
out = append(out, "")
}
} else {
blanks = 0
out = append(out, l)
}
}
return strings.TrimSpace(strings.Join(out, "\n"))
}
// ── Chapter ingestion ─────────────────────────────────────────────────────────
// IngestChapters stores extracted chapters for a book.
// Each chapter is written as a markdown file in the chapters MinIO bucket
// and its index record is upserted in PocketBase via WriteChapter.
func (s *Store) IngestChapters(ctx context.Context, slug string, chapters []bookstore.Chapter) error {
for _, ch := range chapters {
var mdContent string
if ch.Title != "" && ch.Title != fmt.Sprintf("Chapter %d", ch.Number) {
mdContent = fmt.Sprintf("# %s\n\n%s", ch.Title, ch.Content)
} else {
mdContent = fmt.Sprintf("# Chapter %d\n\n%s", ch.Number, ch.Content)
}
domainCh := domain.Chapter{
Ref: domain.ChapterRef{Number: ch.Number, Title: ch.Title},
Text: mdContent,
}
if err := s.WriteChapter(ctx, slug, domainCh); err != nil {
return fmt.Errorf("ingest chapter %d: %w", ch.Number, err)
}
}
return nil
}
// GetImportObjectKey returns the MinIO object key for an uploaded import file.
func GetImportObjectKey(filename string) string {
return fmt.Sprintf("imports/%s", filename)
}