Compare commits

...

1 Commits

Author SHA1 Message Date
root
d82aa9d4b4 fix(import): decrypt owner-encrypted PDFs with pdfcpu; add imports bucket to minio-init
All checks were successful
Release / Test backend (push) Successful in 2m10s
Release / Check ui (push) Successful in 1m55s
Release / Docker (push) Successful in 7m17s
Release / Gitea Release (push) Successful in 48s
- parsePDF now attempts to strip encryption via pdfcpu (empty user password)
  before handing bytes to dslipak/pdf — fixes '256-bit encryption key' error
  on publisher PDFs that use owner-only encryption (copy/print restrictions)
- Add pdfcpu v0.11.1 as direct dependency (was already indirect)
- docker-compose.yml minio-init: add 'imports' and 'translations' buckets
  so a fresh deploy creates all required buckets
2026-04-09 20:08:12 +05:00
3 changed files with 31 additions and 0 deletions

View File

@@ -38,6 +38,7 @@ require (
github.com/minio/crc64nvme v1.1.1 // indirect
github.com/minio/md5-simd v1.1.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pdfcpu/pdfcpu v0.11.1 // indirect
github.com/philhofer/fwd v1.2.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect

View File

@@ -15,6 +15,8 @@ import (
"github.com/libnovel/backend/internal/bookstore"
"github.com/libnovel/backend/internal/domain"
minio "github.com/minio/minio-go/v7"
"github.com/pdfcpu/pdfcpu/pkg/api"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
"golang.org/x/net/html"
)
@@ -90,7 +92,33 @@ func AnalyzeFile(data []byte, fileType string) (chapterCount int, firstLines []s
// decryptPDF strips encryption from a PDF using an empty user password.
// Returns the decrypted bytes, or an error if decryption is not possible.
// This handles the common case of "owner-only" encrypted PDFs (copy/print
// restrictions) which use an empty user password and open normally in readers.
func decryptPDF(data []byte) ([]byte, error) {
conf := model.NewDefaultConfiguration()
conf.UserPW = ""
conf.OwnerPW = ""
var out bytes.Buffer
err := api.Decrypt(bytes.NewReader(data), &out, conf)
if err != nil {
return nil, err
}
return out.Bytes(), nil
}
func parsePDF(data []byte) ([]bookstore.Chapter, error) {
// If the PDF is encrypted, try to decrypt it with an empty password.
// Many publisher PDFs use owner-only encryption (copy/print restrictions)
// with an empty user password, so they open normally but confuse parsers.
decrypted, err := decryptPDF(data)
if err == nil {
data = decrypted
}
// (if decryption fails we still attempt to parse — maybe it works anyway)
r, err := pdf.NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
return nil, fmt.Errorf("open PDF: %w", err)

View File

@@ -58,6 +58,8 @@ services:
mc mb --ignore-existing local/audio;
mc mb --ignore-existing local/avatars;
mc mb --ignore-existing local/catalogue;
mc mb --ignore-existing local/translations;
mc mb --ignore-existing local/imports;
echo 'buckets ready';
"
environment: