LaTeX dynamic building – 1
This commit is contained in:
parent
f1d022b19b
commit
3ddb091d1e
@ -38,6 +38,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
# System deps
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tzdata ca-certificates \
|
||||
qpdf \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
@ -4,6 +4,8 @@ from __future__ import annotations
|
||||
import io
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import PyPDF2
|
||||
@ -134,13 +136,93 @@ def _collect_btn_widgets(reader: PyPDF2.PdfReader):
|
||||
|
||||
return btn_widgets_by_name, export_values_by_name
|
||||
|
||||
# -----------------------------
|
||||
# PDF Flattening Helper
|
||||
# -----------------------------
|
||||
|
||||
def _flatten_pdf_with_qpdf(pdf_bytes: bytes) -> Optional[bytes]:
|
||||
"""Try to flatten PDF using qpdf if available."""
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
|
||||
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
|
||||
input_path = input_file.name
|
||||
output_path = output_file.name
|
||||
|
||||
# Write input PDF
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(pdf_bytes)
|
||||
|
||||
# Try to flatten with qpdf
|
||||
result = subprocess.run(
|
||||
['qpdf', '--flatten-annotations=all', '--generate-appearances', input_path, output_path],
|
||||
capture_output=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
with open(output_path, 'rb') as f:
|
||||
flattened_bytes = f.read()
|
||||
# Cleanup
|
||||
os.unlink(input_path)
|
||||
os.unlink(output_path)
|
||||
return flattened_bytes
|
||||
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
finally:
|
||||
# Ensure cleanup
|
||||
for path in [input_path, output_path]:
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
return None
|
||||
|
||||
def _flatten_pdf_with_pdftk(pdf_bytes: bytes) -> Optional[bytes]:
|
||||
"""Try to flatten PDF using pdftk if available."""
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
|
||||
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
|
||||
input_path = input_file.name
|
||||
output_path = output_file.name
|
||||
|
||||
# Write input PDF
|
||||
with open(input_path, 'wb') as f:
|
||||
f.write(pdf_bytes)
|
||||
|
||||
# Try to flatten with pdftk
|
||||
result = subprocess.run(
|
||||
['pdftk', input_path, 'output', output_path, 'flatten'],
|
||||
capture_output=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
with open(output_path, 'rb') as f:
|
||||
flattened_bytes = f.read()
|
||||
# Cleanup
|
||||
os.unlink(input_path)
|
||||
os.unlink(output_path)
|
||||
return flattened_bytes
|
||||
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
finally:
|
||||
# Ensure cleanup
|
||||
for path in [input_path, output_path]:
|
||||
if os.path.exists(path):
|
||||
os.unlink(path)
|
||||
return None
|
||||
|
||||
# -----------------------------
|
||||
# Kern: PDF füllen (direktes Widget-Update)
|
||||
# -----------------------------
|
||||
|
||||
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None) -> bytes:
|
||||
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None, flatten: bool = True) -> bytes:
|
||||
"""
|
||||
Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
|
||||
|
||||
Args:
|
||||
payload: Dictionary mit den Formulardaten
|
||||
variant: "QSM" oder "VSM"
|
||||
out_path: Optionaler Pfad zum Speichern der PDF
|
||||
flatten: Wenn True, werden Formularfelder in statischen Inhalt umgewandelt
|
||||
"""
|
||||
template_path = _get_template(variant)
|
||||
if not os.path.isfile(template_path):
|
||||
@ -329,7 +411,64 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# 3) Schreiben
|
||||
# 3) Write the PDF with filled forms
|
||||
bio = io.BytesIO()
|
||||
writer.write(bio)
|
||||
data = bio.getvalue()
|
||||
|
||||
# 4) Flatten if requested
|
||||
if flatten:
|
||||
# Try external tools first for better flattening
|
||||
flattened = _flatten_pdf_with_qpdf(data)
|
||||
if flattened:
|
||||
data = flattened
|
||||
else:
|
||||
# Try pdftk as fallback
|
||||
flattened = _flatten_pdf_with_pdftk(data)
|
||||
if flattened:
|
||||
data = flattened
|
||||
else:
|
||||
# Fallback: Remove form fields using PyPDF2 (fields won't be visible)
|
||||
# This is not ideal but better than nothing
|
||||
reader = PyPDF2.PdfReader(io.BytesIO(data))
|
||||
writer = PyPDF2.PdfWriter()
|
||||
|
||||
# Copy all pages
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
|
||||
# Remove AcroForm to make fields non-interactive
|
||||
if "/AcroForm" in writer._root_object:
|
||||
del writer._root_object["/AcroForm"]
|
||||
|
||||
# Remove Widget annotations
|
||||
for page in writer.pages:
|
||||
if "/Annots" in page:
|
||||
annots = page["/Annots"]
|
||||
if isinstance(annots, IndirectObject):
|
||||
try:
|
||||
annots = annots.get_object()
|
||||
except:
|
||||
continue
|
||||
|
||||
new_annots = ArrayObject()
|
||||
if isinstance(annots, (list, ArrayObject)):
|
||||
for annot_ref in annots:
|
||||
try:
|
||||
annot = annot_ref.get_object() if isinstance(annot_ref, IndirectObject) else annot_ref
|
||||
if isinstance(annot, DictionaryObject):
|
||||
subtype = _to_str(annot.get("/Subtype"))
|
||||
if subtype and subtype != "Widget":
|
||||
new_annots.append(annot_ref)
|
||||
except:
|
||||
continue
|
||||
|
||||
if len(new_annots) > 0:
|
||||
page[NameObject("/Annots")] = new_annots
|
||||
else:
|
||||
if "/Annots" in page:
|
||||
del page["/Annots"]
|
||||
|
||||
bio = io.BytesIO()
|
||||
writer.write(bio)
|
||||
data = bio.getvalue()
|
||||
@ -339,5 +478,5 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
|
||||
return data
|
||||
|
||||
|
||||
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str) -> None:
|
||||
_ = fill_pdf(payload, variant, out_path=out_path)
|
||||
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str, flatten: bool = True) -> None:
|
||||
_ = fill_pdf(payload, variant, out_path=out_path, flatten=flatten)
|
||||
|
||||
141
backend/test_flattening.py
Normal file
141
backend/test_flattening.py
Normal file
@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify PDF flattening functionality.
|
||||
Tests that form fields are properly removed after filling.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
# Add src to path
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
||||
|
||||
import PyPDF2
|
||||
from pdf_filler import fill_pdf
|
||||
|
||||
def check_pdf_has_forms(pdf_path):
|
||||
"""Check if a PDF has form fields."""
|
||||
with open(pdf_path, 'rb') as f:
|
||||
reader = PyPDF2.PdfReader(f)
|
||||
|
||||
# Check for AcroForm
|
||||
if '/AcroForm' in reader.trailer.get('/Root', {}):
|
||||
acroform = reader.trailer['/Root']['/AcroForm']
|
||||
if '/Fields' in acroform:
|
||||
fields = acroform['/Fields']
|
||||
if fields and len(fields) > 0:
|
||||
return True, f"Found {len(fields)} form fields"
|
||||
|
||||
# Check for widget annotations
|
||||
widget_count = 0
|
||||
for page in reader.pages:
|
||||
if '/Annots' in page:
|
||||
annots = page['/Annots']
|
||||
if hasattr(annots, 'get_object'):
|
||||
annots = annots.get_object()
|
||||
|
||||
if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
|
||||
for annot_ref in annots:
|
||||
try:
|
||||
annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
|
||||
if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
|
||||
subtype = annot.get('/Subtype')
|
||||
if subtype and str(subtype) == '/Widget':
|
||||
widget_count += 1
|
||||
except:
|
||||
pass
|
||||
|
||||
if widget_count > 0:
|
||||
return True, f"Found {widget_count} widget annotations"
|
||||
|
||||
return False, "No form fields or widgets found"
|
||||
|
||||
def test_flattening():
|
||||
"""Test PDF flattening functionality."""
|
||||
|
||||
# Test payload
|
||||
test_payload = {
|
||||
"pa": {
|
||||
"meta": {
|
||||
"id": "TEST-001",
|
||||
"key": "test-key-123"
|
||||
},
|
||||
"applicant": {
|
||||
"name": "Test Applicant",
|
||||
"email": "test@example.com"
|
||||
},
|
||||
"project": {
|
||||
"title": "Test Project",
|
||||
"description": "This is a test project",
|
||||
"costs": [
|
||||
{"description": "Item 1", "amountEur": 100.50},
|
||||
{"description": "Item 2", "amountEur": 200.75}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print("Testing PDF Flattening...")
|
||||
print("-" * 50)
|
||||
|
||||
# Test both variants
|
||||
for variant in ["VSM", "QSM"]:
|
||||
print(f"\nTesting {variant} variant:")
|
||||
|
||||
# Check if template exists
|
||||
template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
|
||||
if not os.path.exists(template_path):
|
||||
print(f" ⚠️ Template not found at {template_path}, skipping...")
|
||||
continue
|
||||
|
||||
# Check template has forms
|
||||
has_forms, msg = check_pdf_has_forms(template_path)
|
||||
print(f" Template: {msg}")
|
||||
|
||||
# Generate PDF with flattening (default)
|
||||
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
|
||||
flattened_path = tf.name
|
||||
|
||||
try:
|
||||
pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
|
||||
has_forms, msg = check_pdf_has_forms(flattened_path)
|
||||
print(f" Flattened PDF: {msg}")
|
||||
|
||||
if has_forms:
|
||||
print(f" ❌ FAILED: Flattened PDF still has form fields!")
|
||||
else:
|
||||
print(f" ✅ SUCCESS: Form fields removed after flattening")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ ERROR generating flattened PDF: {e}")
|
||||
finally:
|
||||
if os.path.exists(flattened_path):
|
||||
os.unlink(flattened_path)
|
||||
|
||||
# Generate PDF without flattening for comparison
|
||||
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
|
||||
not_flattened_path = tf.name
|
||||
|
||||
try:
|
||||
pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
|
||||
has_forms, msg = check_pdf_has_forms(not_flattened_path)
|
||||
print(f" Non-flattened PDF: {msg}")
|
||||
|
||||
if not has_forms:
|
||||
print(f" ⚠️ WARNING: Non-flattened PDF has no form fields (unexpected)")
|
||||
else:
|
||||
print(f" ✅ Non-flattened PDF keeps form fields as expected")
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ ERROR generating non-flattened PDF: {e}")
|
||||
finally:
|
||||
if os.path.exists(not_flattened_path):
|
||||
os.unlink(not_flattened_path)
|
||||
|
||||
print("\n" + "-" * 50)
|
||||
print("Test complete!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_flattening()
|
||||
Loading…
Reference in New Issue
Block a user