LaTeX dynamic building – 1

This commit is contained in:
Frederik Beimgraben 2025-09-01 14:56:56 +02:00
parent f1d022b19b
commit 3ddb091d1e
3 changed files with 285 additions and 4 deletions

View File

@ -38,6 +38,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
# System deps # System deps
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
tzdata ca-certificates \ tzdata ca-certificates \
qpdf \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
WORKDIR /app WORKDIR /app

View File

@ -4,6 +4,8 @@ from __future__ import annotations
import io import io
import os import os
import re import re
import subprocess
import tempfile
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import PyPDF2 import PyPDF2
@ -134,13 +136,93 @@ def _collect_btn_widgets(reader: PyPDF2.PdfReader):
return btn_widgets_by_name, export_values_by_name return btn_widgets_by_name, export_values_by_name
# -----------------------------
# PDF Flattening Helper
# -----------------------------
def _flatten_pdf_with_qpdf(pdf_bytes: bytes) -> Optional[bytes]:
"""Try to flatten PDF using qpdf if available."""
try:
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
input_path = input_file.name
output_path = output_file.name
# Write input PDF
with open(input_path, 'wb') as f:
f.write(pdf_bytes)
# Try to flatten with qpdf
result = subprocess.run(
['qpdf', '--flatten-annotations=all', '--generate-appearances', input_path, output_path],
capture_output=True,
timeout=30
)
if result.returncode == 0:
with open(output_path, 'rb') as f:
flattened_bytes = f.read()
# Cleanup
os.unlink(input_path)
os.unlink(output_path)
return flattened_bytes
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
pass
finally:
# Ensure cleanup
for path in [input_path, output_path]:
if os.path.exists(path):
os.unlink(path)
return None
def _flatten_pdf_with_pdftk(pdf_bytes: bytes) -> Optional[bytes]:
"""Try to flatten PDF using pdftk if available."""
try:
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
input_path = input_file.name
output_path = output_file.name
# Write input PDF
with open(input_path, 'wb') as f:
f.write(pdf_bytes)
# Try to flatten with pdftk
result = subprocess.run(
['pdftk', input_path, 'output', output_path, 'flatten'],
capture_output=True,
timeout=30
)
if result.returncode == 0:
with open(output_path, 'rb') as f:
flattened_bytes = f.read()
# Cleanup
os.unlink(input_path)
os.unlink(output_path)
return flattened_bytes
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
pass
finally:
# Ensure cleanup
for path in [input_path, output_path]:
if os.path.exists(path):
os.unlink(path)
return None
# ----------------------------- # -----------------------------
# Kern: PDF füllen (direktes Widget-Update) # Kern: PDF füllen (direktes Widget-Update)
# ----------------------------- # -----------------------------
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None) -> bytes: def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None, flatten: bool = True) -> bytes:
""" """
Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes. Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
Args:
payload: Dictionary mit den Formulardaten
variant: "QSM" oder "VSM"
out_path: Optionaler Pfad zum Speichern der PDF
flatten: Wenn True, werden Formularfelder in statischen Inhalt umgewandelt
""" """
template_path = _get_template(variant) template_path = _get_template(variant)
if not os.path.isfile(template_path): if not os.path.isfile(template_path):
@ -329,15 +411,72 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
except Exception: except Exception:
continue continue
# 3) Schreiben # 3) Write the PDF with filled forms
bio = io.BytesIO() bio = io.BytesIO()
writer.write(bio) writer.write(bio)
data = bio.getvalue() data = bio.getvalue()
# 4) Flatten if requested
if flatten:
# Try external tools first for better flattening
flattened = _flatten_pdf_with_qpdf(data)
if flattened:
data = flattened
else:
# Try pdftk as fallback
flattened = _flatten_pdf_with_pdftk(data)
if flattened:
data = flattened
else:
# Fallback: Remove form fields using PyPDF2 (fields won't be visible)
# This is not ideal but better than nothing
reader = PyPDF2.PdfReader(io.BytesIO(data))
writer = PyPDF2.PdfWriter()
# Copy all pages
for page in reader.pages:
writer.add_page(page)
# Remove AcroForm to make fields non-interactive
if "/AcroForm" in writer._root_object:
del writer._root_object["/AcroForm"]
# Remove Widget annotations
for page in writer.pages:
if "/Annots" in page:
annots = page["/Annots"]
if isinstance(annots, IndirectObject):
try:
annots = annots.get_object()
except:
continue
new_annots = ArrayObject()
if isinstance(annots, (list, ArrayObject)):
for annot_ref in annots:
try:
annot = annot_ref.get_object() if isinstance(annot_ref, IndirectObject) else annot_ref
if isinstance(annot, DictionaryObject):
subtype = _to_str(annot.get("/Subtype"))
if subtype and subtype != "Widget":
new_annots.append(annot_ref)
except:
continue
if len(new_annots) > 0:
page[NameObject("/Annots")] = new_annots
else:
if "/Annots" in page:
del page["/Annots"]
bio = io.BytesIO()
writer.write(bio)
data = bio.getvalue()
if out_path: if out_path:
with open(out_path, "wb") as out: with open(out_path, "wb") as out:
out.write(data) out.write(data)
return data return data
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str) -> None: def save_pdf(payload: Dict[str, Any], variant: str, out_path: str, flatten: bool = True) -> None:
_ = fill_pdf(payload, variant, out_path=out_path) _ = fill_pdf(payload, variant, out_path=out_path, flatten=flatten)

141
backend/test_flattening.py Normal file
View File

@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""
Test script to verify PDF flattening functionality.
Tests that form fields are properly removed after filling.
"""
import os
import sys
import tempfile
from pathlib import Path
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import PyPDF2
from pdf_filler import fill_pdf
def check_pdf_has_forms(pdf_path):
"""Check if a PDF has form fields."""
with open(pdf_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
# Check for AcroForm
if '/AcroForm' in reader.trailer.get('/Root', {}):
acroform = reader.trailer['/Root']['/AcroForm']
if '/Fields' in acroform:
fields = acroform['/Fields']
if fields and len(fields) > 0:
return True, f"Found {len(fields)} form fields"
# Check for widget annotations
widget_count = 0
for page in reader.pages:
if '/Annots' in page:
annots = page['/Annots']
if hasattr(annots, 'get_object'):
annots = annots.get_object()
if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
for annot_ref in annots:
try:
annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
subtype = annot.get('/Subtype')
if subtype and str(subtype) == '/Widget':
widget_count += 1
except:
pass
if widget_count > 0:
return True, f"Found {widget_count} widget annotations"
return False, "No form fields or widgets found"
def test_flattening():
"""Test PDF flattening functionality."""
# Test payload
test_payload = {
"pa": {
"meta": {
"id": "TEST-001",
"key": "test-key-123"
},
"applicant": {
"name": "Test Applicant",
"email": "test@example.com"
},
"project": {
"title": "Test Project",
"description": "This is a test project",
"costs": [
{"description": "Item 1", "amountEur": 100.50},
{"description": "Item 2", "amountEur": 200.75}
]
}
}
}
print("Testing PDF Flattening...")
print("-" * 50)
# Test both variants
for variant in ["VSM", "QSM"]:
print(f"\nTesting {variant} variant:")
# Check if template exists
template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
if not os.path.exists(template_path):
print(f" ⚠️ Template not found at {template_path}, skipping...")
continue
# Check template has forms
has_forms, msg = check_pdf_has_forms(template_path)
print(f" Template: {msg}")
# Generate PDF with flattening (default)
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
flattened_path = tf.name
try:
pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
has_forms, msg = check_pdf_has_forms(flattened_path)
print(f" Flattened PDF: {msg}")
if has_forms:
print(f" ❌ FAILED: Flattened PDF still has form fields!")
else:
print(f" ✅ SUCCESS: Form fields removed after flattening")
except Exception as e:
print(f" ❌ ERROR generating flattened PDF: {e}")
finally:
if os.path.exists(flattened_path):
os.unlink(flattened_path)
# Generate PDF without flattening for comparison
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
not_flattened_path = tf.name
try:
pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
has_forms, msg = check_pdf_has_forms(not_flattened_path)
print(f" Non-flattened PDF: {msg}")
if not has_forms:
print(f" ⚠️ WARNING: Non-flattened PDF has no form fields (unexpected)")
else:
print(f" ✅ Non-flattened PDF keeps form fields as expected")
except Exception as e:
print(f" ❌ ERROR generating non-flattened PDF: {e}")
finally:
if os.path.exists(not_flattened_path):
os.unlink(not_flattened_path)
print("\n" + "-" * 50)
print("Test complete!")
if __name__ == "__main__":
test_flattening()