LaTeX dynamic building – 1

This commit is contained in:
Frederik Beimgraben 2025-09-01 14:56:56 +02:00
parent f1d022b19b
commit 3ddb091d1e
3 changed files with 285 additions and 4 deletions

View File

@ -38,6 +38,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
# System deps
RUN apt-get update && apt-get install -y --no-install-recommends \
tzdata ca-certificates \
qpdf \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app

View File

@ -4,6 +4,8 @@ from __future__ import annotations
import io
import os
import re
import subprocess
import tempfile
from typing import Any, Dict, Optional
import PyPDF2
@ -134,13 +136,93 @@ def _collect_btn_widgets(reader: PyPDF2.PdfReader):
return btn_widgets_by_name, export_values_by_name
# -----------------------------
# PDF Flattening Helper
# -----------------------------
def _flatten_pdf_with_qpdf(pdf_bytes: bytes) -> Optional[bytes]:
"""Try to flatten PDF using qpdf if available."""
try:
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
input_path = input_file.name
output_path = output_file.name
# Write input PDF
with open(input_path, 'wb') as f:
f.write(pdf_bytes)
# Try to flatten with qpdf
result = subprocess.run(
['qpdf', '--flatten-annotations=all', '--generate-appearances', input_path, output_path],
capture_output=True,
timeout=30
)
if result.returncode == 0:
with open(output_path, 'rb') as f:
flattened_bytes = f.read()
# Cleanup
os.unlink(input_path)
os.unlink(output_path)
return flattened_bytes
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
pass
finally:
# Ensure cleanup
for path in [input_path, output_path]:
if os.path.exists(path):
os.unlink(path)
return None
def _flatten_pdf_with_pdftk(pdf_bytes: bytes) -> Optional[bytes]:
"""Try to flatten PDF using pdftk if available."""
try:
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
input_path = input_file.name
output_path = output_file.name
# Write input PDF
with open(input_path, 'wb') as f:
f.write(pdf_bytes)
# Try to flatten with pdftk
result = subprocess.run(
['pdftk', input_path, 'output', output_path, 'flatten'],
capture_output=True,
timeout=30
)
if result.returncode == 0:
with open(output_path, 'rb') as f:
flattened_bytes = f.read()
# Cleanup
os.unlink(input_path)
os.unlink(output_path)
return flattened_bytes
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
pass
finally:
# Ensure cleanup
for path in [input_path, output_path]:
if os.path.exists(path):
os.unlink(path)
return None
# -----------------------------
# Kern: PDF füllen (direktes Widget-Update)
# -----------------------------
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None) -> bytes:
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None, flatten: bool = True) -> bytes:
"""
Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
Args:
payload: Dictionary mit den Formulardaten
variant: "QSM" oder "VSM"
out_path: Optionaler Pfad zum Speichern der PDF
flatten: Wenn True, werden Formularfelder in statischen Inhalt umgewandelt
"""
template_path = _get_template(variant)
if not os.path.isfile(template_path):
@ -329,7 +411,64 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
except Exception:
continue
# 3) Schreiben
# 3) Write the PDF with filled forms
bio = io.BytesIO()
writer.write(bio)
data = bio.getvalue()
# 4) Flatten if requested
if flatten:
# Try external tools first for better flattening
flattened = _flatten_pdf_with_qpdf(data)
if flattened:
data = flattened
else:
# Try pdftk as fallback
flattened = _flatten_pdf_with_pdftk(data)
if flattened:
data = flattened
else:
# Fallback: Remove form fields using PyPDF2 (fields won't be visible)
# This is not ideal but better than nothing
reader = PyPDF2.PdfReader(io.BytesIO(data))
writer = PyPDF2.PdfWriter()
# Copy all pages
for page in reader.pages:
writer.add_page(page)
# Remove AcroForm to make fields non-interactive
if "/AcroForm" in writer._root_object:
del writer._root_object["/AcroForm"]
# Remove Widget annotations
for page in writer.pages:
if "/Annots" in page:
annots = page["/Annots"]
if isinstance(annots, IndirectObject):
try:
annots = annots.get_object()
except:
continue
new_annots = ArrayObject()
if isinstance(annots, (list, ArrayObject)):
for annot_ref in annots:
try:
annot = annot_ref.get_object() if isinstance(annot_ref, IndirectObject) else annot_ref
if isinstance(annot, DictionaryObject):
subtype = _to_str(annot.get("/Subtype"))
if subtype and subtype != "Widget":
new_annots.append(annot_ref)
except:
continue
if len(new_annots) > 0:
page[NameObject("/Annots")] = new_annots
else:
if "/Annots" in page:
del page["/Annots"]
bio = io.BytesIO()
writer.write(bio)
data = bio.getvalue()
@ -339,5 +478,5 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
return data
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str) -> None:
_ = fill_pdf(payload, variant, out_path=out_path)
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str, flatten: bool = True) -> None:
_ = fill_pdf(payload, variant, out_path=out_path, flatten=flatten)

141
backend/test_flattening.py Normal file
View File

@ -0,0 +1,141 @@
#!/usr/bin/env python3
"""
Test script to verify PDF flattening functionality.
Tests that form fields are properly removed after filling.
"""
import os
import sys
import tempfile
from pathlib import Path
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import PyPDF2
from pdf_filler import fill_pdf
def check_pdf_has_forms(pdf_path):
"""Check if a PDF has form fields."""
with open(pdf_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
# Check for AcroForm
if '/AcroForm' in reader.trailer.get('/Root', {}):
acroform = reader.trailer['/Root']['/AcroForm']
if '/Fields' in acroform:
fields = acroform['/Fields']
if fields and len(fields) > 0:
return True, f"Found {len(fields)} form fields"
# Check for widget annotations
widget_count = 0
for page in reader.pages:
if '/Annots' in page:
annots = page['/Annots']
if hasattr(annots, 'get_object'):
annots = annots.get_object()
if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
for annot_ref in annots:
try:
annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
subtype = annot.get('/Subtype')
if subtype and str(subtype) == '/Widget':
widget_count += 1
except:
pass
if widget_count > 0:
return True, f"Found {widget_count} widget annotations"
return False, "No form fields or widgets found"
def test_flattening():
"""Test PDF flattening functionality."""
# Test payload
test_payload = {
"pa": {
"meta": {
"id": "TEST-001",
"key": "test-key-123"
},
"applicant": {
"name": "Test Applicant",
"email": "test@example.com"
},
"project": {
"title": "Test Project",
"description": "This is a test project",
"costs": [
{"description": "Item 1", "amountEur": 100.50},
{"description": "Item 2", "amountEur": 200.75}
]
}
}
}
print("Testing PDF Flattening...")
print("-" * 50)
# Test both variants
for variant in ["VSM", "QSM"]:
print(f"\nTesting {variant} variant:")
# Check if template exists
template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
if not os.path.exists(template_path):
print(f" ⚠️ Template not found at {template_path}, skipping...")
continue
# Check template has forms
has_forms, msg = check_pdf_has_forms(template_path)
print(f" Template: {msg}")
# Generate PDF with flattening (default)
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
flattened_path = tf.name
try:
pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
has_forms, msg = check_pdf_has_forms(flattened_path)
print(f" Flattened PDF: {msg}")
if has_forms:
print(f" ❌ FAILED: Flattened PDF still has form fields!")
else:
print(f" ✅ SUCCESS: Form fields removed after flattening")
except Exception as e:
print(f" ❌ ERROR generating flattened PDF: {e}")
finally:
if os.path.exists(flattened_path):
os.unlink(flattened_path)
# Generate PDF without flattening for comparison
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
not_flattened_path = tf.name
try:
pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
has_forms, msg = check_pdf_has_forms(not_flattened_path)
print(f" Non-flattened PDF: {msg}")
if not has_forms:
print(f" ⚠️ WARNING: Non-flattened PDF has no form fields (unexpected)")
else:
print(f" ✅ Non-flattened PDF keeps form fields as expected")
except Exception as e:
print(f" ❌ ERROR generating non-flattened PDF: {e}")
finally:
if os.path.exists(not_flattened_path):
os.unlink(not_flattened_path)
print("\n" + "-" * 50)
print("Test complete!")
if __name__ == "__main__":
test_flattening()