LaTeX dynamic building – 1
This commit is contained in:
parent
f1d022b19b
commit
3ddb091d1e
@ -38,6 +38,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
|||||||
# System deps
|
# System deps
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
tzdata ca-certificates \
|
tzdata ca-certificates \
|
||||||
|
qpdf \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|||||||
@ -4,6 +4,8 @@ from __future__ import annotations
|
|||||||
import io
|
import io
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
from typing import Any, Dict, Optional
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
@ -134,13 +136,93 @@ def _collect_btn_widgets(reader: PyPDF2.PdfReader):
|
|||||||
|
|
||||||
return btn_widgets_by_name, export_values_by_name
|
return btn_widgets_by_name, export_values_by_name
|
||||||
|
|
||||||
|
# -----------------------------
|
||||||
|
# PDF Flattening Helper
|
||||||
|
# -----------------------------
|
||||||
|
|
||||||
|
def _flatten_pdf_with_qpdf(pdf_bytes: bytes) -> Optional[bytes]:
|
||||||
|
"""Try to flatten PDF using qpdf if available."""
|
||||||
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
|
||||||
|
input_path = input_file.name
|
||||||
|
output_path = output_file.name
|
||||||
|
|
||||||
|
# Write input PDF
|
||||||
|
with open(input_path, 'wb') as f:
|
||||||
|
f.write(pdf_bytes)
|
||||||
|
|
||||||
|
# Try to flatten with qpdf
|
||||||
|
result = subprocess.run(
|
||||||
|
['qpdf', '--flatten-annotations=all', '--generate-appearances', input_path, output_path],
|
||||||
|
capture_output=True,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
with open(output_path, 'rb') as f:
|
||||||
|
flattened_bytes = f.read()
|
||||||
|
# Cleanup
|
||||||
|
os.unlink(input_path)
|
||||||
|
os.unlink(output_path)
|
||||||
|
return flattened_bytes
|
||||||
|
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
# Ensure cleanup
|
||||||
|
for path in [input_path, output_path]:
|
||||||
|
if os.path.exists(path):
|
||||||
|
os.unlink(path)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _flatten_pdf_with_pdftk(pdf_bytes: bytes) -> Optional[bytes]:
|
||||||
|
"""Try to flatten PDF using pdftk if available."""
|
||||||
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
|
||||||
|
with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
|
||||||
|
input_path = input_file.name
|
||||||
|
output_path = output_file.name
|
||||||
|
|
||||||
|
# Write input PDF
|
||||||
|
with open(input_path, 'wb') as f:
|
||||||
|
f.write(pdf_bytes)
|
||||||
|
|
||||||
|
# Try to flatten with pdftk
|
||||||
|
result = subprocess.run(
|
||||||
|
['pdftk', input_path, 'output', output_path, 'flatten'],
|
||||||
|
capture_output=True,
|
||||||
|
timeout=30
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode == 0:
|
||||||
|
with open(output_path, 'rb') as f:
|
||||||
|
flattened_bytes = f.read()
|
||||||
|
# Cleanup
|
||||||
|
os.unlink(input_path)
|
||||||
|
os.unlink(output_path)
|
||||||
|
return flattened_bytes
|
||||||
|
except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
# Ensure cleanup
|
||||||
|
for path in [input_path, output_path]:
|
||||||
|
if os.path.exists(path):
|
||||||
|
os.unlink(path)
|
||||||
|
return None
|
||||||
|
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
# Kern: PDF füllen (direktes Widget-Update)
|
# Kern: PDF füllen (direktes Widget-Update)
|
||||||
# -----------------------------
|
# -----------------------------
|
||||||
|
|
||||||
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None) -> bytes:
|
def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None, flatten: bool = True) -> bytes:
|
||||||
"""
|
"""
|
||||||
Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
|
Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
payload: Dictionary mit den Formulardaten
|
||||||
|
variant: "QSM" oder "VSM"
|
||||||
|
out_path: Optionaler Pfad zum Speichern der PDF
|
||||||
|
flatten: Wenn True, werden Formularfelder in statischen Inhalt umgewandelt
|
||||||
"""
|
"""
|
||||||
template_path = _get_template(variant)
|
template_path = _get_template(variant)
|
||||||
if not os.path.isfile(template_path):
|
if not os.path.isfile(template_path):
|
||||||
@ -329,15 +411,72 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
|
|||||||
except Exception:
|
except Exception:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 3) Schreiben
|
# 3) Write the PDF with filled forms
|
||||||
bio = io.BytesIO()
|
bio = io.BytesIO()
|
||||||
writer.write(bio)
|
writer.write(bio)
|
||||||
data = bio.getvalue()
|
data = bio.getvalue()
|
||||||
|
|
||||||
|
# 4) Flatten if requested
|
||||||
|
if flatten:
|
||||||
|
# Try external tools first for better flattening
|
||||||
|
flattened = _flatten_pdf_with_qpdf(data)
|
||||||
|
if flattened:
|
||||||
|
data = flattened
|
||||||
|
else:
|
||||||
|
# Try pdftk as fallback
|
||||||
|
flattened = _flatten_pdf_with_pdftk(data)
|
||||||
|
if flattened:
|
||||||
|
data = flattened
|
||||||
|
else:
|
||||||
|
# Fallback: Remove form fields using PyPDF2 (fields won't be visible)
|
||||||
|
# This is not ideal but better than nothing
|
||||||
|
reader = PyPDF2.PdfReader(io.BytesIO(data))
|
||||||
|
writer = PyPDF2.PdfWriter()
|
||||||
|
|
||||||
|
# Copy all pages
|
||||||
|
for page in reader.pages:
|
||||||
|
writer.add_page(page)
|
||||||
|
|
||||||
|
# Remove AcroForm to make fields non-interactive
|
||||||
|
if "/AcroForm" in writer._root_object:
|
||||||
|
del writer._root_object["/AcroForm"]
|
||||||
|
|
||||||
|
# Remove Widget annotations
|
||||||
|
for page in writer.pages:
|
||||||
|
if "/Annots" in page:
|
||||||
|
annots = page["/Annots"]
|
||||||
|
if isinstance(annots, IndirectObject):
|
||||||
|
try:
|
||||||
|
annots = annots.get_object()
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_annots = ArrayObject()
|
||||||
|
if isinstance(annots, (list, ArrayObject)):
|
||||||
|
for annot_ref in annots:
|
||||||
|
try:
|
||||||
|
annot = annot_ref.get_object() if isinstance(annot_ref, IndirectObject) else annot_ref
|
||||||
|
if isinstance(annot, DictionaryObject):
|
||||||
|
subtype = _to_str(annot.get("/Subtype"))
|
||||||
|
if subtype and subtype != "Widget":
|
||||||
|
new_annots.append(annot_ref)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if len(new_annots) > 0:
|
||||||
|
page[NameObject("/Annots")] = new_annots
|
||||||
|
else:
|
||||||
|
if "/Annots" in page:
|
||||||
|
del page["/Annots"]
|
||||||
|
|
||||||
|
bio = io.BytesIO()
|
||||||
|
writer.write(bio)
|
||||||
|
data = bio.getvalue()
|
||||||
if out_path:
|
if out_path:
|
||||||
with open(out_path, "wb") as out:
|
with open(out_path, "wb") as out:
|
||||||
out.write(data)
|
out.write(data)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str) -> None:
|
def save_pdf(payload: Dict[str, Any], variant: str, out_path: str, flatten: bool = True) -> None:
|
||||||
_ = fill_pdf(payload, variant, out_path=out_path)
|
_ = fill_pdf(payload, variant, out_path=out_path, flatten=flatten)
|
||||||
|
|||||||
141
backend/test_flattening.py
Normal file
141
backend/test_flattening.py
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script to verify PDF flattening functionality.
|
||||||
|
Tests that form fields are properly removed after filling.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add src to path
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
||||||
|
|
||||||
|
import PyPDF2
|
||||||
|
from pdf_filler import fill_pdf
|
||||||
|
|
||||||
|
def check_pdf_has_forms(pdf_path):
|
||||||
|
"""Check if a PDF has form fields."""
|
||||||
|
with open(pdf_path, 'rb') as f:
|
||||||
|
reader = PyPDF2.PdfReader(f)
|
||||||
|
|
||||||
|
# Check for AcroForm
|
||||||
|
if '/AcroForm' in reader.trailer.get('/Root', {}):
|
||||||
|
acroform = reader.trailer['/Root']['/AcroForm']
|
||||||
|
if '/Fields' in acroform:
|
||||||
|
fields = acroform['/Fields']
|
||||||
|
if fields and len(fields) > 0:
|
||||||
|
return True, f"Found {len(fields)} form fields"
|
||||||
|
|
||||||
|
# Check for widget annotations
|
||||||
|
widget_count = 0
|
||||||
|
for page in reader.pages:
|
||||||
|
if '/Annots' in page:
|
||||||
|
annots = page['/Annots']
|
||||||
|
if hasattr(annots, 'get_object'):
|
||||||
|
annots = annots.get_object()
|
||||||
|
|
||||||
|
if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
|
||||||
|
for annot_ref in annots:
|
||||||
|
try:
|
||||||
|
annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
|
||||||
|
if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
|
||||||
|
subtype = annot.get('/Subtype')
|
||||||
|
if subtype and str(subtype) == '/Widget':
|
||||||
|
widget_count += 1
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if widget_count > 0:
|
||||||
|
return True, f"Found {widget_count} widget annotations"
|
||||||
|
|
||||||
|
return False, "No form fields or widgets found"
|
||||||
|
|
||||||
|
def test_flattening():
|
||||||
|
"""Test PDF flattening functionality."""
|
||||||
|
|
||||||
|
# Test payload
|
||||||
|
test_payload = {
|
||||||
|
"pa": {
|
||||||
|
"meta": {
|
||||||
|
"id": "TEST-001",
|
||||||
|
"key": "test-key-123"
|
||||||
|
},
|
||||||
|
"applicant": {
|
||||||
|
"name": "Test Applicant",
|
||||||
|
"email": "test@example.com"
|
||||||
|
},
|
||||||
|
"project": {
|
||||||
|
"title": "Test Project",
|
||||||
|
"description": "This is a test project",
|
||||||
|
"costs": [
|
||||||
|
{"description": "Item 1", "amountEur": 100.50},
|
||||||
|
{"description": "Item 2", "amountEur": 200.75}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print("Testing PDF Flattening...")
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
# Test both variants
|
||||||
|
for variant in ["VSM", "QSM"]:
|
||||||
|
print(f"\nTesting {variant} variant:")
|
||||||
|
|
||||||
|
# Check if template exists
|
||||||
|
template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
|
||||||
|
if not os.path.exists(template_path):
|
||||||
|
print(f" ⚠️ Template not found at {template_path}, skipping...")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check template has forms
|
||||||
|
has_forms, msg = check_pdf_has_forms(template_path)
|
||||||
|
print(f" Template: {msg}")
|
||||||
|
|
||||||
|
# Generate PDF with flattening (default)
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
|
||||||
|
flattened_path = tf.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
|
||||||
|
has_forms, msg = check_pdf_has_forms(flattened_path)
|
||||||
|
print(f" Flattened PDF: {msg}")
|
||||||
|
|
||||||
|
if has_forms:
|
||||||
|
print(f" ❌ FAILED: Flattened PDF still has form fields!")
|
||||||
|
else:
|
||||||
|
print(f" ✅ SUCCESS: Form fields removed after flattening")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ ERROR generating flattened PDF: {e}")
|
||||||
|
finally:
|
||||||
|
if os.path.exists(flattened_path):
|
||||||
|
os.unlink(flattened_path)
|
||||||
|
|
||||||
|
# Generate PDF without flattening for comparison
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
|
||||||
|
not_flattened_path = tf.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
|
||||||
|
has_forms, msg = check_pdf_has_forms(not_flattened_path)
|
||||||
|
print(f" Non-flattened PDF: {msg}")
|
||||||
|
|
||||||
|
if not has_forms:
|
||||||
|
print(f" ⚠️ WARNING: Non-flattened PDF has no form fields (unexpected)")
|
||||||
|
else:
|
||||||
|
print(f" ✅ Non-flattened PDF keeps form fields as expected")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ❌ ERROR generating non-flattened PDF: {e}")
|
||||||
|
finally:
|
||||||
|
if os.path.exists(not_flattened_path):
|
||||||
|
os.unlink(not_flattened_path)
|
||||||
|
|
||||||
|
print("\n" + "-" * 50)
|
||||||
|
print("Test complete!")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_flattening()
|
||||||
Loading…
Reference in New Issue
Block a user