LaTeX dynamic building – 1

2025-09-01 14:56:56 +02:00 · 2025-09-01 14:56:56 +02:00 · 3ddb091d1e
commit 3ddb091d1e
parent f1d022b19b
3 changed files with 285 additions and 4 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -38,6 +38,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 # System deps
 RUN apt-get update && apt-get install -y --no-install-recommends \
    tzdata ca-certificates \
    qpdf \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
--- a/backend/src/pdf_filler.py
+++ b/backend/src/pdf_filler.py
@ -4,6 +4,8 @@ from __future__ import annotations
 import io
 import os
 import re
 import subprocess
 import tempfile
 from typing import Any, Dict, Optional
 import PyPDF2
@ -134,13 +136,93 @@ def _collect_btn_widgets(reader: PyPDF2.PdfReader):
    return btn_widgets_by_name, export_values_by_name
 # -----------------------------
 # PDF Flattening Helper
 # -----------------------------
 def _flatten_pdf_with_qpdf(pdf_bytes: bytes) -> Optional[bytes]:
    """Try to flatten PDF using qpdf if available."""
    try:
        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
            with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
                input_path = input_file.name
                output_path = output_file.name
        # Write input PDF
        with open(input_path, 'wb') as f:
            f.write(pdf_bytes)
        # Try to flatten with qpdf
        result = subprocess.run(
            ['qpdf', '--flatten-annotations=all', '--generate-appearances', input_path, output_path],
            capture_output=True,
            timeout=30
        )
        if result.returncode == 0:
            with open(output_path, 'rb') as f:
                flattened_bytes = f.read()
            # Cleanup
            os.unlink(input_path)
            os.unlink(output_path)
            return flattened_bytes
    except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
        pass
    finally:
        # Ensure cleanup
        for path in [input_path, output_path]:
            if os.path.exists(path):
                os.unlink(path)
    return None
 def _flatten_pdf_with_pdftk(pdf_bytes: bytes) -> Optional[bytes]:
    """Try to flatten PDF using pdftk if available."""
    try:
        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
            with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
                input_path = input_file.name
                output_path = output_file.name
        # Write input PDF
        with open(input_path, 'wb') as f:
            f.write(pdf_bytes)
        # Try to flatten with pdftk
        result = subprocess.run(
            ['pdftk', input_path, 'output', output_path, 'flatten'],
            capture_output=True,
            timeout=30
        )
        if result.returncode == 0:
            with open(output_path, 'rb') as f:
                flattened_bytes = f.read()
            # Cleanup
            os.unlink(input_path)
            os.unlink(output_path)
            return flattened_bytes
    except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
        pass
    finally:
        # Ensure cleanup
        for path in [input_path, output_path]:
            if os.path.exists(path):
                os.unlink(path)
    return None
 # -----------------------------
 # Kern: PDF füllen (direktes Widget-Update)
 # -----------------------------
-def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None) -> bytes:
+def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None, flatten: bool = True) -> bytes:
    """
    Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
    Args:
        payload: Dictionary mit den Formulardaten
        variant: "QSM" oder "VSM"
        out_path: Optionaler Pfad zum Speichern der PDF
        flatten: Wenn True, werden Formularfelder in statischen Inhalt umgewandelt
    """
    template_path = _get_template(variant)
    if not os.path.isfile(template_path):
@ -329,15 +411,72 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
                except Exception:
                    continue
-        # 3) Schreiben
+        # 3) Write the PDF with filled forms
        bio = io.BytesIO()
        writer.write(bio)
        data = bio.getvalue()
        # 4) Flatten if requested
        if flatten:
            # Try external tools first for better flattening
            flattened = _flatten_pdf_with_qpdf(data)
            if flattened:
                data = flattened
            else:
                # Try pdftk as fallback
                flattened = _flatten_pdf_with_pdftk(data)
                if flattened:
                    data = flattened
                else:
                    # Fallback: Remove form fields using PyPDF2 (fields won't be visible)
                    # This is not ideal but better than nothing
                    reader = PyPDF2.PdfReader(io.BytesIO(data))
                    writer = PyPDF2.PdfWriter()
                    # Copy all pages
                    for page in reader.pages:
                        writer.add_page(page)
                    # Remove AcroForm to make fields non-interactive
                    if "/AcroForm" in writer._root_object:
                        del writer._root_object["/AcroForm"]
                    # Remove Widget annotations
                    for page in writer.pages:
                        if "/Annots" in page:
                            annots = page["/Annots"]
                            if isinstance(annots, IndirectObject):
                                try:
                                    annots = annots.get_object()
                                except:
                                    continue
                            new_annots = ArrayObject()
                            if isinstance(annots, (list, ArrayObject)):
                                for annot_ref in annots:
                                    try:
                                        annot = annot_ref.get_object() if isinstance(annot_ref, IndirectObject) else annot_ref
                                        if isinstance(annot, DictionaryObject):
                                            subtype = _to_str(annot.get("/Subtype"))
                                            if subtype and subtype != "Widget":
                                                new_annots.append(annot_ref)
                                    except:
                                        continue
                            if len(new_annots) > 0:
                                page[NameObject("/Annots")] = new_annots
                            else:
                                if "/Annots" in page:
                                    del page["/Annots"]
                    bio = io.BytesIO()
                    writer.write(bio)
                    data = bio.getvalue()
        if out_path:
            with open(out_path, "wb") as out:
                out.write(data)
        return data
-def save_pdf(payload: Dict[str, Any], variant: str, out_path: str) -> None:
+def save_pdf(payload: Dict[str, Any], variant: str, out_path: str, flatten: bool = True) -> None:
-    _ = fill_pdf(payload, variant, out_path=out_path)
+    _ = fill_pdf(payload, variant, out_path=out_path, flatten=flatten)
--- a/backend/test_flattening.py
+++ b/backend/test_flattening.py
@ -0,0 +1,141 @@
 #!/usr/bin/env python3
 """
 Test script to verify PDF flattening functionality.
 Tests that form fields are properly removed after filling.
 """
 import os
 import sys
 import tempfile
 from pathlib import Path
 # Add src to path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
 import PyPDF2
 from pdf_filler import fill_pdf
 def check_pdf_has_forms(pdf_path):
    """Check if a PDF has form fields."""
    with open(pdf_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        # Check for AcroForm
        if '/AcroForm' in reader.trailer.get('/Root', {}):
            acroform = reader.trailer['/Root']['/AcroForm']
            if '/Fields' in acroform:
                fields = acroform['/Fields']
                if fields and len(fields) > 0:
                    return True, f"Found {len(fields)} form fields"
        # Check for widget annotations
        widget_count = 0
        for page in reader.pages:
            if '/Annots' in page:
                annots = page['/Annots']
                if hasattr(annots, 'get_object'):
                    annots = annots.get_object()
                if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
                    for annot_ref in annots:
                        try:
                            annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
                            if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
                                subtype = annot.get('/Subtype')
                                if subtype and str(subtype) == '/Widget':
                                    widget_count += 1
                        except:
                            pass
        if widget_count > 0:
            return True, f"Found {widget_count} widget annotations"
        return False, "No form fields or widgets found"
 def test_flattening():
    """Test PDF flattening functionality."""
    # Test payload
    test_payload = {
        "pa": {
            "meta": {
                "id": "TEST-001",
                "key": "test-key-123"
            },
            "applicant": {
                "name": "Test Applicant",
                "email": "test@example.com"
            },
            "project": {
                "title": "Test Project",
                "description": "This is a test project",
                "costs": [
                    {"description": "Item 1", "amountEur": 100.50},
                    {"description": "Item 2", "amountEur": 200.75}
                ]
            }
        }
    }
    print("Testing PDF Flattening...")
    print("-" * 50)
    # Test both variants
    for variant in ["VSM", "QSM"]:
        print(f"\nTesting {variant} variant:")
        # Check if template exists
        template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
        if not os.path.exists(template_path):
            print(f"  ⚠️  Template not found at {template_path}, skipping...")
            continue
        # Check template has forms
        has_forms, msg = check_pdf_has_forms(template_path)
        print(f"  Template: {msg}")
        # Generate PDF with flattening (default)
        with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
            flattened_path = tf.name
        try:
            pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
            has_forms, msg = check_pdf_has_forms(flattened_path)
            print(f"  Flattened PDF: {msg}")
            if has_forms:
                print(f"  ❌ FAILED: Flattened PDF still has form fields!")
            else:
                print(f"  ✅ SUCCESS: Form fields removed after flattening")
        except Exception as e:
            print(f"  ❌ ERROR generating flattened PDF: {e}")
        finally:
            if os.path.exists(flattened_path):
                os.unlink(flattened_path)
        # Generate PDF without flattening for comparison
        with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
            not_flattened_path = tf.name
        try:
            pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
            has_forms, msg = check_pdf_has_forms(not_flattened_path)
            print(f"  Non-flattened PDF: {msg}")
            if not has_forms:
                print(f"  ⚠️  WARNING: Non-flattened PDF has no form fields (unexpected)")
            else:
                print(f"  ✅ Non-flattened PDF keeps form fields as expected")
        except Exception as e:
            print(f"  ❌ ERROR generating non-flattened PDF: {e}")
        finally:
            if os.path.exists(not_flattened_path):
                os.unlink(not_flattened_path)
    print("\n" + "-" * 50)
    print("Test complete!")
 if __name__ == "__main__":
    test_flattening()