From 3ddb091d1e3e65f1a2b99aabaa6165ffce883c9c Mon Sep 17 00:00:00 2001
From: Frederik Beimgraben <frederik@beimgraben.net>
Date: Mon, 1 Sep 2025 14:56:56 +0200
Subject: [PATCH] =?UTF-8?q?LaTeX=20dynamic=20building=20=E2=80=93=201?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 backend/Dockerfile         |   1 +
 backend/src/pdf_filler.py  | 147 ++++++++++++++++++++++++++++++++++++-
 backend/test_flattening.py | 141 +++++++++++++++++++++++++++++++++++
 3 files changed, 285 insertions(+), 4 deletions(-)
 create mode 100644 backend/test_flattening.py

diff --git a/backend/Dockerfile b/backend/Dockerfile
index b64c81f7..ea3f6814 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -38,6 +38,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 # System deps
 RUN apt-get update && apt-get install -y --no-install-recommends \
     tzdata ca-certificates \
+    qpdf \
     && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
diff --git a/backend/src/pdf_filler.py b/backend/src/pdf_filler.py
index bd3b796b..3270ccc2 100644
--- a/backend/src/pdf_filler.py
+++ b/backend/src/pdf_filler.py
@@ -4,6 +4,8 @@ from __future__ import annotations
 import io
 import os
 import re
+import subprocess
+import tempfile
 from typing import Any, Dict, Optional
 
 import PyPDF2
@@ -134,13 +136,93 @@ def _collect_btn_widgets(reader: PyPDF2.PdfReader):
 
     return btn_widgets_by_name, export_values_by_name
 
+# -----------------------------
+# PDF Flattening Helper
+# -----------------------------
+
+def _flatten_pdf_with_qpdf(pdf_bytes: bytes) -> Optional[bytes]:
+    """Try to flatten PDF using qpdf if available."""
+    try:
+        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
+            with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
+                input_path = input_file.name
+                output_path = output_file.name
+
+        # Write input PDF
+        with open(input_path, 'wb') as f:
+            f.write(pdf_bytes)
+
+        # Try to flatten with qpdf
+        result = subprocess.run(
+            ['qpdf', '--flatten-annotations=all', '--generate-appearances', input_path, output_path],
+            capture_output=True,
+            timeout=30
+        )
+
+        if result.returncode == 0:
+            with open(output_path, 'rb') as f:
+                flattened_bytes = f.read()
+            # Cleanup
+            os.unlink(input_path)
+            os.unlink(output_path)
+            return flattened_bytes
+    except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+    finally:
+        # Ensure cleanup
+        for path in [input_path, output_path]:
+            if os.path.exists(path):
+                os.unlink(path)
+    return None
+
+def _flatten_pdf_with_pdftk(pdf_bytes: bytes) -> Optional[bytes]:
+    """Try to flatten PDF using pdftk if available."""
+    try:
+        with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as input_file:
+            with tempfile.NamedTemporaryFile(suffix='_flattened.pdf', delete=False) as output_file:
+                input_path = input_file.name
+                output_path = output_file.name
+
+        # Write input PDF
+        with open(input_path, 'wb') as f:
+            f.write(pdf_bytes)
+
+        # Try to flatten with pdftk
+        result = subprocess.run(
+            ['pdftk', input_path, 'output', output_path, 'flatten'],
+            capture_output=True,
+            timeout=30
+        )
+
+        if result.returncode == 0:
+            with open(output_path, 'rb') as f:
+                flattened_bytes = f.read()
+            # Cleanup
+            os.unlink(input_path)
+            os.unlink(output_path)
+            return flattened_bytes
+    except (subprocess.SubprocessError, FileNotFoundError, subprocess.TimeoutExpired):
+        pass
+    finally:
+        # Ensure cleanup
+        for path in [input_path, output_path]:
+            if os.path.exists(path):
+                os.unlink(path)
+    return None
+
 # -----------------------------
 # Kern: PDF füllen (direktes Widget-Update)
 # -----------------------------
 
-def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None) -> bytes:
+def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = None, flatten: bool = True) -> bytes:
     """
     Payload (asdict(RootPayload) ODER dein payload["pa"]-ähnliches Dict) -> befüllte PDF-Bytes.
+
+    Args:
+        payload: Dictionary mit den Formulardaten
+        variant: "QSM" oder "VSM"
+        out_path: Optionaler Pfad zum Speichern der PDF
+        flatten: Wenn True, werden Formularfelder in statischen Inhalt umgewandelt
     """
     template_path = _get_template(variant)
     if not os.path.isfile(template_path):
@@ -329,15 +411,72 @@ def fill_pdf(payload: Dict[str, Any], variant: str, out_path: Optional[str] = No
                 except Exception:
                     continue
 
-        # 3) Schreiben
+        # 3) Write the PDF with filled forms
         bio = io.BytesIO()
         writer.write(bio)
         data = bio.getvalue()
+
+        # 4) Flatten if requested
+        if flatten:
+            # Try external tools first for better flattening
+            flattened = _flatten_pdf_with_qpdf(data)
+            if flattened:
+                data = flattened
+            else:
+                # Try pdftk as fallback
+                flattened = _flatten_pdf_with_pdftk(data)
+                if flattened:
+                    data = flattened
+                else:
+                    # Fallback: Remove form fields using PyPDF2 (fields won't be visible)
+                    # This is not ideal but better than nothing
+                    reader = PyPDF2.PdfReader(io.BytesIO(data))
+                    writer = PyPDF2.PdfWriter()
+
+                    # Copy all pages
+                    for page in reader.pages:
+                        writer.add_page(page)
+
+                    # Remove AcroForm to make fields non-interactive
+                    if "/AcroForm" in writer._root_object:
+                        del writer._root_object["/AcroForm"]
+
+                    # Remove Widget annotations
+                    for page in writer.pages:
+                        if "/Annots" in page:
+                            annots = page["/Annots"]
+                            if isinstance(annots, IndirectObject):
+                                try:
+                                    annots = annots.get_object()
+                                except:
+                                    continue
+
+                            new_annots = ArrayObject()
+                            if isinstance(annots, (list, ArrayObject)):
+                                for annot_ref in annots:
+                                    try:
+                                        annot = annot_ref.get_object() if isinstance(annot_ref, IndirectObject) else annot_ref
+                                        if isinstance(annot, DictionaryObject):
+                                            subtype = _to_str(annot.get("/Subtype"))
+                                            if subtype and subtype != "Widget":
+                                                new_annots.append(annot_ref)
+                                    except:
+                                        continue
+
+                            if len(new_annots) > 0:
+                                page[NameObject("/Annots")] = new_annots
+                            else:
+                                if "/Annots" in page:
+                                    del page["/Annots"]
+
+                    bio = io.BytesIO()
+                    writer.write(bio)
+                    data = bio.getvalue()
         if out_path:
             with open(out_path, "wb") as out:
                 out.write(data)
         return data
 
 
-def save_pdf(payload: Dict[str, Any], variant: str, out_path: str) -> None:
-    _ = fill_pdf(payload, variant, out_path=out_path)
+def save_pdf(payload: Dict[str, Any], variant: str, out_path: str, flatten: bool = True) -> None:
+    _ = fill_pdf(payload, variant, out_path=out_path, flatten=flatten)
diff --git a/backend/test_flattening.py b/backend/test_flattening.py
new file mode 100644
index 00000000..c7694e0c
--- /dev/null
+++ b/backend/test_flattening.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+"""
+Test script to verify PDF flattening functionality.
+Tests that form fields are properly removed after filling.
+"""
+
+import os
+import sys
+import tempfile
+from pathlib import Path
+
+# Add src to path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
+
+import PyPDF2
+from pdf_filler import fill_pdf
+
+def check_pdf_has_forms(pdf_path):
+    """Check if a PDF has form fields."""
+    with open(pdf_path, 'rb') as f:
+        reader = PyPDF2.PdfReader(f)
+
+        # Check for AcroForm
+        if '/AcroForm' in reader.trailer.get('/Root', {}):
+            acroform = reader.trailer['/Root']['/AcroForm']
+            if '/Fields' in acroform:
+                fields = acroform['/Fields']
+                if fields and len(fields) > 0:
+                    return True, f"Found {len(fields)} form fields"
+
+        # Check for widget annotations
+        widget_count = 0
+        for page in reader.pages:
+            if '/Annots' in page:
+                annots = page['/Annots']
+                if hasattr(annots, 'get_object'):
+                    annots = annots.get_object()
+
+                if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
+                    for annot_ref in annots:
+                        try:
+                            annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
+                            if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
+                                subtype = annot.get('/Subtype')
+                                if subtype and str(subtype) == '/Widget':
+                                    widget_count += 1
+                        except:
+                            pass
+
+        if widget_count > 0:
+            return True, f"Found {widget_count} widget annotations"
+
+        return False, "No form fields or widgets found"
+
+def test_flattening():
+    """Test PDF flattening functionality."""
+
+    # Test payload
+    test_payload = {
+        "pa": {
+            "meta": {
+                "id": "TEST-001",
+                "key": "test-key-123"
+            },
+            "applicant": {
+                "name": "Test Applicant",
+                "email": "test@example.com"
+            },
+            "project": {
+                "title": "Test Project",
+                "description": "This is a test project",
+                "costs": [
+                    {"description": "Item 1", "amountEur": 100.50},
+                    {"description": "Item 2", "amountEur": 200.75}
+                ]
+            }
+        }
+    }
+
+    print("Testing PDF Flattening...")
+    print("-" * 50)
+
+    # Test both variants
+    for variant in ["VSM", "QSM"]:
+        print(f"\nTesting {variant} variant:")
+
+        # Check if template exists
+        template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
+        if not os.path.exists(template_path):
+            print(f"  ⚠️  Template not found at {template_path}, skipping...")
+            continue
+
+        # Check template has forms
+        has_forms, msg = check_pdf_has_forms(template_path)
+        print(f"  Template: {msg}")
+
+        # Generate PDF with flattening (default)
+        with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
+            flattened_path = tf.name
+
+        try:
+            pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
+            has_forms, msg = check_pdf_has_forms(flattened_path)
+            print(f"  Flattened PDF: {msg}")
+
+            if has_forms:
+                print(f"  ❌ FAILED: Flattened PDF still has form fields!")
+            else:
+                print(f"  ✅ SUCCESS: Form fields removed after flattening")
+
+        except Exception as e:
+            print(f"  ❌ ERROR generating flattened PDF: {e}")
+        finally:
+            if os.path.exists(flattened_path):
+                os.unlink(flattened_path)
+
+        # Generate PDF without flattening for comparison
+        with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
+            not_flattened_path = tf.name
+
+        try:
+            pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
+            has_forms, msg = check_pdf_has_forms(not_flattened_path)
+            print(f"  Non-flattened PDF: {msg}")
+
+            if not has_forms:
+                print(f"  ⚠️  WARNING: Non-flattened PDF has no form fields (unexpected)")
+            else:
+                print(f"  ✅ Non-flattened PDF keeps form fields as expected")
+
+        except Exception as e:
+            print(f"  ❌ ERROR generating non-flattened PDF: {e}")
+        finally:
+            if os.path.exists(not_flattened_path):
+                os.unlink(not_flattened_path)
+
+    print("\n" + "-" * 50)
+    print("Test complete!")
+
+if __name__ == "__main__":
+    test_flattening()