stupa-pdf-api/backend/test_flattening.py

142 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Test script to verify PDF flattening functionality.
Tests that form fields are properly removed after filling.
"""
import os
import sys
import tempfile
from pathlib import Path
# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
import PyPDF2
from pdf_filler import fill_pdf
def check_pdf_has_forms(pdf_path):
"""Check if a PDF has form fields."""
with open(pdf_path, 'rb') as f:
reader = PyPDF2.PdfReader(f)
# Check for AcroForm
if '/AcroForm' in reader.trailer.get('/Root', {}):
acroform = reader.trailer['/Root']['/AcroForm']
if '/Fields' in acroform:
fields = acroform['/Fields']
if fields and len(fields) > 0:
return True, f"Found {len(fields)} form fields"
# Check for widget annotations
widget_count = 0
for page in reader.pages:
if '/Annots' in page:
annots = page['/Annots']
if hasattr(annots, 'get_object'):
annots = annots.get_object()
if isinstance(annots, (list, PyPDF2.generic.ArrayObject)):
for annot_ref in annots:
try:
annot = annot_ref.get_object() if hasattr(annot_ref, 'get_object') else annot_ref
if isinstance(annot, dict) or isinstance(annot, PyPDF2.generic.DictionaryObject):
subtype = annot.get('/Subtype')
if subtype and str(subtype) == '/Widget':
widget_count += 1
except:
pass
if widget_count > 0:
return True, f"Found {widget_count} widget annotations"
return False, "No form fields or widgets found"
def test_flattening():
"""Test PDF flattening functionality."""
# Test payload
test_payload = {
"pa": {
"meta": {
"id": "TEST-001",
"key": "test-key-123"
},
"applicant": {
"name": "Test Applicant",
"email": "test@example.com"
},
"project": {
"title": "Test Project",
"description": "This is a test project",
"costs": [
{"description": "Item 1", "amountEur": 100.50},
{"description": "Item 2", "amountEur": 200.75}
]
}
}
}
print("Testing PDF Flattening...")
print("-" * 50)
# Test both variants
for variant in ["VSM", "QSM"]:
print(f"\nTesting {variant} variant:")
# Check if template exists
template_path = os.path.join(os.path.dirname(__file__), "src", "assets", f"{variant.lower()}.pdf")
if not os.path.exists(template_path):
print(f" ⚠️ Template not found at {template_path}, skipping...")
continue
# Check template has forms
has_forms, msg = check_pdf_has_forms(template_path)
print(f" Template: {msg}")
# Generate PDF with flattening (default)
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_flattened.pdf", delete=False) as tf:
flattened_path = tf.name
try:
pdf_bytes = fill_pdf(test_payload, variant, out_path=flattened_path, flatten=True)
has_forms, msg = check_pdf_has_forms(flattened_path)
print(f" Flattened PDF: {msg}")
if has_forms:
print(f" ❌ FAILED: Flattened PDF still has form fields!")
else:
print(f" ✅ SUCCESS: Form fields removed after flattening")
except Exception as e:
print(f" ❌ ERROR generating flattened PDF: {e}")
finally:
if os.path.exists(flattened_path):
os.unlink(flattened_path)
# Generate PDF without flattening for comparison
with tempfile.NamedTemporaryFile(suffix=f"_{variant}_not_flattened.pdf", delete=False) as tf:
not_flattened_path = tf.name
try:
pdf_bytes = fill_pdf(test_payload, variant, out_path=not_flattened_path, flatten=False)
has_forms, msg = check_pdf_has_forms(not_flattened_path)
print(f" Non-flattened PDF: {msg}")
if not has_forms:
print(f" ⚠️ WARNING: Non-flattened PDF has no form fields (unexpected)")
else:
print(f" ✅ Non-flattened PDF keeps form fields as expected")
except Exception as e:
print(f" ❌ ERROR generating non-flattened PDF: {e}")
finally:
if os.path.exists(not_flattened_path):
os.unlink(not_flattened_path)
print("\n" + "-" * 50)
print("Test complete!")
if __name__ == "__main__":
test_flattening()