From f63fbeeffd76cb20275998e5077d0480892d6d8d Mon Sep 17 00:00:00 2001 From: Frederik Beimgraben Date: Sun, 31 Aug 2025 18:02:28 +0200 Subject: [PATCH] Re-Add Number / Key --- pdf_field_mapping.py | 24 ++++++++++++++++++------ pdf_to_json.py | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/pdf_field_mapping.py b/pdf_field_mapping.py index 2a239b21..8f881ec0 100644 --- a/pdf_field_mapping.py +++ b/pdf_field_mapping.py @@ -9,7 +9,19 @@ _PLACEHOLDER_VALUES: set = {None, "", "-", "JJJJ-MM", "/\\Fld@default "} # --- COMMON fields (shared across variants) --- TEXT_MAPPING_COMMON: dict = { - # Applicant + # --- Meta --- + 'pa-id': { + 'required': True, + 'target-key': 'pa.meta.id', + 'type': str, + }, + 'pa-key': { + 'required': True, + 'target-key': 'pa.meta.key', + 'type': str, + }, + + # --- Applicant --- 'pa-applicant-type': { 'required': True, 'target-key': 'pa.applicant.type', @@ -56,14 +68,14 @@ TEXT_MAPPING_COMMON: dict = { ] }, - # Project core + # --- Project core --- 'pa-project-name': {'required': True, 'target-key': 'pa.project.name', 'type': str}, 'pa-start-date': {'required': True, 'target-key': 'pa.project.dates.start', 'type': str}, 'pa-end-date': {'required': False, 'target-key': 'pa.project.dates.end', 'type': str}, 'pa-participants': {'required': False, 'target-key': 'pa.project.participants', 'type': int}, 'pa-project-description': {'required': True, 'target-key': 'pa.project.description', 'type': str}, - # Participation (checkboxes) + # --- Participation (checkboxes) --- 'pa-participating-faculties-inf': {'required': False, 'target-key': 'pa.project.participation.faculties.inf', 'type': bool}, 'pa-participating-faculties-esb': {'required': False, 'target-key': 'pa.project.participation.faculties.esb', 'type': bool}, 'pa-participating-faculties-ls': {'required': False, 'target-key': 'pa.project.participation.faculties.ls', 'type': bool}, @@ -72,15 +84,15 @@ TEXT_MAPPING_COMMON: dict = { 'pa-participating-faculties-nxt': {'required': False, 'target-key': 'pa.project.participation.faculties.nxt', 'type': bool}, 'pa-participating-faculties-open': {'required': False, 'target-key': 'pa.project.participation.faculties.open', 'type': bool}, - # Costs & totals + # --- Costs & totals --- 'pa-cost-{a;1:24}-name': {'required': True, 'target-key': 'pa.project.costs[{a}].name', 'type': str}, 'pa-cost-{a;1:24}-amount-euro': {'required': True, 'target-key': 'pa.project.costs[{a}].amountEur', 'type': float}, 'pa-requested-amount-euro-sum': {'required': True, 'target-key': 'pa.project.totals.requestedAmountEur', 'type': float}, - # Attachments common + # --- Attachments common --- 'pa-anh-vergleichsangebote': {'required': False, 'target-key': 'pa.attachments.comparativeOffers', 'type': bool}, - # Misc + # --- Misc --- 'warning-not-supported': {'required': False, 'target-key': 'warning.notSupported', 'type': str}, } diff --git a/pdf_to_json.py b/pdf_to_json.py index 7a9dcb06..cbab07c0 100755 --- a/pdf_to_json.py +++ b/pdf_to_json.py @@ -17,7 +17,7 @@ import json import re from argparse import ArgumentParser from dataclasses import dataclass, asdict, field -from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union +from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple import PyPDF2 from pdf_field_mapping import ( @@ -31,6 +31,11 @@ from pdf_field_mapping import ( # Types / Data Model # ========================= +@dataclass +class Meta: + id: Optional[str] = None + key: Optional[str] = None + @dataclass class Name: first: Optional[str] = None @@ -133,7 +138,11 @@ class WarningInfo: @dataclass class RootPayload: - pa: Any = field(default_factory=dict) # will hold applicant + project + attachments + pa: Dict[str, Any] = field(default_factory=dict) # kompatibel für Dump + applicant: Applicant = field(default_factory=Applicant) + project: Project = field(default_factory=Project) + attachments: Attachments = field(default_factory=Attachments) + meta: Meta = field(default_factory=Meta) warning: WarningInfo = field(default_factory=WarningInfo) _validation: Dict[str, Any] = field(default_factory=dict) @@ -338,6 +347,13 @@ def _get(d: Mapping[str, Any], path: str, default=None): return curr def payload_to_model(payload: Dict[str, Any]) -> RootPayload: + # Meta + meta_dict = _get(payload, "pa.meta", {}) or {} + meta = Meta( + id=meta_dict.get("id"), + key=meta_dict.get("key"), + ) + # Build Applicant applicant_dict = _get(payload, "pa.applicant", {}) or {} applicant = Applicant( @@ -427,14 +443,21 @@ def payload_to_model(payload: Dict[str, Any]) -> RootPayload: warning = WarningInfo(notSupported=warning_dict.get("notSupported")) root = RootPayload( - pa={ - "applicant": asdict(applicant), - "project": asdict(project), - "attachments": asdict(attachments), - }, + applicant=applicant, + project=project, + attachments=attachments, + meta=meta, warning=warning, _validation=payload.get("_validation", {}), ) + + # Für JSON-Kompatibilität auch pa zusammenbauen + root.pa = { + "meta": asdict(meta), + "applicant": asdict(applicant), + "project": asdict(project), + "attachments": asdict(attachments), + } return root