feat: add document versioning and list API

This commit is contained in:
wren
2026-04-29 11:48:50 +08:00
parent f3b83c9979
commit b45d61fa97
14 changed files with 1693 additions and 92 deletions
@@ -6,6 +6,7 @@ into leaudit_* table format and writes via SQLAlchemy async session.
from __future__ import annotations
import json
import logging
import re
from typing import Any
@@ -157,11 +158,19 @@ class StorageAdapter:
row = _rule_result_to_row(document_id, resolved_run_id, rule_result, rule, bundle)
if rule_version_id is not None:
row["rule_version_id"] = rule_version_id
columns = ", ".join(row.keys())
placeholders = ", ".join(f":{k}" for k in row)
json_columns = {"stages", "extracted_fields", "field_positions", "remediation", "rule_meta"}
serialized_row = {
key: (json.dumps(value, ensure_ascii=False) if key in json_columns and value is not None else value)
for key, value in row.items()
}
columns = ", ".join(serialized_row.keys())
placeholders = ", ".join(
f"CAST(:{k} AS JSONB)" if k in json_columns else f":{k}"
for k in serialized_row
)
await session.execute(
text(f"INSERT INTO leaudit_rule_results ({columns}) VALUES ({placeholders})"),
row,
serialized_row,
)
# Update audit_runs summary (scores only — terminal state set by finalize_run)
@@ -371,7 +380,7 @@ class StorageAdapter:
:vlm_calls,
:duration_ms,
:requires_human_review,
:payload,
CAST(:payload AS JSONB),
:created_at,
:updated_at
)
@@ -390,7 +399,7 @@ class StorageAdapter:
"vlm_calls": task.vlm_calls,
"duration_ms": task.duration_ms,
"requires_human_review": task.requires_human_review,
"payload": task.model_dump(mode="json"),
"payload": json.dumps(task.model_dump(mode="json"), ensure_ascii=False),
"created_at": task.created_at,
"updated_at": task.updated_at,
},
@@ -527,8 +536,9 @@ def _bundle_to_extracted(bundle: ExtractionBundle) -> dict[str, Any]:
"value": fv.value,
"confidence": float(fv.confidence) if fv.confidence else 0.0,
}
if fv.position is not None:
field_data["position"] = fv.position.model_dump(mode="json")
position_payload = _field_value_position_payload(fv)
if position_payload is not None:
field_data["position"] = position_payload
fields[name] = field_data
else:
fields[name] = {"value": fv}
@@ -637,11 +647,39 @@ def _extract_relevant_field_positions(
if f in positions:
continue
fv = bundle.fields.get(f)
if fv is not None and isinstance(fv, FieldValue) and fv.position is not None:
positions[f] = fv.position.model_dump(mode="json")
if fv is not None and isinstance(fv, FieldValue):
position_payload = _field_value_position_payload(fv)
if position_payload is not None:
positions[f] = position_payload
return positions
def _field_value_position_payload(fv: FieldValue) -> dict[str, Any] | None:
"""兼容原生 leaudit 新旧 FieldValue 结构,提取可落库的位置线索。"""
position = getattr(fv, "position", None)
if position is not None:
if hasattr(position, "model_dump"):
return position.model_dump(mode="json")
if isinstance(position, dict):
return position
metadata = fv.metadata if isinstance(fv.metadata, dict) else {}
payload: dict[str, Any] = {}
if "match_position" in metadata:
payload["matchPosition"] = metadata.get("match_position")
if "matched_text" in metadata:
payload["matchedText"] = metadata.get("matched_text")
if "page_num" in metadata:
payload["pageNum"] = metadata.get("page_num")
if "page_nums" in metadata:
payload["pageNums"] = metadata.get("page_nums")
if "bbox" in metadata:
payload["bbox"] = metadata.get("bbox")
return payload or None
def _rule_result_to_row(
document_id: int,
run_id: int | None,