feat: wire native leaudit upload flow

This commit is contained in:
wren
2026-04-28 16:53:16 +08:00
parent c776af598a
commit e738398eb6
26 changed files with 7364 additions and 5778 deletions
@@ -1,68 +1,7 @@
"""LeAudit Bridge 模块。
对平台暴露统一桥接入口,内部逐步从旧的手写 pipeline
迁移到原生 ``AuditCtx`` + ``AuditService`` 路线
避免在包导入阶段就强依赖原生 ``leaudit``,否则控制器扫描时会因为
桥接层的重型导入失败,导致整包路由都无法注册
"""
from fastapi_modules.fastapi_leaudit.leaudit_bridge.client_factory import (
create_ocr_client,
create_llm_client,
create_vlm_client,
)
from fastapi_modules.fastapi_leaudit.leaudit_bridge.ocr_bridge import BridgeOCRClient
from fastapi_modules.fastapi_leaudit.leaudit_bridge.pipeline import LauditPipeline, PipelineResult
from fastapi_modules.fastapi_leaudit.leaudit_bridge.rules_loader import RulesLoader
from fastapi_modules.fastapi_leaudit.leaudit_bridge.storage_adapter import StorageAdapter
def is_leaudit_mode() -> bool:
"""新平台始终使用 leaudit pipeline。"""
return True
def create_pipeline(rules_path: str | None = None) -> LauditPipeline:
"""创建旧版兼容 LauditPipeline。
当前仍保留该入口兼容旧调用方,后续正式执行链应逐步切到
``NativeRunner``。
"""
from pathlib import Path
from leaudit.doc_normalization.adapter import DocNormalizationAdapter
from leaudit.doc_normalization.doc_classifier import RulesFileRegistry
raw_ocr = create_ocr_client()
llm_client = create_llm_client()
vlm_client = create_vlm_client()
# Build registry from rules/ directory for content-based classification
registry = None
if rules_path is None:
rules_dir = Path(__file__).resolve().parents[3] / "rules"
if rules_dir.is_dir():
registry = RulesFileRegistry.from_directory(rules_dir)
ocr_client = DocNormalizationAdapter(
ocr_client=raw_ocr,
registry=registry,
llm_client=llm_client,
vlm_client=vlm_client,
force_rules_path=rules_path,
)
ocr_client = BridgeOCRClient(ocr_client, vlm_client=vlm_client)
return LauditPipeline(
ocr_client=ocr_client,
llm_client=llm_client,
)
__all__ = [
"LauditPipeline",
"PipelineResult",
"StorageAdapter",
"RulesLoader",
"create_ocr_client",
"create_llm_client",
"create_pipeline",
"is_leaudit_mode",
]
__all__ = []
@@ -49,7 +49,7 @@ class StorageAdapter:
"""Update the document's processing_status."""
async with GetAsyncSession() as session:
await session.execute(
text("UPDATE leaudit_documents SET processing_status = :s, update_time = now() WHERE id = :did"),
text("UPDATE leaudit_documents SET processing_status = :s, updated_at = now() WHERE id = :did"),
{"s": status, "did": document_id},
)
await session.commit()
@@ -61,7 +61,7 @@ class StorageAdapter:
"""Update the document's case number."""
async with GetAsyncSession() as session:
await session.execute(
text("UPDATE leaudit_documents SET document_number = :dn, update_time = now() WHERE id = :did"),
text("UPDATE leaudit_documents SET document_number = :dn, updated_at = now() WHERE id = :did"),
{"dn": document_number, "did": document_id},
)
await session.commit()
@@ -168,7 +168,7 @@ class StorageAdapter:
await session.execute(
text("""UPDATE leaudit_audit_runs SET
total_score = :ts, passed_count = :pc, failed_count = :fc,
skipped_count = :sc, update_time = now()
skipped_count = :sc, updated_at = now()
WHERE id = :rid"""),
{
"ts": evaluation.total_score,
@@ -356,8 +356,8 @@ class StorageAdapter:
duration_ms,
requires_human_review,
payload,
create_time,
update_time
created_at,
updated_at
) VALUES (
:run_id,
:document_id,
@@ -372,8 +372,8 @@ class StorageAdapter:
:duration_ms,
:requires_human_review,
:payload,
:create_time,
:update_time
:created_at,
:updated_at
)
"""
),
@@ -391,8 +391,8 @@ class StorageAdapter:
"duration_ms": task.duration_ms,
"requires_human_review": task.requires_human_review,
"payload": task.model_dump(mode="json"),
"create_time": task.created_at,
"update_time": task.updated_at,
"created_at": task.created_at,
"updated_at": task.updated_at,
},
)
await session.commit()
@@ -419,7 +419,7 @@ class StorageAdapter:
rescue_applied = :rescue_applied,
result_status = :result_status,
finished_at = CASE WHEN :finished THEN now() ELSE finished_at END,
update_time = now()
updated_at = now()
WHERE id = :rid
"""
),
@@ -463,7 +463,7 @@ class StorageAdapter:
phase = COALESCE(:phase, phase),
result_status = 'error',
finished_at = now(),
update_time = now()
updated_at = now()
WHERE id = :rid
"""
),
@@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
from concurrent.futures import ThreadPoolExecutor
import os
from pathlib import Path
import tempfile
@@ -285,7 +286,7 @@ async def _update_status_safe(document_id: int, status: str) -> None:
async with GetAsyncSession() as session:
await session.execute(
sa_text("UPDATE leaudit_documents SET processing_status = :s, update_time = now() WHERE id = :did"),
sa_text("UPDATE leaudit_documents SET processing_status = :s, updated_at = now() WHERE id = :did"),
{"s": status, "did": document_id},
)
await session.commit()
@@ -301,7 +302,7 @@ async def _update_run_status_safe(run_id: int, status: str) -> None:
async with GetAsyncSession() as session:
await session.execute(
sa_text("UPDATE leaudit_audit_runs SET status = :s, update_time = now() WHERE id = :rid"),
sa_text("UPDATE leaudit_audit_runs SET status = :s, updated_at = now() WHERE id = :rid"),
{"s": status, "rid": run_id},
)
await session.commit()
@@ -317,7 +318,7 @@ async def _update_run_phase_safe(run_id: int, phase: str | None) -> None:
async with GetAsyncSession() as session:
await session.execute(
sa_text("UPDATE leaudit_audit_runs SET phase = :p, update_time = now() WHERE id = :rid"),
sa_text("UPDATE leaudit_audit_runs SET phase = :p, updated_at = now() WHERE id = :rid"),
{"p": phase, "rid": run_id},
)
await session.commit()
@@ -344,11 +345,20 @@ def dispatch_leaudit_task(
P2: Celery 集成后改用 leaudit_process_document.apply_async(...)
当前阶段直接同步调用。
"""
return leaudit_process_document(
document_id=document_id,
file_content=file_content,
filename=filename,
upload_info=upload_info,
source_port=source_port or int(os.getenv("APP_PORT", "8000")),
rules_path=rules_path,
)
kwargs = {
"document_id": document_id,
"file_content": file_content,
"filename": filename,
"upload_info": upload_info,
"source_port": source_port or int(os.getenv("APP_PORT", "8000")),
"rules_path": rules_path,
}
try:
asyncio.get_running_loop()
except RuntimeError:
return leaudit_process_document(**kwargs)
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(leaudit_process_document, **kwargs)
return future.result()