feat: wire native leaudit upload flow

This commit is contained in:
wren
2026-04-28 16:53:16 +08:00
parent c776af598a
commit e738398eb6
26 changed files with 7364 additions and 5778 deletions
@@ -0,0 +1,45 @@
"""文档控制器。"""
from fastapi import File, Form, UploadFile
from fastapi_common.fastapi_common_web.controller import BaseController
from fastapi_common.fastapi_common_web.domain.responses import Result
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
from fastapi_modules.fastapi_leaudit.services import IDocumentService
from fastapi_modules.fastapi_leaudit.services.impl.documentServiceImpl import DocumentServiceImpl
class DocumentController(BaseController):
"""文档控制器。"""
def __init__(self):
super().__init__(prefix="", tags=["文档"])
self.DocumentService: IDocumentService = DocumentServiceImpl()
@self.router.post("/upload", response_model=Result[DocumentUploadVO])
async def UploadDocument(
file: UploadFile = File(..., description="上传文档"),
typeId: int | None = Form(None, description="文档类型ID"),
typeCode: str | None = Form(None, description="文档类型编码"),
bizDocumentId: int | None = Form(None, description="业务文档ID"),
region: str = Form("default", description="所属地区"),
fileRole: str = Form("primary", description="文件角色"),
createdBy: int | None = Form(None, description="上传用户ID"),
autoRun: bool = Form(False, description="是否上传后自动触发评查"),
):
"""上传文档并建立评查输入。"""
Content = await file.read()
Data = await self.DocumentService.Upload(
FileName=file.filename or "upload.bin",
FileContent=Content,
ContentType=file.content_type,
TypeId=typeId,
TypeCode=typeCode,
BizDocumentId=bizDocumentId,
Region=region,
FileRole=fileRole,
CreatedBy=createdBy,
AutoRun=autoRun,
)
return Result.success(data=Data)
@@ -11,23 +11,115 @@ class AuditRunVO(BaseModel):
runId: int = Field(..., description="运行ID")
documentId: int = Field(..., description="文档ID")
runNo: int = Field(..., description="执行序号")
documentFileId: int | None = Field(None, description="本次执行锁定的文档文件ID")
status: str = Field(..., description="状态")
phase: str | None = Field(None, description="draft/executed")
phase: str | None = Field(None, description="当前阶段")
resultStatus: str | None = Field(None, description="整体结果: pass/fail/partial/review/error")
ruleSetId: int | None = Field(None, description="规则集ID")
ruleVersionId: int | None = Field(None, description="规则版本ID")
ruleTypeId: str | None = Field(None, description="规则类型ID")
rescueApplied: bool = Field(False, description="是否触发 rescue")
totalScore: float | None = Field(None, description="总分")
passedCount: int | None = Field(None, description="通过数")
failedCount: int | None = Field(None, description="失败数")
skippedCount: int | None = Field(None, description="跳过数")
startedAt: datetime | None = Field(None, description="开始时间")
finishedAt: datetime | None = Field(None, description="结束时间")
class AuditFieldResultVO(BaseModel):
"""抽取字段结果。"""
fieldName: str = Field(..., description="字段名")
fieldType: str | None = Field(None, description="字段类型")
valueText: str | None = Field(None, description="字段值")
confidence: float | None = Field(None, description="综合置信度")
groundingMethod: str | None = Field(None, description="落地验证方法")
fallbackValue: str | None = Field(None, description="兜底值")
rawValueJson: dict | list | str | None = Field(None, description="原始抽取结果")
metaJson: dict | list | str | None = Field(None, description="扩展元数据")
class AuditRunErrorVO(BaseModel):
"""运行错误结果。"""
stage: str = Field(..., description="错误阶段")
level: str = Field(..., description="错误级别")
errorCode: str | None = Field(None, description="错误码")
message: str = Field(..., description="错误信息")
detailJson: dict | list | str | None = Field(None, description="错误详情")
createdAt: datetime | None = Field(None, description="创建时间")
class AuditRescueOutcomeVO(BaseModel):
"""补救结果。"""
ruleId: str = Field(..., description="规则ID")
status: str = Field(..., description="补救状态")
diagnosis: str | None = Field(None, description="诊断结果")
diagnosisConfidence: float | None = Field(None, description="诊断置信度")
finalStatus: str | None = Field(None, description="补救最终状态")
failureReason: str | None = Field(None, description="失败原因")
llmCalls: int | None = Field(None, description="LLM 调用次数")
vlmCalls: int | None = Field(None, description="VLM 调用次数")
durationMs: int | None = Field(None, description="耗时毫秒")
requiresHumanReview: bool = Field(False, description="是否需要人工复核")
payload: dict | list | str | None = Field(None, description="补救载荷")
class AuditMetricsVO(BaseModel):
"""运行指标。"""
ocrSeconds: float | None = Field(None, description="OCR耗时")
normalizeSeconds: float | None = Field(None, description="归一化耗时")
extractSeconds: float | None = Field(None, description="抽取耗时")
evaluateSeconds: float | None = Field(None, description="评查耗时")
rescueSeconds: float | None = Field(None, description="补救耗时")
totalSeconds: float | None = Field(None, description="总耗时")
pageCount: int | None = Field(None, description="页数")
subDocumentCount: int | None = Field(None, description="子文档数")
fieldCount: int | None = Field(None, description="字段数")
ruleCount: int | None = Field(None, description="规则数")
llmCallCount: int | None = Field(None, description="LLM 调用次数")
vlmCallCount: int | None = Field(None, description="VLM 调用次数")
rescueRuleCount: int | None = Field(None, description="补救规则数")
artifactCount: int | None = Field(None, description="产物数")
class AuditArtifactVO(BaseModel):
"""评查产物。"""
artifactType: str = Field(..., description="产物类型")
artifactRole: str | None = Field(None, description="产物角色")
fileName: str | None = Field(None, description="文件名")
fileExt: str | None = Field(None, description="扩展名")
mimeType: str | None = Field(None, description="MIME")
fileSize: int | None = Field(None, description="文件大小")
ossUrl: str | None = Field(None, description="OSS 路径")
isPersisted: bool | None = Field(None, description="是否已持久化")
class AuditResultVO(BaseModel):
"""评查结果响应。"""
runId: int = Field(..., description="运行ID")
documentId: int = Field(..., description="文档ID")
documentFileId: int | None = Field(None, description="本次执行锁定的文档文件ID")
status: str = Field(..., description="运行状态")
totalScore: float | None = Field(None, description="总分")
passedCount: int = Field(0, description="通过数")
failedCount: int = Field(0, description="失败数")
skippedCount: int = Field(0, description="跳过数")
phase: str | None = Field(None, description="draft/executed")
phase: str | None = Field(None, description="当前阶段")
resultStatus: str | None = Field(None, description="整体结果")
rescueApplied: bool = Field(False, description="是否执行 rescue")
ruleSetId: int | None = Field(None, description="规则集ID")
ruleVersionId: int | None = Field(None, description="规则版本ID")
startedAt: datetime | None = Field(None, description="开始时间")
finishedAt: datetime | None = Field(None, description="结束时间")
rules: list[dict] = Field(default_factory=list, description="规则结果列表")
fields: list[AuditFieldResultVO] = Field(default_factory=list, description="抽取字段结果")
errors: list[AuditRunErrorVO] = Field(default_factory=list, description="运行错误列表")
rescueOutcomes: list[AuditRescueOutcomeVO] = Field(default_factory=list, description="补救结果列表")
metrics: AuditMetricsVO | None = Field(None, description="运行指标")
artifacts: list[AuditArtifactVO] = Field(default_factory=list, description="运行产物列表")
@@ -0,0 +1,21 @@
"""文档 VO。"""
from pydantic import BaseModel, Field
from fastapi_modules.fastapi_leaudit.domian.vo.auditVo import AuditRunVO
class DocumentUploadVO(BaseModel):
"""文档上传响应。"""
documentId: int = Field(..., description="LeAudit 文档ID")
bizDocumentId: int = Field(..., description="业务文档ID")
fileId: int = Field(..., description="文档文件ID")
typeId: int = Field(..., description="文档类型ID")
typeCode: str = Field(..., description="文档类型编码")
region: str = Field(..., description="所属地区")
fileName: str = Field(..., description="文件名")
ossUrl: str = Field(..., description="OSS 对象路径")
processingStatus: str = Field(..., description="文档处理状态")
autoRunTriggered: bool = Field(..., description="是否已自动触发评查")
run: AuditRunVO | None = Field(None, description="自动触发后的运行信息")
@@ -1,68 +1,7 @@
"""LeAudit Bridge 模块。
对平台暴露统一桥接入口,内部逐步从旧的手写 pipeline
迁移到原生 ``AuditCtx`` + ``AuditService`` 路线
避免在包导入阶段就强依赖原生 ``leaudit``,否则控制器扫描时会因为
桥接层的重型导入失败,导致整包路由都无法注册
"""
from fastapi_modules.fastapi_leaudit.leaudit_bridge.client_factory import (
create_ocr_client,
create_llm_client,
create_vlm_client,
)
from fastapi_modules.fastapi_leaudit.leaudit_bridge.ocr_bridge import BridgeOCRClient
from fastapi_modules.fastapi_leaudit.leaudit_bridge.pipeline import LauditPipeline, PipelineResult
from fastapi_modules.fastapi_leaudit.leaudit_bridge.rules_loader import RulesLoader
from fastapi_modules.fastapi_leaudit.leaudit_bridge.storage_adapter import StorageAdapter
def is_leaudit_mode() -> bool:
"""新平台始终使用 leaudit pipeline。"""
return True
def create_pipeline(rules_path: str | None = None) -> LauditPipeline:
"""创建旧版兼容 LauditPipeline。
当前仍保留该入口兼容旧调用方,后续正式执行链应逐步切到
``NativeRunner``。
"""
from pathlib import Path
from leaudit.doc_normalization.adapter import DocNormalizationAdapter
from leaudit.doc_normalization.doc_classifier import RulesFileRegistry
raw_ocr = create_ocr_client()
llm_client = create_llm_client()
vlm_client = create_vlm_client()
# Build registry from rules/ directory for content-based classification
registry = None
if rules_path is None:
rules_dir = Path(__file__).resolve().parents[3] / "rules"
if rules_dir.is_dir():
registry = RulesFileRegistry.from_directory(rules_dir)
ocr_client = DocNormalizationAdapter(
ocr_client=raw_ocr,
registry=registry,
llm_client=llm_client,
vlm_client=vlm_client,
force_rules_path=rules_path,
)
ocr_client = BridgeOCRClient(ocr_client, vlm_client=vlm_client)
return LauditPipeline(
ocr_client=ocr_client,
llm_client=llm_client,
)
__all__ = [
"LauditPipeline",
"PipelineResult",
"StorageAdapter",
"RulesLoader",
"create_ocr_client",
"create_llm_client",
"create_pipeline",
"is_leaudit_mode",
]
__all__ = []
@@ -49,7 +49,7 @@ class StorageAdapter:
"""Update the document's processing_status."""
async with GetAsyncSession() as session:
await session.execute(
text("UPDATE leaudit_documents SET processing_status = :s, update_time = now() WHERE id = :did"),
text("UPDATE leaudit_documents SET processing_status = :s, updated_at = now() WHERE id = :did"),
{"s": status, "did": document_id},
)
await session.commit()
@@ -61,7 +61,7 @@ class StorageAdapter:
"""Update the document's case number."""
async with GetAsyncSession() as session:
await session.execute(
text("UPDATE leaudit_documents SET document_number = :dn, update_time = now() WHERE id = :did"),
text("UPDATE leaudit_documents SET document_number = :dn, updated_at = now() WHERE id = :did"),
{"dn": document_number, "did": document_id},
)
await session.commit()
@@ -168,7 +168,7 @@ class StorageAdapter:
await session.execute(
text("""UPDATE leaudit_audit_runs SET
total_score = :ts, passed_count = :pc, failed_count = :fc,
skipped_count = :sc, update_time = now()
skipped_count = :sc, updated_at = now()
WHERE id = :rid"""),
{
"ts": evaluation.total_score,
@@ -356,8 +356,8 @@ class StorageAdapter:
duration_ms,
requires_human_review,
payload,
create_time,
update_time
created_at,
updated_at
) VALUES (
:run_id,
:document_id,
@@ -372,8 +372,8 @@ class StorageAdapter:
:duration_ms,
:requires_human_review,
:payload,
:create_time,
:update_time
:created_at,
:updated_at
)
"""
),
@@ -391,8 +391,8 @@ class StorageAdapter:
"duration_ms": task.duration_ms,
"requires_human_review": task.requires_human_review,
"payload": task.model_dump(mode="json"),
"create_time": task.created_at,
"update_time": task.updated_at,
"created_at": task.created_at,
"updated_at": task.updated_at,
},
)
await session.commit()
@@ -419,7 +419,7 @@ class StorageAdapter:
rescue_applied = :rescue_applied,
result_status = :result_status,
finished_at = CASE WHEN :finished THEN now() ELSE finished_at END,
update_time = now()
updated_at = now()
WHERE id = :rid
"""
),
@@ -463,7 +463,7 @@ class StorageAdapter:
phase = COALESCE(:phase, phase),
result_status = 'error',
finished_at = now(),
update_time = now()
updated_at = now()
WHERE id = :rid
"""
),
@@ -3,6 +3,7 @@
from __future__ import annotations
import asyncio
from concurrent.futures import ThreadPoolExecutor
import os
from pathlib import Path
import tempfile
@@ -285,7 +286,7 @@ async def _update_status_safe(document_id: int, status: str) -> None:
async with GetAsyncSession() as session:
await session.execute(
sa_text("UPDATE leaudit_documents SET processing_status = :s, update_time = now() WHERE id = :did"),
sa_text("UPDATE leaudit_documents SET processing_status = :s, updated_at = now() WHERE id = :did"),
{"s": status, "did": document_id},
)
await session.commit()
@@ -301,7 +302,7 @@ async def _update_run_status_safe(run_id: int, status: str) -> None:
async with GetAsyncSession() as session:
await session.execute(
sa_text("UPDATE leaudit_audit_runs SET status = :s, update_time = now() WHERE id = :rid"),
sa_text("UPDATE leaudit_audit_runs SET status = :s, updated_at = now() WHERE id = :rid"),
{"s": status, "rid": run_id},
)
await session.commit()
@@ -317,7 +318,7 @@ async def _update_run_phase_safe(run_id: int, phase: str | None) -> None:
async with GetAsyncSession() as session:
await session.execute(
sa_text("UPDATE leaudit_audit_runs SET phase = :p, update_time = now() WHERE id = :rid"),
sa_text("UPDATE leaudit_audit_runs SET phase = :p, updated_at = now() WHERE id = :rid"),
{"p": phase, "rid": run_id},
)
await session.commit()
@@ -344,11 +345,20 @@ def dispatch_leaudit_task(
P2: Celery 集成后改用 leaudit_process_document.apply_async(...)
当前阶段直接同步调用。
"""
return leaudit_process_document(
document_id=document_id,
file_content=file_content,
filename=filename,
upload_info=upload_info,
source_port=source_port or int(os.getenv("APP_PORT", "8000")),
rules_path=rules_path,
)
kwargs = {
"document_id": document_id,
"file_content": file_content,
"filename": filename,
"upload_info": upload_info,
"source_port": source_port or int(os.getenv("APP_PORT", "8000")),
"rules_path": rules_path,
}
try:
asyncio.get_running_loop()
except RuntimeError:
return leaudit_process_document(**kwargs)
with ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(leaudit_process_document, **kwargs)
return future.result()
@@ -15,46 +15,46 @@ class LeauditAuditRun(BaseModel):
__tablename__ = "leaudit_audit_runs"
Id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
documentId: Mapped[int] = mapped_column(BigInteger, comment="关联 leaudit_documents.id")
documentFileId: Mapped[int | None] = mapped_column(BigInteger, comment="输入文件ID")
runNo: Mapped[int] = mapped_column(Integer, comment="同一文档第几次执行")
triggerSource: Mapped[str] = mapped_column(String(64), comment="upload/manual/retry/migration/batch")
triggerUserId: Mapped[int | None] = mapped_column(BigInteger, comment="触发人")
taskId: Mapped[str | None] = mapped_column(String(128), comment="Celery 任务 ID")
Id: Mapped[int] = mapped_column("id", BigInteger, primary_key=True, autoincrement=True)
documentId: Mapped[int] = mapped_column("document_id", BigInteger, comment="关联 leaudit_documents.id")
documentFileId: Mapped[int | None] = mapped_column("document_file_id", BigInteger, comment="输入文件ID")
runNo: Mapped[int] = mapped_column("run_no", Integer, comment="同一文档第几次执行")
triggerSource: Mapped[str] = mapped_column("trigger_source", String(64), comment="upload/manual/retry/migration/batch")
triggerUserId: Mapped[int | None] = mapped_column("trigger_user_id", BigInteger, comment="触发人")
taskId: Mapped[str | None] = mapped_column("task_id", String(128), comment="Celery 任务 ID")
# 状态
status: Mapped[str] = mapped_column(String(64), default="pending", comment="pending/processing/completed/failed/cancelled")
phase: Mapped[str | None] = mapped_column(String(32), comment="draft/executed")
status: Mapped[str] = mapped_column("status", String(64), default="pending", comment="pending/processing/completed/failed/cancelled")
phase: Mapped[str | None] = mapped_column("phase", String(32), comment="draft/executed")
# 规则溯源
ruleSetId: Mapped[int] = mapped_column(BigInteger, comment="关联 leaudit_rule_sets.id")
ruleVersionId: Mapped[int] = mapped_column(BigInteger, comment="关联 leaudit_rule_versions.id")
ruleTypeId: Mapped[str | None] = mapped_column(String(256), comment="LeAudit metadata.type_id")
ruleSourceOssUrl: Mapped[str | None] = mapped_column(String(2048), comment="规则 YAML OSS 地址")
ruleSourceSha256: Mapped[str | None] = mapped_column(String(64), comment="规则文件 SHA256")
ruleLocalCachePath: Mapped[str | None] = mapped_column(String(1024), comment="本地缓存路径")
ruleSetId: Mapped[int] = mapped_column("rule_set_id", BigInteger, comment="关联 leaudit_rule_sets.id")
ruleVersionId: Mapped[int] = mapped_column("rule_version_id", BigInteger, comment="关联 leaudit_rule_versions.id")
ruleTypeId: Mapped[str | None] = mapped_column("rule_type_id", String(256), comment="LeAudit metadata.type_id")
ruleSourceOssUrl: Mapped[str | None] = mapped_column("rule_source_oss_url", String(2048), comment="规则 YAML OSS 地址")
ruleSourceSha256: Mapped[str | None] = mapped_column("rule_source_sha256", String(64), comment="规则文件 SHA256")
ruleLocalCachePath: Mapped[str | None] = mapped_column("rule_local_cache_path", String(1024), comment="本地缓存路径")
# 模型快照
engineVersion: Mapped[str | None] = mapped_column(String(64))
llmProvider: Mapped[str | None] = mapped_column(String(64))
llmModel: Mapped[str | None] = mapped_column(String(128))
vlmProvider: Mapped[str | None] = mapped_column(String(64))
vlmModel: Mapped[str | None] = mapped_column(String(128))
ocrProvider: Mapped[str | None] = mapped_column(String(64))
ocrModel: Mapped[str | None] = mapped_column(String(128))
engineVersion: Mapped[str | None] = mapped_column("engine_version", String(64))
llmProvider: Mapped[str | None] = mapped_column("llm_provider", String(64))
llmModel: Mapped[str | None] = mapped_column("llm_model", String(128))
vlmProvider: Mapped[str | None] = mapped_column("vlm_provider", String(64))
vlmModel: Mapped[str | None] = mapped_column("vlm_model", String(128))
ocrProvider: Mapped[str | None] = mapped_column("ocr_provider", String(64))
ocrModel: Mapped[str | None] = mapped_column("ocr_model", String(128))
# Rescue
rescueMode: Mapped[str | None] = mapped_column(String(32), comment="off/tier1/auto")
rescueApplied: Mapped[bool] = mapped_column(Boolean, default=False, comment="是否执行 rescue")
rescueMode: Mapped[str | None] = mapped_column("rescue_mode", String(32), comment="off/tier1/auto")
rescueApplied: Mapped[bool] = mapped_column("rescue_applied", Boolean, default=False, comment="是否执行 rescue")
# 结果汇总
totalScore: Mapped[float | None] = mapped_column(Numeric(10, 2))
passedCount: Mapped[int | None] = mapped_column(Integer)
failedCount: Mapped[int | None] = mapped_column(Integer)
skippedCount: Mapped[int | None] = mapped_column(Integer)
resultStatus: Mapped[str | None] = mapped_column(String(32), comment="pass/fail/partial/error")
totalScore: Mapped[float | None] = mapped_column("total_score", Numeric(10, 2))
passedCount: Mapped[int | None] = mapped_column("passed_count", Integer)
failedCount: Mapped[int | None] = mapped_column("failed_count", Integer)
skippedCount: Mapped[int | None] = mapped_column("skipped_count", Integer)
resultStatus: Mapped[str | None] = mapped_column("result_status", String(32), comment="pass/fail/partial/error")
# 时间
startedAt: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
finishedAt: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
startedAt: Mapped[datetime | None] = mapped_column("started_at", DateTime(timezone=True))
finishedAt: Mapped[datetime | None] = mapped_column("finished_at", DateTime(timezone=True))
@@ -17,11 +17,11 @@ class LeauditDocument(BaseModel):
__tablename__ = "leaudit_documents"
Id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
bizDocumentId: Mapped[int] = mapped_column(BigInteger, unique=True, comment="关联业务 documents.id")
typeId: Mapped[int | None] = mapped_column(BigInteger, comment="文档类型ID")
processingStatus: Mapped[str | None] = mapped_column(String(64), default="waiting", comment="waiting/processing/completed/failed")
currentRunId: Mapped[int | None] = mapped_column(BigInteger, comment="最新有效 run id")
Id: Mapped[int] = mapped_column("id", BigInteger, primary_key=True, autoincrement=True)
bizDocumentId: Mapped[int] = mapped_column("biz_document_id", BigInteger, unique=True, comment="关联业务 documents.id")
typeId: Mapped[int | None] = mapped_column("type_id", BigInteger, comment="文档类型ID")
processingStatus: Mapped[str | None] = mapped_column("processing_status", String(64), default="waiting", comment="waiting/processing/completed/failed")
currentRunId: Mapped[int | None] = mapped_column("current_run_id", BigInteger, comment="最新有效 run id")
region: Mapped[str] = mapped_column(String(32), default="default", comment="所属地区: mz/yf/jy/cz/default")
@classmethod
@@ -3,6 +3,7 @@
from __future__ import annotations
from sqlalchemy import BigInteger, Boolean, String
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Mapped, mapped_column
from fastapi_common.fastapi_common_web.models import BaseModel
@@ -13,16 +14,42 @@ class LeauditDocumentFile(BaseModel):
__tablename__ = "leaudit_document_files"
Id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
documentId: Mapped[int] = mapped_column(BigInteger, comment="关联 leaudit_documents.id")
fileRole: Mapped[str] = mapped_column(String(64), comment="original/converted_pdf/merged_pdf/temp_input")
fileName: Mapped[str] = mapped_column(String(512), comment="文件名")
fileExt: Mapped[str | None] = mapped_column(String(32), comment="扩展名")
mimeType: Mapped[str | None] = mapped_column(String(128), comment="MIME")
fileSize: Mapped[int | None] = mapped_column(BigInteger, comment="文件大小")
sha256: Mapped[str | None] = mapped_column(String(64), comment="SHA256")
localPath: Mapped[str | None] = mapped_column(String(1024), comment="本地路径")
ossUrl: Mapped[str | None] = mapped_column(String(2048), comment="OSS 地址")
storageProvider: Mapped[str | None] = mapped_column(String(32), comment="oss/minio/local")
isActive: Mapped[bool] = mapped_column(Boolean, default=True, comment="当前生效文件")
createdBy: Mapped[int | None] = mapped_column(BigInteger, comment="上传人")
Id: Mapped[int] = mapped_column("id", BigInteger, primary_key=True, autoincrement=True)
documentId: Mapped[int] = mapped_column("document_id", BigInteger, comment="关联 leaudit_documents.id")
fileRole: Mapped[str] = mapped_column("file_role", String(64), comment="original/converted_pdf/merged_pdf/temp_input")
fileName: Mapped[str] = mapped_column("file_name", String(512), comment="文件名")
fileExt: Mapped[str | None] = mapped_column("file_ext", String(32), comment="扩展名")
mimeType: Mapped[str | None] = mapped_column("mime_type", String(128), comment="MIME")
fileSize: Mapped[int | None] = mapped_column("file_size", BigInteger, comment="文件大小")
sha256: Mapped[str | None] = mapped_column("sha256", String(64), comment="SHA256")
localPath: Mapped[str | None] = mapped_column("local_path", String(1024), comment="本地路径")
ossUrl: Mapped[str | None] = mapped_column("oss_url", String(2048), comment="OSS 地址")
storageProvider: Mapped[str | None] = mapped_column("storage_provider", String(32), comment="oss/minio/local")
isActive: Mapped[bool] = mapped_column("is_active", Boolean, default=True, comment="当前生效文件")
createdBy: Mapped[int | None] = mapped_column("created_by", BigInteger, comment="上传人")
@classmethod
async def deactivate_active_by_document(cls, session: AsyncSession, documentId: int) -> None:
"""把指定文档当前激活文件全部置为非激活。"""
from sqlalchemy import update
await session.execute(
update(cls)
.where(
cls.documentId == documentId,
cls.isActive.is_(True),
)
.values(isActive=False)
)
@classmethod
async def count_by_document(cls, session: AsyncSession, documentId: int) -> int:
"""统计指定文档历史文件版本数。"""
from sqlalchemy import func, select
return int(
await session.scalar(
select(func.count()).select_from(cls).where(cls.documentId == documentId)
)
or 0
)
@@ -1,9 +1,10 @@
"""LeAudit 服务层导出。"""
from fastapi_modules.fastapi_leaudit.services.auditService import IAuditService
from fastapi_modules.fastapi_leaudit.services.documentService import IDocumentService
from fastapi_modules.fastapi_leaudit.services.authService import IAuthService
from fastapi_modules.fastapi_leaudit.services.ossService import IOssService
from fastapi_modules.fastapi_leaudit.services.permissionService import IPermissionService
from fastapi_modules.fastapi_leaudit.services.ruleService import IRuleService
__all__ = ["IAuditService", "IAuthService", "IOssService", "IPermissionService", "IRuleService"]
__all__ = ["IAuditService", "IDocumentService", "IAuthService", "IOssService", "IPermissionService", "IRuleService"]
@@ -0,0 +1,26 @@
"""文档服务接口。"""
from abc import ABC, abstractmethod
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
class IDocumentService(ABC):
"""文档服务接口。"""
@abstractmethod
async def Upload(
self,
FileName: str,
FileContent: bytes,
ContentType: str | None,
TypeId: int | None = None,
TypeCode: str | None = None,
BizDocumentId: int | None = None,
Region: str = "default",
FileRole: str = "primary",
CreatedBy: int | None = None,
AutoRun: bool = False,
) -> DocumentUploadVO:
"""上传文档并建立 LeAudit document/file 记录。"""
...
@@ -12,7 +12,15 @@ from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
from sqlalchemy import select, text
from fastapi_modules.fastapi_leaudit.domian.vo.auditVo import AuditRunVO, AuditResultVO
from fastapi_modules.fastapi_leaudit.domian.vo.auditVo import (
AuditArtifactVO,
AuditFieldResultVO,
AuditMetricsVO,
AuditRescueOutcomeVO,
AuditResultVO,
AuditRunErrorVO,
AuditRunVO,
)
from fastapi_modules.fastapi_leaudit.leaudit_bridge.fileSourceResolver import FileSourceResolver
from fastapi_modules.fastapi_leaudit.leaudit_bridge.tasks import dispatch_leaudit_task
from fastapi_modules.fastapi_leaudit.models import (
@@ -132,11 +140,18 @@ class AuditServiceImpl(IAuditService):
runId=run.Id,
documentId=run.documentId,
runNo=run.runNo,
documentFileId=run.documentFileId,
status=run.status,
phase=run.phase,
resultStatus=run.resultStatus,
ruleSetId=run.ruleSetId,
ruleVersionId=run.ruleVersionId,
ruleTypeId=run.ruleTypeId,
rescueApplied=run.rescueApplied or False,
totalScore=float(run.totalScore) if run.totalScore else None,
passedCount=run.passedCount,
failedCount=run.failedCount,
skippedCount=run.skippedCount,
startedAt=run.startedAt,
finishedAt=run.finishedAt,
)
@@ -151,11 +166,18 @@ class AuditServiceImpl(IAuditService):
runId=run.Id,
documentId=run.documentId,
runNo=run.runNo,
documentFileId=run.documentFileId,
status=run.status,
phase=run.phase,
resultStatus=run.resultStatus,
ruleSetId=run.ruleSetId,
ruleVersionId=run.ruleVersionId,
ruleTypeId=run.ruleTypeId,
rescueApplied=run.rescueApplied or False,
totalScore=float(run.totalScore) if run.totalScore else None,
passedCount=run.passedCount,
failedCount=run.failedCount,
skippedCount=run.skippedCount,
startedAt=run.startedAt,
finishedAt=run.finishedAt,
)
@@ -166,7 +188,7 @@ class AuditServiceImpl(IAuditService):
run = await session.get(LeauditAuditRun, RunId)
if not run:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "评查运行记录不存在")
result = await session.execute(
ruleResult = await session.execute(
text(
"""
SELECT
@@ -192,14 +214,204 @@ class AuditServiceImpl(IAuditService):
),
{"run_id": RunId},
)
rules = [dict(row) for row in result.mappings().all()]
fieldResult = await session.execute(
text(
"""
SELECT
field_name,
field_type,
value_text,
confidence,
grounding_method,
fallback_value,
raw_value_json,
meta_json
FROM leaudit_field_results
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
errorResult = await session.execute(
text(
"""
SELECT
stage,
level,
error_code,
message,
detail_json,
created_at
FROM leaudit_run_errors
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
rescueResult = await session.execute(
text(
"""
SELECT
rule_id,
status,
diagnosis,
diagnosis_confidence,
final_status,
failure_reason,
llm_calls,
vlm_calls,
duration_ms,
requires_human_review,
payload
FROM leaudit_rescue_outcomes
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
metricResult = await session.execute(
text(
"""
SELECT
ocr_seconds,
normalize_seconds,
extract_seconds,
evaluate_seconds,
rescue_seconds,
total_seconds,
page_count,
sub_document_count,
field_count,
rule_count,
llm_call_count,
vlm_call_count,
rescue_rule_count,
artifact_count
FROM leaudit_run_metrics
WHERE run_id = :run_id
ORDER BY id DESC
LIMIT 1
"""
),
{"run_id": RunId},
)
artifactResult = await session.execute(
text(
"""
SELECT
artifact_type,
artifact_role,
file_name,
file_ext,
mime_type,
file_size,
oss_url,
is_persisted
FROM leaudit_artifacts
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
rules = [dict(row) for row in ruleResult.mappings().all()]
fields = [
AuditFieldResultVO(
fieldName=row["field_name"],
fieldType=row["field_type"],
valueText=row["value_text"],
confidence=float(row["confidence"]) if row["confidence"] is not None else None,
groundingMethod=row["grounding_method"],
fallbackValue=row["fallback_value"],
rawValueJson=row["raw_value_json"],
metaJson=row["meta_json"],
)
for row in fieldResult.mappings().all()
]
errors = [
AuditRunErrorVO(
stage=row["stage"],
level=row["level"],
errorCode=row["error_code"],
message=row["message"],
detailJson=row["detail_json"],
createdAt=row["created_at"],
)
for row in errorResult.mappings().all()
]
rescueOutcomes = [
AuditRescueOutcomeVO(
ruleId=row["rule_id"],
status=row["status"],
diagnosis=row["diagnosis"],
diagnosisConfidence=float(row["diagnosis_confidence"]) if row["diagnosis_confidence"] is not None else None,
finalStatus=row["final_status"],
failureReason=row["failure_reason"],
llmCalls=row["llm_calls"],
vlmCalls=row["vlm_calls"],
durationMs=row["duration_ms"],
requiresHumanReview=bool(row["requires_human_review"]),
payload=row["payload"],
)
for row in rescueResult.mappings().all()
]
metricRow = metricResult.mappings().first()
metrics = (
AuditMetricsVO(
ocrSeconds=float(metricRow["ocr_seconds"]) if metricRow["ocr_seconds"] is not None else None,
normalizeSeconds=float(metricRow["normalize_seconds"]) if metricRow["normalize_seconds"] is not None else None,
extractSeconds=float(metricRow["extract_seconds"]) if metricRow["extract_seconds"] is not None else None,
evaluateSeconds=float(metricRow["evaluate_seconds"]) if metricRow["evaluate_seconds"] is not None else None,
rescueSeconds=float(metricRow["rescue_seconds"]) if metricRow["rescue_seconds"] is not None else None,
totalSeconds=float(metricRow["total_seconds"]) if metricRow["total_seconds"] is not None else None,
pageCount=metricRow["page_count"],
subDocumentCount=metricRow["sub_document_count"],
fieldCount=metricRow["field_count"],
ruleCount=metricRow["rule_count"],
llmCallCount=metricRow["llm_call_count"],
vlmCallCount=metricRow["vlm_call_count"],
rescueRuleCount=metricRow["rescue_rule_count"],
artifactCount=metricRow["artifact_count"],
)
if metricRow
else None
)
artifacts = [
AuditArtifactVO(
artifactType=row["artifact_type"],
artifactRole=row["artifact_role"],
fileName=row["file_name"],
fileExt=row["file_ext"],
mimeType=row["mime_type"],
fileSize=row["file_size"],
ossUrl=row["oss_url"],
isPersisted=row["is_persisted"],
)
for row in artifactResult.mappings().all()
]
return AuditResultVO(
runId=run.Id,
documentId=run.documentId,
documentFileId=run.documentFileId,
status=run.status,
totalScore=float(run.totalScore) if run.totalScore else None,
passedCount=run.passedCount or 0,
failedCount=run.failedCount or 0,
skippedCount=run.skippedCount or 0,
phase=run.phase,
resultStatus=run.resultStatus,
rescueApplied=run.rescueApplied or False,
ruleSetId=run.ruleSetId,
ruleVersionId=run.ruleVersionId,
startedAt=run.startedAt,
finishedAt=run.finishedAt,
rules=rules,
fields=fields,
errors=errors,
rescueOutcomes=rescueOutcomes,
metrics=metrics,
artifacts=artifacts,
)
@@ -0,0 +1,175 @@
"""文档服务实现。"""
from __future__ import annotations
import hashlib
import mimetypes
import time
from pathlib import Path
from sqlalchemy import text
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile
from fastapi_modules.fastapi_leaudit.services import IAuditService, IDocumentService, IOssService
from fastapi_modules.fastapi_leaudit.services.impl.auditServiceImpl import AuditServiceImpl
from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl
class DocumentServiceImpl(IDocumentService):
"""文档服务实现。"""
def __init__(
self,
OssService: IOssService | None = None,
AuditService: IAuditService | None = None,
) -> None:
self.OssService = OssService or OssServiceImpl()
self.AuditService = AuditService or AuditServiceImpl()
async def Upload(
self,
FileName: str,
FileContent: bytes,
ContentType: str | None,
TypeId: int | None = None,
TypeCode: str | None = None,
BizDocumentId: int | None = None,
Region: str = "default",
FileRole: str = "primary",
CreatedBy: int | None = None,
AutoRun: bool = False,
) -> DocumentUploadVO:
"""上传文档并建立 LeAudit document/file 记录。"""
if not FileName:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件名不能为空")
if not FileContent:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件内容不能为空")
if not TypeId and not TypeCode:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "typeId 与 typeCode 至少传一个")
normalizedRegion = (Region or "default").strip() or "default"
normalizedFileRole = (FileRole or "primary").strip() or "primary"
fileExt = Path(FileName).suffix.lstrip(".").lower() or None
mimeType = ContentType or mimetypes.guess_type(FileName)[0] or "application/octet-stream"
fileSha256 = hashlib.sha256(FileContent).hexdigest()
fileSize = len(FileContent)
async with GetAsyncSession() as Session:
if TypeId is not None and TypeCode is not None:
typeResult = await Session.execute(
text(
"""
SELECT id, code
FROM leaudit_document_types
WHERE id = :type_id
AND code = :type_code
AND deleted_at IS NULL
LIMIT 1
"""
),
{"type_id": TypeId, "type_code": TypeCode},
)
elif TypeId is not None:
typeResult = await Session.execute(
text(
"""
SELECT id, code
FROM leaudit_document_types
WHERE id = :type_id
AND deleted_at IS NULL
LIMIT 1
"""
),
{"type_id": TypeId},
)
else:
typeResult = await Session.execute(
text(
"""
SELECT id, code
FROM leaudit_document_types
WHERE code = :type_code
AND deleted_at IS NULL
LIMIT 1
"""
),
{"type_code": TypeCode},
)
typeRow = typeResult.mappings().first()
if not typeRow:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档类型不存在或已停用")
resolvedTypeId = int(typeRow["id"])
resolvedTypeCode = str(typeRow["code"])
resolvedBizDocumentId = BizDocumentId or int(time.time() * 1000)
document = await LeauditDocument.upsert_by_biz_id(
Session,
bizDocumentId=resolvedBizDocumentId,
typeId=resolvedTypeId,
region=normalizedRegion,
processingStatus="waiting",
)
versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
versionNo = f"v{versionCount + 1}"
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=normalizedRegion,
TypeCode=resolvedTypeCode,
DocumentId=document.Id,
Version=versionNo,
FileRole=normalizedFileRole,
FileName=FileName,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=FileContent,
ContentType=mimeType,
)
await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole=normalizedFileRole,
fileName=FileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=fileSize,
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=CreatedBy,
)
Session.add(documentFile)
await Session.flush()
await Session.commit()
await Session.refresh(document)
await Session.refresh(documentFile)
run = None
processingStatus = document.processingStatus or "waiting"
if AutoRun:
run = await self.AuditService.Run(DocumentId=document.Id)
processingStatus = "running" if run.status in {"pending", "running"} else run.status
return DocumentUploadVO(
documentId=document.Id,
bizDocumentId=document.bizDocumentId,
fileId=documentFile.Id,
typeId=resolvedTypeId,
typeCode=resolvedTypeCode,
region=normalizedRegion,
fileName=documentFile.fileName,
ossUrl=ossUrl,
processingStatus=processingStatus,
autoRunTriggered=AutoRun,
run=run,
)