feat: wire native leaudit upload flow

This commit is contained in:
wren
2026-04-28 16:53:16 +08:00
parent c776af598a
commit e738398eb6
26 changed files with 7364 additions and 5778 deletions
@@ -12,7 +12,15 @@ from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
from sqlalchemy import select, text
from fastapi_modules.fastapi_leaudit.domian.vo.auditVo import AuditRunVO, AuditResultVO
from fastapi_modules.fastapi_leaudit.domian.vo.auditVo import (
AuditArtifactVO,
AuditFieldResultVO,
AuditMetricsVO,
AuditRescueOutcomeVO,
AuditResultVO,
AuditRunErrorVO,
AuditRunVO,
)
from fastapi_modules.fastapi_leaudit.leaudit_bridge.fileSourceResolver import FileSourceResolver
from fastapi_modules.fastapi_leaudit.leaudit_bridge.tasks import dispatch_leaudit_task
from fastapi_modules.fastapi_leaudit.models import (
@@ -132,11 +140,18 @@ class AuditServiceImpl(IAuditService):
runId=run.Id,
documentId=run.documentId,
runNo=run.runNo,
documentFileId=run.documentFileId,
status=run.status,
phase=run.phase,
resultStatus=run.resultStatus,
ruleSetId=run.ruleSetId,
ruleVersionId=run.ruleVersionId,
ruleTypeId=run.ruleTypeId,
rescueApplied=run.rescueApplied or False,
totalScore=float(run.totalScore) if run.totalScore else None,
passedCount=run.passedCount,
failedCount=run.failedCount,
skippedCount=run.skippedCount,
startedAt=run.startedAt,
finishedAt=run.finishedAt,
)
@@ -151,11 +166,18 @@ class AuditServiceImpl(IAuditService):
runId=run.Id,
documentId=run.documentId,
runNo=run.runNo,
documentFileId=run.documentFileId,
status=run.status,
phase=run.phase,
resultStatus=run.resultStatus,
ruleSetId=run.ruleSetId,
ruleVersionId=run.ruleVersionId,
ruleTypeId=run.ruleTypeId,
rescueApplied=run.rescueApplied or False,
totalScore=float(run.totalScore) if run.totalScore else None,
passedCount=run.passedCount,
failedCount=run.failedCount,
skippedCount=run.skippedCount,
startedAt=run.startedAt,
finishedAt=run.finishedAt,
)
@@ -166,7 +188,7 @@ class AuditServiceImpl(IAuditService):
run = await session.get(LeauditAuditRun, RunId)
if not run:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "评查运行记录不存在")
result = await session.execute(
ruleResult = await session.execute(
text(
"""
SELECT
@@ -192,14 +214,204 @@ class AuditServiceImpl(IAuditService):
),
{"run_id": RunId},
)
rules = [dict(row) for row in result.mappings().all()]
fieldResult = await session.execute(
text(
"""
SELECT
field_name,
field_type,
value_text,
confidence,
grounding_method,
fallback_value,
raw_value_json,
meta_json
FROM leaudit_field_results
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
errorResult = await session.execute(
text(
"""
SELECT
stage,
level,
error_code,
message,
detail_json,
created_at
FROM leaudit_run_errors
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
rescueResult = await session.execute(
text(
"""
SELECT
rule_id,
status,
diagnosis,
diagnosis_confidence,
final_status,
failure_reason,
llm_calls,
vlm_calls,
duration_ms,
requires_human_review,
payload
FROM leaudit_rescue_outcomes
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
metricResult = await session.execute(
text(
"""
SELECT
ocr_seconds,
normalize_seconds,
extract_seconds,
evaluate_seconds,
rescue_seconds,
total_seconds,
page_count,
sub_document_count,
field_count,
rule_count,
llm_call_count,
vlm_call_count,
rescue_rule_count,
artifact_count
FROM leaudit_run_metrics
WHERE run_id = :run_id
ORDER BY id DESC
LIMIT 1
"""
),
{"run_id": RunId},
)
artifactResult = await session.execute(
text(
"""
SELECT
artifact_type,
artifact_role,
file_name,
file_ext,
mime_type,
file_size,
oss_url,
is_persisted
FROM leaudit_artifacts
WHERE run_id = :run_id
ORDER BY id ASC
"""
),
{"run_id": RunId},
)
rules = [dict(row) for row in ruleResult.mappings().all()]
fields = [
AuditFieldResultVO(
fieldName=row["field_name"],
fieldType=row["field_type"],
valueText=row["value_text"],
confidence=float(row["confidence"]) if row["confidence"] is not None else None,
groundingMethod=row["grounding_method"],
fallbackValue=row["fallback_value"],
rawValueJson=row["raw_value_json"],
metaJson=row["meta_json"],
)
for row in fieldResult.mappings().all()
]
errors = [
AuditRunErrorVO(
stage=row["stage"],
level=row["level"],
errorCode=row["error_code"],
message=row["message"],
detailJson=row["detail_json"],
createdAt=row["created_at"],
)
for row in errorResult.mappings().all()
]
rescueOutcomes = [
AuditRescueOutcomeVO(
ruleId=row["rule_id"],
status=row["status"],
diagnosis=row["diagnosis"],
diagnosisConfidence=float(row["diagnosis_confidence"]) if row["diagnosis_confidence"] is not None else None,
finalStatus=row["final_status"],
failureReason=row["failure_reason"],
llmCalls=row["llm_calls"],
vlmCalls=row["vlm_calls"],
durationMs=row["duration_ms"],
requiresHumanReview=bool(row["requires_human_review"]),
payload=row["payload"],
)
for row in rescueResult.mappings().all()
]
metricRow = metricResult.mappings().first()
metrics = (
AuditMetricsVO(
ocrSeconds=float(metricRow["ocr_seconds"]) if metricRow["ocr_seconds"] is not None else None,
normalizeSeconds=float(metricRow["normalize_seconds"]) if metricRow["normalize_seconds"] is not None else None,
extractSeconds=float(metricRow["extract_seconds"]) if metricRow["extract_seconds"] is not None else None,
evaluateSeconds=float(metricRow["evaluate_seconds"]) if metricRow["evaluate_seconds"] is not None else None,
rescueSeconds=float(metricRow["rescue_seconds"]) if metricRow["rescue_seconds"] is not None else None,
totalSeconds=float(metricRow["total_seconds"]) if metricRow["total_seconds"] is not None else None,
pageCount=metricRow["page_count"],
subDocumentCount=metricRow["sub_document_count"],
fieldCount=metricRow["field_count"],
ruleCount=metricRow["rule_count"],
llmCallCount=metricRow["llm_call_count"],
vlmCallCount=metricRow["vlm_call_count"],
rescueRuleCount=metricRow["rescue_rule_count"],
artifactCount=metricRow["artifact_count"],
)
if metricRow
else None
)
artifacts = [
AuditArtifactVO(
artifactType=row["artifact_type"],
artifactRole=row["artifact_role"],
fileName=row["file_name"],
fileExt=row["file_ext"],
mimeType=row["mime_type"],
fileSize=row["file_size"],
ossUrl=row["oss_url"],
isPersisted=row["is_persisted"],
)
for row in artifactResult.mappings().all()
]
return AuditResultVO(
runId=run.Id,
documentId=run.documentId,
documentFileId=run.documentFileId,
status=run.status,
totalScore=float(run.totalScore) if run.totalScore else None,
passedCount=run.passedCount or 0,
failedCount=run.failedCount or 0,
skippedCount=run.skippedCount or 0,
phase=run.phase,
resultStatus=run.resultStatus,
rescueApplied=run.rescueApplied or False,
ruleSetId=run.ruleSetId,
ruleVersionId=run.ruleVersionId,
startedAt=run.startedAt,
finishedAt=run.finishedAt,
rules=rules,
fields=fields,
errors=errors,
rescueOutcomes=rescueOutcomes,
metrics=metrics,
artifacts=artifacts,
)
@@ -0,0 +1,175 @@
"""文档服务实现。"""
from __future__ import annotations
import hashlib
import mimetypes
import time
from pathlib import Path
from sqlalchemy import text
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile
from fastapi_modules.fastapi_leaudit.services import IAuditService, IDocumentService, IOssService
from fastapi_modules.fastapi_leaudit.services.impl.auditServiceImpl import AuditServiceImpl
from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl
class DocumentServiceImpl(IDocumentService):
"""文档服务实现。"""
def __init__(
self,
OssService: IOssService | None = None,
AuditService: IAuditService | None = None,
) -> None:
self.OssService = OssService or OssServiceImpl()
self.AuditService = AuditService or AuditServiceImpl()
async def Upload(
self,
FileName: str,
FileContent: bytes,
ContentType: str | None,
TypeId: int | None = None,
TypeCode: str | None = None,
BizDocumentId: int | None = None,
Region: str = "default",
FileRole: str = "primary",
CreatedBy: int | None = None,
AutoRun: bool = False,
) -> DocumentUploadVO:
"""上传文档并建立 LeAudit document/file 记录。"""
if not FileName:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件名不能为空")
if not FileContent:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件内容不能为空")
if not TypeId and not TypeCode:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "typeId 与 typeCode 至少传一个")
normalizedRegion = (Region or "default").strip() or "default"
normalizedFileRole = (FileRole or "primary").strip() or "primary"
fileExt = Path(FileName).suffix.lstrip(".").lower() or None
mimeType = ContentType or mimetypes.guess_type(FileName)[0] or "application/octet-stream"
fileSha256 = hashlib.sha256(FileContent).hexdigest()
fileSize = len(FileContent)
async with GetAsyncSession() as Session:
if TypeId is not None and TypeCode is not None:
typeResult = await Session.execute(
text(
"""
SELECT id, code
FROM leaudit_document_types
WHERE id = :type_id
AND code = :type_code
AND deleted_at IS NULL
LIMIT 1
"""
),
{"type_id": TypeId, "type_code": TypeCode},
)
elif TypeId is not None:
typeResult = await Session.execute(
text(
"""
SELECT id, code
FROM leaudit_document_types
WHERE id = :type_id
AND deleted_at IS NULL
LIMIT 1
"""
),
{"type_id": TypeId},
)
else:
typeResult = await Session.execute(
text(
"""
SELECT id, code
FROM leaudit_document_types
WHERE code = :type_code
AND deleted_at IS NULL
LIMIT 1
"""
),
{"type_code": TypeCode},
)
typeRow = typeResult.mappings().first()
if not typeRow:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档类型不存在或已停用")
resolvedTypeId = int(typeRow["id"])
resolvedTypeCode = str(typeRow["code"])
resolvedBizDocumentId = BizDocumentId or int(time.time() * 1000)
document = await LeauditDocument.upsert_by_biz_id(
Session,
bizDocumentId=resolvedBizDocumentId,
typeId=resolvedTypeId,
region=normalizedRegion,
processingStatus="waiting",
)
versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
versionNo = f"v{versionCount + 1}"
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=normalizedRegion,
TypeCode=resolvedTypeCode,
DocumentId=document.Id,
Version=versionNo,
FileRole=normalizedFileRole,
FileName=FileName,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=FileContent,
ContentType=mimeType,
)
await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole=normalizedFileRole,
fileName=FileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=fileSize,
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=CreatedBy,
)
Session.add(documentFile)
await Session.flush()
await Session.commit()
await Session.refresh(document)
await Session.refresh(documentFile)
run = None
processingStatus = document.processingStatus or "waiting"
if AutoRun:
run = await self.AuditService.Run(DocumentId=document.Id)
processingStatus = "running" if run.status in {"pending", "running"} else run.status
return DocumentUploadVO(
documentId=document.Id,
bizDocumentId=document.bizDocumentId,
fileId=documentFile.Id,
typeId=resolvedTypeId,
typeCode=resolvedTypeCode,
region=normalizedRegion,
fileName=documentFile.fileName,
ossUrl=ossUrl,
processingStatus=processingStatus,
autoRunTriggered=AutoRun,
run=run,
)