fix: improve cross-review upload and OCR failure handling

This commit is contained in:
wren
2026-05-12 17:57:47 +08:00
parent cf8d4d4261
commit 5e1a55430a
6 changed files with 66 additions and 6 deletions
@@ -2,7 +2,7 @@
from typing import Any from typing import Any
from fastapi import Depends, File, Query, UploadFile from fastapi import Depends, File, Form, Query, UploadFile
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi_common.fastapi_common_security.security import verify_access_token from fastapi_common.fastapi_common_security.security import verify_access_token
@@ -139,6 +139,8 @@ class CrossReviewController(BaseController):
async def UploadTaskDocument( async def UploadTaskDocument(
TaskId: int, TaskId: int,
file: UploadFile = File(..., description="上传文档"), file: UploadFile = File(..., description="上传文档"),
typeId: int | None = Form(None, description="文档类型ID"),
groupId: int | None = Form(None, description="文档子类型ID"),
payload: dict[str, Any] = Depends(verify_access_token), payload: dict[str, Any] = Depends(verify_access_token),
): ):
"""向交叉评查任务补传文档。""" """向交叉评查任务补传文档。"""
@@ -151,6 +153,8 @@ class CrossReviewController(BaseController):
FileName=file.filename or "upload.bin", FileName=file.filename or "upload.bin",
FileContent=content, FileContent=content,
ContentType=file.content_type, ContentType=file.content_type,
TypeId=typeId,
GroupId=groupId,
) )
return Result.success(data=Data, message="交叉评查任务文档上传成功") return Result.success(data=Data, message="交叉评查任务文档上传成功")
@@ -93,6 +93,8 @@ class DocumentListItemVO(BaseModel):
currentRunId: int | None = Field(None, description="当前运行ID") currentRunId: int | None = Field(None, description="当前运行ID")
runStatus: str | None = Field(None, description="当前运行状态") runStatus: str | None = Field(None, description="当前运行状态")
resultStatus: str | None = Field(None, description="当前结果状态") resultStatus: str | None = Field(None, description="当前结果状态")
latestErrorCode: str | None = Field(None, description="最新失败错误码")
latestErrorMessage: str | None = Field(None, description="最新失败错误信息")
totalScore: float | None = Field(None, description="总分") totalScore: float | None = Field(None, description="总分")
passedCount: int | None = Field(None, description="通过数") passedCount: int | None = Field(None, description="通过数")
failedCount: int | None = Field(None, description="失败数") failedCount: int | None = Field(None, description="失败数")
@@ -486,6 +486,7 @@ class StorageAdapter:
run_id: int | None, run_id: int | None,
phase: str | None, phase: str | None,
message: str, message: str,
error_code: str = "AUDIT_RUN_FAILED",
detail_json: dict[str, Any] | None = None, detail_json: dict[str, Any] | None = None,
) -> None: ) -> None:
"""记录运行失败并更新主表。""" """记录运行失败并更新主表。"""
@@ -496,7 +497,7 @@ class StorageAdapter:
stage=phase or "persist", stage=phase or "persist",
messages=[message], messages=[message],
level="fatal", level="fatal",
error_code="AUDIT_RUN_FAILED", error_code=error_code,
detail_json=detail_json, detail_json=detail_json,
) )
async with GetAsyncSession() as session: async with GetAsyncSession() as session:
@@ -12,6 +12,7 @@ from typing import Any, Dict, Optional
import fitz import fitz
from fastapi_common.fastapi_common_logger import logger from fastapi_common.fastapi_common_logger import logger
from leaudit.converters import doc2pdf from leaudit.converters import doc2pdf
from leaudit.ocr.chandra_client import ChandraOCRError
from sqlalchemy import select from sqlalchemy import select
from fastapi_admin.celery_app import celery_app from fastapi_admin.celery_app import celery_app
@@ -45,6 +46,48 @@ from fastapi_modules.fastapi_leaudit.models import (
log = logger log = logger
def _classify_run_failure(exc: Exception) -> tuple[str, str, dict[str, Any]]:
"""将底层异常归一化为可落库、可展示的失败信息。"""
raw_message = str(exc).strip() or exc.__class__.__name__
detail: dict[str, Any] = {
"rawMessage": raw_message,
"errorType": type(exc).__name__,
}
if isinstance(exc, ChandraOCRError):
lower_message = raw_message.lower()
detail["service"] = "ocr"
if "all connection attempts failed" in lower_message or "connect" in lower_message:
return (
"OCR_SERVICE_UNAVAILABLE",
"OCR服务暂时不可用,文档未完成识别,请稍后重试。",
{**detail, "reason": "connection_failed"},
)
if "timeout" in lower_message:
return (
"OCR_SERVICE_TIMEOUT",
"OCR服务处理超时,文档未完成识别,请稍后重试。",
{**detail, "reason": "timeout"},
)
if "ocr api returned" in lower_message:
return (
"OCR_SERVICE_BAD_RESPONSE",
"OCR服务响应异常,文档未完成识别,请稍后重试。",
{**detail, "reason": "bad_status"},
)
return (
"OCR_PROCESSING_FAILED",
"OCR处理失败,文档未完成识别,请稍后重试。",
{**detail, "reason": "ocr_failed"},
)
return (
"AUDIT_RUN_FAILED",
raw_message,
detail,
)
def leaudit_process_document( def leaudit_process_document(
document_id: int, document_id: int,
file_content: bytes, file_content: bytes,
@@ -164,6 +207,7 @@ def leaudit_process_document(
except Exception as e: except Exception as e:
log.error(f"[任务ID: {task_id}] leaudit管线失败: {e}", exc_info=True) log.error(f"[任务ID: {task_id}] leaudit管线失败: {e}", exc_info=True)
error_code, user_message, error_detail = _classify_run_failure(e)
try: try:
loop.run_until_complete(_update_status_safe(document_id, "failed")) loop.run_until_complete(_update_status_safe(document_id, "failed"))
if 'run_id' in locals(): if 'run_id' in locals():
@@ -175,11 +219,12 @@ def leaudit_process_document(
document_id, document_id,
run_id=run_id, run_id=run_id,
phase=failed_phase, phase=failed_phase,
message=str(e), message=user_message,
error_code=error_code,
detail_json={ detail_json={
"taskId": task_id, "taskId": task_id,
"filename": filename, "filename": filename,
"errorType": type(e).__name__, **error_detail,
}, },
) )
) )
@@ -107,6 +107,8 @@ class ICrossReviewService(ABC):
FileName: str, FileName: str,
FileContent: bytes, FileContent: bytes,
ContentType: str | None, ContentType: str | None,
TypeId: int | None = None,
GroupId: int | None = None,
) -> CrossReviewTaskDocumentUploadVO: ) -> CrossReviewTaskDocumentUploadVO:
"""向交叉评查任务补传文档。""" """向交叉评查任务补传文档。"""
... ...
@@ -832,6 +832,8 @@ class CrossReviewServiceImpl(ICrossReviewService):
FileName: str, FileName: str,
FileContent: bytes, FileContent: bytes,
ContentType: str | None, ContentType: str | None,
TypeId: int | None = None,
GroupId: int | None = None,
) -> CrossReviewTaskDocumentUploadVO: ) -> CrossReviewTaskDocumentUploadVO:
"""向交叉评查任务补传文档。""" """向交叉评查任务补传文档。"""
async with GetAsyncSession() as session: async with GetAsyncSession() as session:
@@ -857,12 +859,16 @@ class CrossReviewServiceImpl(ICrossReviewService):
if not taskMeta: if not taskMeta:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "交叉评查任务不存在") raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "交叉评查任务不存在")
resolvedTypeId = int(TypeId) if TypeId is not None else self._to_int(taskMeta.get("doc_type_id"))
resolvedGroupId = int(GroupId) if GroupId is not None else None
uploadResult = await self.DocumentService.Upload( uploadResult = await self.DocumentService.Upload(
FileName=FileName, FileName=FileName,
FileContent=FileContent, FileContent=FileContent,
ContentType=ContentType, ContentType=ContentType,
TypeId=self._to_int(taskMeta.get("doc_type_id")), TypeId=resolvedTypeId,
TypeCode=taskMeta.get("doc_type_code"), TypeCode=None if resolvedTypeId is not None else taskMeta.get("doc_type_code"),
GroupId=resolvedGroupId,
CreatedBy=CurrentUserId, CreatedBy=CurrentUserId,
AutoRun=True, AutoRun=True,
) )