fix: improve cross-review upload and OCR failure handling

This commit is contained in:
wren
2026-05-12 17:57:47 +08:00
parent cf8d4d4261
commit 5e1a55430a
6 changed files with 66 additions and 6 deletions
@@ -486,6 +486,7 @@ class StorageAdapter:
run_id: int | None,
phase: str | None,
message: str,
error_code: str = "AUDIT_RUN_FAILED",
detail_json: dict[str, Any] | None = None,
) -> None:
"""记录运行失败并更新主表。"""
@@ -496,7 +497,7 @@ class StorageAdapter:
stage=phase or "persist",
messages=[message],
level="fatal",
error_code="AUDIT_RUN_FAILED",
error_code=error_code,
detail_json=detail_json,
)
async with GetAsyncSession() as session:
@@ -12,6 +12,7 @@ from typing import Any, Dict, Optional
import fitz
from fastapi_common.fastapi_common_logger import logger
from leaudit.converters import doc2pdf
from leaudit.ocr.chandra_client import ChandraOCRError
from sqlalchemy import select
from fastapi_admin.celery_app import celery_app
@@ -45,6 +46,48 @@ from fastapi_modules.fastapi_leaudit.models import (
log = logger
def _classify_run_failure(exc: Exception) -> tuple[str, str, dict[str, Any]]:
"""将底层异常归一化为可落库、可展示的失败信息。"""
raw_message = str(exc).strip() or exc.__class__.__name__
detail: dict[str, Any] = {
"rawMessage": raw_message,
"errorType": type(exc).__name__,
}
if isinstance(exc, ChandraOCRError):
lower_message = raw_message.lower()
detail["service"] = "ocr"
if "all connection attempts failed" in lower_message or "connect" in lower_message:
return (
"OCR_SERVICE_UNAVAILABLE",
"OCR服务暂时不可用,文档未完成识别,请稍后重试。",
{**detail, "reason": "connection_failed"},
)
if "timeout" in lower_message:
return (
"OCR_SERVICE_TIMEOUT",
"OCR服务处理超时,文档未完成识别,请稍后重试。",
{**detail, "reason": "timeout"},
)
if "ocr api returned" in lower_message:
return (
"OCR_SERVICE_BAD_RESPONSE",
"OCR服务响应异常,文档未完成识别,请稍后重试。",
{**detail, "reason": "bad_status"},
)
return (
"OCR_PROCESSING_FAILED",
"OCR处理失败,文档未完成识别,请稍后重试。",
{**detail, "reason": "ocr_failed"},
)
return (
"AUDIT_RUN_FAILED",
raw_message,
detail,
)
def leaudit_process_document(
document_id: int,
file_content: bytes,
@@ -164,6 +207,7 @@ def leaudit_process_document(
except Exception as e:
log.error(f"[任务ID: {task_id}] leaudit管线失败: {e}", exc_info=True)
error_code, user_message, error_detail = _classify_run_failure(e)
try:
loop.run_until_complete(_update_status_safe(document_id, "failed"))
if 'run_id' in locals():
@@ -175,11 +219,12 @@ def leaudit_process_document(
document_id,
run_id=run_id,
phase=failed_phase,
message=str(e),
message=user_message,
error_code=error_code,
detail_json={
"taskId": task_id,
"filename": filename,
"errorType": type(e).__name__,
**error_detail,
},
)
)