feat: add async worker queues and retry controls

This commit is contained in:
wren
2026-04-29 11:48:09 +08:00
parent e738398eb6
commit f3b83c9979
16 changed files with 1316 additions and 96 deletions
@@ -9,10 +9,17 @@ Keeps docauditai-specific fixes outside ``services/leaudit/**``:
from __future__ import annotations
import asyncio
import logging
from io import BytesIO
from pathlib import Path
from fastapi_admin.config import (
LEAUDIT_SIGNATURE_PROBE_CONCURRENCY,
LEAUDIT_SIGNATURE_PROBE_RETRY_BACKOFF_BASE_SECONDS,
LEAUDIT_SIGNATURE_PROBE_RETRY_MAX_ATTEMPTS,
LEAUDIT_SIGNATURE_PROBE_TIMEOUT,
)
from leaudit.ocr.base import BaseOCRClient
from leaudit.ocr.models import OcrResult, VisualManifestItem
@@ -120,18 +127,20 @@ async def _inject_docx_signature_candidates(
if parent_key:
parent_to_items.setdefault(parent_key, []).append(item)
for parent_key, items in parent_to_items.items():
sem = asyncio.Semaphore(max(1, int(LEAUDIT_SIGNATURE_PROBE_CONCURRENCY)))
async def _probe_parent(parent_key: str, items: list[VisualManifestItem]) -> None:
if any((it.label or "") == "signature" for it in items):
continue
return
parent_bytes = ocr_result.get_image_bytes(parent_key)
if not parent_bytes:
continue
return
try:
image = Image.open(BytesIO(parent_bytes))
except Exception as exc:
log.warning("failed to open parent image %s: %s", parent_key, exc)
continue
return
width, height = image.size
for candidate_bbox in _signature_candidate_boxes(items, width, height):
@@ -139,11 +148,13 @@ async def _inject_docx_signature_candidates(
crop = image.crop(tuple(candidate_bbox))
buf = BytesIO()
crop.save(buf, format="PNG")
result = await _classify_signature_candidate(
vlm_client,
buf.getvalue(),
"这是合同签章页里疑似法人签名的候选区域,请优先判断是否为手写签名。",
)
async with sem:
result = await _classify_signature_candidate(
vlm_client,
buf.getvalue(),
"这是合同签章页里疑似法人签名的候选区域,请优先判断是否为手写签名。",
parent_key=parent_key,
)
except Exception as exc:
log.warning("signature probe failed for %s: %s", parent_key, exc)
continue
@@ -166,33 +177,66 @@ async def _inject_docx_signature_candidates(
)
break
if parent_to_items:
await asyncio.gather(
*(_probe_parent(parent_key, items) for parent_key, items in parent_to_items.items()),
return_exceptions=False,
)
async def _classify_signature_candidate(
vlm_client: object,
image_bytes: bytes,
user_hint: str,
*,
parent_key: str | None = None,
) -> object:
"""Classify with one retry using a fresh VLM client when needed."""
try:
return await vlm_client.classify_visual(image_bytes, user_hint=user_hint)
except Exception as exc:
log.warning("signature probe primary VLM failed, retrying fresh client: %s", exc)
"""Classify with configurable retry using a fresh VLM client when needed."""
timeout = max(1, int(LEAUDIT_SIGNATURE_PROBE_TIMEOUT))
max_attempts = max(1, int(LEAUDIT_SIGNATURE_PROBE_RETRY_MAX_ATTEMPTS))
backoff_base = max(0.0, float(LEAUDIT_SIGNATURE_PROBE_RETRY_BACKOFF_BASE_SECONDS))
last_error: Exception | None = None
try:
from leaudit.llm.qwen_vlm_client import QwenVLMClient
fresh = QwenVLMClient(
base_url=getattr(vlm_client, "base_url"),
api_key=getattr(vlm_client, "api_key", ""),
model=getattr(vlm_client, "model"),
timeout=getattr(vlm_client, "timeout", 90.0),
)
for attempt in range(max_attempts):
current_client = vlm_client
fresh = None
try:
return await fresh.classify_visual(image_bytes, user_hint=user_hint)
if attempt > 0:
from fastapi_modules.fastapi_leaudit.leaudit_bridge.resilient_clients import ResilientQwenVLMClient
fresh = ResilientQwenVLMClient(
base_url=getattr(vlm_client, "base_url"),
api_key=getattr(vlm_client, "api_key", ""),
model=getattr(vlm_client, "model"),
timeout=getattr(vlm_client, "timeout", 90.0),
retry_max_attempts=1,
retry_backoff_base_seconds=0.0,
)
current_client = fresh
return await asyncio.wait_for(
current_client.classify_visual(image_bytes, user_hint=user_hint),
timeout=timeout,
)
except Exception as exc:
last_error = exc
if attempt < max_attempts - 1:
log.warning(
"signature probe attempt %s/%s failed for %s, retrying after %.2fs (timeout=%ss): %s",
attempt + 1,
max_attempts,
parent_key or "-",
backoff_base * (2 ** attempt),
timeout,
exc,
)
await asyncio.sleep(backoff_base * (2 ** attempt))
continue
finally:
await fresh.close()
except Exception as exc:
raise RuntimeError(exc) from exc
if fresh is not None:
await fresh.close()
raise RuntimeError(last_error) from last_error
def _signature_candidate_boxes(