feat: add async worker queues and retry controls
This commit is contained in:
@@ -9,10 +9,17 @@ Keeps docauditai-specific fixes outside ``services/leaudit/**``:
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi_admin.config import (
|
||||
LEAUDIT_SIGNATURE_PROBE_CONCURRENCY,
|
||||
LEAUDIT_SIGNATURE_PROBE_RETRY_BACKOFF_BASE_SECONDS,
|
||||
LEAUDIT_SIGNATURE_PROBE_RETRY_MAX_ATTEMPTS,
|
||||
LEAUDIT_SIGNATURE_PROBE_TIMEOUT,
|
||||
)
|
||||
from leaudit.ocr.base import BaseOCRClient
|
||||
from leaudit.ocr.models import OcrResult, VisualManifestItem
|
||||
|
||||
@@ -120,18 +127,20 @@ async def _inject_docx_signature_candidates(
|
||||
if parent_key:
|
||||
parent_to_items.setdefault(parent_key, []).append(item)
|
||||
|
||||
for parent_key, items in parent_to_items.items():
|
||||
sem = asyncio.Semaphore(max(1, int(LEAUDIT_SIGNATURE_PROBE_CONCURRENCY)))
|
||||
|
||||
async def _probe_parent(parent_key: str, items: list[VisualManifestItem]) -> None:
|
||||
if any((it.label or "") == "signature" for it in items):
|
||||
continue
|
||||
return
|
||||
parent_bytes = ocr_result.get_image_bytes(parent_key)
|
||||
if not parent_bytes:
|
||||
continue
|
||||
return
|
||||
|
||||
try:
|
||||
image = Image.open(BytesIO(parent_bytes))
|
||||
except Exception as exc:
|
||||
log.warning("failed to open parent image %s: %s", parent_key, exc)
|
||||
continue
|
||||
return
|
||||
|
||||
width, height = image.size
|
||||
for candidate_bbox in _signature_candidate_boxes(items, width, height):
|
||||
@@ -139,11 +148,13 @@ async def _inject_docx_signature_candidates(
|
||||
crop = image.crop(tuple(candidate_bbox))
|
||||
buf = BytesIO()
|
||||
crop.save(buf, format="PNG")
|
||||
result = await _classify_signature_candidate(
|
||||
vlm_client,
|
||||
buf.getvalue(),
|
||||
"这是合同签章页里疑似法人签名的候选区域,请优先判断是否为手写签名。",
|
||||
)
|
||||
async with sem:
|
||||
result = await _classify_signature_candidate(
|
||||
vlm_client,
|
||||
buf.getvalue(),
|
||||
"这是合同签章页里疑似法人签名的候选区域,请优先判断是否为手写签名。",
|
||||
parent_key=parent_key,
|
||||
)
|
||||
except Exception as exc:
|
||||
log.warning("signature probe failed for %s: %s", parent_key, exc)
|
||||
continue
|
||||
@@ -166,33 +177,66 @@ async def _inject_docx_signature_candidates(
|
||||
)
|
||||
break
|
||||
|
||||
if parent_to_items:
|
||||
await asyncio.gather(
|
||||
*(_probe_parent(parent_key, items) for parent_key, items in parent_to_items.items()),
|
||||
return_exceptions=False,
|
||||
)
|
||||
|
||||
|
||||
async def _classify_signature_candidate(
|
||||
vlm_client: object,
|
||||
image_bytes: bytes,
|
||||
user_hint: str,
|
||||
*,
|
||||
parent_key: str | None = None,
|
||||
) -> object:
|
||||
"""Classify with one retry using a fresh VLM client when needed."""
|
||||
try:
|
||||
return await vlm_client.classify_visual(image_bytes, user_hint=user_hint)
|
||||
except Exception as exc:
|
||||
log.warning("signature probe primary VLM failed, retrying fresh client: %s", exc)
|
||||
"""Classify with configurable retry using a fresh VLM client when needed."""
|
||||
timeout = max(1, int(LEAUDIT_SIGNATURE_PROBE_TIMEOUT))
|
||||
max_attempts = max(1, int(LEAUDIT_SIGNATURE_PROBE_RETRY_MAX_ATTEMPTS))
|
||||
backoff_base = max(0.0, float(LEAUDIT_SIGNATURE_PROBE_RETRY_BACKOFF_BASE_SECONDS))
|
||||
last_error: Exception | None = None
|
||||
|
||||
try:
|
||||
from leaudit.llm.qwen_vlm_client import QwenVLMClient
|
||||
|
||||
fresh = QwenVLMClient(
|
||||
base_url=getattr(vlm_client, "base_url"),
|
||||
api_key=getattr(vlm_client, "api_key", ""),
|
||||
model=getattr(vlm_client, "model"),
|
||||
timeout=getattr(vlm_client, "timeout", 90.0),
|
||||
)
|
||||
for attempt in range(max_attempts):
|
||||
current_client = vlm_client
|
||||
fresh = None
|
||||
try:
|
||||
return await fresh.classify_visual(image_bytes, user_hint=user_hint)
|
||||
if attempt > 0:
|
||||
from fastapi_modules.fastapi_leaudit.leaudit_bridge.resilient_clients import ResilientQwenVLMClient
|
||||
|
||||
fresh = ResilientQwenVLMClient(
|
||||
base_url=getattr(vlm_client, "base_url"),
|
||||
api_key=getattr(vlm_client, "api_key", ""),
|
||||
model=getattr(vlm_client, "model"),
|
||||
timeout=getattr(vlm_client, "timeout", 90.0),
|
||||
retry_max_attempts=1,
|
||||
retry_backoff_base_seconds=0.0,
|
||||
)
|
||||
current_client = fresh
|
||||
|
||||
return await asyncio.wait_for(
|
||||
current_client.classify_visual(image_bytes, user_hint=user_hint),
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception as exc:
|
||||
last_error = exc
|
||||
if attempt < max_attempts - 1:
|
||||
log.warning(
|
||||
"signature probe attempt %s/%s failed for %s, retrying after %.2fs (timeout=%ss): %s",
|
||||
attempt + 1,
|
||||
max_attempts,
|
||||
parent_key or "-",
|
||||
backoff_base * (2 ** attempt),
|
||||
timeout,
|
||||
exc,
|
||||
)
|
||||
await asyncio.sleep(backoff_base * (2 ** attempt))
|
||||
continue
|
||||
finally:
|
||||
await fresh.close()
|
||||
except Exception as exc:
|
||||
raise RuntimeError(exc) from exc
|
||||
if fresh is not None:
|
||||
await fresh.close()
|
||||
|
||||
raise RuntimeError(last_error) from last_error
|
||||
|
||||
|
||||
def _signature_candidate_boxes(
|
||||
|
||||
Reference in New Issue
Block a user