后端:稳定租户链路与 VLM 图片质量检测 #9
@@ -239,7 +239,7 @@ class ResilientQwenVLMClient(QwenVLMClient):
|
||||
body = response.json()
|
||||
text = (body.get("choices") or [{}])[0].get("message", {}).get("content", "")
|
||||
parsed = _parse_json_loose(text)
|
||||
return parsed if isinstance(parsed, dict) else {}
|
||||
return parsed if isinstance(parsed, dict) else {"result": text, "reason": text}
|
||||
|
||||
|
||||
class ResilientChandraOCRClient(ChandraOCRClient):
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import Any
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import logging
|
||||
import json
|
||||
|
||||
import fitz
|
||||
from leaudit.converters import doc2pdf
|
||||
@@ -30,9 +31,11 @@ _PAGE_QUALITY_VLM_PROMPT = """
|
||||
你是文档扫描图片质量检测员。请判断这 1 页文档图片是否适合继续做 OCR 与合同/公文评查。
|
||||
|
||||
判定标准:
|
||||
1. pass:文字主体清晰、方向正常、没有明显截断,能稳定阅读。
|
||||
2. review:存在轻微模糊、倾斜、阴影、低对比度、局部遮挡、轻微截断,建议人工确认但仍可能可读。
|
||||
3. reject:严重模糊、重影、过曝/过暗、页面大面积缺失、关键文字不可辨认、方向严重错误、空白页或非文档页,建议重拍。
|
||||
1. 必须同时检查整页扫描质量,以及页面内所有内嵌照片、证据照片、现场照片、截图、印章和签名图片的清晰度。
|
||||
2. pass:文字主体清晰、方向正常、没有明显截断;页面内嵌照片/证据照片也能辨认关键视觉信息。
|
||||
3. review:存在轻微模糊、倾斜、阴影、低对比度、局部遮挡、轻微截断;或内嵌照片/证据照片主体明显发虚、牌匾/场所/人物/关键物证不易辨认,建议人工确认但仍可能可用。
|
||||
4. reject:严重模糊、重影、过曝/过暗、页面大面积缺失、关键文字不可辨认、方向严重错误、空白页或非文档页;或内嵌证据照片主体无法辨认、关键证据信息不可用,建议重拍。
|
||||
5. 即使页面周边文字清楚,只要内嵌证据照片明显模糊,也不能判 pass,至少判 review,严重时判 reject。
|
||||
|
||||
只输出 JSON,不要输出 Markdown,不要解释额外文本:
|
||||
{"status":"pass|review|reject","score":0.0到1.0,"reason":"20字以内中文原因"}
|
||||
@@ -495,12 +498,28 @@ class PageQualityServiceImpl(IPageQualityService):
|
||||
logger.warning("VLM page quality detection failed: %s", exc)
|
||||
return "review", 0.5, "VLM图片质量检测失败,需人工确认"
|
||||
|
||||
status = str((result or {}).get("status") or "").strip().lower()
|
||||
result_dict = self._coerce_vlm_result(result)
|
||||
status = self._normalize_quality_status(
|
||||
self._first_non_empty(
|
||||
result_dict,
|
||||
("status", "quality_status", "qualityStatus", "result", "label", "decision", "conclusion"),
|
||||
)
|
||||
)
|
||||
reason = self._normalize_quality_reason(
|
||||
self._first_non_empty(
|
||||
result_dict,
|
||||
("reason", "quality_reason", "qualityReason", "message", "msg", "detail", "explanation", "description"),
|
||||
)
|
||||
)
|
||||
if status is None and reason:
|
||||
status = self._normalize_quality_status(reason)
|
||||
if status not in {"pass", "review", "reject"}:
|
||||
return "review", 0.5, "VLM返回结果不可用,需人工确认"
|
||||
|
||||
score = self._normalize_quality_score((result or {}).get("score"), status)
|
||||
reason = str((result or {}).get("reason") or "").strip() or None
|
||||
score = self._normalize_quality_score(
|
||||
self._first_non_empty(result_dict, ("score", "quality_score", "qualityScore", "confidence")),
|
||||
status,
|
||||
)
|
||||
if status != "pass" and not reason:
|
||||
reason = "页面图片质量需人工确认"
|
||||
return status, score, reason
|
||||
@@ -526,6 +545,56 @@ class PageQualityServiceImpl(IPageQualityService):
|
||||
return defaults[status]
|
||||
return max(0.0, min(1.0, score))
|
||||
|
||||
def _coerce_vlm_result(self, result: Any) -> dict[str, Any]:
|
||||
if isinstance(result, dict):
|
||||
return result
|
||||
if isinstance(result, str):
|
||||
text_result = result.strip()
|
||||
if not text_result:
|
||||
return {}
|
||||
try:
|
||||
parsed = json.loads(text_result)
|
||||
except json.JSONDecodeError:
|
||||
return {"result": text_result, "reason": text_result}
|
||||
return parsed if isinstance(parsed, dict) else {"result": text_result}
|
||||
return {}
|
||||
|
||||
def _first_non_empty(self, payload: dict[str, Any], keys: tuple[str, ...]) -> Any:
|
||||
for key in keys:
|
||||
value = payload.get(key)
|
||||
if value is not None and str(value).strip():
|
||||
return value
|
||||
return None
|
||||
|
||||
def _normalize_quality_status(self, raw_status: Any) -> str | None:
|
||||
text_status = str(raw_status or "").strip().lower()
|
||||
if not text_status:
|
||||
return None
|
||||
compact_status = text_status.replace(" ", "").replace("_", "").replace("-", "")
|
||||
if compact_status in {"pass", "passed", "ok", "good", "clear", "readable"}:
|
||||
return "pass"
|
||||
if compact_status in {"review", "warn", "warning", "manual", "uncertain", "suspect", "suspicious"}:
|
||||
return "review"
|
||||
if compact_status in {"reject", "rejected", "fail", "failed", "bad", "unreadable", "retake"}:
|
||||
return "reject"
|
||||
|
||||
reject_keywords = ("不通过", "拒绝", "重拍", "不可读", "无法辨认", "无法识别", "严重", "大面积缺失", "空白页")
|
||||
review_keywords = ("复核", "人工", "疑似", "轻微", "建议确认", "建议人工", "模糊", "不清晰", "低对比", "发虚")
|
||||
pass_keywords = ("通过", "合格", "清晰", "可读")
|
||||
if any(keyword in text_status for keyword in reject_keywords):
|
||||
return "reject"
|
||||
if any(keyword in text_status for keyword in review_keywords):
|
||||
return "review"
|
||||
if any(keyword in text_status for keyword in pass_keywords):
|
||||
return "pass"
|
||||
return None
|
||||
|
||||
def _normalize_quality_reason(self, raw_reason: Any) -> str | None:
|
||||
reason = str(raw_reason or "").strip()
|
||||
if not reason:
|
||||
return None
|
||||
return reason[:80]
|
||||
|
||||
def _document_service(self):
|
||||
if self.DocumentService is None:
|
||||
from fastapi_modules.fastapi_leaudit.services.impl.documentServiceImpl import DocumentServiceImpl
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import pytest
|
||||
import httpx
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.leaudit_bridge.resilient_clients import ResilientQwenVLMClient
|
||||
from fastapi_modules.fastapi_leaudit.services.impl.pageQualityServiceImpl import PageQualityServiceImpl
|
||||
|
||||
|
||||
@@ -32,6 +34,58 @@ async def test_vlm_page_quality_reject_result_is_used():
|
||||
assert score == 0.18
|
||||
assert "严重模糊" in reason
|
||||
assert "只输出 JSON" in service.VlmClient.prompts[0][0]
|
||||
assert "内嵌照片" in service.VlmClient.prompts[0][0]
|
||||
assert "即使页面周边文字清楚" in service.VlmClient.prompts[0][0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vlm_page_quality_embedded_evidence_blur_cannot_pass():
|
||||
service = PageQualityServiceImpl()
|
||||
service.VlmClient = _FakeVlmClient(
|
||||
{
|
||||
"quality_status": "疑似模糊",
|
||||
"quality_score": "0.42",
|
||||
"message": "内嵌证据照片主体发虚,门头文字不易辨认",
|
||||
}
|
||||
)
|
||||
|
||||
status, score, reason = await service._classify_page_image_by_vlm(b"image-bytes")
|
||||
|
||||
assert status == "review"
|
||||
assert score == 0.42
|
||||
assert "内嵌证据照片" in reason
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vlm_page_quality_chinese_reject_status_is_supported():
|
||||
service = PageQualityServiceImpl()
|
||||
service.VlmClient = _FakeVlmClient(
|
||||
{
|
||||
"result": "不通过",
|
||||
"confidence": 0.1,
|
||||
"detail": "证据照片严重模糊,关键场所无法辨认",
|
||||
}
|
||||
)
|
||||
|
||||
status, score, reason = await service._classify_page_image_by_vlm(b"image-bytes")
|
||||
|
||||
assert status == "reject"
|
||||
assert score == 0.1
|
||||
assert "严重模糊" in reason
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vlm_page_quality_json_string_result_is_supported():
|
||||
service = PageQualityServiceImpl()
|
||||
service.VlmClient = _FakeVlmClient(
|
||||
'{"status":"review","score":0.33,"reason":"页面内照片模糊"}'
|
||||
)
|
||||
|
||||
status, score, reason = await service._classify_page_image_by_vlm(b"image-bytes")
|
||||
|
||||
assert status == "review"
|
||||
assert score == 0.33
|
||||
assert reason == "页面内照片模糊"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@@ -56,3 +110,32 @@ async def test_vlm_page_quality_error_falls_back_to_review_not_pass():
|
||||
assert status == "review"
|
||||
assert score == 0.5
|
||||
assert "VLM图片质量检测失败" in reason
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_resilient_vlm_extract_multifield_keeps_raw_text_when_json_parse_fails(monkeypatch):
|
||||
client = ResilientQwenVLMClient(base_url="http://example.test", api_key="x", model="vlm-test")
|
||||
|
||||
async def fake_post_with_retry(payload):
|
||||
return httpx.Response(
|
||||
200,
|
||||
json={
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"content": "疑似模糊:内嵌证据照片主体发虚,建议人工复核",
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(client, "_post_with_retry", fake_post_with_retry)
|
||||
|
||||
result = await client.extract_multifield(
|
||||
prompt="图片质量检测",
|
||||
images_data_urls=["data:image/png;base64,xxx"],
|
||||
)
|
||||
|
||||
assert result["result"].startswith("疑似模糊")
|
||||
assert "内嵌证据照片" in result["reason"]
|
||||
|
||||
Reference in New Issue
Block a user