fix: improve page quality vlm detection

2026-05-22 14:41:42 +08:00
parent 842b362150
commit 9434f2b22b
3 changed files with 159 additions and 7 deletions
@@ -1,5 +1,7 @@
 import pytest
+import httpx

+from fastapi_modules.fastapi_leaudit.leaudit_bridge.resilient_clients import ResilientQwenVLMClient
 from fastapi_modules.fastapi_leaudit.services.impl.pageQualityServiceImpl import PageQualityServiceImpl


@@ -32,6 +34,58 @@ async def test_vlm_page_quality_reject_result_is_used():
    assert score == 0.18
    assert "严重模糊" in reason
    assert "只输出 JSON" in service.VlmClient.prompts[0][0]
+    assert "内嵌照片" in service.VlmClient.prompts[0][0]
+    assert "即使页面周边文字清楚" in service.VlmClient.prompts[0][0]
+
+
+@pytest.mark.asyncio
+async def test_vlm_page_quality_embedded_evidence_blur_cannot_pass():
+    service = PageQualityServiceImpl()
+    service.VlmClient = _FakeVlmClient(
+        {
+            "quality_status": "疑似模糊",
+            "quality_score": "0.42",
+            "message": "内嵌证据照片主体发虚，门头文字不易辨认",
+        }
+    )
+
+    status, score, reason = await service._classify_page_image_by_vlm(b"image-bytes")
+
+    assert status == "review"
+    assert score == 0.42
+    assert "内嵌证据照片" in reason
+
+
+@pytest.mark.asyncio
+async def test_vlm_page_quality_chinese_reject_status_is_supported():
+    service = PageQualityServiceImpl()
+    service.VlmClient = _FakeVlmClient(
+        {
+            "result": "不通过",
+            "confidence": 0.1,
+            "detail": "证据照片严重模糊，关键场所无法辨认",
+        }
+    )
+
+    status, score, reason = await service._classify_page_image_by_vlm(b"image-bytes")
+
+    assert status == "reject"
+    assert score == 0.1
+    assert "严重模糊" in reason
+
+
+@pytest.mark.asyncio
+async def test_vlm_page_quality_json_string_result_is_supported():
+    service = PageQualityServiceImpl()
+    service.VlmClient = _FakeVlmClient(
+        '{"status":"review","score":0.33,"reason":"页面内照片模糊"}'
+    )
+
+    status, score, reason = await service._classify_page_image_by_vlm(b"image-bytes")
+
+    assert status == "review"
+    assert score == 0.33
+    assert reason == "页面内照片模糊"


@pytest.mark.asyncio
@@ -56,3 +110,32 @@ async def test_vlm_page_quality_error_falls_back_to_review_not_pass():
    assert status == "review"
    assert score == 0.5
    assert "VLM图片质量检测失败" in reason
+
+
+@pytest.mark.asyncio
+async def test_resilient_vlm_extract_multifield_keeps_raw_text_when_json_parse_fails(monkeypatch):
+    client = ResilientQwenVLMClient(base_url="http://example.test", api_key="x", model="vlm-test")
+
+    async def fake_post_with_retry(payload):
+        return httpx.Response(
+            200,
+            json={
+                "choices": [
+                    {
+                        "message": {
+                            "content": "疑似模糊：内嵌证据照片主体发虚，建议人工复核",
+                        }
+                    }
+                ]
+            },
+        )
+
+    monkeypatch.setattr(client, "_post_with_retry", fake_post_with_retry)
+
+    result = await client.extract_multifield(
+        prompt="图片质量检测",
+        images_data_urls=["data:image/png;base64,xxx"],
+    )
+
+    assert result["result"].startswith("疑似模糊")
+    assert "内嵌证据照片" in result["reason"]