fix: stabilize backend services and frontend pointer

This commit is contained in:
wren
2026-05-22 12:21:43 +08:00
parent 5366868c5f
commit 0af41e370c
9 changed files with 265 additions and 16 deletions
@@ -508,6 +508,7 @@ class ContractTemplateServiceImpl(IContractTemplateService):
ORDER BY c.name ASC
"""
)
(sql,) = self._bind_expanding(sql, params)
rows = (await session.execute(sql, params)).mappings().all()
return [
@@ -26,6 +26,18 @@ from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServ
logger = logging.getLogger(__name__)
_PAGE_QUALITY_VLM_PROMPT = """
你是文档扫描图片质量检测员。请判断这 1 页文档图片是否适合继续做 OCR 与合同/公文评查。
判定标准:
1. pass:文字主体清晰、方向正常、没有明显截断,能稳定阅读。
2. review:存在轻微模糊、倾斜、阴影、低对比度、局部遮挡、轻微截断,建议人工确认但仍可能可读。
3. reject:严重模糊、重影、过曝/过暗、页面大面积缺失、关键文字不可辨认、方向严重错误、空白页或非文档页,建议重拍。
只输出 JSON,不要输出 Markdown,不要解释额外文本:
{"status":"pass|review|reject","score":0.0到1.0,"reason":"20字以内中文原因"}
""".strip()
class PageQualityServiceImpl(IPageQualityService):
"""页级图片质量服务实现。"""
@@ -33,6 +45,7 @@ class PageQualityServiceImpl(IPageQualityService):
def __init__(self) -> None:
self.OssService = OssServiceImpl()
self.DocumentService = None
self.VlmClient = None
async def DispatchForDocument(
self,
@@ -282,7 +295,7 @@ class PageQualityServiceImpl(IPageQualityService):
reject_pages = 0
async with GetAsyncSession() as session:
for page_num, page_image in page_images:
status, score, reason = self._classify_page_image(page_image)
status, score, reason = await self._classify_page_image_by_vlm(page_image)
if status == "review":
review_pages += 1
elif status == "reject":
@@ -466,13 +479,52 @@ class PageQualityServiceImpl(IPageQualityService):
finally:
doc.close()
def _classify_page_image(self, image_bytes: bytes) -> tuple[str, float, str | None]:
size = len(image_bytes)
if size < 25_000:
return "reject", 0.2, "页面图像内容过少或清晰度较低,建议重拍"
if size < 60_000:
return "review", 0.45, "页面疑似存在模糊,建议人工确认"
return "pass", 0.9, None
async def _classify_page_image_by_vlm(self, image_bytes: bytes) -> tuple[str, float, str | None]:
"""使用 VLM 对单页图片做质量判定。VLM 异常不能默认为通过。"""
client = self._vlm_client()
if client is None:
return "review", 0.5, "VLM未配置,需人工确认图片质量"
try:
result = await client.extract_multifield(
prompt=_PAGE_QUALITY_VLM_PROMPT,
images_data_urls=[self._image_data_url(image_bytes)],
max_tokens=300,
)
except Exception as exc:
logger.warning("VLM page quality detection failed: %s", exc)
return "review", 0.5, "VLM图片质量检测失败,需人工确认"
status = str((result or {}).get("status") or "").strip().lower()
if status not in {"pass", "review", "reject"}:
return "review", 0.5, "VLM返回结果不可用,需人工确认"
score = self._normalize_quality_score((result or {}).get("score"), status)
reason = str((result or {}).get("reason") or "").strip() or None
if status != "pass" and not reason:
reason = "页面图片质量需人工确认"
return status, score, reason
def _vlm_client(self):
if self.VlmClient is None:
from fastapi_modules.fastapi_leaudit.leaudit_bridge.client_factory import create_vlm_client
self.VlmClient = create_vlm_client()
return self.VlmClient
def _image_data_url(self, image_bytes: bytes) -> str:
import base64
encoded = base64.b64encode(image_bytes).decode()
return f"data:image/png;base64,{encoded}"
def _normalize_quality_score(self, raw_score: Any, status: str) -> float:
defaults = {"pass": 0.9, "review": 0.5, "reject": 0.2}
try:
score = float(raw_score)
except (TypeError, ValueError):
return defaults[status]
return max(0.0, min(1.0, score))
def _document_service(self):
if self.DocumentService is None:
@@ -54,6 +54,8 @@ class RagChatServiceImpl(IRagChatService):
_task_done: dict[str, bool] = {}
_task_locks: dict[str, asyncio.Lock] = {}
_title_tasks: dict[str, asyncio.Task] = {}
_chat_schema_checked = False
_chat_schema_lock = asyncio.Lock()
def __init__(self) -> None:
self.TenantResolver = TenantResolver()
@@ -731,8 +733,34 @@ class RagChatServiceImpl(IRagChatService):
)
async def _ensure_rag_chat_schema(self, session) -> None:
await session.execute(text("ALTER TABLE rag_chat_app ADD COLUMN IF NOT EXISTS tenant_code VARCHAR(64) NULL"))
await session.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chat_app_tenant_code ON rag_chat_app(tenant_code) WHERE deleted_at IS NULL"))
if self.__class__._chat_schema_checked:
return
async with self.__class__._chat_schema_lock:
if self.__class__._chat_schema_checked:
return
exists = (
await session.execute(
text(
"""
SELECT 1
FROM information_schema.columns
WHERE table_schema = current_schema()
AND table_name = 'rag_chat_app'
AND column_name = 'tenant_code'
"""
)
)
).scalar_one_or_none()
if exists:
self.__class__._chat_schema_checked = True
return
await session.execute(text("SET LOCAL lock_timeout = '1000ms'"))
await session.execute(text("ALTER TABLE rag_chat_app ADD COLUMN tenant_code VARCHAR(64) NULL"))
await session.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chat_app_tenant_code ON rag_chat_app(tenant_code) WHERE deleted_at IS NULL"))
self.__class__._chat_schema_checked = True
@staticmethod
def _tenant_context_is_global(tenant_context: dict[str, str | None]) -> bool:
@@ -57,6 +57,8 @@ class RagDatasetServiceImpl(IRagDatasetService):
ORDER BY dataset_id, is_default DESC, sort_order ASC, id ASC
) a ON a.dataset_id = d.id
"""
_tenant_schema_checked = False
_tenant_schema_lock = asyncio.Lock()
def __init__(self) -> None:
self.TenantResolver = TenantResolver()
@@ -1038,10 +1040,39 @@ class RagDatasetServiceImpl(IRagDatasetService):
raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "当前用户只能管理本地区知识库")
async def _ensure_rag_tenant_schema(self, session) -> None:
await session.execute(text("ALTER TABLE rag_dataset ADD COLUMN IF NOT EXISTS tenant_code VARCHAR(64) NULL"))
await session.execute(text("ALTER TABLE rag_chat_app ADD COLUMN IF NOT EXISTS tenant_code VARCHAR(64) NULL"))
await session.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_dataset_tenant_code ON rag_dataset(tenant_code) WHERE deleted_at IS NULL"))
await session.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chat_app_tenant_code ON rag_chat_app(tenant_code) WHERE deleted_at IS NULL"))
if self.__class__._tenant_schema_checked:
return
async with self.__class__._tenant_schema_lock:
if self.__class__._tenant_schema_checked:
return
columns = (
await session.execute(
text(
"""
SELECT table_name
FROM information_schema.columns
WHERE table_schema = current_schema()
AND table_name IN ('rag_dataset', 'rag_chat_app')
AND column_name = 'tenant_code'
"""
)
)
).scalars().all()
existing = set(columns)
if existing == {"rag_dataset", "rag_chat_app"}:
self.__class__._tenant_schema_checked = True
return
await session.execute(text("SET LOCAL lock_timeout = '1000ms'"))
if "rag_dataset" not in existing:
await session.execute(text("ALTER TABLE rag_dataset ADD COLUMN tenant_code VARCHAR(64) NULL"))
if "rag_chat_app" not in existing:
await session.execute(text("ALTER TABLE rag_chat_app ADD COLUMN tenant_code VARCHAR(64) NULL"))
await session.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_dataset_tenant_code ON rag_dataset(tenant_code) WHERE deleted_at IS NULL"))
await session.execute(text("CREATE INDEX IF NOT EXISTS idx_rag_chat_app_tenant_code ON rag_chat_app(tenant_code) WHERE deleted_at IS NULL"))
self.__class__._tenant_schema_checked = True
def _dataset_tenant_filter_sql(
self,