feat: add document versioning and list API

2026-04-29 11:48:50 +08:00
parent f3b83c9979
commit b45d61fa97
14 changed files with 1693 additions and 92 deletions
@@ -5,7 +5,7 @@ from fastapi import File, Form, UploadFile
 from fastapi_common.fastapi_common_web.controller import BaseController
 from fastapi_common.fastapi_common_web.domain.responses import Result

-from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
+from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentListPageVO, DocumentUploadVO
 from fastapi_modules.fastapi_leaudit.services import IDocumentService
 from fastapi_modules.fastapi_leaudit.services.impl.documentServiceImpl import DocumentServiceImpl

@@ -22,11 +22,11 @@ class DocumentController(BaseController):
            file: UploadFile = File(..., description="上传文档"),
            typeId: int | None = Form(None, description="文档类型ID"),
            typeCode: str | None = Form(None, description="文档类型编码"),
-            bizDocumentId: int | None = Form(None, description="业务文档ID"),
            region: str = Form("default", description="所属地区"),
            fileRole: str = Form("primary", description="文件角色"),
            createdBy: int | None = Form(None, description="上传用户ID"),
            autoRun: bool = Form(False, description="是否上传后自动触发评查"),
+            speed: str = Form("normal", description="执行速度档位：urgent/normal"),
        ):
            """上传文档并建立评查输入。"""
            Content = await file.read()
@@ -36,10 +36,32 @@ class DocumentController(BaseController):
                ContentType=file.content_type,
                TypeId=typeId,
                TypeCode=typeCode,
-                BizDocumentId=bizDocumentId,
                Region=region,
                FileRole=fileRole,
                CreatedBy=createdBy,
                AutoRun=autoRun,
+                Speed=speed,
+            )
+            return Result.success(data=Data)
+
+        @self.router.get("/documents/list", response_model=Result[DocumentListPageVO])
+        async def ListDocuments(
+            page: int = 1,
+            pageSize: int = 20,
+            keyword: str | None = None,
+            typeCode: str | None = None,
+            region: str | None = None,
+            processingStatus: str | None = None,
+            resultStatus: str | None = None,
+        ):
+            """获取文档列表（仅返回最新版本，附历史版本摘要）。"""
+            Data = await self.DocumentService.ListDocuments(
+                Page=page,
+                PageSize=pageSize,
+                Keyword=keyword,
+                TypeCode=typeCode,
+                Region=region,
+                ProcessingStatus=processingStatus,
+                ResultStatus=resultStatus,
            )
            return Result.success(data=Data)
@@ -9,13 +9,76 @@ class DocumentUploadVO(BaseModel):
    """文档上传响应。"""

    documentId: int = Field(..., description="LeAudit 文档ID")
-    bizDocumentId: int = Field(..., description="业务文档ID")
+    internalDocumentNo: int = Field(..., description="平台内部追踪号（兼容旧字段）")
+    versionGroupKey: str = Field(..., description="文档版本归档组键")
+    versionNo: int = Field(..., description="当前文档版本号")
+    previousVersionId: int | None = Field(None, description="上一版本文档ID")
+    rootVersionId: int = Field(..., description="文档版本链根文档ID")
+    duplicateUpload: bool = Field(..., description="是否命中同名同内容的重复上传")
    fileId: int = Field(..., description="文档文件ID")
    typeId: int = Field(..., description="文档类型ID")
    typeCode: str = Field(..., description="文档类型编码")
    region: str = Field(..., description="所属地区")
    fileName: str = Field(..., description="文件名")
    ossUrl: str = Field(..., description="OSS 对象路径")
+    speed: str = Field(..., description="执行速度档位：urgent/normal")
    processingStatus: str = Field(..., description="文档处理状态")
    autoRunTriggered: bool = Field(..., description="是否已自动触发评查")
    run: AuditRunVO | None = Field(None, description="自动触发后的运行信息")
+
+
+class DocumentHistoryVersionVO(BaseModel):
+    """历史版本摘要。"""
+
+    documentId: int = Field(..., description="文档ID")
+    fileId: int | None = Field(None, description="文件ID")
+    versionNo: int = Field(..., description="版本号")
+    fileName: str | None = Field(None, description="文件名")
+    fileExt: str | None = Field(None, description="文件扩展名")
+    processingStatus: str | None = Field(None, description="处理状态")
+    runStatus: str | None = Field(None, description="最新运行状态")
+    resultStatus: str | None = Field(None, description="最新结果状态")
+    updatedAt: str | None = Field(None, description="更新时间")
+
+
+class DocumentListItemVO(BaseModel):
+    """文档列表项。"""
+
+    documentId: int = Field(..., description="文档ID")
+    internalDocumentNo: int = Field(..., description="平台内部追踪号")
+    versionGroupKey: str = Field(..., description="版本归档组键")
+    versionNo: int = Field(..., description="当前版本号")
+    rootVersionId: int = Field(..., description="根版本文档ID")
+    previousVersionId: int | None = Field(None, description="上一版本文档ID")
+    typeId: int | None = Field(None, description="文档类型ID")
+    typeCode: str | None = Field(None, description="文档类型编码")
+    region: str = Field(..., description="区域")
+    normalizedName: str | None = Field(None, description="归一化名称")
+    fileId: int | None = Field(None, description="文件ID")
+    fileName: str | None = Field(None, description="文件名")
+    fileExt: str | None = Field(None, description="文件扩展名")
+    mimeType: str | None = Field(None, description="MIME类型")
+    fileSize: int | None = Field(None, description="文件大小")
+    ossUrl: str | None = Field(None, description="OSS路径")
+    processingStatus: str | None = Field(None, description="处理状态")
+    currentRunId: int | None = Field(None, description="当前运行ID")
+    runStatus: str | None = Field(None, description="当前运行状态")
+    resultStatus: str | None = Field(None, description="当前结果状态")
+    totalScore: float | None = Field(None, description="总分")
+    passedCount: int | None = Field(None, description="通过数")
+    failedCount: int | None = Field(None, description="失败数")
+    skippedCount: int | None = Field(None, description="跳过数")
+    updatedAt: str | None = Field(None, description="更新时间")
+    hasHistory: bool = Field(False, description="是否存在历史版本")
+    totalVersions: int = Field(1, description="总版本数")
+    historyVersions: list[DocumentHistoryVersionVO] = Field(default_factory=list, description="历史版本摘要")
+
+
+class DocumentListPageVO(BaseModel):
+    """文档列表分页结果。"""
+
+    total: int = Field(..., description="总数")
+    page: int = Field(..., description="当前页")
+    pageSize: int = Field(..., description="每页数量")
+    totalPages: int = Field(..., description="总页数")
+    documents: list[DocumentListItemVO] = Field(default_factory=list, description="文档列表")
@@ -6,6 +6,7 @@ into leaudit_* table format and writes via SQLAlchemy async session.

 from __future__ import annotations

+import json
 import logging
 import re
 from typing import Any
@@ -157,11 +158,19 @@ class StorageAdapter:
                row = _rule_result_to_row(document_id, resolved_run_id, rule_result, rule, bundle)
                if rule_version_id is not None:
                    row["rule_version_id"] = rule_version_id
-                columns = ", ".join(row.keys())
-                placeholders = ", ".join(f":{k}" for k in row)
+                json_columns = {"stages", "extracted_fields", "field_positions", "remediation", "rule_meta"}
+                serialized_row = {
+                    key: (json.dumps(value, ensure_ascii=False) if key in json_columns and value is not None else value)
+                    for key, value in row.items()
+                }
+                columns = ", ".join(serialized_row.keys())
+                placeholders = ", ".join(
+                    f"CAST(:{k} AS JSONB)" if k in json_columns else f":{k}"
+                    for k in serialized_row
+                )
                await session.execute(
                    text(f"INSERT INTO leaudit_rule_results ({columns}) VALUES ({placeholders})"),
-                    row,
+                    serialized_row,
                )

            # Update audit_runs summary (scores only — terminal state set by finalize_run)
@@ -371,7 +380,7 @@ class StorageAdapter:
                            :vlm_calls,
                            :duration_ms,
                            :requires_human_review,
-                            :payload,
+                            CAST(:payload AS JSONB),
                            :created_at,
                            :updated_at
                        )
@@ -390,7 +399,7 @@ class StorageAdapter:
                        "vlm_calls": task.vlm_calls,
                        "duration_ms": task.duration_ms,
                        "requires_human_review": task.requires_human_review,
-                        "payload": task.model_dump(mode="json"),
+                        "payload": json.dumps(task.model_dump(mode="json"), ensure_ascii=False),
                        "created_at": task.created_at,
                        "updated_at": task.updated_at,
                    },
@@ -527,8 +536,9 @@ def _bundle_to_extracted(bundle: ExtractionBundle) -> dict[str, Any]:
                "value": fv.value,
                "confidence": float(fv.confidence) if fv.confidence else 0.0,
            }
-            if fv.position is not None:
-                field_data["position"] = fv.position.model_dump(mode="json")
+            position_payload = _field_value_position_payload(fv)
+            if position_payload is not None:
+                field_data["position"] = position_payload
            fields[name] = field_data
        else:
            fields[name] = {"value": fv}
@@ -637,11 +647,39 @@ def _extract_relevant_field_positions(
            if f in positions:
                continue
            fv = bundle.fields.get(f)
-            if fv is not None and isinstance(fv, FieldValue) and fv.position is not None:
-                positions[f] = fv.position.model_dump(mode="json")
+            if fv is not None and isinstance(fv, FieldValue):
+                position_payload = _field_value_position_payload(fv)
+                if position_payload is not None:
+                    positions[f] = position_payload
    return positions


+def _field_value_position_payload(fv: FieldValue) -> dict[str, Any] | None:
+    """兼容原生 leaudit 新旧 FieldValue 结构，提取可落库的位置线索。"""
+    position = getattr(fv, "position", None)
+    if position is not None:
+        if hasattr(position, "model_dump"):
+            return position.model_dump(mode="json")
+        if isinstance(position, dict):
+            return position
+
+    metadata = fv.metadata if isinstance(fv.metadata, dict) else {}
+    payload: dict[str, Any] = {}
+
+    if "match_position" in metadata:
+        payload["matchPosition"] = metadata.get("match_position")
+    if "matched_text" in metadata:
+        payload["matchedText"] = metadata.get("matched_text")
+    if "page_num" in metadata:
+        payload["pageNum"] = metadata.get("page_num")
+    if "page_nums" in metadata:
+        payload["pageNums"] = metadata.get("page_nums")
+    if "bbox" in metadata:
+        payload["bbox"] = metadata.get("bbox")
+
+    return payload or None
+
+
 def _rule_result_to_row(
    document_id: int,
    run_id: int | None,
@@ -1,11 +1,12 @@
-"""LeAudit 域文档镜像模型 —— leaudit_documents 表。
+"""LeAudit 文档主模型 —— leaudit_documents 表。

-通过 biz_document_id 关联业务 documents 表，不复制业务字段。
+当前平台把它作为 LeAudit 自己的文档主表使用，不再依赖旧系统文档表。
+``biz_document_id`` 字段仅保留为内部追踪号，避免直接改库。
 """

 from __future__ import annotations

-from sqlalchemy import BigInteger, String, ForeignKey
+from sqlalchemy import BigInteger, Boolean, Integer, String
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import Mapped, mapped_column

@@ -13,33 +14,27 @@ from fastapi_common.fastapi_common_web.models import BaseModel


 class LeauditDocument(BaseModel):
-    """LeAudit 文档镜像表。"""
+    """LeAudit 平台内部文档主表。"""

    __tablename__ = "leaudit_documents"

    Id: Mapped[int] = mapped_column("id", BigInteger, primary_key=True, autoincrement=True)
-    bizDocumentId: Mapped[int] = mapped_column("biz_document_id", BigInteger, unique=True, comment="关联业务 documents.id")
+    bizDocumentId: Mapped[int] = mapped_column("biz_document_id", BigInteger, unique=True, comment="内部追踪号（兼容旧字段名）")
    typeId: Mapped[int | None] = mapped_column("type_id", BigInteger, comment="文档类型ID")
    processingStatus: Mapped[str | None] = mapped_column("processing_status", String(64), default="waiting", comment="waiting/processing/completed/failed")
    currentRunId: Mapped[int | None] = mapped_column("current_run_id", BigInteger, comment="最新有效 run id")
    region: Mapped[str] = mapped_column(String(32), default="default", comment="所属地区: mz/yf/jy/cz/default")
+    versionGroupKey: Mapped[str | None] = mapped_column("version_group_key", String(64), comment="文档版本归档组键")
+    versionNo: Mapped[int] = mapped_column("version_no", Integer, default=1, comment="同一文档系列中的版本号")
+    previousVersionId: Mapped[int | None] = mapped_column("previous_version_id", BigInteger, comment="上一版本文档ID")
+    rootVersionId: Mapped[int | None] = mapped_column("root_version_id", BigInteger, comment="首版本文档ID")
+    isLatestVersion: Mapped[bool] = mapped_column("is_latest_version", Boolean, default=True, comment="是否当前最新版本")
+    normalizedName: Mapped[str | None] = mapped_column("normalized_name", String(512), comment="归一化文件名（不含扩展名）")

    @classmethod
-    async def get_by_biz_id(cls, session: AsyncSession, bizDocumentId: int) -> "LeauditDocument | None":
-        """按业务文档 ID 查询。"""
-        from sqlalchemy import select
-        return await session.scalar(select(cls).where(cls.bizDocumentId == bizDocumentId))
-
-    @classmethod
-    async def upsert_by_biz_id(cls, session: AsyncSession, bizDocumentId: int, **fields) -> "LeauditDocument":
-        """按业务文档 ID 创建或更新。"""
-        from sqlalchemy import select
-        doc = await session.scalar(select(cls).where(cls.bizDocumentId == bizDocumentId))
-        if doc is None:
-            doc = cls(bizDocumentId=bizDocumentId, **fields)
-            session.add(doc)
-        else:
-            for k, v in fields.items():
-                setattr(doc, k, v)
+    async def create_new(cls, session: AsyncSession, **fields) -> "LeauditDocument":
+        """Create a new platform-side document row for every upload."""
+        doc = cls(**fields)
+        session.add(doc)
        await session.flush()
        return doc
@@ -2,7 +2,7 @@

 from abc import ABC, abstractmethod

-from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
+from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentListPageVO, DocumentUploadVO


 class IDocumentService(ABC):
@@ -16,11 +16,25 @@ class IDocumentService(ABC):
        ContentType: str | None,
        TypeId: int | None = None,
        TypeCode: str | None = None,
-        BizDocumentId: int | None = None,
        Region: str = "default",
        FileRole: str = "primary",
        CreatedBy: int | None = None,
        AutoRun: bool = False,
+        Speed: str = "normal",
    ) -> DocumentUploadVO:
        """上传文档并建立 LeAudit document/file 记录。"""
        ...
+
+    @abstractmethod
+    async def ListDocuments(
+        self,
+        Page: int = 1,
+        PageSize: int = 20,
+        Keyword: str | None = None,
+        TypeCode: str | None = None,
+        Region: str | None = None,
+        ProcessingStatus: str | None = None,
+        ResultStatus: str | None = None,
+    ) -> DocumentListPageVO:
+        """获取文档列表（仅最新版本，附历史版本摘要）。"""
+        ...
@@ -2,9 +2,13 @@

 from __future__ import annotations

+from datetime import datetime
 import hashlib
 import mimetypes
+import re
 import time
+import unicodedata
+import uuid
 from pathlib import Path

 from sqlalchemy import text
@@ -14,7 +18,12 @@ from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
 from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
 from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils

-from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
+from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import (
+    DocumentHistoryVersionVO,
+    DocumentListItemVO,
+    DocumentListPageVO,
+    DocumentUploadVO,
+)
 from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile
 from fastapi_modules.fastapi_leaudit.services import IAuditService, IDocumentService, IOssService
 from fastapi_modules.fastapi_leaudit.services.impl.auditServiceImpl import AuditServiceImpl
@@ -39,11 +48,11 @@ class DocumentServiceImpl(IDocumentService):
        ContentType: str | None,
        TypeId: int | None = None,
        TypeCode: str | None = None,
-        BizDocumentId: int | None = None,
        Region: str = "default",
        FileRole: str = "primary",
        CreatedBy: int | None = None,
        AutoRun: bool = False,
+        Speed: str = "normal",
    ) -> DocumentUploadVO:
        """上传文档并建立 LeAudit document/file 记录。"""
        if not FileName:
@@ -59,6 +68,9 @@ class DocumentServiceImpl(IDocumentService):
        mimeType = ContentType or mimetypes.guess_type(FileName)[0] or "application/octet-stream"
        fileSha256 = hashlib.sha256(FileContent).hexdigest()
        fileSize = len(FileContent)
+        normalizedSpeed = _normalize_speed(Speed)
+        normalizedName = _normalize_document_name(FileName)
+        uploadedAt = datetime.now()

        async with GetAsyncSession() as Session:
            if TypeId is not None and TypeCode is not None:
@@ -107,69 +119,386 @@ class DocumentServiceImpl(IDocumentService):

            resolvedTypeId = int(typeRow["id"])
            resolvedTypeCode = str(typeRow["code"])
-            resolvedBizDocumentId = BizDocumentId or int(time.time() * 1000)
+            duplicateUpload = False
+            previousVersionId: int | None = None
+            rootVersionId: int | None = None
+            versionGroupKey: str | None = None
+            versionNo = 1

-            document = await LeauditDocument.upsert_by_biz_id(
-                Session,
-                bizDocumentId=resolvedBizDocumentId,
-                typeId=resolvedTypeId,
-                region=normalizedRegion,
-                processingStatus="waiting",
-            )
+            latestCandidate = None
+            if normalizedFileRole == "primary":
+                latestCandidate = await _find_latest_version_candidate(
+                    Session,
+                    type_id=resolvedTypeId,
+                    region=normalizedRegion,
+                    normalized_name=normalizedName,
+                )

-            versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
-            versionNo = f"v{versionCount + 1}"
-            objectKey = OssPathUtils.BuildBusinessDocKey(
-                Region=normalizedRegion,
-                TypeCode=resolvedTypeCode,
-                DocumentId=document.Id,
-                Version=versionNo,
-                FileRole=normalizedFileRole,
-                FileName=FileName,
-            )
-            ossUrl = await self.OssService.UploadBytes(
-                ObjectKey=objectKey,
-                Content=FileContent,
-                ContentType=mimeType,
-            )
+            if latestCandidate and latestCandidate["sha256"] == fileSha256:
+                duplicateUpload = True
+                document = await Session.get(LeauditDocument, int(latestCandidate["document_id"]))
+                documentFile = await Session.get(LeauditDocumentFile, int(latestCandidate["file_id"]))
+                if document is None or documentFile is None:
+                    raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "重复上传版本定位失败")
+                await Session.commit()
+            else:
+                internalDocumentNo = time.time_ns()
+                if latestCandidate:
+                    previousVersionId = int(latestCandidate["document_id"])
+                    rootVersionId = int(latestCandidate["root_version_id"] or latestCandidate["document_id"])
+                    versionGroupKey = str(latestCandidate["version_group_key"])
+                    versionNo = int(latestCandidate["version_no"]) + 1
+                    previousDocument = await Session.get(LeauditDocument, previousVersionId)
+                    if previousDocument is not None:
+                        previousDocument.isLatestVersion = False
+                else:
+                    versionGroupKey = uuid.uuid4().hex

-            await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
-            documentFile = LeauditDocumentFile(
-                documentId=document.Id,
-                fileRole=normalizedFileRole,
-                fileName=FileName,
-                fileExt=fileExt,
-                mimeType=mimeType,
-                fileSize=fileSize,
-                sha256=fileSha256,
-                localPath=None,
-                ossUrl=ossUrl,
-                storageProvider="minio",
-                isActive=True,
-                createdBy=CreatedBy,
-            )
-            Session.add(documentFile)
-            await Session.flush()
-            await Session.commit()
-            await Session.refresh(document)
-            await Session.refresh(documentFile)
+                document = await LeauditDocument.create_new(
+                    Session,
+                    bizDocumentId=internalDocumentNo,
+                    typeId=resolvedTypeId,
+                    region=normalizedRegion,
+                    processingStatus="waiting",
+                    versionGroupKey=versionGroupKey,
+                    versionNo=versionNo,
+                    previousVersionId=previousVersionId,
+                    rootVersionId=rootVersionId,
+                    isLatestVersion=True,
+                    normalizedName=normalizedName,
+                )
+                if document.rootVersionId is None:
+                    document.rootVersionId = document.Id
+                    rootVersionId = document.Id
+                else:
+                    rootVersionId = document.rootVersionId
+
+                versionLabel = f"v{document.versionNo}"
+                objectKey = OssPathUtils.BuildBusinessDocKey(
+                    Region=normalizedRegion,
+                    TypeCode=resolvedTypeCode,
+                    DocumentId=document.Id,
+                    Version=versionLabel,
+                    FileRole=normalizedFileRole,
+                    FileName=FileName,
+                    Year=uploadedAt.year,
+                    Month=uploadedAt.month,
+                )
+                ossUrl = await self.OssService.UploadBytes(
+                    ObjectKey=objectKey,
+                    Content=FileContent,
+                    ContentType=mimeType,
+                )
+
+                versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
+                _ = versionCount  # single-version-per-document in current model; kept for future extension
+                await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
+                documentFile = LeauditDocumentFile(
+                    documentId=document.Id,
+                    fileRole=normalizedFileRole,
+                    fileName=FileName,
+                    fileExt=fileExt,
+                    mimeType=mimeType,
+                    fileSize=fileSize,
+                    sha256=fileSha256,
+                    localPath=None,
+                    ossUrl=ossUrl,
+                    storageProvider="minio",
+                    isActive=True,
+                    createdBy=CreatedBy,
+                )
+                Session.add(documentFile)
+                await Session.flush()
+                await Session.commit()
+                await Session.refresh(document)
+                await Session.refresh(documentFile)
+
+            ossUrl = documentFile.ossUrl or ""

        run = None
        processingStatus = document.processingStatus or "waiting"
        if AutoRun:
-            run = await self.AuditService.Run(DocumentId=document.Id)
+            run = await self.AuditService.Run(
+                DocumentId=document.Id,
+                Speed=Speed,
+                Force=duplicateUpload,
+            )
            processingStatus = "running" if run.status in {"pending", "running"} else run.status

        return DocumentUploadVO(
            documentId=document.Id,
-            bizDocumentId=document.bizDocumentId,
+            internalDocumentNo=document.bizDocumentId,
+            versionGroupKey=document.versionGroupKey or "",
+            versionNo=int(document.versionNo or 1),
+            previousVersionId=document.previousVersionId,
+            rootVersionId=int(document.rootVersionId or document.Id),
+            duplicateUpload=duplicateUpload,
            fileId=documentFile.Id,
            typeId=resolvedTypeId,
            typeCode=resolvedTypeCode,
            region=normalizedRegion,
            fileName=documentFile.fileName,
            ossUrl=ossUrl,
+            speed=normalizedSpeed,
            processingStatus=processingStatus,
            autoRunTriggered=AutoRun,
            run=run,
        )
+
+    async def ListDocuments(
+        self,
+        Page: int = 1,
+        PageSize: int = 20,
+        Keyword: str | None = None,
+        TypeCode: str | None = None,
+        Region: str | None = None,
+        ProcessingStatus: str | None = None,
+        ResultStatus: str | None = None,
+    ) -> DocumentListPageVO:
+        """获取文档列表（仅最新版本，附历史版本摘要）。"""
+        page = max(1, int(Page))
+        page_size = max(1, min(int(PageSize), 100))
+        offset = (page - 1) * page_size
+
+        filters = ["d.is_latest_version = true", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'"]
+        params: dict[str, object] = {"limit": page_size, "offset": offset}
+
+        if Keyword:
+            filters.append("(f.file_name ILIKE :keyword OR d.normalized_name ILIKE :keyword)")
+            params["keyword"] = f"%{Keyword.strip()}%"
+        if TypeCode:
+            filters.append("dt.code = :type_code")
+            params["type_code"] = TypeCode.strip()
+        if Region:
+            filters.append("d.region = :region")
+            params["region"] = Region.strip()
+        if ProcessingStatus:
+            filters.append("d.processing_status = :processing_status")
+            params["processing_status"] = ProcessingStatus.strip()
+        if ResultStatus:
+            filters.append("ar.result_status = :result_status")
+            params["result_status"] = ResultStatus.strip()
+
+        where_clause = " AND ".join(filters)
+
+        count_sql = text(
+            f"""
+            SELECT COUNT(*)
+            FROM leaudit_documents d
+            JOIN leaudit_document_files f
+              ON f.document_id = d.id
+            LEFT JOIN leaudit_document_types dt
+              ON dt.id = d.type_id
+            LEFT JOIN leaudit_audit_runs ar
+              ON ar.id = d.current_run_id
+            WHERE {where_clause}
+            """
+        )
+
+        list_sql = text(
+            f"""
+            SELECT
+                d.id AS document_id,
+                d.biz_document_id AS internal_document_no,
+                d.version_group_key,
+                d.version_no,
+                d.root_version_id,
+                d.previous_version_id,
+                d.type_id,
+                dt.code AS type_code,
+                d.region,
+                d.normalized_name,
+                d.processing_status,
+                d.current_run_id,
+                d.updated_at,
+                f.id AS file_id,
+                f.file_name,
+                f.file_ext,
+                f.mime_type,
+                f.file_size,
+                f.oss_url,
+                ar.status AS run_status,
+                ar.result_status,
+                ar.total_score,
+                ar.passed_count,
+                ar.failed_count,
+                ar.skipped_count,
+                vc.total_versions,
+                COALESCE(vc.total_versions, 1) > 1 AS has_history
+            FROM leaudit_documents d
+            JOIN leaudit_document_files f
+              ON f.document_id = d.id
+            LEFT JOIN leaudit_document_types dt
+              ON dt.id = d.type_id
+            LEFT JOIN leaudit_audit_runs ar
+              ON ar.id = d.current_run_id
+            LEFT JOIN (
+                SELECT version_group_key, COUNT(*) AS total_versions
+                FROM leaudit_documents
+                WHERE deleted_at IS NULL
+                GROUP BY version_group_key
+            ) vc
+              ON vc.version_group_key = d.version_group_key
+            WHERE {where_clause}
+            ORDER BY d.updated_at DESC, d.id DESC
+            LIMIT :limit OFFSET :offset
+            """
+        )
+
+        history_sql = text(
+            """
+            SELECT
+                d.version_group_key,
+                d.id AS document_id,
+                d.version_no,
+                d.processing_status,
+                d.updated_at,
+                f.id AS file_id,
+                f.file_name,
+                f.file_ext,
+                ar.status AS run_status,
+                ar.result_status
+            FROM leaudit_documents d
+            JOIN leaudit_document_files f
+              ON f.document_id = d.id
+             AND f.is_active = true
+             AND f.file_role = 'primary'
+            LEFT JOIN leaudit_audit_runs ar
+              ON ar.id = d.current_run_id
+            WHERE d.version_group_key = ANY(:group_keys)
+              AND d.is_latest_version = false
+              AND d.deleted_at IS NULL
+            ORDER BY d.version_group_key, d.version_no DESC, d.id DESC
+            """
+        )
+
+        async with GetAsyncSession() as Session:
+            total = int((await Session.execute(count_sql, params)).scalar_one())
+            rows = (await Session.execute(list_sql, params)).mappings().all()
+
+            history_by_group: dict[str, list[DocumentHistoryVersionVO]] = {}
+            group_keys = [str(row["version_group_key"]) for row in rows if row["version_group_key"]]
+            if group_keys:
+                history_rows = (
+                    await Session.execute(history_sql, {"group_keys": group_keys})
+                ).mappings().all()
+                for row in history_rows:
+                    history_by_group.setdefault(str(row["version_group_key"]), []).append(
+                        DocumentHistoryVersionVO(
+                            documentId=int(row["document_id"]),
+                            fileId=int(row["file_id"]) if row["file_id"] is not None else None,
+                            versionNo=int(row["version_no"]),
+                            fileName=row["file_name"],
+                            fileExt=row["file_ext"],
+                            processingStatus=row["processing_status"],
+                            runStatus=row["run_status"],
+                            resultStatus=row["result_status"],
+                            updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None,
+                        )
+                    )
+
+        documents: list[DocumentListItemVO] = []
+        for row in rows:
+            group_key = str(row["version_group_key"] or "")
+            documents.append(
+                DocumentListItemVO(
+                    documentId=int(row["document_id"]),
+                    internalDocumentNo=int(row["internal_document_no"]),
+                    versionGroupKey=group_key,
+                    versionNo=int(row["version_no"] or 1),
+                    rootVersionId=int(row["root_version_id"] or row["document_id"]),
+                    previousVersionId=int(row["previous_version_id"]) if row["previous_version_id"] is not None else None,
+                    typeId=int(row["type_id"]) if row["type_id"] is not None else None,
+                    typeCode=row["type_code"],
+                    region=row["region"],
+                    normalizedName=row["normalized_name"],
+                    fileId=int(row["file_id"]) if row["file_id"] is not None else None,
+                    fileName=row["file_name"],
+                    fileExt=row["file_ext"],
+                    mimeType=row["mime_type"],
+                    fileSize=int(row["file_size"]) if row["file_size"] is not None else None,
+                    ossUrl=row["oss_url"],
+                    processingStatus=row["processing_status"],
+                    currentRunId=int(row["current_run_id"]) if row["current_run_id"] is not None else None,
+                    runStatus=row["run_status"],
+                    resultStatus=row["result_status"],
+                    totalScore=float(row["total_score"]) if row["total_score"] is not None else None,
+                    passedCount=int(row["passed_count"]) if row["passed_count"] is not None else None,
+                    failedCount=int(row["failed_count"]) if row["failed_count"] is not None else None,
+                    skippedCount=int(row["skipped_count"]) if row["skipped_count"] is not None else None,
+                    updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None,
+                    hasHistory=bool(row["has_history"]),
+                    totalVersions=int(row["total_versions"] or 1),
+                    historyVersions=history_by_group.get(group_key, []),
+                )
+            )
+
+        total_pages = (total + page_size - 1) // page_size if total else 0
+        return DocumentListPageVO(
+            total=total,
+            page=page,
+            pageSize=page_size,
+            totalPages=total_pages,
+            documents=documents,
+        )
+
+
+async def _find_latest_version_candidate(
+    session,
+    *,
+    type_id: int,
+    region: str,
+    normalized_name: str,
+) -> dict | None:
+    """Find the latest primary document version candidate by normalized name."""
+    result = await session.execute(
+        text(
+            """
+            SELECT
+                d.id AS document_id,
+                d.version_group_key,
+                d.version_no,
+                d.root_version_id,
+                f.id AS file_id,
+                f.sha256
+            FROM leaudit_documents d
+            JOIN leaudit_document_files f
+              ON f.document_id = d.id
+             AND f.is_active = true
+             AND f.file_role = 'primary'
+            WHERE d.type_id = :type_id
+              AND d.region = :region
+              AND d.normalized_name = :normalized_name
+              AND d.is_latest_version = true
+              AND d.deleted_at IS NULL
+            ORDER BY d.version_no DESC, d.id DESC
+            LIMIT 1
+            """
+        ),
+        {
+            "type_id": type_id,
+            "region": region,
+            "normalized_name": normalized_name,
+        },
+    )
+    row = result.mappings().first()
+    return dict(row) if row else None
+
+
+def _normalize_speed(speed: str | None) -> str:
+    """Normalize front-end speed selection to urgent/normal."""
+    normalized = (speed or "").strip().lower()
+    if normalized in {"urgent", "high", "fast", "emergency", "紧急"}:
+        return "urgent"
+    return "normal"
+
+
+def _normalize_document_name(file_name: str) -> str:
+    """Build a stable name key for same-name version matching."""
+    stem = Path(file_name).stem
+    name = unicodedata.normalize("NFKC", stem).strip().lower()
+    name = re.sub(r"[\s_\-]+", " ", name)
+    name = re.sub(r"(?:\(|（)\d+(?:\)|）)$", "", name).strip()
+    name = re.sub(r"(?:[-_\s]*副本|[-_\s]*copy)$", "", name).strip()
+    name = re.sub(r"\s+", " ", name).strip()
+    return name or "untitled"