feat: add document versioning and list API

This commit is contained in:
wren
2026-04-29 11:48:50 +08:00
parent f3b83c9979
commit b45d61fa97
14 changed files with 1693 additions and 92 deletions
@@ -5,7 +5,7 @@ from fastapi import File, Form, UploadFile
from fastapi_common.fastapi_common_web.controller import BaseController
from fastapi_common.fastapi_common_web.domain.responses import Result
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentListPageVO, DocumentUploadVO
from fastapi_modules.fastapi_leaudit.services import IDocumentService
from fastapi_modules.fastapi_leaudit.services.impl.documentServiceImpl import DocumentServiceImpl
@@ -22,11 +22,11 @@ class DocumentController(BaseController):
file: UploadFile = File(..., description="上传文档"),
typeId: int | None = Form(None, description="文档类型ID"),
typeCode: str | None = Form(None, description="文档类型编码"),
bizDocumentId: int | None = Form(None, description="业务文档ID"),
region: str = Form("default", description="所属地区"),
fileRole: str = Form("primary", description="文件角色"),
createdBy: int | None = Form(None, description="上传用户ID"),
autoRun: bool = Form(False, description="是否上传后自动触发评查"),
speed: str = Form("normal", description="执行速度档位:urgent/normal"),
):
"""上传文档并建立评查输入。"""
Content = await file.read()
@@ -36,10 +36,32 @@ class DocumentController(BaseController):
ContentType=file.content_type,
TypeId=typeId,
TypeCode=typeCode,
BizDocumentId=bizDocumentId,
Region=region,
FileRole=fileRole,
CreatedBy=createdBy,
AutoRun=autoRun,
Speed=speed,
)
return Result.success(data=Data)
@self.router.get("/documents/list", response_model=Result[DocumentListPageVO])
async def ListDocuments(
page: int = 1,
pageSize: int = 20,
keyword: str | None = None,
typeCode: str | None = None,
region: str | None = None,
processingStatus: str | None = None,
resultStatus: str | None = None,
):
"""获取文档列表(仅返回最新版本,附历史版本摘要)。"""
Data = await self.DocumentService.ListDocuments(
Page=page,
PageSize=pageSize,
Keyword=keyword,
TypeCode=typeCode,
Region=region,
ProcessingStatus=processingStatus,
ResultStatus=resultStatus,
)
return Result.success(data=Data)
@@ -9,13 +9,76 @@ class DocumentUploadVO(BaseModel):
"""文档上传响应。"""
documentId: int = Field(..., description="LeAudit 文档ID")
bizDocumentId: int = Field(..., description="业务文档ID")
internalDocumentNo: int = Field(..., description="平台内部追踪号(兼容旧字段)")
versionGroupKey: str = Field(..., description="文档版本归档组键")
versionNo: int = Field(..., description="当前文档版本号")
previousVersionId: int | None = Field(None, description="上一版本文档ID")
rootVersionId: int = Field(..., description="文档版本链根文档ID")
duplicateUpload: bool = Field(..., description="是否命中同名同内容的重复上传")
fileId: int = Field(..., description="文档文件ID")
typeId: int = Field(..., description="文档类型ID")
typeCode: str = Field(..., description="文档类型编码")
region: str = Field(..., description="所属地区")
fileName: str = Field(..., description="文件名")
ossUrl: str = Field(..., description="OSS 对象路径")
speed: str = Field(..., description="执行速度档位:urgent/normal")
processingStatus: str = Field(..., description="文档处理状态")
autoRunTriggered: bool = Field(..., description="是否已自动触发评查")
run: AuditRunVO | None = Field(None, description="自动触发后的运行信息")
class DocumentHistoryVersionVO(BaseModel):
"""历史版本摘要。"""
documentId: int = Field(..., description="文档ID")
fileId: int | None = Field(None, description="文件ID")
versionNo: int = Field(..., description="版本号")
fileName: str | None = Field(None, description="文件名")
fileExt: str | None = Field(None, description="文件扩展名")
processingStatus: str | None = Field(None, description="处理状态")
runStatus: str | None = Field(None, description="最新运行状态")
resultStatus: str | None = Field(None, description="最新结果状态")
updatedAt: str | None = Field(None, description="更新时间")
class DocumentListItemVO(BaseModel):
"""文档列表项。"""
documentId: int = Field(..., description="文档ID")
internalDocumentNo: int = Field(..., description="平台内部追踪号")
versionGroupKey: str = Field(..., description="版本归档组键")
versionNo: int = Field(..., description="当前版本号")
rootVersionId: int = Field(..., description="根版本文档ID")
previousVersionId: int | None = Field(None, description="上一版本文档ID")
typeId: int | None = Field(None, description="文档类型ID")
typeCode: str | None = Field(None, description="文档类型编码")
region: str = Field(..., description="区域")
normalizedName: str | None = Field(None, description="归一化名称")
fileId: int | None = Field(None, description="文件ID")
fileName: str | None = Field(None, description="文件名")
fileExt: str | None = Field(None, description="文件扩展名")
mimeType: str | None = Field(None, description="MIME类型")
fileSize: int | None = Field(None, description="文件大小")
ossUrl: str | None = Field(None, description="OSS路径")
processingStatus: str | None = Field(None, description="处理状态")
currentRunId: int | None = Field(None, description="当前运行ID")
runStatus: str | None = Field(None, description="当前运行状态")
resultStatus: str | None = Field(None, description="当前结果状态")
totalScore: float | None = Field(None, description="总分")
passedCount: int | None = Field(None, description="通过数")
failedCount: int | None = Field(None, description="失败数")
skippedCount: int | None = Field(None, description="跳过数")
updatedAt: str | None = Field(None, description="更新时间")
hasHistory: bool = Field(False, description="是否存在历史版本")
totalVersions: int = Field(1, description="总版本数")
historyVersions: list[DocumentHistoryVersionVO] = Field(default_factory=list, description="历史版本摘要")
class DocumentListPageVO(BaseModel):
"""文档列表分页结果。"""
total: int = Field(..., description="总数")
page: int = Field(..., description="当前页")
pageSize: int = Field(..., description="每页数量")
totalPages: int = Field(..., description="总页数")
documents: list[DocumentListItemVO] = Field(default_factory=list, description="文档列表")
@@ -6,6 +6,7 @@ into leaudit_* table format and writes via SQLAlchemy async session.
from __future__ import annotations
import json
import logging
import re
from typing import Any
@@ -157,11 +158,19 @@ class StorageAdapter:
row = _rule_result_to_row(document_id, resolved_run_id, rule_result, rule, bundle)
if rule_version_id is not None:
row["rule_version_id"] = rule_version_id
columns = ", ".join(row.keys())
placeholders = ", ".join(f":{k}" for k in row)
json_columns = {"stages", "extracted_fields", "field_positions", "remediation", "rule_meta"}
serialized_row = {
key: (json.dumps(value, ensure_ascii=False) if key in json_columns and value is not None else value)
for key, value in row.items()
}
columns = ", ".join(serialized_row.keys())
placeholders = ", ".join(
f"CAST(:{k} AS JSONB)" if k in json_columns else f":{k}"
for k in serialized_row
)
await session.execute(
text(f"INSERT INTO leaudit_rule_results ({columns}) VALUES ({placeholders})"),
row,
serialized_row,
)
# Update audit_runs summary (scores only — terminal state set by finalize_run)
@@ -371,7 +380,7 @@ class StorageAdapter:
:vlm_calls,
:duration_ms,
:requires_human_review,
:payload,
CAST(:payload AS JSONB),
:created_at,
:updated_at
)
@@ -390,7 +399,7 @@ class StorageAdapter:
"vlm_calls": task.vlm_calls,
"duration_ms": task.duration_ms,
"requires_human_review": task.requires_human_review,
"payload": task.model_dump(mode="json"),
"payload": json.dumps(task.model_dump(mode="json"), ensure_ascii=False),
"created_at": task.created_at,
"updated_at": task.updated_at,
},
@@ -527,8 +536,9 @@ def _bundle_to_extracted(bundle: ExtractionBundle) -> dict[str, Any]:
"value": fv.value,
"confidence": float(fv.confidence) if fv.confidence else 0.0,
}
if fv.position is not None:
field_data["position"] = fv.position.model_dump(mode="json")
position_payload = _field_value_position_payload(fv)
if position_payload is not None:
field_data["position"] = position_payload
fields[name] = field_data
else:
fields[name] = {"value": fv}
@@ -637,11 +647,39 @@ def _extract_relevant_field_positions(
if f in positions:
continue
fv = bundle.fields.get(f)
if fv is not None and isinstance(fv, FieldValue) and fv.position is not None:
positions[f] = fv.position.model_dump(mode="json")
if fv is not None and isinstance(fv, FieldValue):
position_payload = _field_value_position_payload(fv)
if position_payload is not None:
positions[f] = position_payload
return positions
def _field_value_position_payload(fv: FieldValue) -> dict[str, Any] | None:
"""兼容原生 leaudit 新旧 FieldValue 结构,提取可落库的位置线索。"""
position = getattr(fv, "position", None)
if position is not None:
if hasattr(position, "model_dump"):
return position.model_dump(mode="json")
if isinstance(position, dict):
return position
metadata = fv.metadata if isinstance(fv.metadata, dict) else {}
payload: dict[str, Any] = {}
if "match_position" in metadata:
payload["matchPosition"] = metadata.get("match_position")
if "matched_text" in metadata:
payload["matchedText"] = metadata.get("matched_text")
if "page_num" in metadata:
payload["pageNum"] = metadata.get("page_num")
if "page_nums" in metadata:
payload["pageNums"] = metadata.get("page_nums")
if "bbox" in metadata:
payload["bbox"] = metadata.get("bbox")
return payload or None
def _rule_result_to_row(
document_id: int,
run_id: int | None,
@@ -1,11 +1,12 @@
"""LeAudit 文档镜像模型 —— leaudit_documents 表。
"""LeAudit 文档模型 —— leaudit_documents 表。
通过 biz_document_id 关联业务 documents 表,不复制业务字段
当前平台把它作为 LeAudit 自己的文档主表使用,不再依赖旧系统文档表
``biz_document_id`` 字段仅保留为内部追踪号,避免直接改库。
"""
from __future__ import annotations
from sqlalchemy import BigInteger, String, ForeignKey
from sqlalchemy import BigInteger, Boolean, Integer, String
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Mapped, mapped_column
@@ -13,33 +14,27 @@ from fastapi_common.fastapi_common_web.models import BaseModel
class LeauditDocument(BaseModel):
"""LeAudit 文档镜像表。"""
"""LeAudit 平台内部文档主表。"""
__tablename__ = "leaudit_documents"
Id: Mapped[int] = mapped_column("id", BigInteger, primary_key=True, autoincrement=True)
bizDocumentId: Mapped[int] = mapped_column("biz_document_id", BigInteger, unique=True, comment="关联业务 documents.id")
bizDocumentId: Mapped[int] = mapped_column("biz_document_id", BigInteger, unique=True, comment="内部追踪号(兼容旧字段名)")
typeId: Mapped[int | None] = mapped_column("type_id", BigInteger, comment="文档类型ID")
processingStatus: Mapped[str | None] = mapped_column("processing_status", String(64), default="waiting", comment="waiting/processing/completed/failed")
currentRunId: Mapped[int | None] = mapped_column("current_run_id", BigInteger, comment="最新有效 run id")
region: Mapped[str] = mapped_column(String(32), default="default", comment="所属地区: mz/yf/jy/cz/default")
versionGroupKey: Mapped[str | None] = mapped_column("version_group_key", String(64), comment="文档版本归档组键")
versionNo: Mapped[int] = mapped_column("version_no", Integer, default=1, comment="同一文档系列中的版本号")
previousVersionId: Mapped[int | None] = mapped_column("previous_version_id", BigInteger, comment="上一版本文档ID")
rootVersionId: Mapped[int | None] = mapped_column("root_version_id", BigInteger, comment="首版本文档ID")
isLatestVersion: Mapped[bool] = mapped_column("is_latest_version", Boolean, default=True, comment="是否当前最新版本")
normalizedName: Mapped[str | None] = mapped_column("normalized_name", String(512), comment="归一化文件名(不含扩展名)")
@classmethod
async def get_by_biz_id(cls, session: AsyncSession, bizDocumentId: int) -> "LeauditDocument | None":
"""按业务文档 ID 查询。"""
from sqlalchemy import select
return await session.scalar(select(cls).where(cls.bizDocumentId == bizDocumentId))
@classmethod
async def upsert_by_biz_id(cls, session: AsyncSession, bizDocumentId: int, **fields) -> "LeauditDocument":
"""按业务文档 ID 创建或更新。"""
from sqlalchemy import select
doc = await session.scalar(select(cls).where(cls.bizDocumentId == bizDocumentId))
if doc is None:
doc = cls(bizDocumentId=bizDocumentId, **fields)
session.add(doc)
else:
for k, v in fields.items():
setattr(doc, k, v)
async def create_new(cls, session: AsyncSession, **fields) -> "LeauditDocument":
"""Create a new platform-side document row for every upload."""
doc = cls(**fields)
session.add(doc)
await session.flush()
return doc
@@ -2,7 +2,7 @@
from abc import ABC, abstractmethod
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentListPageVO, DocumentUploadVO
class IDocumentService(ABC):
@@ -16,11 +16,25 @@ class IDocumentService(ABC):
ContentType: str | None,
TypeId: int | None = None,
TypeCode: str | None = None,
BizDocumentId: int | None = None,
Region: str = "default",
FileRole: str = "primary",
CreatedBy: int | None = None,
AutoRun: bool = False,
Speed: str = "normal",
) -> DocumentUploadVO:
"""上传文档并建立 LeAudit document/file 记录。"""
...
@abstractmethod
async def ListDocuments(
self,
Page: int = 1,
PageSize: int = 20,
Keyword: str | None = None,
TypeCode: str | None = None,
Region: str | None = None,
ProcessingStatus: str | None = None,
ResultStatus: str | None = None,
) -> DocumentListPageVO:
"""获取文档列表(仅最新版本,附历史版本摘要)。"""
...
@@ -2,9 +2,13 @@
from __future__ import annotations
from datetime import datetime
import hashlib
import mimetypes
import re
import time
import unicodedata
import uuid
from pathlib import Path
from sqlalchemy import text
@@ -14,7 +18,12 @@ from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import DocumentUploadVO
from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import (
DocumentHistoryVersionVO,
DocumentListItemVO,
DocumentListPageVO,
DocumentUploadVO,
)
from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile
from fastapi_modules.fastapi_leaudit.services import IAuditService, IDocumentService, IOssService
from fastapi_modules.fastapi_leaudit.services.impl.auditServiceImpl import AuditServiceImpl
@@ -39,11 +48,11 @@ class DocumentServiceImpl(IDocumentService):
ContentType: str | None,
TypeId: int | None = None,
TypeCode: str | None = None,
BizDocumentId: int | None = None,
Region: str = "default",
FileRole: str = "primary",
CreatedBy: int | None = None,
AutoRun: bool = False,
Speed: str = "normal",
) -> DocumentUploadVO:
"""上传文档并建立 LeAudit document/file 记录。"""
if not FileName:
@@ -59,6 +68,9 @@ class DocumentServiceImpl(IDocumentService):
mimeType = ContentType or mimetypes.guess_type(FileName)[0] or "application/octet-stream"
fileSha256 = hashlib.sha256(FileContent).hexdigest()
fileSize = len(FileContent)
normalizedSpeed = _normalize_speed(Speed)
normalizedName = _normalize_document_name(FileName)
uploadedAt = datetime.now()
async with GetAsyncSession() as Session:
if TypeId is not None and TypeCode is not None:
@@ -107,69 +119,386 @@ class DocumentServiceImpl(IDocumentService):
resolvedTypeId = int(typeRow["id"])
resolvedTypeCode = str(typeRow["code"])
resolvedBizDocumentId = BizDocumentId or int(time.time() * 1000)
duplicateUpload = False
previousVersionId: int | None = None
rootVersionId: int | None = None
versionGroupKey: str | None = None
versionNo = 1
document = await LeauditDocument.upsert_by_biz_id(
Session,
bizDocumentId=resolvedBizDocumentId,
typeId=resolvedTypeId,
region=normalizedRegion,
processingStatus="waiting",
)
latestCandidate = None
if normalizedFileRole == "primary":
latestCandidate = await _find_latest_version_candidate(
Session,
type_id=resolvedTypeId,
region=normalizedRegion,
normalized_name=normalizedName,
)
versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
versionNo = f"v{versionCount + 1}"
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=normalizedRegion,
TypeCode=resolvedTypeCode,
DocumentId=document.Id,
Version=versionNo,
FileRole=normalizedFileRole,
FileName=FileName,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=FileContent,
ContentType=mimeType,
)
if latestCandidate and latestCandidate["sha256"] == fileSha256:
duplicateUpload = True
document = await Session.get(LeauditDocument, int(latestCandidate["document_id"]))
documentFile = await Session.get(LeauditDocumentFile, int(latestCandidate["file_id"]))
if document is None or documentFile is None:
raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "重复上传版本定位失败")
await Session.commit()
else:
internalDocumentNo = time.time_ns()
if latestCandidate:
previousVersionId = int(latestCandidate["document_id"])
rootVersionId = int(latestCandidate["root_version_id"] or latestCandidate["document_id"])
versionGroupKey = str(latestCandidate["version_group_key"])
versionNo = int(latestCandidate["version_no"]) + 1
previousDocument = await Session.get(LeauditDocument, previousVersionId)
if previousDocument is not None:
previousDocument.isLatestVersion = False
else:
versionGroupKey = uuid.uuid4().hex
await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole=normalizedFileRole,
fileName=FileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=fileSize,
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=CreatedBy,
)
Session.add(documentFile)
await Session.flush()
await Session.commit()
await Session.refresh(document)
await Session.refresh(documentFile)
document = await LeauditDocument.create_new(
Session,
bizDocumentId=internalDocumentNo,
typeId=resolvedTypeId,
region=normalizedRegion,
processingStatus="waiting",
versionGroupKey=versionGroupKey,
versionNo=versionNo,
previousVersionId=previousVersionId,
rootVersionId=rootVersionId,
isLatestVersion=True,
normalizedName=normalizedName,
)
if document.rootVersionId is None:
document.rootVersionId = document.Id
rootVersionId = document.Id
else:
rootVersionId = document.rootVersionId
versionLabel = f"v{document.versionNo}"
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=normalizedRegion,
TypeCode=resolvedTypeCode,
DocumentId=document.Id,
Version=versionLabel,
FileRole=normalizedFileRole,
FileName=FileName,
Year=uploadedAt.year,
Month=uploadedAt.month,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=FileContent,
ContentType=mimeType,
)
versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id)
_ = versionCount # single-version-per-document in current model; kept for future extension
await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole=normalizedFileRole,
fileName=FileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=fileSize,
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=CreatedBy,
)
Session.add(documentFile)
await Session.flush()
await Session.commit()
await Session.refresh(document)
await Session.refresh(documentFile)
ossUrl = documentFile.ossUrl or ""
run = None
processingStatus = document.processingStatus or "waiting"
if AutoRun:
run = await self.AuditService.Run(DocumentId=document.Id)
run = await self.AuditService.Run(
DocumentId=document.Id,
Speed=Speed,
Force=duplicateUpload,
)
processingStatus = "running" if run.status in {"pending", "running"} else run.status
return DocumentUploadVO(
documentId=document.Id,
bizDocumentId=document.bizDocumentId,
internalDocumentNo=document.bizDocumentId,
versionGroupKey=document.versionGroupKey or "",
versionNo=int(document.versionNo or 1),
previousVersionId=document.previousVersionId,
rootVersionId=int(document.rootVersionId or document.Id),
duplicateUpload=duplicateUpload,
fileId=documentFile.Id,
typeId=resolvedTypeId,
typeCode=resolvedTypeCode,
region=normalizedRegion,
fileName=documentFile.fileName,
ossUrl=ossUrl,
speed=normalizedSpeed,
processingStatus=processingStatus,
autoRunTriggered=AutoRun,
run=run,
)
async def ListDocuments(
self,
Page: int = 1,
PageSize: int = 20,
Keyword: str | None = None,
TypeCode: str | None = None,
Region: str | None = None,
ProcessingStatus: str | None = None,
ResultStatus: str | None = None,
) -> DocumentListPageVO:
"""获取文档列表(仅最新版本,附历史版本摘要)。"""
page = max(1, int(Page))
page_size = max(1, min(int(PageSize), 100))
offset = (page - 1) * page_size
filters = ["d.is_latest_version = true", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'"]
params: dict[str, object] = {"limit": page_size, "offset": offset}
if Keyword:
filters.append("(f.file_name ILIKE :keyword OR d.normalized_name ILIKE :keyword)")
params["keyword"] = f"%{Keyword.strip()}%"
if TypeCode:
filters.append("dt.code = :type_code")
params["type_code"] = TypeCode.strip()
if Region:
filters.append("d.region = :region")
params["region"] = Region.strip()
if ProcessingStatus:
filters.append("d.processing_status = :processing_status")
params["processing_status"] = ProcessingStatus.strip()
if ResultStatus:
filters.append("ar.result_status = :result_status")
params["result_status"] = ResultStatus.strip()
where_clause = " AND ".join(filters)
count_sql = text(
f"""
SELECT COUNT(*)
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
LEFT JOIN leaudit_document_types dt
ON dt.id = d.type_id
LEFT JOIN leaudit_audit_runs ar
ON ar.id = d.current_run_id
WHERE {where_clause}
"""
)
list_sql = text(
f"""
SELECT
d.id AS document_id,
d.biz_document_id AS internal_document_no,
d.version_group_key,
d.version_no,
d.root_version_id,
d.previous_version_id,
d.type_id,
dt.code AS type_code,
d.region,
d.normalized_name,
d.processing_status,
d.current_run_id,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
f.mime_type,
f.file_size,
f.oss_url,
ar.status AS run_status,
ar.result_status,
ar.total_score,
ar.passed_count,
ar.failed_count,
ar.skipped_count,
vc.total_versions,
COALESCE(vc.total_versions, 1) > 1 AS has_history
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
LEFT JOIN leaudit_document_types dt
ON dt.id = d.type_id
LEFT JOIN leaudit_audit_runs ar
ON ar.id = d.current_run_id
LEFT JOIN (
SELECT version_group_key, COUNT(*) AS total_versions
FROM leaudit_documents
WHERE deleted_at IS NULL
GROUP BY version_group_key
) vc
ON vc.version_group_key = d.version_group_key
WHERE {where_clause}
ORDER BY d.updated_at DESC, d.id DESC
LIMIT :limit OFFSET :offset
"""
)
history_sql = text(
"""
SELECT
d.version_group_key,
d.id AS document_id,
d.version_no,
d.processing_status,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
ar.status AS run_status,
ar.result_status
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'primary'
LEFT JOIN leaudit_audit_runs ar
ON ar.id = d.current_run_id
WHERE d.version_group_key = ANY(:group_keys)
AND d.is_latest_version = false
AND d.deleted_at IS NULL
ORDER BY d.version_group_key, d.version_no DESC, d.id DESC
"""
)
async with GetAsyncSession() as Session:
total = int((await Session.execute(count_sql, params)).scalar_one())
rows = (await Session.execute(list_sql, params)).mappings().all()
history_by_group: dict[str, list[DocumentHistoryVersionVO]] = {}
group_keys = [str(row["version_group_key"]) for row in rows if row["version_group_key"]]
if group_keys:
history_rows = (
await Session.execute(history_sql, {"group_keys": group_keys})
).mappings().all()
for row in history_rows:
history_by_group.setdefault(str(row["version_group_key"]), []).append(
DocumentHistoryVersionVO(
documentId=int(row["document_id"]),
fileId=int(row["file_id"]) if row["file_id"] is not None else None,
versionNo=int(row["version_no"]),
fileName=row["file_name"],
fileExt=row["file_ext"],
processingStatus=row["processing_status"],
runStatus=row["run_status"],
resultStatus=row["result_status"],
updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None,
)
)
documents: list[DocumentListItemVO] = []
for row in rows:
group_key = str(row["version_group_key"] or "")
documents.append(
DocumentListItemVO(
documentId=int(row["document_id"]),
internalDocumentNo=int(row["internal_document_no"]),
versionGroupKey=group_key,
versionNo=int(row["version_no"] or 1),
rootVersionId=int(row["root_version_id"] or row["document_id"]),
previousVersionId=int(row["previous_version_id"]) if row["previous_version_id"] is not None else None,
typeId=int(row["type_id"]) if row["type_id"] is not None else None,
typeCode=row["type_code"],
region=row["region"],
normalizedName=row["normalized_name"],
fileId=int(row["file_id"]) if row["file_id"] is not None else None,
fileName=row["file_name"],
fileExt=row["file_ext"],
mimeType=row["mime_type"],
fileSize=int(row["file_size"]) if row["file_size"] is not None else None,
ossUrl=row["oss_url"],
processingStatus=row["processing_status"],
currentRunId=int(row["current_run_id"]) if row["current_run_id"] is not None else None,
runStatus=row["run_status"],
resultStatus=row["result_status"],
totalScore=float(row["total_score"]) if row["total_score"] is not None else None,
passedCount=int(row["passed_count"]) if row["passed_count"] is not None else None,
failedCount=int(row["failed_count"]) if row["failed_count"] is not None else None,
skippedCount=int(row["skipped_count"]) if row["skipped_count"] is not None else None,
updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None,
hasHistory=bool(row["has_history"]),
totalVersions=int(row["total_versions"] or 1),
historyVersions=history_by_group.get(group_key, []),
)
)
total_pages = (total + page_size - 1) // page_size if total else 0
return DocumentListPageVO(
total=total,
page=page,
pageSize=page_size,
totalPages=total_pages,
documents=documents,
)
async def _find_latest_version_candidate(
session,
*,
type_id: int,
region: str,
normalized_name: str,
) -> dict | None:
"""Find the latest primary document version candidate by normalized name."""
result = await session.execute(
text(
"""
SELECT
d.id AS document_id,
d.version_group_key,
d.version_no,
d.root_version_id,
f.id AS file_id,
f.sha256
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'primary'
WHERE d.type_id = :type_id
AND d.region = :region
AND d.normalized_name = :normalized_name
AND d.is_latest_version = true
AND d.deleted_at IS NULL
ORDER BY d.version_no DESC, d.id DESC
LIMIT 1
"""
),
{
"type_id": type_id,
"region": region,
"normalized_name": normalized_name,
},
)
row = result.mappings().first()
return dict(row) if row else None
def _normalize_speed(speed: str | None) -> str:
"""Normalize front-end speed selection to urgent/normal."""
normalized = (speed or "").strip().lower()
if normalized in {"urgent", "high", "fast", "emergency", "紧急"}:
return "urgent"
return "normal"
def _normalize_document_name(file_name: str) -> str:
"""Build a stable name key for same-name version matching."""
stem = Path(file_name).stem
name = unicodedata.normalize("NFKC", stem).strip().lower()
name = re.sub(r"[\s_\-]+", " ", name)
name = re.sub(r"(?:\(|)\d+(?:\)|)$", "", name).strip()
name = re.sub(r"(?:[-_\s]*副本|[-_\s]*copy)$", "", name).strip()
name = re.sub(r"\s+", " ", name).strip()
return name or "untitled"