"""文档服务实现。""" from __future__ import annotations import json import logging logger = logging.getLogger(__name__) from typing import Any from datetime import date as date_type, datetime import hashlib import mimetypes import re import tempfile import time import unicodedata import uuid from copy import deepcopy from pathlib import Path import fitz from leaudit.converters import doc2pdf from sqlalchemy import bindparam, text from docx import Document as DocxDocument from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils from fastapi_modules.fastapi_leaudit.domian.vo.documentVo import ( ContractTemplateUploadVO, DocumentAttachmentVO, DocumentDetailVO, DocumentHistoryVersionVO, DocumentListItemVO, DocumentListPageVO, DocumentStatusItemVO, DocumentTypeRootCreateDTO, DocumentTypeRootItemVO, DocumentTypeRootUpdateDTO, DocumentUpdateDTO, DocumentTypeCreateDTO, DocumentTypeItemVO, DocumentTypeUpdateDTO, DocumentUploadVO, ) from fastapi_modules.fastapi_leaudit.domian.vo.reviewPointVo import ( DocumentConfirmVO, ReviewPointAuditVO, ReviewPointInfoVO, ReviewPointResultVO, ReviewPointStatsVO, ReviewPointsAggregateVO, ) from fastapi_modules.fastapi_leaudit.domian.vo.pageQualityVo import PageQualitySummaryVO from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile from fastapi_modules.fastapi_leaudit.leaudit_bridge.fileSourceResolver import FileSourceResolver from fastapi_modules.fastapi_leaudit.services import IAuditService, IDocumentService, IOssService from fastapi_modules.fastapi_leaudit.services.impl.auditServiceImpl import AuditServiceImpl from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl from fastapi_modules.fastapi_leaudit.services.impl.pageQualityServiceImpl import PageQualityServiceImpl from fastapi_modules.fastapi_leaudit.services.impl.ruleGroupSupport import sync_group_bindings_from_doc_type from fastapi_modules.fastapi_leaudit.services.impl.ssoUserCompat import SsoUserCompat from fastapi_modules.fastapi_leaudit.services.impl.tenantResolver import TenantResolver class DocumentServiceImpl(IDocumentService): """文档服务实现。""" def __init__( self, OssService: IOssService | None = None, AuditService: IAuditService | None = None, ) -> None: self.OssService = OssService or OssServiceImpl() self.AuditService = AuditService or AuditServiceImpl() self.PageQualityService = PageQualityServiceImpl() self.TenantResolver = TenantResolver() _PUBLIC_REGION = "公共" async def Upload( self, FileName: str, FileContent: bytes, ContentType: str | None, TypeId: int | None = None, TypeCode: str | None = None, GroupId: int | None = None, Region: str | None = None, FileRole: str = "primary", CreatedBy: int | None = None, TenantCode: str | None = None, TenantName: str | None = None, Attachments: list[tuple[str, bytes, str | None]] | None = None, AutoRun: bool = False, Speed: str = "normal", ReviewScope: str = "standard", ) -> DocumentUploadVO: """上传文档并建立 LeAudit document/file 记录。""" if not FileName: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件名不能为空") if not FileContent: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件内容不能为空") if not TypeId and not TypeCode: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "typeId 与 typeCode 至少传一个") normalizedRegion = await self._resolve_document_region( Region=Region, TenantCode=TenantCode, TenantName=TenantName, ) currentUserContext: dict[str, Any] | None = None if CreatedBy is not None: currentUserContext = await self._getCurrentUserContext(CreatedBy) normalizedRegion = self._resolve_upload_region( currentUserContext, requestedRegion=normalizedRegion, requestedTenantCode=TenantCode, ) resolvedTenant = await self.TenantResolver.Resolve( RawValue=normalizedRegion, Source="document_upload_response", PreferredTenantCode=str(TenantCode or "").strip() or None, FallbackTenantName=TenantName, ) normalizedFileRole = (FileRole or "primary").strip() or "primary" fileExt = Path(FileName).suffix.lstrip(".").lower() or None mimeType = ContentType or mimetypes.guess_type(FileName)[0] or "application/octet-stream" fileSha256 = hashlib.sha256(FileContent).hexdigest() fileSize = len(FileContent) normalizedSpeed = _normalize_speed(Speed) normalizedName = _normalize_document_name(FileName) uploadedAt = datetime.now() async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) if TypeId is not None and TypeCode is not None: typeResult = await Session.execute( text( """ SELECT id, code FROM leaudit_document_types WHERE id = :type_id AND code = :type_code AND deleted_at IS NULL LIMIT 1 """ ), {"type_id": TypeId, "type_code": TypeCode}, ) elif TypeId is not None: typeResult = await Session.execute( text( """ SELECT id, code FROM leaudit_document_types WHERE id = :type_id AND deleted_at IS NULL LIMIT 1 """ ), {"type_id": TypeId}, ) else: typeResult = await Session.execute( text( """ SELECT id, code FROM leaudit_document_types WHERE code = :type_code AND deleted_at IS NULL LIMIT 1 """ ), {"type_code": TypeCode}, ) typeRow = typeResult.mappings().first() if not typeRow: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档类型不存在或已停用") resolvedTypeId = int(typeRow["id"]) resolvedTypeCode = str(typeRow["code"]) resolvedGroupId = await self._resolveDocumentGroupId(Session, resolvedTypeId, GroupId) resolvedRootGroupId = await self._resolveDocumentRootGroupId(Session, resolvedTypeId, resolvedGroupId) duplicateUpload = False previousVersionId: int | None = None rootVersionId: int | None = None versionGroupKey: str | None = None versionNo = 1 latestCandidate = None if normalizedFileRole == "primary": latestCandidate = await _find_latest_version_candidate( Session, type_id=resolvedTypeId, root_group_id=resolvedRootGroupId, tenant_code=resolvedTenant.tenant_code, region=normalizedRegion, normalized_name=normalizedName, file_ext=fileExt, ) internalDocumentNo = time.time_ns() if latestCandidate: previousVersionId = int(latestCandidate["document_id"]) rootVersionId = int(latestCandidate["root_version_id"] or latestCandidate["document_id"]) versionGroupKey = str(latestCandidate["version_group_key"]) versionNo = int(latestCandidate["version_no"]) + 1 previousDocument = await Session.get(LeauditDocument, previousVersionId) if previousDocument is not None: previousDocument.isLatestVersion = False else: versionGroupKey = uuid.uuid4().hex document = await LeauditDocument.create_new( Session, bizDocumentId=internalDocumentNo, typeId=resolvedTypeId, groupId=resolvedGroupId, tenantCode=resolvedTenant.tenant_code, region=normalizedRegion, processingStatus="waiting", versionGroupKey=versionGroupKey, versionNo=versionNo, previousVersionId=previousVersionId, rootVersionId=rootVersionId, isLatestVersion=True, normalizedName=normalizedName, reviewScope=ReviewScope, ) if document.rootVersionId is None: document.rootVersionId = document.Id rootVersionId = document.Id else: rootVersionId = document.rootVersionId versionLabel = f"v{document.versionNo}" objectKey = OssPathUtils.BuildBusinessDocKey( Region=normalizedRegion, TypeCode=resolvedTypeCode, DocumentId=document.Id, Version=versionLabel, FileRole=normalizedFileRole, FileName=FileName, Year=uploadedAt.year, Month=uploadedAt.month, ) ossUrl = await self.OssService.UploadBytes( ObjectKey=objectKey, Content=FileContent, ContentType=mimeType, ) versionCount = await LeauditDocumentFile.count_by_document(Session, document.Id) _ = versionCount # single-version-per-document in current model; kept for future extension await LeauditDocumentFile.deactivate_active_by_document(Session, document.Id) documentFile = LeauditDocumentFile( documentId=document.Id, fileRole=normalizedFileRole, fileName=FileName, fileExt=fileExt, mimeType=mimeType, fileSize=fileSize, sha256=fileSha256, localPath=None, ossUrl=ossUrl, storageProvider="minio", isActive=True, createdBy=CreatedBy, ) Session.add(documentFile) await Session.flush() await Session.commit() await Session.refresh(document) await Session.refresh(documentFile) if normalizedFileRole == "primary" and Attachments: await self._appendAttachmentFiles( Session=Session, DocumentId=document.Id, TypeCode=resolvedTypeCode, Region=normalizedRegion, VersionNo=int(document.versionNo or 1), Files=Attachments, CreatedBy=CreatedBy, ) ossUrl = documentFile.ossUrl or "" run = None pageQualityRun = None processingStatus = document.processingStatus or "waiting" if AutoRun: run = await self.AuditService.Run( DocumentId=document.Id, Speed=Speed, Force=duplicateUpload, TriggerUserId=CreatedBy, ) processingStatus = "running" if run.status in {"pending", "running"} else run.status if normalizedFileRole == "primary": try: pageQualityRun = await self.PageQualityService.DispatchForDocument( DocumentId=document.Id, TriggerUserId=CreatedBy, Force=duplicateUpload, Speed=normalizedSpeed, ) except Exception as exc: logger.warning("dispatch page quality failed for document_id=%s: %s", document.Id, exc) return DocumentUploadVO( documentId=document.Id, internalDocumentNo=document.bizDocumentId, versionGroupKey=document.versionGroupKey or "", versionNo=int(document.versionNo or 1), previousVersionId=document.previousVersionId, rootVersionId=int(document.rootVersionId or document.Id), duplicateUpload=duplicateUpload, fileId=documentFile.Id, typeId=resolvedTypeId, typeCode=resolvedTypeCode, groupId=resolvedGroupId, region=normalizedRegion, tenantCode=resolvedTenant.tenant_code, tenantName=resolvedTenant.tenant_name or normalizedRegion, fileName=documentFile.fileName, ossUrl=ossUrl, speed=normalizedSpeed, processingStatus=processingStatus, autoRunTriggered=AutoRun, pageQualityRunId=pageQualityRun.runId if pageQualityRun else None, pageQualityRunStatus=pageQualityRun.status if pageQualityRun else None, pageQualitySummaryStatus=None, run=run, ) async def ListDocuments( self, CurrentUserId: int, Page: int = 1, PageSize: int = 20, Keyword: str | None = None, DocumentNumber: str | None = None, TypeCode: str | None = None, TypeIds: list[int] | None = None, EntryModuleId: int | None = None, Region: str | None = None, TenantCode: str | None = None, ProcessingStatus: str | None = None, ResultStatus: str | None = None, AuditStatus: int | None = None, UserId: int | None = None, DateFrom: str | None = None, DateTo: str | None = None, ) -> DocumentListPageVO: """获取文档列表(仅最新版本,附历史版本摘要)。""" page = max(1, int(Page)) page_size = max(1, min(int(PageSize), 100)) offset = (page - 1) * page_size async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) documentColumns = await self._loadDocumentColumns(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) requestedTenant = await self.TenantResolver.Resolve( RawValue=(Region or "").strip() or None, Source="document_list_scope", PreferredTenantCode=str(TenantCode or "").strip() or None, ) filters = [ "d.is_latest_version = true", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'", ] params: dict[str, object] = {"limit": page_size, "offset": offset} filters.extend( self._buildDocumentScopeFilters( CurrentUserId=CurrentUserId, CurrentUser=currentUser, Params=params, RequestedRegion=requestedTenant.tenant_name or requestedTenant.normalized_value or Region, RequestedTenantCode=requestedTenant.tenant_code or TenantCode, RequestedUserId=UserId, DocumentAlias="d", FileAlias="f", ) ) if Keyword: if "document_number" in documentColumns: filters.append("(f.file_name ILIKE :keyword OR d.normalized_name ILIKE :keyword OR d.document_number ILIKE :keyword)") else: filters.append("(f.file_name ILIKE :keyword OR d.normalized_name ILIKE :keyword)") params["keyword"] = f"%{Keyword.strip()}%" if DocumentNumber and "document_number" in documentColumns: filters.append("d.document_number ILIKE :document_number") params["document_number"] = f"%{DocumentNumber.strip()}%" if TypeCode: filters.append("dt.code = :type_code") params["type_code"] = TypeCode.strip() if TypeIds: normalizedTypeIds = [int(typeId) for typeId in TypeIds if int(typeId) > 0] if normalizedTypeIds: filters.append("d.type_id = ANY(:type_ids)") params["type_ids"] = normalizedTypeIds if EntryModuleId is not None and int(EntryModuleId) > 0: filters.append( "COALESCE(eg.entry_module_id, eg_parent.entry_module_id, dt.entry_module_id) = :entry_module_id" ) params["entry_module_id"] = int(EntryModuleId) if ProcessingStatus: filters.append("d.processing_status = :processing_status") params["processing_status"] = ProcessingStatus.strip() if ResultStatus: filters.append("ar.result_status = :result_status") params["result_status"] = ResultStatus.strip() if DateFrom: filters.append("d.created_at >= :date_from") params["date_from"] = date_type.fromisoformat(DateFrom.strip()) if DateTo: filters.append("d.created_at < (CAST(:date_to AS date) + INTERVAL '1 day')") params["date_to"] = date_type.fromisoformat(DateTo.strip()) filters.append("COALESCE(d.review_scope, 'standard') != 'cross_review'") where_clause = " AND ".join(filters) derivedAuditStatusExpr = """ CASE WHEN LOWER(COALESCE(ar.status, '')) IN ('queued', 'running') OR LOWER(COALESCE(d.processing_status, '')) = 'running' THEN 2 WHEN LOWER(COALESCE(d.processing_status, '')) IN ('waiting', 'pending') THEN 0 WHEN LOWER(COALESCE(d.processing_status, '')) = 'failed' THEN -1 WHEN COALESCE(ar.failed_count, 0) > 0 THEN -1 WHEN COALESCE(ar.passed_count, 0) > 0 THEN 1 ELSE 0 END """.strip() persistedOrDerivedAuditStatusExpr = ( f"COALESCE(d.audit_status, {derivedAuditStatusExpr})" if "audit_status" in documentColumns else derivedAuditStatusExpr ) if AuditStatus is not None: filters.append(f"{persistedOrDerivedAuditStatusExpr} = :audit_status") params["audit_status"] = int(AuditStatus) where_clause = " AND ".join(filters) groupIdSelectExpr = "d.group_id" if "group_id" in documentColumns else "NULL::bigint" # Transitional display fallback: if the document itself has not yet persisted a child group, # but its document type currently maps to exactly one active child group, surface it in list/detail UI. resolvedGroupIdExpr = f"COALESCE({groupIdSelectExpr}, dg.inferred_group_id)" resolvedGroupNameExpr = ( f"CASE WHEN {groupIdSelectExpr} IS NOT NULL THEN eg.name ELSE dg.inferred_group_name END" ) optionalSelects = [ f"{resolvedGroupIdExpr} AS group_id", "d.document_number AS document_number" if "document_number" in documentColumns else "NULL::text AS document_number", f"{persistedOrDerivedAuditStatusExpr} AS audit_status", "d.is_test_document AS is_test_document" if "is_test_document" in documentColumns else "FALSE AS is_test_document", "dt.name AS type_name", f"{resolvedGroupNameExpr} AS group_name", "COALESCE(NULLIF(BTRIM(d.tenant_code), ''), NULL) AS tenant_code", """ CASE WHEN NULLIF(BTRIM(d.tenant_code), '') = 'PUBLIC' THEN '公共' WHEN NULLIF(BTRIM(d.tenant_code), '') = 'PROVINCIAL' THEN '省级' ELSE COALESCE(NULLIF(BTRIM(d.region), ''), '') END AS tenant_name """.strip(), ] count_sql = text( f""" SELECT COUNT(*) FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id LEFT JOIN leaudit_document_types dt ON dt.id = d.type_id LEFT JOIN leaudit_evaluation_point_groups eg ON eg.id = d.group_id LEFT JOIN leaudit_evaluation_point_groups eg_parent ON eg_parent.id = eg.pid LEFT JOIN ( SELECT document_type_id, CASE WHEN COUNT(*) = 1 THEN MIN(id) END AS inferred_group_id, CASE WHEN COUNT(*) = 1 THEN MIN(name) END AS inferred_group_name FROM leaudit_evaluation_point_groups WHERE pid <> 0 AND deleted_at IS NULL AND is_enabled = true AND document_type_id IS NOT NULL GROUP BY document_type_id ) dg ON dg.document_type_id = d.type_id LEFT JOIN leaudit_audit_runs ar ON ar.id = d.current_run_id WHERE {where_clause} """ ) list_sql = text( f""" SELECT d.id AS document_id, d.biz_document_id AS internal_document_no, d.version_group_key, d.version_no, d.root_version_id, d.previous_version_id, d.type_id, dt.code AS type_code, d.region, d.normalized_name, d.processing_status, d.current_run_id, d.updated_at, f.id AS file_id, f.file_name, f.file_ext, f.mime_type, f.file_size, f.oss_url, ar.status AS run_status, ar.result_status, ar.total_score, ar.passed_count, ar.failed_count, ar.skipped_count, {', '.join(optionalSelects)}, vc.total_versions, COALESCE(vc.total_versions, 1) > 1 AS has_history FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id LEFT JOIN leaudit_document_types dt ON dt.id = d.type_id LEFT JOIN leaudit_evaluation_point_groups eg ON eg.id = d.group_id LEFT JOIN leaudit_evaluation_point_groups eg_parent ON eg_parent.id = eg.pid LEFT JOIN ( SELECT document_type_id, CASE WHEN COUNT(*) = 1 THEN MIN(id) END AS inferred_group_id, CASE WHEN COUNT(*) = 1 THEN MIN(name) END AS inferred_group_name FROM leaudit_evaluation_point_groups WHERE pid <> 0 AND deleted_at IS NULL AND is_enabled = true AND document_type_id IS NOT NULL GROUP BY document_type_id ) dg ON dg.document_type_id = d.type_id LEFT JOIN leaudit_audit_runs ar ON ar.id = d.current_run_id LEFT JOIN ( SELECT version_group_key, COUNT(*) AS total_versions FROM leaudit_documents WHERE deleted_at IS NULL GROUP BY version_group_key ) vc ON vc.version_group_key = d.version_group_key WHERE {where_clause} ORDER BY d.updated_at DESC, d.id DESC LIMIT :limit OFFSET :offset """ ) history_sql = text( """ SELECT d.version_group_key, d.id AS document_id, d.version_no, d.processing_status, d.updated_at, f.id AS file_id, f.file_name, f.file_ext, f.file_size, f.oss_url, ar.status AS run_status, ar.result_status, ar.total_score, ar.passed_count, ar.failed_count, ar.skipped_count FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id AND f.is_active = true AND f.file_role = 'primary' LEFT JOIN leaudit_audit_runs ar ON ar.id = d.current_run_id WHERE d.version_group_key = ANY(:group_keys) AND d.is_latest_version = false AND d.deleted_at IS NULL ORDER BY d.version_group_key, d.version_no DESC, d.id DESC """ ) async with GetAsyncSession() as Session: total = int((await Session.execute(count_sql, params)).scalar_one()) rows = (await Session.execute(list_sql, params)).mappings().all() history_by_group: dict[str, list[DocumentHistoryVersionVO]] = {} page_quality_map = await self._loadLatestPageQualitySummaryMap( Session, [int(row["document_id"]) for row in rows], ) group_keys = [str(row["version_group_key"]) for row in rows if row["version_group_key"]] if group_keys: history_rows = ( await Session.execute(history_sql, {"group_keys": group_keys}) ).mappings().all() for row in history_rows: history_by_group.setdefault(str(row["version_group_key"]), []).append( DocumentHistoryVersionVO( documentId=int(row["document_id"]), fileId=int(row["file_id"]) if row["file_id"] is not None else None, versionNo=int(row["version_no"]), fileName=row["file_name"], fileExt=row["file_ext"], fileSize=int(row["file_size"]) if row["file_size"] is not None else None, ossUrl=row["oss_url"], processingStatus=row["processing_status"], runStatus=row["run_status"], resultStatus=row["result_status"], totalScore=int(row["total_score"]) if row["total_score"] is not None else None, passedCount=int(row["passed_count"]) if row["passed_count"] is not None else None, failedCount=int(row["failed_count"]) if row["failed_count"] is not None else None, skippedCount=int(row["skipped_count"]) if row["skipped_count"] is not None else None, updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None, ) ) documents: list[DocumentListItemVO] = [] for row in rows: quality = page_quality_map.get(int(row["document_id"]), {}) group_key = str(row["version_group_key"] or "") documents.append( DocumentListItemVO( documentId=int(row["document_id"]), internalDocumentNo=int(row["internal_document_no"]), versionGroupKey=group_key, versionNo=int(row["version_no"] or 1), rootVersionId=int(row["root_version_id"] or row["document_id"]), previousVersionId=int(row["previous_version_id"]) if row["previous_version_id"] is not None else None, typeId=int(row["type_id"]) if row["type_id"] is not None else None, typeCode=row["type_code"], typeName=row["type_name"], groupId=int(row["group_id"]) if row["group_id"] is not None else None, groupName=row["group_name"], region=row["region"], tenantCode=row["tenant_code"], tenantName=row["tenant_name"], normalizedName=row["normalized_name"], fileId=int(row["file_id"]) if row["file_id"] is not None else None, fileName=row["file_name"], fileExt=row["file_ext"], mimeType=row["mime_type"], fileSize=int(row["file_size"]) if row["file_size"] is not None else None, ossUrl=row["oss_url"], processingStatus=row["processing_status"], currentRunId=int(row["current_run_id"]) if row["current_run_id"] is not None else None, runStatus=row["run_status"], resultStatus=row["result_status"], totalScore=float(row["total_score"]) if row["total_score"] is not None else None, passedCount=int(row["passed_count"]) if row["passed_count"] is not None else None, failedCount=int(row["failed_count"]) if row["failed_count"] is not None else None, skippedCount=int(row["skipped_count"]) if row["skipped_count"] is not None else None, documentNumber=row["document_number"], auditStatus=int(row["audit_status"]) if row["audit_status"] is not None else None, isTestDocument=bool(row["is_test_document"]), pageQualityRunId=quality.get("pageQualityRunId"), pageQualityRunStatus=quality.get("pageQualityRunStatus"), pageQualitySummaryStatus=quality.get("pageQualitySummaryStatus"), pageQualityIssueCount=int(quality.get("pageQualityIssueCount") or 0), pageQualityWarningText=quality.get("pageQualityWarningText"), updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None, hasHistory=bool(row["has_history"]), totalVersions=int(row["total_versions"] or 1), historyVersions=history_by_group.get(group_key, []), ) ) total_pages = (total + page_size - 1) // page_size if total else 0 return DocumentListPageVO( total=total, page=page, pageSize=page_size, totalPages=total_pages, documents=documents, ) async def GetDocument(self, CurrentUserId: int, Id: int) -> DocumentDetailVO: """获取单个文档详情,并执行数据隔离校验。""" async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, Id, CurrentUserId, currentUser, documentColumns) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") return detail async def GetReviewPoints(self, CurrentUserId: int, DocumentId: int) -> ReviewPointsAggregateVO: """获取评查详情页聚合数据。""" async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) await self._ensureReviewPointAuditTable(Session) await self._ensureCrossReviewProposalSchema(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, DocumentId, CurrentUserId, currentUser, documentColumns) if not detail and await self._hasCrossReviewDocumentAccess(Session, DocumentId, CurrentUserId): detail = await self._getDocumentDetail( Session, DocumentId, CurrentUserId, currentUser, documentColumns, BypassScopeCheck=True, ) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") runRow = await self._loadReviewRunRow(Session, detail) reviewPoints: list[ReviewPointResultVO] = [] stats = ReviewPointStatsVO() if runRow: reviewPoints = await self._loadReviewPointResults(Session, detail, int(runRow["id"])) stats = self._buildReviewPointStats(reviewPoints) approvedSupplementDelta = await self._loadApprovedSupplementScoreDelta(Session, detail.documentId) if approvedSupplementDelta: stats.score += approvedSupplementDelta documentPayload = await self._buildReviewDocumentPayload(Session, detail, runRow) reviewInfo = self._buildReviewInfo(runRow, reviewPoints, stats) comparisonDocument = await self._loadComparisonDocument(Session, detail.documentId) scoringProposals = await self._loadScoringProposals(Session, detail.documentId) return ReviewPointsAggregateVO( data=reviewPoints, stats=stats, reviewInfo=reviewInfo, document=documentPayload, comparison_document=comparisonDocument, scoring_proposals=scoringProposals, ) async def AuditReviewPoint( self, CurrentUserId: int, ReviewPointResultId: int, Result: str, Message: str, ) -> ReviewPointAuditVO: """更新单个评查点的人工审核状态。""" normalizedResult = (Result or "").strip().lower() if normalizedResult not in {"true", "false", "review"}: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "result 仅支持 true、false、review") async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) await self._ensureReviewPointAuditTable(Session) row = ( await Session.execute( text( """ SELECT document_id FROM leaudit_rule_results WHERE id = :result_id LIMIT 1 """ ), {"result_id": ReviewPointResultId}, ) ).mappings().first() if not row: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "评查点结果不存在") documentId = int(row["document_id"]) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, documentId, CurrentUserId, currentUser, documentColumns) if not detail and await self._hasCrossReviewDocumentAccess(Session, documentId, CurrentUserId): detail = await self._getDocumentDetail( Session, documentId, CurrentUserId, currentUser, documentColumns, BypassScopeCheck=True, ) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") payload = { "rule_result_id": ReviewPointResultId, "document_id": documentId, "edit_audit_status": 0 if normalizedResult == "review" else 1, "override_result": None if normalizedResult == "review" else (normalizedResult == "true"), "message": "" if normalizedResult == "review" else (Message or ""), "reviewed_by": CurrentUserId, } auditRow = ( await Session.execute( text( """ INSERT INTO leaudit_review_point_audits ( rule_result_id, document_id, edit_audit_status, override_result, message, reviewed_by, created_at, updated_at ) VALUES ( :rule_result_id, :document_id, :edit_audit_status, :override_result, :message, :reviewed_by, NOW(), NOW() ) ON CONFLICT (rule_result_id) DO UPDATE SET edit_audit_status = EXCLUDED.edit_audit_status, override_result = EXCLUDED.override_result, message = EXCLUDED.message, reviewed_by = EXCLUDED.reviewed_by, updated_at = NOW() RETURNING id, edit_audit_status, override_result, message """ ), payload, ) ).mappings().first() await Session.commit() return ReviewPointAuditVO( reviewPointResultId=ReviewPointResultId, editAuditStatusId=int(auditRow["id"]), editAuditStatus=int(auditRow["edit_audit_status"] or 0), overrideResult=bool(auditRow["override_result"]) if auditRow["override_result"] is not None else None, message=str(auditRow["message"] or ""), ) async def ConfirmReviewResults(self, CurrentUserId: int, DocumentId: int) -> DocumentConfirmVO: """确认文档评查结果。""" async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, DocumentId, CurrentUserId, currentUser, documentColumns) if not detail and await self._hasCrossReviewDocumentAccess(Session, DocumentId, CurrentUserId): detail = await self._getDocumentDetail( Session, DocumentId, CurrentUserId, currentUser, documentColumns, BypassScopeCheck=True, ) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") if "audit_status" in documentColumns: await Session.execute( text( """ UPDATE leaudit_documents SET audit_status = 1, updated_at = NOW() WHERE id = :document_id AND deleted_at IS NULL """ ), {"document_id": DocumentId}, ) await Session.commit() return DocumentConfirmVO(documentId=DocumentId, auditStatus=1) async def GetDocumentsStatus(self, CurrentUserId: int, Ids: list[int]) -> list[DocumentStatusItemVO]: """批量获取文档状态,并执行数据隔离校验。""" normalizedIds = [int(docId) for docId in Ids if int(docId) > 0] if not normalizedIds: return [] async with GetAsyncSession() as Session: currentUser = await self._getCurrentUserContext(CurrentUserId) params: dict[str, object] = {"ids": normalizedIds} filters = [ "d.id = ANY(:ids)", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'", ] filters.extend( self._buildDocumentScopeFilters( CurrentUserId=CurrentUserId, CurrentUser=currentUser, Params=params, DocumentAlias="d", FileAlias="f", ) ) rows = ( await Session.execute( text( f""" SELECT d.id AS document_id, d.processing_status, ar.status AS run_status, ar.phase, ar.result_status, d.updated_at FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id LEFT JOIN leaudit_audit_runs ar ON ar.id = d.current_run_id WHERE {' AND '.join(filters)} ORDER BY d.updated_at DESC, d.id DESC """ ), params, ) ).mappings().all() page_quality_map = await self._loadLatestPageQualitySummaryMap( Session, [int(row["document_id"]) for row in rows], ) return [ DocumentStatusItemVO( documentId=int(row["document_id"]), processingStatus=row["processing_status"], runStatus=row["run_status"], phase=row["phase"], resultStatus=row["result_status"], pageQualityRunId=page_quality_map.get(int(row["document_id"]), {}).get("pageQualityRunId"), pageQualityRunStatus=page_quality_map.get(int(row["document_id"]), {}).get("pageQualityRunStatus"), pageQualitySummaryStatus=page_quality_map.get(int(row["document_id"]), {}).get("pageQualitySummaryStatus"), pageQualityReviewPageCount=int( page_quality_map.get(int(row["document_id"]), {}).get("pageQualityReviewPageCount") or 0 ), pageQualityRejectPageCount=int( page_quality_map.get(int(row["document_id"]), {}).get("pageQualityRejectPageCount") or 0 ), updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None, ) for row in rows ] async def UploadContractTemplate( self, CurrentUserId: int, DocumentId: int, FileName: str, FileContent: bytes, ContentType: str | None, ComparisonId: int | None = None, ) -> ContractTemplateUploadVO: """为现有合同文档上传结构对比模板,并写入 contract_structure_comparison。""" if DocumentId <= 0: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "文档ID不能为空") if not FileName: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "模板文件名不能为空") if not FileContent: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "模板文件内容不能为空") fileExt = Path(FileName).suffix.lstrip(".").lower() if fileExt not in {"pdf", "doc", "docx"}: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "模板文件仅支持 pdf/doc/docx") mimeType = ContentType or mimetypes.guess_type(FileName)[0] or "application/octet-stream" fileSize = len(FileContent) uploadedAt = datetime.now() async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) await self._ensureContractStructureComparisonTable(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, DocumentId, CurrentUserId, currentUser, documentColumns) if not detail and await self._hasCrossReviewDocumentAccess(Session, DocumentId, CurrentUserId): detail = await self._getDocumentDetail( Session, DocumentId, CurrentUserId, currentUser, documentColumns, BypassScopeCheck=True, ) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "目标文档不存在或无权限访问") versionLabel = f"v{int(detail.versionNo or 1)}" objectKey = OssPathUtils.BuildBusinessDocKey( Region=detail.region or "公共", TypeCode=detail.typeCode or "contract", DocumentId=DocumentId, Version=versionLabel, FileRole="template", FileName=FileName, Year=uploadedAt.year, Month=uploadedAt.month, ) ossUrl = await self.OssService.UploadBytes( ObjectKey=objectKey, Content=FileContent, ContentType=mimeType, ) comparisonId = await self._upsertContractStructureComparison( Session=Session, DocumentId=DocumentId, ComparisonId=ComparisonId, TemplateName=FileName, TemplatePath=ossUrl, FileSize=fileSize, ) await Session.commit() return ContractTemplateUploadVO( documentId=DocumentId, comparisonId=comparisonId, templateName=FileName, templateContractPath=ossUrl, fileSize=fileSize, status="uploaded", ) async def UpdateDocument(self, CurrentUserId: int, Id: int, Body: DocumentUpdateDTO) -> DocumentDetailVO: """更新文档元数据,并执行数据隔离校验。""" async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, Id, CurrentUserId, currentUser, documentColumns) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") assignments: list[str] = [] params: dict[str, object] = {"id": Id} if Body.documentNumber is not None and "document_number" in documentColumns: assignments.append("document_number = :document_number") params["document_number"] = Body.documentNumber.strip() or None if Body.remark is not None and "remark" in documentColumns: assignments.append("remark = :remark") params["remark"] = Body.remark.strip() or None if Body.isTestDocument is not None and "is_test_document" in documentColumns: assignments.append("is_test_document = :is_test_document") params["is_test_document"] = Body.isTestDocument if Body.auditStatus is not None and "audit_status" in documentColumns: assignments.append("audit_status = :audit_status") params["audit_status"] = Body.auditStatus if assignments: assignments.append("updated_at = NOW()") await Session.execute( text(f"UPDATE leaudit_documents SET {', '.join(assignments)} WHERE id = :id AND deleted_at IS NULL"), params, ) await Session.commit() refreshed = await self._getDocumentDetail(Session, Id, CurrentUserId, currentUser, documentColumns) if not refreshed: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") return refreshed async def _appendAttachmentFiles( self, Session, *, DocumentId: int, TypeCode: str, Region: str, VersionNo: int, Files: list[tuple[str, bytes, str | None]], CreatedBy: int | None, ) -> None: """为文档追加附件文件记录。""" versionLabel = f"v{VersionNo}" uploadedAt = datetime.now() for fileName, fileContent, contentType in Files: if not fileName: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "附件文件名不能为空") if not fileContent: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, f"附件 {fileName} 内容不能为空") fileExt = Path(fileName).suffix.lstrip(".").lower() or None mimeType = contentType or mimetypes.guess_type(fileName)[0] or "application/octet-stream" fileSha256 = hashlib.sha256(fileContent).hexdigest() fileSize = len(fileContent) objectKey = OssPathUtils.BuildBusinessDocKey( Region=Region, TypeCode=TypeCode, DocumentId=DocumentId, Version=versionLabel, FileRole="attachment", FileName=fileName, Year=uploadedAt.year, Month=uploadedAt.month, ) ossUrl = await self.OssService.UploadBytes( ObjectKey=objectKey, Content=fileContent, ContentType=mimeType, ) attachment = LeauditDocumentFile( documentId=DocumentId, fileRole="attachment", fileName=fileName, fileExt=fileExt, mimeType=mimeType, fileSize=fileSize, sha256=fileSha256, localPath=None, ossUrl=ossUrl, storageProvider="minio", isActive=True, createdBy=CreatedBy, ) Session.add(attachment) await Session.commit() async def _load_active_primary_file_row( self, Session, *, DocumentId: int, ) -> dict[str, Any]: row = ( await Session.execute( text( """ SELECT id, file_name, file_ext, mime_type, file_role, oss_url, local_path FROM leaudit_document_files WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = 'primary' ORDER BY id DESC LIMIT 1 """ ), {"document_id": DocumentId}, ) ).mappings().first() if not row: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少主文件") return dict(row) def _normalize_document_source_kind(self, file_name: str, file_ext: str | None, mime_type: str | None) -> str: suffix = (f".{str(file_ext).lstrip('.')}" if file_ext else Path(file_name).suffix).lower() normalizedMime = (mime_type or "").lower() if suffix == ".pdf" or normalizedMime == "application/pdf": return "pdf" if suffix == ".docx" or normalizedMime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return "docx" raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前仅支持 PDF 或 DOCX 主文档追加附件") def _normalize_attachment_source_kind(self, file_name: str, file_ext: str | None, mime_type: str | None) -> str: suffix = (f".{str(file_ext).lstrip('.')}" if file_ext else Path(file_name).suffix).lower() normalizedMime = (mime_type or "").lower() if suffix == ".pdf" or normalizedMime == "application/pdf": return "pdf" if suffix == ".docx" or normalizedMime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return "docx" raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, f"附件 {file_name} 仅支持 PDF 或 DOCX 格式") def _validate_attachment_matrix( self, *, MainSourceKind: str, Files: list[tuple[str, bytes, str | None]], ) -> None: for fileName, _, contentType in Files: attachmentKind = self._normalize_attachment_source_kind(fileName, Path(fileName).suffix.lstrip(".").lower() or None, contentType) if MainSourceKind == "docx" and attachmentKind != "docx": raise LeauditException( StatusCodeEnum.HTTP_400_BAD_REQUEST, "源文档为 DOCX 时,仅允许追加 DOCX 附件,且合并结果仍为 DOCX", ) def _merge_docx_paths_for_merge( self, *, DocxPaths: list[str], TempPaths: list[str], ) -> str: if not DocxPaths: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "缺少可合并的 DOCX 文件") mergedTemp = tempfile.NamedTemporaryFile(suffix=".docx", delete=False) mergedTemp.close() TempPaths.append(mergedTemp.name) baseDoc = DocxDocument(DocxPaths[0]) for attachmentPath in DocxPaths[1:]: attachmentDoc = DocxDocument(attachmentPath) if baseDoc.paragraphs: baseDoc.add_page_break() for element in attachmentDoc.element.body: baseDoc.element.body.append(deepcopy(element)) baseDoc.save(mergedTemp.name) return mergedTemp.name async def _cloneActiveFilesToNewDocument( self, Session, *, SourceDocumentId: int, TargetDocumentId: int, CreatedBy: int | None, IncludeAttachments: bool = True, ) -> None: """复制当前文档的主文件与现有附件到新版本文档。""" fileRoles = ["primary", "attachment"] if IncludeAttachments else ["primary"] sourceFiles = ( await Session.execute( text( """ SELECT id, file_role, file_name, file_ext, mime_type, file_size, sha256, local_path, oss_url, storage_provider FROM leaudit_document_files WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = ANY(:file_roles) ORDER BY CASE WHEN file_role = 'primary' THEN 0 ELSE 1 END, id ASC """ ).bindparams(bindparam("file_roles", expanding=False)), {"document_id": SourceDocumentId, "file_roles": fileRoles}, ) ).mappings().all() if not sourceFiles: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "源文档缺少可复制的有效文件") for row in sourceFiles: clonedFile = LeauditDocumentFile( documentId=TargetDocumentId, fileRole=str(row["file_role"] or "attachment"), fileName=str(row["file_name"] or ""), fileExt=row["file_ext"], mimeType=row["mime_type"], fileSize=int(row["file_size"]) if row["file_size"] is not None else None, sha256=row["sha256"], localPath=row["local_path"], ossUrl=row["oss_url"], storageProvider=row["storage_provider"], isActive=True, createdBy=CreatedBy, ) Session.add(clonedFile) await Session.flush() async def _buildMergedPdfBytesForDocument( self, Session, *, DocumentId: int, ) -> tuple[bytes, str]: """读取当前主文件与附件,生成新的主 PDF 内容。""" primaryFile = ( await Session.execute( text( """ SELECT id FROM leaudit_document_files WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = 'primary' ORDER BY id DESC LIMIT 1 """ ), {"document_id": DocumentId}, ) ).scalar_one_or_none() if primaryFile is None: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少主文件,无法生成合并文件") attachmentRows = ( await Session.execute( text( """ SELECT id FROM leaudit_document_files WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = 'attachment' ORDER BY id ASC """ ), {"document_id": DocumentId}, ) ).scalars().all() if not attachmentRows: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档没有附件,无法生成合并文件") resolver = FileSourceResolver() primaryModel = await Session.get(LeauditDocumentFile, int(primaryFile)) if primaryModel is None: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "主文件记录不存在,无法生成合并文件") primaryPayload = await resolver.ResolvePayload(primaryModel) attachmentModels: list[LeauditDocumentFile] = [] for attachmentId in attachmentRows: attachmentModel = await Session.get(LeauditDocumentFile, int(attachmentId)) if attachmentModel is not None: attachmentModels.append(attachmentModel) attachmentPayloads = await resolver.ResolvePayloads(attachmentModels) tempPaths: list[str] = [] try: mainLocalPath = self._write_temp_file_for_merge( FileName=primaryPayload.fileName, Content=primaryPayload.fileContent, TempPaths=tempPaths, ) mainPdfPath = self._convert_source_to_pdf( SourcePath=mainLocalPath, TempPaths=tempPaths, ) pdfPaths = [mainPdfPath] for attachmentPayload in attachmentPayloads: attachmentLocalPath = self._write_temp_file_for_merge( FileName=attachmentPayload.fileName, Content=attachmentPayload.fileContent, TempPaths=tempPaths, ) attachmentPdfPath = self._convert_source_to_pdf( SourcePath=attachmentLocalPath, TempPaths=tempPaths, ) pdfPaths.append(attachmentPdfPath) mergedPdfPath = self._merge_pdf_paths_for_merge( PdfPaths=pdfPaths, TempPaths=tempPaths, ) mergedBytes = Path(mergedPdfPath).read_bytes() mergedName = f"{Path(primaryPayload.fileName).stem}.pdf" return mergedBytes, mergedName except LeauditException: raise except Exception as error: raise LeauditException( StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, f"生成合并PDF失败: {error}", ) from error finally: for tempPath in reversed(tempPaths): try: pathObj = Path(tempPath) if pathObj.is_file(): pathObj.unlink(missing_ok=True) except Exception: pass async def _buildMergedDocxBytesForDocument( self, Session, *, DocumentId: int, ) -> tuple[bytes, str]: """读取当前主文件与附件,生成新的主 DOCX 内容。""" primaryFile = await self._load_active_primary_file_row(Session, DocumentId=DocumentId) if self._normalize_document_source_kind( str(primaryFile["file_name"] or ""), primaryFile.get("file_ext"), primaryFile.get("mime_type"), ) != "docx": raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "仅 DOCX 主文档支持生成 DOCX 合并结果") attachmentRows = ( await Session.execute( text( """ SELECT id FROM leaudit_document_files WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = 'attachment' ORDER BY id ASC """ ), {"document_id": DocumentId}, ) ).scalars().all() if not attachmentRows: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档没有附件,无法生成合并文件") resolver = FileSourceResolver() primaryModel = await Session.get(LeauditDocumentFile, int(primaryFile["id"])) if primaryModel is None: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "主文件记录不存在,无法生成合并文件") primaryPayload = await resolver.ResolvePayload(primaryModel) attachmentModels: list[LeauditDocumentFile] = [] for attachmentId in attachmentRows: attachmentModel = await Session.get(LeauditDocumentFile, int(attachmentId)) if attachmentModel is not None: attachmentModels.append(attachmentModel) attachmentPayloads = await resolver.ResolvePayloads(attachmentModels) tempPaths: list[str] = [] try: docxPaths = [ self._write_temp_file_for_merge( FileName=primaryPayload.fileName, Content=primaryPayload.fileContent, TempPaths=tempPaths, ) ] for attachmentPayload in attachmentPayloads: attachmentKind = self._normalize_attachment_source_kind( attachmentPayload.fileName, Path(attachmentPayload.fileName).suffix.lstrip(".").lower() or None, None, ) if attachmentKind != "docx": raise LeauditException( StatusCodeEnum.HTTP_400_BAD_REQUEST, f"DOCX 主文档仅允许追加 DOCX 附件,当前附件 {attachmentPayload.fileName} 不符合要求", ) docxPaths.append( self._write_temp_file_for_merge( FileName=attachmentPayload.fileName, Content=attachmentPayload.fileContent, TempPaths=tempPaths, ) ) mergedDocxPath = self._merge_docx_paths_for_merge( DocxPaths=docxPaths, TempPaths=tempPaths, ) mergedBytes = Path(mergedDocxPath).read_bytes() mergedName = f"{Path(primaryPayload.fileName).stem}.docx" return mergedBytes, mergedName finally: for tempPath in reversed(tempPaths): try: pathObj = Path(tempPath) if pathObj.is_file(): pathObj.unlink(missing_ok=True) except Exception: pass def _write_temp_file_for_merge( self, *, FileName: str, Content: bytes, TempPaths: list[str], ) -> str: """为持久化合并流程写入临时文件。""" suffix = Path(FileName).suffix or ".bin" with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tempFile: tempFile.write(Content) tempPath = tempFile.name TempPaths.append(tempPath) return tempPath def _convert_source_to_pdf( self, *, SourcePath: str, TempPaths: list[str], ) -> str: """将源文件转换为 PDF。""" source = Path(SourcePath) if source.suffix.lower() == ".pdf": return str(source) pdfTemp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) pdfTemp.close() TempPaths.append(pdfTemp.name) doc2pdf.convert(source, pdfTemp.name, soffice="auto", pdfa=False, force=True, verify=False) return pdfTemp.name def _merge_pdf_paths_for_merge( self, *, PdfPaths: list[str], TempPaths: list[str], ) -> str: """合并多个 PDF 为一个临时 PDF。""" mergedTemp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) mergedTemp.close() TempPaths.append(mergedTemp.name) output = fitz.open() try: for pdfPath in PdfPaths: source = fitz.open(pdfPath) try: output.insert_pdf(source) finally: source.close() output.save(mergedTemp.name) finally: output.close() return mergedTemp.name async def _persistMergedPdfForDocument( self, Session, *, DocumentId: int, TypeCode: str, Region: str, VersionNo: int, CreatedBy: int | None, FileRole: str = "primary", ) -> None: """为当前文档生成并替换主 PDF 文件。""" mergedBytes, mergedName = await self._buildMergedPdfBytesForDocument( Session, DocumentId=DocumentId, ) mergedSha256 = hashlib.sha256(mergedBytes).hexdigest() mergedSize = len(mergedBytes) uploadedAt = datetime.now() versionLabel = f"v{VersionNo}" objectKey = OssPathUtils.BuildBusinessDocKey( Region=Region, TypeCode=TypeCode, DocumentId=DocumentId, Version=versionLabel, FileRole=FileRole, FileName=mergedName, Year=uploadedAt.year, Month=uploadedAt.month, ) ossUrl = await self.OssService.UploadBytes( ObjectKey=objectKey, Content=mergedBytes, ContentType="application/pdf", ) if FileRole == "primary": await Session.execute( text( """ UPDATE leaudit_document_files SET is_active = false WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role IN ('primary', 'merged_pdf', 'merged_docx') """ ), {"document_id": DocumentId}, ) else: await Session.execute( text( """ UPDATE leaudit_document_files SET is_active = false WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = :file_role """ ), {"document_id": DocumentId, "file_role": FileRole}, ) Session.add( LeauditDocumentFile( documentId=DocumentId, fileRole=FileRole, fileName=mergedName, fileExt="pdf", mimeType="application/pdf", fileSize=mergedSize, sha256=mergedSha256, localPath=None, ossUrl=ossUrl, storageProvider="minio", isActive=True, createdBy=CreatedBy, ) ) await Session.flush() async def _persistMergedDocxForDocument( self, Session, *, DocumentId: int, TypeCode: str, Region: str, VersionNo: int, CreatedBy: int | None, ) -> None: """为当前文档生成并替换主 DOCX 文件。""" mergedBytes, mergedName = await self._buildMergedDocxBytesForDocument( Session, DocumentId=DocumentId, ) mergedSha256 = hashlib.sha256(mergedBytes).hexdigest() mergedSize = len(mergedBytes) uploadedAt = datetime.now() versionLabel = f"v{VersionNo}" objectKey = OssPathUtils.BuildBusinessDocKey( Region=Region, TypeCode=TypeCode, DocumentId=DocumentId, Version=versionLabel, FileRole="primary", FileName=mergedName, Year=uploadedAt.year, Month=uploadedAt.month, ) ossUrl = await self.OssService.UploadBytes( ObjectKey=objectKey, Content=mergedBytes, ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document", ) await Session.execute( text( """ UPDATE leaudit_document_files SET is_active = false WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role IN ('primary', 'merged_pdf', 'merged_docx') """ ), {"document_id": DocumentId}, ) Session.add( LeauditDocumentFile( documentId=DocumentId, fileRole="primary", fileName=mergedName, fileExt="docx", mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document", fileSize=mergedSize, sha256=mergedSha256, localPath=None, ossUrl=ossUrl, storageProvider="minio", isActive=True, createdBy=CreatedBy, ) ) await Session.flush() async def _createNextVersionFromExistingDocument( self, Session, *, SourceDocumentId: int, CreatedBy: int, Remark: str | None = None, ) -> tuple[LeauditDocument, str, str]: """基于现有文档创建一个新版本,并复制当前主文件/附件。""" documentColumns = await self._loadDocumentColumns(Session) optionalSelects = [ "d.document_number AS document_number" if "document_number" in documentColumns else "NULL::text AS document_number", "d.remark AS remark" if "remark" in documentColumns else "NULL::text AS remark", "d.is_test_document AS is_test_document" if "is_test_document" in documentColumns else "FALSE AS is_test_document", "d.audit_status AS audit_status" if "audit_status" in documentColumns else "NULL::integer AS audit_status", ] sourceRow = ( await Session.execute( text( """ SELECT d.id, d.biz_document_id, d.type_id, d.group_id, COALESCE(NULLIF(BTRIM(d.tenant_code), ''), NULL) AS tenant_code, d.region, d.processing_status, d.version_group_key, d.version_no, d.previous_version_id, d.root_version_id, d.is_latest_version, d.normalized_name, d.review_scope, """ + ",\n ".join(optionalSelects) + """ , dt.code AS type_code FROM leaudit_documents d LEFT JOIN leaudit_document_types dt ON dt.id = d.type_id WHERE d.id = :document_id AND d.deleted_at IS NULL LIMIT 1 """ ), {"document_id": SourceDocumentId}, ) ).mappings().first() if not sourceRow: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") resolvedTypeCode = str(sourceRow["type_code"] or "").strip() if not resolvedTypeCode: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少文档类型编码,无法创建新版本") previousDocument = await Session.get(LeauditDocument, SourceDocumentId) if previousDocument is None: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") previousDocument.isLatestVersion = False documentFields: dict[str, object] = { "bizDocumentId": time.time_ns(), "typeId": int(sourceRow["type_id"]) if sourceRow["type_id"] is not None else None, "groupId": int(sourceRow["group_id"]) if sourceRow["group_id"] is not None else None, "tenantCode": sourceRow["tenant_code"], "region": str(sourceRow["region"] or "公共").strip() or "公共", "processingStatus": "waiting", "versionGroupKey": str(sourceRow["version_group_key"] or uuid.uuid4().hex), "versionNo": int(sourceRow["version_no"] or 1) + 1, "previousVersionId": SourceDocumentId, "rootVersionId": int(sourceRow["root_version_id"] or SourceDocumentId), "isLatestVersion": True, "normalizedName": sourceRow["normalized_name"], "reviewScope": str(sourceRow["review_scope"] or "standard"), } newDocument = await LeauditDocument.create_new(Session, **documentFields) if newDocument.rootVersionId is None: newDocument.rootVersionId = newDocument.Id assignments: list[str] = [] params: dict[str, object] = {"id": int(newDocument.Id)} if "document_number" in documentColumns: assignments.append("document_number = :document_number") params["document_number"] = sourceRow["document_number"] if "remark" in documentColumns: assignments.append("remark = :remark") params["remark"] = Remark.strip() if Remark and Remark.strip() else sourceRow["remark"] if "is_test_document" in documentColumns: assignments.append("is_test_document = :is_test_document") params["is_test_document"] = bool(sourceRow["is_test_document"]) if "audit_status" in documentColumns: assignments.append("audit_status = :audit_status") params["audit_status"] = int(sourceRow["audit_status"]) if sourceRow["audit_status"] is not None else None if assignments: assignments.append("updated_at = NOW()") await Session.execute( text(f"UPDATE leaudit_documents SET {', '.join(assignments)} WHERE id = :id"), params, ) await self._cloneActiveFilesToNewDocument( Session, SourceDocumentId=SourceDocumentId, TargetDocumentId=newDocument.Id, CreatedBy=CreatedBy, IncludeAttachments=False, ) await Session.flush() return newDocument, resolvedTypeCode, str(sourceRow["region"] or "公共").strip() or "公共" def _normalizeAttachmentMergeMode(self, MergeMode: str | None) -> str: """标准化附件合并模式。""" normalizedMode = (MergeMode or "new").strip().lower() if normalizedMode not in {"overwrite", "new"}: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "mergeMode 仅支持 overwrite 或 new") return normalizedMode async def DeleteDocument(self, CurrentUserId: int, Id: int) -> None: """软删除文档,并执行数据隔离校验。""" async with GetAsyncSession() as Session: currentUser = await self._getCurrentUserContext(CurrentUserId) filters = [ "d.id = :id", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'", ] params: dict[str, object] = {"id": Id} filters.extend( self._buildDocumentScopeFilters( CurrentUserId=CurrentUserId, CurrentUser=currentUser, Params=params, DocumentAlias="d", FileAlias="f", ) ) row = ( await Session.execute( text( f""" SELECT d.id, d.version_group_key, d.is_latest_version FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id WHERE {' AND '.join(filters)} LIMIT 1 """ ), params, ) ).mappings().first() if not row: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") versionGroupKey = str(row["version_group_key"] or "") isLatestVersion = bool(row["is_latest_version"]) await Session.execute( text( """ UPDATE leaudit_documents SET deleted_at = NOW(), is_latest_version = false, updated_at = NOW() WHERE id = :id AND deleted_at IS NULL """ ), {"id": Id}, ) await Session.execute( text( """ UPDATE leaudit_document_files SET is_active = false WHERE document_id = :id AND is_active = true """ ), {"id": Id}, ) if isLatestVersion and versionGroupKey: nextLatest = ( await Session.execute( text( """ SELECT id FROM leaudit_documents WHERE version_group_key = :group_key AND deleted_at IS NULL ORDER BY version_no DESC, id DESC LIMIT 1 """ ), {"group_key": versionGroupKey}, ) ).scalar_one_or_none() if nextLatest is not None: await Session.execute( text( """ UPDATE leaudit_documents SET is_latest_version = true, updated_at = NOW() WHERE id = :id """ ), {"id": int(nextLatest)}, ) await Session.commit() async def AppendAttachments( self, CurrentUserId: int, Id: int, Files: list[tuple[str, bytes, str | None]], MergeMode: str = "overwrite", Remark: str | None = None, ) -> DocumentDetailVO: """为现有文档追加附件,并执行数据隔离校验。""" if not Files: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "至少上传一个附件文件") normalizedMergeMode = self._normalizeAttachmentMergeMode(MergeMode) async with GetAsyncSession() as Session: await self._ensureDocumentGroupColumn(Session) currentUser = await self._getCurrentUserContext(CurrentUserId) documentColumns = await self._loadDocumentColumns(Session) detail = await self._getDocumentDetail(Session, Id, CurrentUserId, currentUser, documentColumns) if not detail: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") documentMeta = ( await Session.execute( text( """ SELECT d.id AS document_id, d.type_id, d.version_no, d.region, dt.code AS type_code FROM leaudit_documents d LEFT JOIN leaudit_document_types dt ON dt.id = d.type_id WHERE d.id = :document_id AND d.deleted_at IS NULL LIMIT 1 """ ), {"document_id": Id}, ) ).mappings().first() if not documentMeta: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") resolvedTypeCode = str(documentMeta["type_code"] or "").strip() if not resolvedTypeCode: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少文档类型编码,无法追加附件") primaryFileRow = await self._load_active_primary_file_row(Session, DocumentId=Id) mainSourceKind = self._normalize_document_source_kind( str(primaryFileRow["file_name"] or ""), primaryFileRow.get("file_ext"), primaryFileRow.get("mime_type"), ) self._validate_attachment_matrix( MainSourceKind=mainSourceKind, Files=Files, ) targetDocumentId = int(documentMeta["document_id"]) targetVersionNo = int(documentMeta["version_no"] or 1) normalizedRegion = str(documentMeta["region"] or "公共").strip() or "公共" if normalizedMergeMode == "new": newDocument, resolvedTypeCode, normalizedRegion = await self._createNextVersionFromExistingDocument( Session, SourceDocumentId=Id, CreatedBy=CurrentUserId, Remark=Remark, ) targetDocumentId = int(newDocument.Id) targetVersionNo = int(newDocument.versionNo or (int(documentMeta["version_no"] or 1) + 1)) await self._appendAttachmentFiles( Session=Session, DocumentId=targetDocumentId, TypeCode=resolvedTypeCode, Region=normalizedRegion, VersionNo=targetVersionNo, Files=Files, CreatedBy=CurrentUserId, ) if mainSourceKind == "docx": await self._persistMergedDocxForDocument( Session, DocumentId=targetDocumentId, TypeCode=resolvedTypeCode, Region=normalizedRegion, VersionNo=targetVersionNo, CreatedBy=CurrentUserId, ) await self._persistMergedPdfForDocument( Session, DocumentId=targetDocumentId, TypeCode=resolvedTypeCode, Region=normalizedRegion, VersionNo=targetVersionNo, CreatedBy=CurrentUserId, FileRole="merged_pdf", ) else: await self._persistMergedPdfForDocument( Session, DocumentId=targetDocumentId, TypeCode=resolvedTypeCode, Region=normalizedRegion, VersionNo=targetVersionNo, CreatedBy=CurrentUserId, ) await Session.commit() try: await self.PageQualityService.DispatchForDocument( DocumentId=targetDocumentId, TriggerUserId=CurrentUserId, Force=True, Speed="normal", ) except Exception as exc: logger.warning("dispatch page quality after append attachments failed for document_id=%s: %s", targetDocumentId, exc) refreshed = await self.GetDocument(CurrentUserId=CurrentUserId, Id=targetDocumentId) if not refreshed: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问") return refreshed async def ListDocumentTypes(self, Ids: list[int] | None = None, EntryModuleId: int | None = None) -> list[DocumentTypeItemVO]: """获取文档类型列表。""" async with GetAsyncSession() as Session: rows, bindingsMap = await self._queryDocumentTypes(Session, Ids, EntryModuleId) return [ DocumentTypeItemVO( id=int(r["id"]), name=str(r["name"] or ""), code=str(r["code"] or ""), description=r.get("description"), entryModuleId=r.get("entry_module_id"), isEnabled=bool(r.get("is_enabled", True)), ruleSetIds=bindingsMap.get(int(r["id"]), []), ) for r in rows ] async def GetDocumentType(self, Id: int) -> DocumentTypeItemVO: """获取文档类型详情。""" async with GetAsyncSession() as Session: rows, bindingsMap = await self._queryDocumentTypes(Session, [Id]) if not rows: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档类型不存在") r = rows[0] return DocumentTypeItemVO( id=int(r["id"]), name=str(r["name"] or ""), code=str(r["code"] or ""), description=r.get("description"), entryModuleId=r.get("entry_module_id"), isEnabled=bool(r.get("is_enabled", True)), ruleSetIds=bindingsMap.get(int(r["id"]), []), ) async def CreateDocumentType(self, Body: DocumentTypeCreateDTO) -> DocumentTypeItemVO: """创建文档类型。""" async with GetAsyncSession() as Session: existing = ( await Session.execute( text("SELECT 1 FROM leaudit_document_types WHERE code = :code AND deleted_at IS NULL LIMIT 1"), {"code": Body.code.strip()}, ) ).first() if existing: raise LeauditException(StatusCodeEnum.HTTP_409_CONFLICT, f"文档类型编码 {Body.code} 已存在") row = ( await Session.execute( text( """ INSERT INTO leaudit_document_types (code, name, description, entry_module_id, is_enabled, sort_order, created_at, updated_at) VALUES (:code, :name, :description, :entry_module_id, :is_enabled, :sort_order, NOW(), NOW()) RETURNING id """ ), { "code": Body.code.strip(), "name": Body.name.strip(), "description": Body.description.strip() or None, "entry_module_id": Body.entryModuleId, "is_enabled": Body.isEnabled, "sort_order": Body.sortOrder, }, ) ).scalar_one() await self._syncRuleBindings(Session, int(row), Body.ruleSetIds, self._PUBLIC_REGION) await sync_group_bindings_from_doc_type(Session, int(row), Body.ruleSetIds) await Session.commit() return await self.GetDocumentType(int(row)) async def UpdateDocumentType(self, Id: int, Body: DocumentTypeUpdateDTO) -> DocumentTypeItemVO: """更新文档类型。""" async with GetAsyncSession() as Session: current = ( await Session.execute( text("SELECT id FROM leaudit_document_types WHERE id = :id AND deleted_at IS NULL"), {"id": Id}, ) ).first() if not current: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档类型不存在") providedFields = set(getattr(Body, "model_fields_set", set())) sets: list[str] = [] params: dict[str, object] = {"id": Id} if "name" in providedFields and Body.name is not None: sets.append("name = :name") params["name"] = Body.name.strip() if "description" in providedFields: sets.append("description = :description") params["description"] = Body.description.strip() if Body.description is not None else None if params["description"] == "": params["description"] = None if "entryModuleId" in providedFields: sets.append("entry_module_id = :entry_module_id") params["entry_module_id"] = Body.entryModuleId if "isEnabled" in providedFields and Body.isEnabled is not None: sets.append("is_enabled = :is_enabled") params["is_enabled"] = Body.isEnabled if "sortOrder" in providedFields and Body.sortOrder is not None: sets.append("sort_order = :sort_order") params["sort_order"] = Body.sortOrder if sets: sets.append("updated_at = NOW()") await Session.execute(text(f"UPDATE leaudit_document_types SET {', '.join(sets)} WHERE id = :id"), params) if "ruleSetIds" in providedFields and Body.ruleSetIds is not None: await self._syncRuleBindings(Session, Id, Body.ruleSetIds, self._PUBLIC_REGION) await sync_group_bindings_from_doc_type(Session, Id, Body.ruleSetIds) await Session.commit() return await self.GetDocumentType(Id) async def DeleteDocumentType(self, Id: int) -> None: """软删除文档类型。""" async with GetAsyncSession() as Session: current = ( await Session.execute( text("SELECT 1 FROM leaudit_document_types WHERE id = :id AND deleted_at IS NULL"), {"id": Id}, ) ).first() if not current: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档类型不存在") await Session.execute(text("UPDATE leaudit_document_types SET deleted_at = NOW() WHERE id = :id"), {"id": Id}) await Session.execute(text("UPDATE leaudit_rule_type_bindings SET deleted_at = NOW() WHERE doc_type_id = :id AND deleted_at IS NULL"), {"id": Id}) await Session.commit() async def ListDocumentTypeRoots(self, EntryModuleId: int | None = None) -> list[DocumentTypeRootItemVO]: """获取一级文档类型(业务大类)列表。""" async with GetAsyncSession() as Session: rows = await self._queryDocumentTypeRoots(Session, EntryModuleId=EntryModuleId) return [self._toDocumentTypeRootVo(row) for row in rows] async def GetDocumentTypeRoot(self, Id: int) -> DocumentTypeRootItemVO: """获取一级文档类型(业务大类)详情。""" async with GetAsyncSession() as Session: rows = await self._queryDocumentTypeRoots(Session, Ids=[Id]) if not rows: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "一级文档类型不存在") return self._toDocumentTypeRootVo(rows[0]) async def CreateDocumentTypeRoot(self, Body: DocumentTypeRootCreateDTO) -> DocumentTypeRootItemVO: """创建一级文档类型(业务大类)。""" async with GetAsyncSession() as Session: await self._ensureDocumentTypeRootCodeUnique(Session, Body.code.strip(), None) root_id = ( await Session.execute( text( """ INSERT INTO leaudit_evaluation_point_groups (pid, name, code, description, document_type_id, entry_module_id, sort_order, is_enabled, created_at, updated_at) VALUES (0, :name, :code, :description, NULL, :entry_module_id, :sort_order, :is_enabled, NOW(), NOW()) RETURNING id """ ), { "name": Body.name.strip(), "code": Body.code.strip(), "description": Body.description.strip() or None, "entry_module_id": Body.entryModuleId, "sort_order": Body.sortOrder, "is_enabled": Body.isEnabled, }, ) ).scalar_one() await Session.commit() return await self.GetDocumentTypeRoot(int(root_id)) async def UpdateDocumentTypeRoot(self, Id: int, Body: DocumentTypeRootUpdateDTO) -> DocumentTypeRootItemVO: """更新一级文档类型(业务大类)。""" async with GetAsyncSession() as Session: current = ( await Session.execute( text( """ SELECT 1 FROM leaudit_evaluation_point_groups WHERE id = :id AND deleted_at IS NULL AND COALESCE(pid, 0) = 0 """ ), {"id": Id}, ) ).first() if not current: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "一级文档类型不存在") providedFields = set(getattr(Body, "model_fields_set", set())) sets: list[str] = [] params: dict[str, object] = {"id": Id} if "name" in providedFields and Body.name is not None: sets.append("name = :name") params["name"] = Body.name.strip() if "description" in providedFields: sets.append("description = :description") params["description"] = Body.description.strip() if Body.description is not None else None if params["description"] == "": params["description"] = None if "entryModuleId" in providedFields: sets.append("entry_module_id = :entry_module_id") params["entry_module_id"] = Body.entryModuleId if "isEnabled" in providedFields and Body.isEnabled is not None: sets.append("is_enabled = :is_enabled") params["is_enabled"] = Body.isEnabled if "sortOrder" in providedFields and Body.sortOrder is not None: sets.append("sort_order = :sort_order") params["sort_order"] = Body.sortOrder if sets: sets.append("updated_at = NOW()") await Session.execute( text(f"UPDATE leaudit_evaluation_point_groups SET {', '.join(sets)} WHERE id = :id"), params, ) await Session.commit() return await self.GetDocumentTypeRoot(Id) async def _queryDocumentTypes(self, Session, Ids: list[int] | None = None, EntryModuleId: int | None = None): """查询文档类型及其规则绑定。""" if Ids: rows = ( await Session.execute( text( """ SELECT id, code, name, description, entry_module_id, is_enabled, sort_order FROM leaudit_document_types WHERE deleted_at IS NULL AND id = ANY(:ids) ORDER BY sort_order ASC, id ASC """ ), {"ids": Ids}, ) ).mappings().all() elif EntryModuleId is not None: rows = ( await Session.execute( text( """ SELECT id, code, name, description, entry_module_id, is_enabled, sort_order FROM leaudit_document_types WHERE deleted_at IS NULL AND entry_module_id = :entry_module_id ORDER BY sort_order ASC, id ASC """ ), {"entry_module_id": EntryModuleId}, ) ).mappings().all() else: rows = ( await Session.execute( text( """ SELECT id, code, name, description, entry_module_id, is_enabled, sort_order FROM leaudit_document_types WHERE deleted_at IS NULL ORDER BY sort_order ASC, id ASC """ ) ) ).mappings().all() allIds = [int(r["id"]) for r in rows] bindingsMap: dict[int, list[int]] = {i: [] for i in allIds} if allIds: bindingRows = ( await Session.execute( text( """ SELECT doc_type_id, rule_set_id FROM leaudit_rule_type_bindings WHERE doc_type_id = ANY(:ids) AND deleted_at IS NULL AND is_active = true ORDER BY priority DESC """ ), {"ids": allIds}, ) ).fetchall() for b in bindingRows: bindingsMap.setdefault(int(b[0]), []).append(int(b[1])) return rows, bindingsMap async def _queryDocumentTypeRoots(self, Session, Ids: list[int] | None = None, EntryModuleId: int | None = None): filters = ["g.deleted_at IS NULL", "COALESCE(g.pid, 0) = 0"] params: dict[str, object] = {} if Ids: filters.append("g.id = ANY(:ids)") params["ids"] = Ids if EntryModuleId is not None: filters.append("g.entry_module_id = :entry_module_id") params["entry_module_id"] = EntryModuleId where_clause = " AND ".join(filters) rows = ( await Session.execute( text( f""" SELECT g.id, g.name, g.code, g.description, g.entry_module_id, em.name AS entry_module_name, g.is_enabled, COALESCE(child_stats.child_group_count, 0) AS child_group_count, COALESCE(rule_stats.rule_set_count, 0) AS rule_set_count, COALESCE(rule_stats.rule_set_ids, ARRAY[]::bigint[]) AS rule_set_ids FROM leaudit_evaluation_point_groups g LEFT JOIN leaudit_entry_modules em ON em.id = g.entry_module_id LEFT JOIN ( SELECT pid, COUNT(*)::int AS child_group_count FROM leaudit_evaluation_point_groups WHERE deleted_at IS NULL AND COALESCE(pid, 0) <> 0 GROUP BY pid ) child_stats ON child_stats.pid = g.id LEFT JOIN ( SELECT child.pid, COUNT(DISTINCT rgb.rule_set_id)::int AS rule_set_count, ARRAY_AGG(DISTINCT rgb.rule_set_id) FILTER (WHERE rgb.rule_set_id IS NOT NULL) AS rule_set_ids FROM leaudit_evaluation_point_groups child LEFT JOIN leaudit_rule_group_bindings rgb ON rgb.group_id = child.id AND rgb.deleted_at IS NULL WHERE child.deleted_at IS NULL AND COALESCE(child.pid, 0) <> 0 GROUP BY child.pid ) rule_stats ON rule_stats.pid = g.id WHERE {where_clause} ORDER BY COALESCE(g.sort_order, 0) ASC, g.id ASC """ ), params, ) ).mappings().all() return rows def _toDocumentTypeRootVo(self, row) -> DocumentTypeRootItemVO: rule_set_ids = [int(item) for item in (row.get("rule_set_ids") or []) if item is not None] return DocumentTypeRootItemVO( id=int(row["id"]), name=str(row["name"] or ""), code=str(row["code"] or ""), description=row.get("description"), entryModuleId=int(row["entry_module_id"]) if row.get("entry_module_id") is not None else None, entryModuleName=row.get("entry_module_name"), isEnabled=bool(row.get("is_enabled", True)), childGroupCount=int(row.get("child_group_count") or 0), ruleSetCount=int(row.get("rule_set_count") or 0), ruleSetIds=rule_set_ids, ) async def _ensureDocumentTypeRootCodeUnique(self, Session, Code: str, CurrentId: int | None) -> None: sql = """ SELECT 1 FROM leaudit_evaluation_point_groups WHERE deleted_at IS NULL AND COALESCE(pid, 0) = 0 AND code = :code """ params: dict[str, object] = {"code": Code} if CurrentId is not None: sql += " AND id <> :current_id" params["current_id"] = CurrentId exists = (await Session.execute(text(sql), params)).first() if exists: raise LeauditException(StatusCodeEnum.HTTP_409_CONFLICT, f"一级文档类型编码 {Code} 已存在") async def _loadDocumentColumns(self, Session) -> set[str]: """读取 leaudit_documents 当前真实列,兼容不同环境的渐进式字段上线。""" rows = ( await Session.execute( text( """ SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = 'leaudit_documents' """ ) ) ).fetchall() return {str(row[0]) for row in rows} async def _ensureReviewPointAuditTable(self, Session) -> None: """补齐详情页人工审核表。""" await Session.execute( text( """ CREATE TABLE IF NOT EXISTS leaudit_review_point_audits ( id BIGSERIAL PRIMARY KEY, rule_result_id BIGINT NOT NULL UNIQUE, document_id BIGINT NOT NULL, edit_audit_status INTEGER NOT NULL DEFAULT 0, override_result BOOLEAN NULL, message TEXT NOT NULL DEFAULT '', reviewed_by BIGINT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ) """ ) ) await Session.execute( text( "CREATE INDEX IF NOT EXISTS idx_leaudit_review_point_audits_document_id ON leaudit_review_point_audits(document_id)" ) ) async def _ensureCrossReviewProposalSchema(self, Session) -> None: """补齐交叉评查提案表的补充意见字段,兼容旧环境。""" if not await self._tableExists(Session, "leaudit_cross_review_proposals"): return proposalColumns = await self._loadTableColumns(Session, "leaudit_cross_review_proposals") touched = False if "proposal_type" not in proposalColumns: await Session.execute( text( "ALTER TABLE leaudit_cross_review_proposals ADD COLUMN proposal_type VARCHAR(32) NOT NULL DEFAULT 'review_point'" ) ) touched = True if "evaluation_point_name" not in proposalColumns: await Session.execute( text( "ALTER TABLE leaudit_cross_review_proposals ADD COLUMN evaluation_point_name VARCHAR(255)" ) ) touched = True if "extraction_result_text" not in proposalColumns: await Session.execute( text( "ALTER TABLE leaudit_cross_review_proposals ADD COLUMN extraction_result_text TEXT" ) ) touched = True isRuleResultNotNull = bool( await Session.scalar( text( """ SELECT 1 FROM information_schema.columns WHERE table_schema = current_schema() AND table_name = 'leaudit_cross_review_proposals' AND column_name = 'rule_result_id' AND is_nullable = 'NO' LIMIT 1 """ ) ) ) if isRuleResultNotNull: await Session.execute( text( "ALTER TABLE leaudit_cross_review_proposals ALTER COLUMN rule_result_id DROP NOT NULL" ) ) touched = True if touched: await Session.commit() async def _ensureContractStructureComparisonTable(self, Session) -> None: """补齐合同结构比对表,兼容旧前端模板上传与详情页读取。""" await Session.execute( text( """ CREATE TABLE IF NOT EXISTS contract_structure_comparison ( id BIGSERIAL PRIMARY KEY, document_id BIGINT NOT NULL, comparison_id BIGINT NULL, template_contract_name VARCHAR(512) NULL, template_contract_path TEXT NOT NULL DEFAULT '', file_size BIGINT NOT NULL DEFAULT 0, comparison_results JSONB NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ) """ ) ) await Session.execute( text( "CREATE INDEX IF NOT EXISTS idx_contract_structure_comparison_document_id ON contract_structure_comparison(document_id)" ) ) existingColumns = await self._loadTableColumns(Session, "contract_structure_comparison") if "comparison_id" not in existingColumns: await Session.execute(text("ALTER TABLE contract_structure_comparison ADD COLUMN comparison_id BIGINT NULL")) if "template_contract_name" not in existingColumns: await Session.execute(text("ALTER TABLE contract_structure_comparison ADD COLUMN template_contract_name VARCHAR(512) NULL")) if "template_contract_path" not in existingColumns: await Session.execute(text("ALTER TABLE contract_structure_comparison ADD COLUMN template_contract_path TEXT NOT NULL DEFAULT ''")) if "file_size" not in existingColumns: await Session.execute(text("ALTER TABLE contract_structure_comparison ADD COLUMN file_size BIGINT NOT NULL DEFAULT 0")) if "comparison_results" not in existingColumns: await Session.execute(text("ALTER TABLE contract_structure_comparison ADD COLUMN comparison_results JSONB NULL")) async def _upsertContractStructureComparison( self, Session, DocumentId: int, ComparisonId: int | None, TemplateName: str, TemplatePath: str, FileSize: int, ) -> int: """新增或更新合同结构比对模板记录。""" targetRow = None if ComparisonId is not None: targetRow = ( await Session.execute( text( """ SELECT id FROM contract_structure_comparison WHERE id = :comparison_id AND document_id = :document_id LIMIT 1 """ ), {"comparison_id": ComparisonId, "document_id": DocumentId}, ) ).mappings().first() if targetRow is None: targetRow = ( await Session.execute( text( """ SELECT id FROM contract_structure_comparison WHERE document_id = :document_id ORDER BY id DESC LIMIT 1 """ ), {"document_id": DocumentId}, ) ).mappings().first() if targetRow: comparisonId = int(targetRow["id"]) await Session.execute( text( """ UPDATE contract_structure_comparison SET comparison_id = COALESCE(comparison_id, :comparison_id), template_contract_name = :template_name, template_contract_path = :template_path, file_size = :file_size, updated_at = NOW() WHERE id = :comparison_id """ ), { "comparison_id": comparisonId, "template_name": TemplateName, "template_path": TemplatePath, "file_size": FileSize, }, ) return comparisonId inserted = ( await Session.execute( text( """ INSERT INTO contract_structure_comparison ( document_id, comparison_id, template_contract_name, template_contract_path, file_size, created_at, updated_at ) VALUES ( :document_id, NULL, :template_name, :template_path, :file_size, NOW(), NOW() ) RETURNING id """ ), { "document_id": DocumentId, "template_name": TemplateName, "template_path": TemplatePath, "file_size": FileSize, }, ) ).mappings().first() if not inserted: raise LeauditException(StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR, "合同模板记录写入失败") comparisonId = int(inserted["id"]) await Session.execute( text( """ UPDATE contract_structure_comparison SET comparison_id = COALESCE(comparison_id, :comparison_id), updated_at = NOW() WHERE id = :comparison_id """ ), {"comparison_id": comparisonId}, ) return comparisonId async def _tableExists(self, Session, TableName: str) -> bool: """检查表是否存在。""" row = ( await Session.execute( text( """ SELECT EXISTS ( SELECT 1 FROM information_schema.tables WHERE table_schema = 'public' AND table_name = :table_name ) AS exists_flag """ ), {"table_name": TableName}, ) ).mappings().first() return bool(row["exists_flag"]) if row else False async def _loadTableColumns(self, Session, TableName: str) -> set[str]: """读取任意 public 表的真实列。""" rows = ( await Session.execute( text( """ SELECT column_name FROM information_schema.columns WHERE table_schema = 'public' AND table_name = :table_name """ ), {"table_name": TableName}, ) ).fetchall() return {str(row[0]) for row in rows} async def _ensureDocumentGroupColumn(self, Session) -> None: """渐进式补齐文档二级分组字段,避免旧环境缺列。""" await Session.execute( text( """ ALTER TABLE leaudit_documents ADD COLUMN IF NOT EXISTS tenant_code VARCHAR(64) """ ) ) await Session.execute( text( """ ALTER TABLE leaudit_documents ADD COLUMN IF NOT EXISTS group_id BIGINT NULL REFERENCES leaudit_evaluation_point_groups(id) """ ) ) await Session.execute( text("CREATE INDEX IF NOT EXISTS idx_leaudit_documents_group_id ON leaudit_documents(group_id)") ) await Session.execute( text("CREATE INDEX IF NOT EXISTS idx_leaudit_documents_tenant_code ON leaudit_documents(tenant_code)") ) async def _resolveDocumentGroupId(self, Session, TypeId: int, GroupId: int | None) -> int | None: """校验上传时选择的二级分组是否属于当前文档类型。""" if GroupId is None: return None row = ( await Session.execute( text( """ SELECT id, document_type_id, name FROM leaudit_evaluation_point_groups WHERE id = :group_id AND deleted_at IS NULL AND COALESCE(pid, 0) <> 0 LIMIT 1 """ ), {"group_id": GroupId}, ) ).mappings().first() if not row: raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, f"子类型 (id={GroupId}) 不存在或不是有效的二级分组,无法上传") if int(row["document_type_id"] or 0) != TypeId: logger.warning( "_resolveDocumentGroupId mismatch: TypeId=%s GroupId=%s group_doc_type_id=%s group_name=%s", TypeId, GroupId, row["document_type_id"], row["name"], ) raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, f"当前子类型「{row['name']}」(id={GroupId}) 属于文档类型 {row['document_type_id']},与所选文档类型 {TypeId} 不匹配,无法上传") return int(row["id"]) async def _resolveDocumentRootGroupId(self, Session, TypeId: int, GroupId: int | None) -> int | None: """解析上传命中的一级分组,用于跨二级类型做版本归档。""" if GroupId is not None: row = ( await Session.execute( text( """ SELECT CASE WHEN COALESCE(pid, 0) = 0 THEN id ELSE pid END AS root_group_id FROM leaudit_evaluation_point_groups WHERE id = :group_id AND deleted_at IS NULL LIMIT 1 """ ), {"group_id": GroupId}, ) ).mappings().first() if row and row["root_group_id"] is not None: return int(row["root_group_id"]) row = ( await Session.execute( text( """ SELECT CASE WHEN COUNT(DISTINCT pid) = 1 THEN MIN(pid) END AS root_group_id FROM leaudit_evaluation_point_groups WHERE document_type_id = :doc_type_id AND COALESCE(pid, 0) <> 0 AND deleted_at IS NULL AND is_enabled = true """ ), {"doc_type_id": TypeId}, ) ).mappings().first() if row and row["root_group_id"] is not None: return int(row["root_group_id"]) return None async def _loadLatestPageQualitySummaryMap( self, Session, DocumentIds: list[int], ) -> dict[int, dict[str, Any]]: """批量读取文档最新页级模糊摘要。""" normalized_ids = [int(document_id) for document_id in DocumentIds if int(document_id) > 0] if not normalized_ids: return {} if not await self._tableExists(Session, "leaudit_page_quality_runs"): return {} rows = ( await Session.execute( text( """ SELECT DISTINCT ON (document_id) document_id, id AS run_id, status AS run_status, summary_status, total_pages, review_page_count, reject_page_count FROM leaudit_page_quality_runs WHERE document_id = ANY(:document_ids) AND deleted_at IS NULL ORDER BY document_id, id DESC """ ), {"document_ids": normalized_ids}, ) ).mappings().all() result: dict[int, dict[str, Any]] = {} for row in rows: document_id = int(row["document_id"]) review_count = int(row["review_page_count"] or 0) reject_count = int(row["reject_page_count"] or 0) issue_count = review_count + reject_count summary_status = str(row["summary_status"] or "") or None warning_text = None if issue_count > 0 and summary_status: pages = await self._loadPageQualityIssuePages(Session, int(row["run_id"])) warning_text = self._buildPageQualityWarningText(pages, summary_status) result[document_id] = { "pageQualityRunId": int(row["run_id"]), "pageQualityRunStatus": str(row["run_status"] or "") or None, "pageQualitySummaryStatus": summary_status, "pageQualityTotalPages": int(row["total_pages"] or 0), "pageQualityReviewPageCount": review_count, "pageQualityRejectPageCount": reject_count, "pageQualityIssueCount": issue_count, "pageQualityWarningText": warning_text, } return result async def _loadPageQualityIssuePages(self, Session, RunId: int) -> list[int]: """读取单次运行的异常页码列表。""" if not await self._tableExists(Session, "leaudit_page_quality_results"): return [] rows = ( await Session.execute( text( """ SELECT DISTINCT page_num FROM leaudit_page_quality_results WHERE run_id = :run_id AND quality_status IN ('review', 'reject') ORDER BY page_num ASC """ ), {"run_id": RunId}, ) ).mappings().all() return [int(row["page_num"]) for row in rows if row["page_num"] is not None] def _buildPageQualityWarningText(self, Pages: list[int], SummaryStatus: str | None) -> str | None: """组装页级模糊预警文案。""" if not Pages or not SummaryStatus: return None pages_text = "、".join(f"第{page}页" for page in Pages[:10]) suffix = "建议重拍" if SummaryStatus == "reject" else "疑似模糊" if len(Pages) > 10: pages_text = f"{pages_text}等" return f"{pages_text}{suffix}" async def _getDocumentDetail( self, Session, DocumentId: int, CurrentUserId: int, CurrentUser: dict[str, Any], DocumentColumns: set[str], BypassScopeCheck: bool = False, ) -> DocumentDetailVO | None: """查询单文档详情,并附带历史版本。""" params: dict[str, object] = {"id": DocumentId} filters = [ "d.id = :id", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'", ] if not BypassScopeCheck: filters.extend( self._buildDocumentScopeFilters( CurrentUserId=CurrentUserId, CurrentUser=CurrentUser, Params=params, DocumentAlias="d", FileAlias="f", ) ) whereClause = " AND ".join(filters) groupIdSelectExpr = "d.group_id" if "group_id" in DocumentColumns else "NULL::bigint" resolvedGroupIdExpr = f"COALESCE({groupIdSelectExpr}, dg.inferred_group_id)" resolvedGroupNameExpr = ( f"CASE WHEN {groupIdSelectExpr} IS NOT NULL THEN eg.name ELSE dg.inferred_group_name END" ) optionalSelects = [ f"{resolvedGroupIdExpr} AS group_id", "d.document_number AS document_number" if "document_number" in DocumentColumns else "NULL::text AS document_number", "d.remark AS remark" if "remark" in DocumentColumns else "NULL::text AS remark", "d.is_test_document AS is_test_document" if "is_test_document" in DocumentColumns else "FALSE AS is_test_document", "d.audit_status AS audit_status" if "audit_status" in DocumentColumns else "NULL::integer AS audit_status", "COALESCE(NULLIF(BTRIM(d.tenant_code), ''), NULL) AS tenant_code", """ CASE WHEN NULLIF(BTRIM(d.tenant_code), '') = 'PUBLIC' THEN '公共' WHEN NULLIF(BTRIM(d.tenant_code), '') = 'PROVINCIAL' THEN '省级' ELSE COALESCE(NULLIF(BTRIM(d.region), ''), '') END AS tenant_name """.strip(), ] detailRow = ( await Session.execute( text( f""" SELECT d.id AS document_id, d.biz_document_id AS internal_document_no, d.version_group_key, d.version_no, d.root_version_id, d.previous_version_id, d.type_id, dt.code AS type_code, dt.name AS type_name, {resolvedGroupNameExpr} AS group_name, d.region, d.normalized_name, d.processing_status, d.current_run_id, d.updated_at, f.id AS file_id, f.file_name, f.file_ext, f.mime_type, f.file_size, f.oss_url, ar.status AS run_status, ar.result_status, ar.total_score, ar.passed_count, ar.failed_count, ar.skipped_count, vc.total_versions, COALESCE(vc.total_versions, 1) > 1 AS has_history, {', '.join(optionalSelects)} FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id LEFT JOIN leaudit_document_types dt ON dt.id = d.type_id LEFT JOIN leaudit_evaluation_point_groups eg ON eg.id = d.group_id LEFT JOIN ( SELECT document_type_id, CASE WHEN COUNT(*) = 1 THEN MIN(id) END AS inferred_group_id, CASE WHEN COUNT(*) = 1 THEN MIN(name) END AS inferred_group_name FROM leaudit_evaluation_point_groups WHERE pid <> 0 AND deleted_at IS NULL AND is_enabled = true AND document_type_id IS NOT NULL GROUP BY document_type_id ) dg ON dg.document_type_id = d.type_id LEFT JOIN leaudit_audit_runs ar ON ar.id = d.current_run_id LEFT JOIN ( SELECT version_group_key, COUNT(*) AS total_versions FROM leaudit_documents WHERE deleted_at IS NULL GROUP BY version_group_key ) vc ON vc.version_group_key = d.version_group_key WHERE {whereClause} LIMIT 1 """ ), params, ) ).mappings().first() if not detailRow: return None groupKey = str(detailRow["version_group_key"] or "") historyVersions: list[DocumentHistoryVersionVO] = [] if groupKey: historyRows = ( await Session.execute( text( """ SELECT d.id AS document_id, d.version_no, d.processing_status, d.updated_at, f.id AS file_id, f.file_name, f.file_ext, f.oss_url, ar.status AS run_status, ar.result_status FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id AND f.is_active = true AND f.file_role = 'primary' LEFT JOIN leaudit_audit_runs ar ON ar.id = d.current_run_id WHERE d.version_group_key = :group_key AND d.id <> :document_id AND d.deleted_at IS NULL ORDER BY d.version_no DESC, d.id DESC """ ), {"group_key": groupKey, "document_id": int(detailRow["document_id"])}, ) ).mappings().all() historyVersions = [ DocumentHistoryVersionVO( documentId=int(row["document_id"]), fileId=int(row["file_id"]) if row["file_id"] is not None else None, versionNo=int(row["version_no"]), fileName=row["file_name"], fileExt=row["file_ext"], ossUrl=row["oss_url"], processingStatus=row["processing_status"], runStatus=row["run_status"], resultStatus=row["result_status"], updatedAt=row["updated_at"].isoformat() if row["updated_at"] else None, ) for row in historyRows ] attachmentRows = ( await Session.execute( text( """ SELECT id, file_name, file_ext, mime_type, file_size, file_role, oss_url, created_by, created_at FROM leaudit_document_files WHERE document_id = :document_id AND deleted_at IS NULL AND is_active = true AND file_role = 'attachment' ORDER BY id ASC """ ), {"document_id": int(detailRow["document_id"])}, ) ).mappings().all() attachments = [ DocumentAttachmentVO( fileId=int(row["id"]), fileName=str(row["file_name"] or ""), fileExt=row["file_ext"], mimeType=row["mime_type"], fileSize=int(row["file_size"]) if row["file_size"] is not None else None, fileRole=str(row["file_role"] or "attachment"), ossUrl=row["oss_url"], createdBy=int(row["created_by"]) if row["created_by"] is not None else None, createdAt=row["created_at"].isoformat() if row["created_at"] else None, ) for row in attachmentRows ] page_quality_map = await self._loadLatestPageQualitySummaryMap(Session, [int(detailRow["document_id"])]) quality = page_quality_map.get(int(detailRow["document_id"]), {}) page_quality_pages = [] if quality.get("pageQualityRunId") is not None: page_quality_pages = await self._loadPageQualityIssuePages(Session, int(quality["pageQualityRunId"])) page_quality_summary = None if quality: page_quality_summary = PageQualitySummaryVO( runId=quality.get("pageQualityRunId"), runStatus=quality.get("pageQualityRunStatus"), summaryStatus=quality.get("pageQualitySummaryStatus"), totalPages=int(quality.get("pageQualityTotalPages") or 0), reviewPageCount=int(quality.get("pageQualityReviewPageCount") or 0), rejectPageCount=int(quality.get("pageQualityRejectPageCount") or 0), warningText=quality.get("pageQualityWarningText"), pages=page_quality_pages, ) return DocumentDetailVO( documentId=int(detailRow["document_id"]), internalDocumentNo=int(detailRow["internal_document_no"]), versionGroupKey=groupKey, versionNo=int(detailRow["version_no"] or 1), rootVersionId=int(detailRow["root_version_id"] or detailRow["document_id"]), previousVersionId=int(detailRow["previous_version_id"]) if detailRow["previous_version_id"] is not None else None, typeId=int(detailRow["type_id"]) if detailRow["type_id"] is not None else None, typeCode=detailRow["type_code"], typeName=detailRow["type_name"], groupId=int(detailRow["group_id"]) if detailRow["group_id"] is not None else None, groupName=detailRow["group_name"], region=str(detailRow["region"] or ""), tenantCode=detailRow["tenant_code"], tenantName=detailRow["tenant_name"], normalizedName=detailRow["normalized_name"], fileId=int(detailRow["file_id"]) if detailRow["file_id"] is not None else None, fileName=detailRow["file_name"], fileExt=detailRow["file_ext"], mimeType=detailRow["mime_type"], fileSize=int(detailRow["file_size"]) if detailRow["file_size"] is not None else None, ossUrl=detailRow["oss_url"], processingStatus=detailRow["processing_status"], currentRunId=int(detailRow["current_run_id"]) if detailRow["current_run_id"] is not None else None, runStatus=detailRow["run_status"], resultStatus=detailRow["result_status"], totalScore=float(detailRow["total_score"]) if detailRow["total_score"] is not None else None, passedCount=int(detailRow["passed_count"]) if detailRow["passed_count"] is not None else None, failedCount=int(detailRow["failed_count"]) if detailRow["failed_count"] is not None else None, skippedCount=int(detailRow["skipped_count"]) if detailRow["skipped_count"] is not None else None, updatedAt=detailRow["updated_at"].isoformat() if detailRow["updated_at"] else None, hasHistory=bool(detailRow["has_history"]), totalVersions=int(detailRow["total_versions"] or 1), historyVersions=historyVersions, documentNumber=detailRow["document_number"], remark=detailRow["remark"], isTestDocument=bool(detailRow["is_test_document"]), auditStatus=int(detailRow["audit_status"]) if detailRow["audit_status"] is not None else None, pageQualityRunId=quality.get("pageQualityRunId"), pageQualityRunStatus=quality.get("pageQualityRunStatus"), pageQualitySummaryStatus=quality.get("pageQualitySummaryStatus"), pageQualityIssueCount=int(quality.get("pageQualityIssueCount") or 0), pageQualityWarningText=quality.get("pageQualityWarningText"), pageCount=None, pageQualitySummary=page_quality_summary, attachments=attachments, ) async def _hasCrossReviewDocumentAccess(self, Session, DocumentId: int, CurrentUserId: int) -> bool: """判断当前用户是否作为交叉评查任务成员拥有文档访问权。""" if not await self._tableExists(Session, "leaudit_cross_review_task_documents"): return False if not await self._tableExists(Session, "leaudit_cross_review_task_members"): return False if not await self._tableExists(Session, "leaudit_cross_review_tasks"): return False row = ( await Session.execute( text( """ SELECT 1 FROM leaudit_cross_review_task_documents td JOIN leaudit_cross_review_task_members tm ON tm.task_id = td.task_id JOIN leaudit_cross_review_tasks t ON t.id = td.task_id WHERE td.document_id = :document_id AND tm.user_id = :user_id AND td.delete_time IS NULL AND tm.delete_time IS NULL AND t.delete_time IS NULL LIMIT 1 """ ), {"document_id": DocumentId, "user_id": CurrentUserId}, ) ).first() return bool(row) def _buildDocumentScopeFilters( self, CurrentUserId: int, CurrentUser: dict[str, Any], Params: dict[str, object], DocumentAlias: str, FileAlias: str, RequestedRegion: str | None = None, RequestedTenantCode: str | None = None, RequestedUserId: int | None = None, ) -> list[str]: """根据当前用户角色与租户/地区构建文档数据范围过滤。""" filters: list[str] = [] requested_tenant_code, requested_region = self._normalize_scope_value(RequestedRegion, RequestedTenantCode, CurrentUser) current_tenant_code = str(CurrentUser.get("tenant_code") or "").strip() current_region = str(CurrentUser.get("tenant_scope_value") or CurrentUser.get("area") or "").strip() if CurrentUser["is_global"]: if requested_tenant_code: filters.extend( self._document_tenant_filter_sql( Params, DocumentAlias=DocumentAlias, tenant_code=requested_tenant_code, region=requested_region, prefix="requested", ) ) elif requested_region: Params["requested_region"] = requested_region filters.append(f"{DocumentAlias}.region = :requested_region") if RequestedUserId is not None: filters.append(f"{FileAlias}.created_by = :requested_user_id") Params["requested_user_id"] = RequestedUserId return filters if CurrentUser["can_manage"]: if not current_tenant_code and not current_region: return ["1 = 0"] effective_tenant_code = current_tenant_code or requested_tenant_code effective_region = current_region or requested_region if current_tenant_code and requested_tenant_code and requested_tenant_code != current_tenant_code: return ["1 = 0"] if requested_region and current_region and requested_region != current_region: return ["1 = 0"] filters.extend( self._document_tenant_filter_sql( Params, DocumentAlias=DocumentAlias, tenant_code=effective_tenant_code or None, region=effective_region or None, prefix="scope", ) ) if RequestedUserId is not None: filters.append(f"{FileAlias}.created_by = :requested_user_id") Params["requested_user_id"] = RequestedUserId return filters filters.append(f"{FileAlias}.created_by = :scope_user_id") Params["scope_user_id"] = CurrentUserId if requested_tenant_code: if current_tenant_code and requested_tenant_code != current_tenant_code: filters.append("1 = 0") elif not current_tenant_code and current_region and requested_region != current_region: filters.append("1 = 0") else: filters.extend( self._document_tenant_filter_sql( Params, DocumentAlias=DocumentAlias, tenant_code=(current_tenant_code or requested_tenant_code) or None, region=(current_region or requested_region) or None, prefix="requested", ) ) elif requested_region: if current_region and requested_region != current_region: filters.append("1 = 0") elif current_tenant_code: filters.extend( self._document_tenant_filter_sql( Params, DocumentAlias=DocumentAlias, tenant_code=current_tenant_code or None, region=current_region or requested_region, prefix="requested", ) ) else: Params["requested_region"] = requested_region filters.append(f"{DocumentAlias}.region = :requested_region") if RequestedUserId is not None and RequestedUserId != CurrentUserId: filters.append("1 = 0") return filters async def _getCurrentUserContext(self, CurrentUserId: int) -> dict[str, Any]: """加载当前用户上下文,用于文档数据隔离。""" async with GetAsyncSession() as Session: sso_user_columns = await SsoUserCompat.get_columns(Session) tenant_code_select = SsoUserCompat.optional_coalesce_as( sso_user_columns, alias="u", column="tenant_code", fallback_sql="''", ) row = ( await Session.execute( text( f""" SELECT u.id, COALESCE(u.area, '') AS area, {tenant_code_select}, COALESCE(bool_or(r.role_key IN ('super_admin', 'provincial_admin')), FALSE) AS is_global, COALESCE(bool_or(r.role_key IN ('super_admin', 'provincial_admin', 'admin')), FALSE) AS can_manage, COALESCE(bool_or(r.role_key = 'super_admin'), FALSE) AS is_super_admin FROM sso_users u LEFT JOIN user_role ur ON ur.user_id = u.id LEFT JOIN roles r ON r.id = ur.role_id WHERE u.id = :user_id GROUP BY u.id, u.area """ ), {"user_id": CurrentUserId}, ) ).mappings().first() if not row: raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "当前用户不存在") tenant_resolution = await self.TenantResolver.ResolveUserContext( Area=str(row["area"] or ""), TenantCode=str(row["tenant_code"] or ""), TenantName=None, Source="document_user", ) return { "area": tenant_resolution.tenant_name or tenant_resolution.normalized_value or str(row["area"] or ""), "tenant_code": tenant_resolution.tenant_code or "", "tenant_name": tenant_resolution.tenant_name or "", "tenant_scope_value": tenant_resolution.tenant_name or tenant_resolution.normalized_value or str(row["area"] or ""), "is_global": bool(row["is_global"]), "can_manage": bool(row["can_manage"]), "is_super_admin": bool(row["is_super_admin"]), } async def _resolve_document_region( self, *, Region: str | None, TenantCode: str | None, TenantName: str | None, ) -> str: resolution = await self.TenantResolver.Resolve( RawValue=Region, Source="document_region", PreferredTenantCode=str(TenantCode or "").strip() or None, FallbackTenantName=TenantName, ) normalized = resolution.tenant_name or resolution.normalized_value or "" return normalized.strip() or "公共" @staticmethod def _normalize_region_value(value: str | None) -> str: if value is None: return "" trimmed = str(value).strip() return trimmed def _normalize_scope_value( self, requestedRegion: str | None, requestedTenantCode: str | None, currentUser: dict[str, Any], ) -> tuple[str | None, str]: tenant_code = str(requestedTenantCode or "").strip() if tenant_code: if tenant_code == str(currentUser.get("tenant_code") or "").strip(): return ( str(currentUser.get("tenant_code") or "").strip() or None, str(currentUser.get("tenant_scope_value") or currentUser.get("tenant_name") or currentUser.get("area") or "").strip(), ) if tenant_code == "PUBLIC": return "PUBLIC", self._PUBLIC_REGION if tenant_code == "PROVINCIAL": return "PROVINCIAL", "省级" return tenant_code or None, str(requestedRegion or "").strip() return None, self._normalize_region_value(requestedRegion) def _resolve_upload_region( self, currentUser: dict[str, Any], requestedRegion: str | None, requestedTenantCode: str | None = None, ) -> str: requestedTenantCodeNormalized, normalizedRequestedRegion = self._normalize_scope_value( requestedRegion, requestedTenantCode, currentUser, ) currentTenantCode = str(currentUser.get("tenant_code") or "").strip() currentRegion = str(currentUser.get("tenant_scope_value") or currentUser.get("area") or "").strip() if currentUser["is_global"]: return normalizedRequestedRegion or currentRegion or self._PUBLIC_REGION if requestedTenantCodeNormalized: if currentTenantCode and requestedTenantCodeNormalized != currentTenantCode: raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "不能上传到非本人所属租户") if not currentTenantCode and currentRegion and normalizedRequestedRegion != currentRegion: raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "不能上传到非本人所属租户") elif normalizedRequestedRegion and currentRegion and normalizedRequestedRegion != currentRegion: raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "不能上传到非本人所属租户") return currentRegion or normalizedRequestedRegion or self._PUBLIC_REGION def _document_tenant_filter_sql( self, params: dict[str, Any], *, DocumentAlias: str, tenant_code: str | None, region: str | None, prefix: str, ) -> list[str]: normalized_region = str(region or "").strip() normalized_tenant_code = str(tenant_code or "").strip() if normalized_tenant_code: params[f"{prefix}_tenant_code"] = normalized_tenant_code return [f"{DocumentAlias}.tenant_code = :{prefix}_tenant_code"] if normalized_region: params[f"{prefix}_region"] = normalized_region return [f"{DocumentAlias}.region = :{prefix}_region"] return ["1 = 0"] async def _loadReviewRunRow(self, Session, Detail: DocumentDetailVO) -> dict[str, Any] | None: """定位当前文档可展示的最新评查运行。""" params: dict[str, object] = {"document_id": Detail.documentId} if Detail.currentRunId is not None: params["run_id"] = Detail.currentRunId row = ( await Session.execute( text( """ SELECT id, status, result_status, total_score, passed_count, failed_count, skipped_count, llm_model, finished_at, updated_at FROM leaudit_audit_runs WHERE id = :run_id AND document_id = :document_id LIMIT 1 """ ), params, ) ).mappings().first() if row: return dict(row) row = ( await Session.execute( text( """ SELECT id, status, result_status, total_score, passed_count, failed_count, skipped_count, llm_model, finished_at, updated_at FROM leaudit_audit_runs WHERE document_id = :document_id ORDER BY id DESC LIMIT 1 """ ), params, ) ).mappings().first() return dict(row) if row else None async def _buildReviewDocumentPayload( self, Session, Detail: DocumentDetailVO, RunRow: dict[str, Any] | None, ) -> dict[str, Any]: """把新文档详情转换成旧详情页可直接消费的文档对象。""" metaRow = ( await Session.execute( text( """ SELECT d.created_at, f.created_by, COALESCE(u.nick_name, u.username, '') AS upload_user FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id AND f.is_active = true AND f.file_role = 'primary' LEFT JOIN sso_users u ON u.id = f.created_by WHERE d.id = :document_id LIMIT 1 """ ), {"document_id": Detail.documentId}, ) ).mappings().first() metricRow = None if RunRow: metricRow = ( await Session.execute( text( """ SELECT page_count FROM leaudit_run_metrics WHERE run_id = :run_id ORDER BY id DESC LIMIT 1 """ ), {"run_id": int(RunRow["id"])}, ) ).mappings().first() createdAt = metaRow["created_at"] if metaRow else None uploadUser = str(metaRow["upload_user"] or "") if metaRow else "" pageCount = int(metricRow["page_count"]) if metricRow and metricRow["page_count"] is not None else 0 ocrResultPayload = await self._buildReviewOcrPayload(Session, Detail.documentId, RunRow, pageCount) return { "id": Detail.documentId, "documentId": Detail.documentId, "name": Detail.fileName or Detail.normalizedName or f"文档{Detail.documentId}", "path": Detail.ossUrl or "", "fileName": Detail.fileName or "", "fileType": (Detail.fileExt or "").lower(), "size": Detail.fileSize, "file_size": Detail.fileSize, "documentNumber": Detail.documentNumber, "document_number": Detail.documentNumber, "type": str(Detail.typeId or ""), "type_id": Detail.typeId, "typeName": Detail.typeName, "type_name": Detail.typeName, "groupId": Detail.groupId, "groupName": Detail.groupName, "region": Detail.region, "tenantCode": Detail.tenantCode, "tenantName": Detail.tenantName, "auditStatus": Detail.auditStatus or 0, "audit_status": Detail.auditStatus or 0, "uploadTime": _format_iso_datetime(createdAt) or Detail.updatedAt or "", "created_at": _format_iso_datetime(createdAt) or Detail.updatedAt or "", "updated_at": Detail.updatedAt or "", "uploadUser": uploadUser, "pageCount": pageCount, "page_count": pageCount, "ocrResult": ocrResultPayload, "attachments": [item.model_dump() for item in Detail.attachments], } async def _buildReviewOcrPayload( self, Session, DocumentId: int, RunRow: dict[str, Any] | None, PageCount: int, ) -> dict[str, Any]: """构建旧详情页仍在读取的最小 ocrResult 结构。""" pageMap: dict[str, set[int]] = {} def add_page(field_name: str | None, page_value: Any) -> None: if not field_name: return page = _coerce_positive_int(page_value) if page is None: return section = str(field_name).split("-", 1)[0].strip() or "未分组" pageMap.setdefault(section, set()).add(page) if RunRow: fieldRows = ( await Session.execute( text( """ SELECT field_name, meta_json, raw_value_json FROM leaudit_field_results WHERE run_id = :run_id AND document_id = :document_id ORDER BY id ASC """ ), {"run_id": int(RunRow["id"]), "document_id": DocumentId}, ) ).mappings().all() for row in fieldRows: metaJson = row["meta_json"] if isinstance(row["meta_json"], dict) else {} rawValueJson = row["raw_value_json"] if isinstance(row["raw_value_json"], dict) else {} position = metaJson.get("position") if isinstance(metaJson.get("position"), dict) else {} add_page(row["field_name"], position.get("pageNum") or position.get("page")) rawPosition = rawValueJson.get("position") if isinstance(rawValueJson.get("position"), dict) else {} add_page(row["field_name"], rawPosition.get("pageNum") or rawPosition.get("page")) ruleRows = ( await Session.execute( text( """ SELECT extracted_fields, field_positions FROM leaudit_rule_results WHERE run_id = :run_id AND document_id = :document_id ORDER BY id ASC """ ), {"run_id": int(RunRow["id"]), "document_id": DocumentId}, ) ).mappings().all() for row in ruleRows: extractedFields = row["extracted_fields"] if isinstance(row["extracted_fields"], dict) else {} fieldPositions = row["field_positions"] if isinstance(row["field_positions"], dict) else {} for field_name, field_payload in extractedFields.items(): if isinstance(field_payload, dict): add_page(field_name, field_payload.get("page")) add_page(field_name, field_payload.get("pageNum")) position = fieldPositions.get(field_name) if isinstance(fieldPositions, dict) else None add_page(field_name, _extract_review_page(position)) normalizedMap = { key: {"pages": sorted(value)} for key, value in pageMap.items() if value } return { "__meta": {"page_offset": 0, "page_count": PageCount}, "ocr_result": normalizedMap, } async def _loadComparisonDocument(self, Session, DocumentId: int) -> dict[str, Any]: """读取合同结构比对结果;缺表或空数据时降级。""" if not await self._tableExists(Session, "contract_structure_comparison"): return {"template_contract_path": ""} columns = await self._loadTableColumns(Session, "contract_structure_comparison") selectColumns: list[str] = ["id", "document_id"] optionalColumns = [ "template_contract_path", "comparison_results", "created_at", "updated_at", "comparison_id", ] for column in optionalColumns: if column in columns: selectColumns.append(column) row = ( await Session.execute( text( f""" SELECT {', '.join(selectColumns)} FROM contract_structure_comparison WHERE document_id = :document_id ORDER BY id DESC LIMIT 1 """ ), {"document_id": DocumentId}, ) ).mappings().first() if not row: return {"template_contract_path": ""} payload = dict(row) rawResults = payload.get("comparison_results") if isinstance(rawResults, str): import json try: payload["comparison_results"] = json.loads(rawResults) except Exception: payload["comparison_results"] = None if "comparisonId" not in payload: payload["comparisonId"] = payload.get("comparison_id") or payload.get("id") if "template_contract_path" not in payload: payload["template_contract_path"] = "" return payload async def _loadScoringProposals(self, Session, DocumentId: int) -> list[dict[str, Any]]: """读取交叉评分提案;缺表时降级为空。""" if await self._tableExists(Session, "leaudit_cross_review_proposals"): columns = await self._loadTableColumns(Session, "leaudit_cross_review_proposals") selectColumns = [ "id", "'review_point' AS proposal_type", "rule_result_id AS evaluation_result_id", "proposer_id", "NULL::varchar AS evaluation_point_name", "NULL::text AS extraction_result_text", "proposed_score_delta AS proposed_score", "reason", "status", "create_time AS created_at", "update_time AS updated_at", "document_id", ] if "proposal_type" in columns: selectColumns[1] = "proposal_type" if "evaluation_point_name" in columns: selectColumns[4] = "evaluation_point_name" if "extraction_result_text" in columns: selectColumns[5] = "extraction_result_text" rows = ( await Session.execute( text( f""" SELECT {', '.join(selectColumns)} FROM leaudit_cross_review_proposals WHERE document_id = :document_id AND delete_time IS NULL ORDER BY id DESC """ ), {"document_id": DocumentId}, ) ).mappings().all() if rows: return [dict(row) for row in rows] if not await self._tableExists(Session, "cross_scoring_proposals"): return [] columns = await self._loadTableColumns(Session, "cross_scoring_proposals") selectColumns = [ column for column in ( "id", "evaluation_result_id", "proposer_id", "proposed_score", "reason", "status", "created_at", "updated_at", "document_id", "deleted_at", ) if column in columns ] if not selectColumns: return [] filterClauses = ["document_id = :document_id"] if "deleted_at" in columns: filterClauses.append("deleted_at IS NULL") rows = ( await Session.execute( text( f""" SELECT {', '.join(selectColumns)} FROM cross_scoring_proposals WHERE {' AND '.join(filterClauses)} ORDER BY id DESC """ ), {"document_id": DocumentId}, ) ).mappings().all() return [dict(row) for row in rows] async def _loadApprovedSupplementScoreDelta(self, Session, DocumentId: int) -> float: """读取已通过的补充意见分值调整,计入详情页文档总分。""" if not await self._tableExists(Session, "leaudit_cross_review_proposals"): return 0.0 columns = await self._loadTableColumns(Session, "leaudit_cross_review_proposals") if "proposal_type" not in columns: return 0.0 delta = await Session.scalar( text( """ SELECT COALESCE(SUM(proposed_score_delta), 0) FROM leaudit_cross_review_proposals WHERE document_id = :document_id AND COALESCE(proposal_type, 'review_point') = 'supplement' AND status = 'approved' AND delete_time IS NULL """ ), {"document_id": DocumentId}, ) return float(delta or 0.0) async def _loadReviewPointResults( self, Session, Detail: DocumentDetailVO, RunId: int, ) -> list[ReviewPointResultVO]: """读取评查运行下的规则结果并转换成前端详情页结构。""" rows = ( await Session.execute( text( """ SELECT rr.id, rr.rule_id, rr.rule_name, rr.risk, rr.score, rr.passed, rr.status, rr.skip_reason, rr.pass_message, rr.fail_message, rr.stages, rr.extracted_fields, rr.field_positions, rr.remediation, rr.rule_meta, rr.updated_at, a.id AS audit_id, a.edit_audit_status, a.override_result, a.message AS audit_message, COALESCE(ad.approved_delta, 0) AS approved_delta FROM leaudit_rule_results rr LEFT JOIN leaudit_review_point_audits a ON a.rule_result_id = rr.id LEFT JOIN LATERAL ( SELECT COALESCE(SUM(proposed_score_delta), 0) AS approved_delta FROM leaudit_cross_review_proposals WHERE rule_result_id = rr.id AND document_id = :document_id AND COALESCE(proposal_type, 'review_point') = 'review_point' AND status = 'approved' AND delete_time IS NULL ) ad ON TRUE WHERE rr.run_id = :run_id AND rr.document_id = :document_id ORDER BY id ASC """ ), {"run_id": RunId, "document_id": Detail.documentId}, ) ).mappings().all() rows = await self._backfillMissingReviewFieldPositions( Session, Detail.documentId, RunId, rows, ) result: list[ReviewPointResultVO] = [] for row in rows: extractedFields = row["extracted_fields"] if isinstance(row["extracted_fields"], dict) else {} fieldPositions = row["field_positions"] if isinstance(row["field_positions"], dict) else {} ruleMeta = row["rule_meta"] if isinstance(row["rule_meta"], dict) else {} remediation = row["remediation"] if isinstance(row["remediation"], dict) else {} rawStages = row["stages"] if isinstance(row["stages"], list) else [] stages = _enrich_stage_payload_with_positions(rawStages, fieldPositions) score = float(row["score"]) if row["score"] is not None else 0.0 auditedResult = row["override_result"] if row["edit_audit_status"] == 1 else None effectivePassed = auditedResult if auditedResult is not None else row["passed"] effectiveStatus = row["status"] if auditedResult is not None: effectiveStatus = "passed" if auditedResult else ("failed" if row["status"] != "error" else "error") resultFlag = None if effectiveStatus not in {"skipped", "not_applicable"}: resultFlag = bool(effectivePassed) if effectivePassed is not None else None title = self._buildReviewPointTitle(row) if row["edit_audit_status"] == 1 and row["audit_message"]: title = str(row["audit_message"]) result.append( ReviewPointResultVO( id=int(row["id"]), documentId=str(Detail.documentId), pointId=int(row["id"]), editAuditStatusId=int(row["audit_id"]) if row["audit_id"] is not None else "", editAuditStatus=int(row["edit_audit_status"] or 0), editAuditStatusMessage=str(row["audit_message"] or ""), title=title, pointName=str(row["rule_name"] or row["rule_id"] or ""), pointCode=str(row["rule_id"] or ""), groupName=str(ruleMeta.get("group") or Detail.groupName or Detail.typeName or "未分组"), status=_map_rule_status(effectiveStatus, effectivePassed, row["risk"]), content=_normalize_review_content(extractedFields, fieldPositions), contentPage=_normalize_review_content_pages(extractedFields, fieldPositions), suggestion=_extract_review_suggestion(remediation, row["fail_message"], row["pass_message"]), postAction="manual" if effectivePassed is not True else "", actionContent=remediation or {}, legalBasis=ruleMeta.get("references_laws") or {}, evaluationConfig={"risk": row["risk"], "status": effectiveStatus, "ruleMeta": ruleMeta}, score=score, finalScore=score if resultFlag is True else (0.0 if resultFlag is False else None), machineScore=score, currentScore=( (score if resultFlag is True else 0.0) + float(row["approved_delta"] or 0) ), result=resultFlag, failMessage=str(row["fail_message"] or ""), passMessage=str(row["pass_message"] or ""), evaluatedPointResultsLog={"rules": _normalize_review_stage_logs(stages)}, ) ) return result async def _backfillMissingReviewFieldPositions( self, Session, DocumentId: int, RunId: int, rows: list[dict[str, Any]], ) -> list[dict[str, Any]]: """详情页兜底补齐缺失页码,避免长文本字段只能按文本定位。""" missingFields: dict[str, str] = {} touchedRuleRows: dict[int, dict[str, Any]] = {} for rawRow in rows: row = dict(rawRow) extractedFields = row["extracted_fields"] if isinstance(row.get("extracted_fields"), dict) else {} fieldPositions = dict(row["field_positions"]) if isinstance(row.get("field_positions"), dict) else {} row["field_positions"] = fieldPositions rowTouched = False for fieldName, rawValue in extractedFields.items(): if _extract_review_page(fieldPositions.get(fieldName)) is not None: continue valueText = _stringify_char_position_value(_normalize_review_value(rawValue)) if not valueText.strip(): continue existing = missingFields.get(str(fieldName)) if existing is None or len(valueText) > len(existing): missingFields[str(fieldName)] = valueText rowTouched = True if rowTouched and row.get("id") is not None: touchedRuleRows[int(row["id"])] = row if not missingFields: return rows inferredPositions = await self._inferFieldPositionsFromDocumentSource( Session, DocumentId, missingFields, ) if not inferredPositions: return rows for ruleRowId, row in touchedRuleRows.items(): fieldPositions = row["field_positions"] if isinstance(row.get("field_positions"), dict) else {} changed = False extractedFields = row["extracted_fields"] if isinstance(row.get("extracted_fields"), dict) else {} for fieldName in extractedFields.keys(): key = str(fieldName) if _extract_review_page(fieldPositions.get(key)) is not None: continue inferred = inferredPositions.get(key) if inferred is None: continue fieldPositions[key] = inferred changed = True if not changed: continue await Session.execute( text( """ UPDATE leaudit_rule_results SET field_positions = CAST(:field_positions AS JSONB), updated_at = NOW() WHERE id = :rule_result_id """ ), { "rule_result_id": ruleRowId, "field_positions": json.dumps(fieldPositions, ensure_ascii=False), }, ) for fieldName, position in inferredPositions.items(): await Session.execute( text( """ UPDATE leaudit_field_results SET meta_json = jsonb_set( COALESCE(meta_json, '{}'::jsonb), '{position}', CAST(:position AS JSONB), true ), updated_at = NOW() WHERE run_id = :run_id AND document_id = :document_id AND field_name = :field_name AND ( meta_json IS NULL OR meta_json->'position' IS NULL OR meta_json->'position' = 'null'::jsonb ) """ ), { "run_id": RunId, "document_id": DocumentId, "field_name": fieldName, "position": json.dumps(position, ensure_ascii=False), }, ) await Session.commit() return rows async def _inferFieldPositionsFromDocumentSource( self, Session, DocumentId: int, FieldTexts: dict[str, str], ) -> dict[str, dict[str, Any]]: """按当前文档源文件逐页匹配字段文本,推导最小可用 pageNum。""" fileRow = ( await Session.execute( text( """ SELECT id, file_name, file_ext, local_path, oss_url, file_role FROM leaudit_document_files WHERE document_id = :document_id AND is_active = true ORDER BY CASE file_role WHEN 'converted_pdf' THEN 0 WHEN 'primary' THEN 1 WHEN 'merged_docx' THEN 2 WHEN 'merged_pdf' THEN 3 ELSE 9 END, id DESC LIMIT 1 """ ), {"document_id": DocumentId}, ) ).mappings().first() if not fileRow: return {} fileName = str(fileRow["file_name"] or "") fileExt = f".{str(fileRow['file_ext']).lstrip('.')}" if fileRow.get("file_ext") else Path(fileName).suffix tempPath: str | None = None localPath = str(fileRow["local_path"] or "").strip() try: if localPath and Path(localPath).is_file(): sourcePath = Path(localPath) else: ossUrl = str(fileRow["oss_url"] or "").strip() if not ossUrl: return {} downloaded = await self.OssService.DownloadToTempFile( ossUrl, Suffix=fileExt or Path(fileName).suffix or "", Prefix=f"review-page-{DocumentId}-", ) tempPath = downloaded sourcePath = Path(downloaded) suffix = sourcePath.suffix.lower() if suffix == ".pdf": pageTexts = _extract_page_texts_from_pdf(sourcePath) elif suffix == ".docx": pageTexts = _extract_page_texts_from_docx(sourcePath) else: return {} if not pageTexts: return {} inferred: dict[str, dict[str, Any]] = {} for fieldName, fieldText in FieldTexts.items(): pageNum = _infer_page_num_from_page_texts(fieldText, pageTexts) if pageNum is None: continue inferred[fieldName] = {"pageNum": pageNum, "matchMethod": "detail_page_fallback"} return inferred except Exception: return {} finally: if tempPath: try: Path(tempPath).unlink(missing_ok=True) except OSError: pass def _buildReviewPointStats(self, ReviewPoints: list[ReviewPointResultVO]) -> ReviewPointStatsVO: """按旧详情页口径汇总统计。""" stats = ReviewPointStatsVO(total=len(ReviewPoints), success=0, warning=0, error=0, score=0) for item in ReviewPoints: if item.status == "success": stats.success += 1 elif item.status == "warning": stats.warning += 1 elif item.status == "error": stats.error += 1 stats.score += float(item.currentScore if item.currentScore is not None else (item.score or 0)) return stats def _buildReviewInfo( self, RunRow: dict[str, Any] | None, ReviewPoints: list[ReviewPointResultVO], Stats: ReviewPointStatsVO, ) -> ReviewPointInfoVO: """构建详情页右侧的评查摘要。""" groupNames = [item.groupName for item in ReviewPoints if item.groupName] uniqueGroups = list(dict.fromkeys(groupNames)) reviewTime = "" reviewModel = "DeepSeek" if RunRow: reviewTime = _format_display_datetime(RunRow.get("finished_at") or RunRow.get("updated_at")) reviewModel = str(RunRow.get("llm_model") or "DeepSeek") issueCount = Stats.warning + Stats.error return ReviewPointInfoVO( reviewTime=reviewTime, reviewModel=reviewModel, ruleGroup="、".join(uniqueGroups), result="warning" if issueCount > 0 else "success", issueCount=issueCount, ) def _buildReviewPointTitle(self, Row: dict[str, Any]) -> str: """生成详情页展示标题。""" if Row.get("passed") is True: return str(Row.get("pass_message") or Row.get("rule_name") or Row.get("rule_id") or "") if Row.get("fail_message"): return str(Row["fail_message"]) if Row.get("skip_reason"): return str(Row["skip_reason"]) return str(Row.get("rule_name") or Row.get("rule_id") or "") async def _syncRuleBindings(self, Session, DocTypeId: int, RuleSetIds: list[int], Region: str | None = None) -> None: """全量替换规则绑定。""" normalizedRegion = self._normalize_binding_region(Region) await Session.execute( text("UPDATE leaudit_rule_type_bindings SET deleted_at = NOW() WHERE doc_type_id = :id AND deleted_at IS NULL"), {"id": DocTypeId}, ) for idx, ruleSetId in enumerate(RuleSetIds): await Session.execute( text( """ INSERT INTO leaudit_rule_type_bindings (doc_type_id, rule_set_id, binding_mode, priority, region, is_active, created_at, updated_at) VALUES (:doc_type_id, :rule_set_id, 'explicit', :priority, :region, true, NOW(), NOW()) """ ), {"doc_type_id": DocTypeId, "rule_set_id": ruleSetId, "priority": 100 - idx, "region": normalizedRegion}, ) def _normalize_binding_region(self, region: str | None) -> str: normalized = str(region or "").strip() if normalized in {"", "default", "省级"}: return self._PUBLIC_REGION return normalized async def _find_latest_version_candidate( session, *, type_id: int, root_group_id: int | None, tenant_code: str | None, region: str, normalized_name: str, file_ext: str | None = None, ) -> dict | None: """Find the latest primary document version candidate by normalized name + extension.""" ext_clause = "" ext_params: dict[str, object] = {} if file_ext: ext_clause = " AND f.file_ext = :file_ext" ext_params["file_ext"] = file_ext if root_group_id is not None: params: dict[str, object] = { "root_group_id": root_group_id, "tenant_code": str(tenant_code or "").strip(), "region": region, "normalized_name": normalized_name, **ext_params, } result = await session.execute( text( f""" SELECT d.id AS document_id, d.version_group_key, d.version_no, d.root_version_id, f.id AS file_id, f.sha256 FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id AND f.is_active = true AND f.file_role = 'primary' LEFT JOIN leaudit_evaluation_point_groups eg ON eg.id = d.group_id LEFT JOIN ( SELECT document_type_id, CASE WHEN COUNT(DISTINCT pid) = 1 THEN MIN(pid) END AS inferred_root_group_id FROM leaudit_evaluation_point_groups WHERE COALESCE(pid, 0) <> 0 AND deleted_at IS NULL AND is_enabled = true AND document_type_id IS NOT NULL GROUP BY document_type_id ) dg ON dg.document_type_id = d.type_id WHERE ( d.tenant_code = :tenant_code OR ( (:tenant_code = '') AND (d.tenant_code IS NULL OR BTRIM(d.tenant_code) = '') AND d.region = :region ) ) AND d.normalized_name = :normalized_name AND d.is_latest_version = true AND d.deleted_at IS NULL{ext_clause} AND COALESCE( CASE WHEN eg.id IS NULL THEN NULL WHEN COALESCE(eg.pid, 0) = 0 THEN eg.id ELSE eg.pid END, dg.inferred_root_group_id ) = :root_group_id ORDER BY d.version_no DESC, d.id DESC LIMIT 1 """ ), params, ) row = result.mappings().first() if row: return dict(row) params = { "type_id": type_id, "tenant_code": str(tenant_code or "").strip(), "region": region, "normalized_name": normalized_name, **ext_params, } result = await session.execute( text( f""" SELECT d.id AS document_id, d.version_group_key, d.version_no, d.root_version_id, f.id AS file_id, f.sha256 FROM leaudit_documents d JOIN leaudit_document_files f ON f.document_id = d.id AND f.is_active = true AND f.file_role = 'primary' WHERE d.type_id = :type_id AND ( d.tenant_code = :tenant_code OR ( (:tenant_code = '') AND (d.tenant_code IS NULL OR BTRIM(d.tenant_code) = '') AND d.region = :region ) ) AND d.normalized_name = :normalized_name{ext_clause} AND d.is_latest_version = true AND d.deleted_at IS NULL ORDER BY d.version_no DESC, d.id DESC LIMIT 1 """ ), params, ) row = result.mappings().first() return dict(row) if row else None def _map_rule_status(status: str | None, passed: bool | None, risk: str | None) -> str: """把新规则结果状态转换成旧详情页口径。""" normalized = (status or "").strip().lower() if normalized in {"skipped", "not_applicable"}: return "notApplicable" if passed is True or normalized in {"passed", "success"}: return "success" if normalized in {"error", "failed", "fail"}: return "error" if (risk or "").strip().lower() == "high" else "warning" return "warning" if passed is False else "success" def _normalize_review_content( extracted_fields: dict[str, Any], field_positions: dict[str, Any], ) -> dict[str, dict[str, Any]]: """归一化字段内容,兼容旧详情页结构。""" content: dict[str, dict[str, Any]] = {} for field_name, raw_value in extracted_fields.items(): position = field_positions.get(field_name) if isinstance(field_positions, dict) else None page = _extract_review_page(position) char_positions = _build_char_positions(position, raw_value) content[str(field_name)] = { "page": page if page is not None else "", "value": _normalize_review_value(raw_value), "char_positions": char_positions, } return content def _normalize_review_content_pages( extracted_fields: dict[str, Any], field_positions: dict[str, Any], ) -> dict[str, str]: """提取字段页码映射。""" pages: dict[str, str] = {} for field_name in extracted_fields.keys(): position = field_positions.get(field_name) if isinstance(field_positions, dict) else None page = _extract_review_page(position) pages[str(field_name)] = str(page) if page is not None else "" return pages def _normalize_review_value(raw_value: Any) -> Any: """尽量保留原值,同时把复杂对象压到旧前端能识别的结构。""" if isinstance(raw_value, dict): if "value" in raw_value: return raw_value["value"] if "text" in raw_value: return raw_value["text"] if "value_text" in raw_value: return raw_value["value_text"] return raw_value return raw_value def _extract_review_page(position: Any) -> int | None: """从字段位置信息中提取页码。""" if not isinstance(position, dict): return None page_num = position.get("pageNum") if isinstance(page_num, int): return page_num if isinstance(page_num, str) and page_num.isdigit(): return int(page_num) page_nums = position.get("pageNums") if isinstance(page_nums, list) and page_nums: first = page_nums[0] if isinstance(first, int): return first if isinstance(first, str) and first.isdigit(): return int(first) match_position = position.get("matchPosition") if isinstance(match_position, dict): for key in ("page", "pageNum"): value = match_position.get(key) if isinstance(value, int): return value if isinstance(value, str) and value.isdigit(): return int(value) return None def _extract_review_suggestion(remediation: dict[str, Any], fail_message: Any, pass_message: Any) -> str: """从 remediation 中提取适合旧详情页展示的一句建议。""" suggestions = remediation.get("suggestions") if isinstance(suggestions, list) and suggestions: first = suggestions[0] if isinstance(first, str): return first if isinstance(first, dict): return str(first.get("text") or first.get("message") or first.get("suggestion") or "") actions = remediation.get("actions") if isinstance(actions, list) and actions: first = actions[0] if isinstance(first, str): return first if isinstance(first, dict): return str(first.get("text") or first.get("message") or first.get("action") or "") if fail_message: return str(fail_message) if pass_message: return str(pass_message) return "" def _normalize_review_stage_logs(stages: list[Any]) -> list[dict[str, Any]]: """把 stages 压成旧前端的 evaluatedPointResultsLog.rules。""" result: list[dict[str, Any]] = [] for index, stage in enumerate(stages): if not isinstance(stage, dict): continue result.append( { "id": str(stage.get("id") or index), "type": str(stage.get("type") or stage.get("stage") or stage.get("name") or "rule"), "res": stage.get("passed"), "config": stage, } ) return result def _enrich_stage_payload_with_positions(payload: Any, field_positions: dict[str, Any]) -> Any: """递归给 stage payload 注入最小可用 char_positions。""" if isinstance(payload, list): return [_enrich_stage_payload_with_positions(item, field_positions) for item in payload] if not isinstance(payload, dict): return payload enriched: dict[str, Any] = {} for key, value in payload.items(): enriched[key] = _enrich_stage_payload_with_positions(value, field_positions) if isinstance(value, dict): position = field_positions.get(key) if isinstance(field_positions, dict) else None if position: if ("page" not in enriched[key] or not enriched[key].get("page")): page = _extract_review_page(position) if page is not None: enriched[key]["page"] = page if "char_positions" not in enriched[key]: char_positions = _build_char_positions(position, value.get("value")) if char_positions: enriched[key]["char_positions"] = char_positions return enriched def _build_char_positions(position: Any, raw_value: Any) -> list[dict[str, Any]] | None: """把后端已有 bbox / matchPosition 压成前端高亮所需的 char_positions。""" if not isinstance(position, dict): return None bbox = position.get("bbox") if bbox is None and isinstance(position.get("matchPosition"), dict): bbox = position["matchPosition"].get("bbox") normalized_box = _normalize_bbox_to_points(bbox) if not normalized_box: return None value_text = _stringify_char_position_value(raw_value) return [ { "box": normalized_box, "char": value_text, "score": 1, } ] def _normalize_bbox_to_points(bbox: Any) -> list[list[float]] | None: """兼容多种 bbox 形态,统一转成四点坐标。""" if not bbox: return None if ( isinstance(bbox, list) and len(bbox) == 4 and all(isinstance(item, (list, tuple)) and len(item) >= 2 for item in bbox) ): return [[float(point[0]), float(point[1])] for point in bbox] if isinstance(bbox, (list, tuple)) and len(bbox) >= 4 and all(isinstance(item, (int, float)) for item in bbox[:4]): x1, y1, x2, y2 = [float(item) for item in bbox[:4]] return [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] if isinstance(bbox, dict): if all(key in bbox for key in ("x1", "y1", "x2", "y2")): x1 = float(bbox["x1"]) y1 = float(bbox["y1"]) x2 = float(bbox["x2"]) y2 = float(bbox["y2"]) return [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] if all(key in bbox for key in ("left", "top", "right", "bottom")): x1 = float(bbox["left"]) y1 = float(bbox["top"]) x2 = float(bbox["right"]) y2 = float(bbox["bottom"]) return [[x1, y1], [x2, y1], [x2, y2], [x1, y2]] return None def _stringify_char_position_value(raw_value: Any) -> str: """将字段值压成前端高亮文本。""" normalized = _normalize_review_value(raw_value) if normalized is None: return "" if isinstance(normalized, str): return normalized if isinstance(normalized, (int, float, bool)): return str(normalized) return "" _REVIEW_TEXT_PUNCT_TABLE = str.maketrans({ ",": ",", "。": ".", ";": ";", ":": ":", "!": "!", "?": "?", "(": "(", ")": ")", "【": "[", "】": "]", "「": '"', "」": '"', "‘": "'", "’": "'", "“": '"', "”": '"', }) def _normalize_review_match_text(text: str) -> str: """统一比较文本,尽量消除 OCR/文档解析带来的空白和标点差异。""" if not text: return "" normalized = re.sub(r"<[^>]+>", "", str(text)) normalized = re.sub(r"\s+", "", normalized) return normalized.translate(_REVIEW_TEXT_PUNCT_TABLE) def _infer_page_num_from_page_texts(field_text: str, page_texts: list[tuple[int, str]]) -> int | None: """根据字段文本在逐页正文中的命中情况推导 1-based 页码。""" normalizedField = _normalize_review_match_text(field_text) if not normalizedField: return None bestPage: int | None = None bestScore = 0.0 fieldLength = len(normalizedField) for pageNum, pageText in page_texts: normalizedPage = _normalize_review_match_text(pageText) if not normalizedPage: continue if normalizedField in normalizedPage: return pageNum if fieldLength < 8: continue ratio = _partial_similarity(normalizedField, normalizedPage) if ratio > bestScore: bestScore = ratio bestPage = pageNum if bestScore >= 0.92: return bestPage return None def _partial_similarity(needle: str, haystack: str) -> float: """简化版 partial similarity,避免详情页兜底依赖额外包。""" if not needle or not haystack: return 0.0 if len(needle) > len(haystack): needle, haystack = haystack, needle window = len(needle) if window <= 0: return 0.0 if needle == haystack: return 1.0 best = 0.0 step = max(1, window // 6) stop = max(len(haystack) - window + 1, 1) for start in range(0, stop, step): chunk = haystack[start : start + window] if not chunk: continue matches = sum(1 for left, right in zip(needle, chunk) if left == right) best = max(best, matches / window) if best >= 0.999: return 1.0 return best def _extract_page_texts_from_pdf(path: Path) -> list[tuple[int, str]]: """平台侧直接从 PDF 提取逐页文本,避免依赖 leaudit 内核。""" import fitz doc = fitz.open(path) try: page_texts: list[tuple[int, str]] = [] for index in range(len(doc)): text = doc[index].get_text("text") or "" if text.strip(): page_texts.append((index + 1, text)) return page_texts finally: doc.close() def _extract_page_texts_from_docx(path: Path) -> list[tuple[int, str]]: """DOCX 无稳定真实分页时,平台侧仅退化为整文第 1 页定位。""" from docx import Document as DocxDocument doc = DocxDocument(str(path)) parts: list[str] = [] for para in doc.paragraphs: text = (para.text or "").strip() if text: parts.append(text) for table in doc.tables: for row in table.rows: cells = [cell.text.strip() for cell in row.cells if cell.text and cell.text.strip()] if cells: parts.append(" | ".join(cells)) text = "\n".join(parts).strip() return [(1, text)] if text else [] def _format_display_datetime(value: Any) -> str: """格式化详情页展示时间。""" if isinstance(value, datetime): return value.strftime("%Y-%m-%d %H:%M:%S") return "" def _format_iso_datetime(value: Any) -> str | None: """格式化 ISO 时间字符串。""" if isinstance(value, datetime): return value.isoformat() return None def _coerce_positive_int(value: Any) -> int | None: """把各种页码值安全转成正整数。""" if isinstance(value, int): return value if value > 0 else None if isinstance(value, float): int_value = int(value) return int_value if int_value > 0 else None if isinstance(value, str): match = re.search(r"\d+", value) if match: int_value = int(match.group(0)) return int_value if int_value > 0 else None return None def _normalize_speed(speed: str | None) -> str: """Normalize front-end speed selection to urgent/normal.""" normalized = (speed or "").strip().lower() if normalized in {"urgent", "high", "fast", "emergency", "紧急"}: return "urgent" return "normal" def _normalize_document_name(file_name: str) -> str: """Build a stable name key for same-name version matching.""" stem = Path(file_name).stem name = unicodedata.normalize("NFKC", stem).strip().lower() name = re.sub(r"[\s_\-]+", " ", name) name = re.sub(r"(?:\(|()\d+(?:\)|))$", "", name).strip() name = re.sub(r"(?:[-_\s]*副本|[-_\s]*copy)$", "", name).strip() name = re.sub(r"\s+", " ", name).strip() return name or "untitled"