feat: 完善模板对比持久化与附件版本处理

2026-05-20 10:55:28 +08:00
parent 7c6f134808
commit a2c2bf1969
14 changed files with 1701 additions and 77 deletions
@@ -11,12 +11,17 @@ from datetime import date as date_type, datetime
 import hashlib
 import mimetypes
 import re
+import tempfile
 import time
 import unicodedata
 import uuid
+from copy import deepcopy
 from pathlib import Path

-from sqlalchemy import text
+import fitz
+from leaudit.converters import doc2pdf
+from sqlalchemy import bindparam, text
+from docx import Document as DocxDocument

 from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
 from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
@@ -49,6 +54,7 @@ from fastapi_modules.fastapi_leaudit.domian.vo.reviewPointVo import (
    ReviewPointsAggregateVO,
 )
 from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile
+from fastapi_modules.fastapi_leaudit.leaudit_bridge.fileSourceResolver import FileSourceResolver
 from fastapi_modules.fastapi_leaudit.services import IAuditService, IDocumentService, IOssService
 from fastapi_modules.fastapi_leaudit.services.impl.auditServiceImpl import AuditServiceImpl
 from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl
@@ -313,7 +319,12 @@ class DocumentServiceImpl(IDocumentService):
            documentColumns = await self._loadDocumentColumns(Session)

        currentUser = await self._getCurrentUserContext(CurrentUserId)
-        filters = ["d.is_latest_version = true", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'"]
+        filters = [
+            "d.is_latest_version = true",
+            "d.deleted_at IS NULL",
+            "f.is_active = true",
+            "f.file_role = 'primary'",
+        ]
        params: dict[str, object] = {"limit": page_size, "offset": offset}
        filters.extend(
            self._buildDocumentScopeFilters(
@@ -516,6 +527,7 @@ class DocumentServiceImpl(IDocumentService):
                f.file_name,
                f.file_ext,
                f.file_size,
+                f.oss_url,
                ar.status AS run_status,
                ar.result_status,
                ar.total_score,
@@ -555,6 +567,7 @@ class DocumentServiceImpl(IDocumentService):
                            fileName=row["file_name"],
                            fileExt=row["file_ext"],
                            fileSize=int(row["file_size"]) if row["file_size"] is not None else None,
+                            ossUrl=row["oss_url"],
                            processingStatus=row["processing_status"],
                            runStatus=row["run_status"],
                            resultStatus=row["result_status"],
@@ -1049,11 +1062,687 @@ class DocumentServiceImpl(IDocumentService):

        await Session.commit()

+    async def _load_active_primary_file_row(
+        self,
+        Session,
+        *,
+        DocumentId: int,
+    ) -> dict[str, Any]:
+        row = (
+            await Session.execute(
+                text(
+                    """
+                    SELECT
+                        id,
+                        file_name,
+                        file_ext,
+                        mime_type,
+                        file_role,
+                        oss_url,
+                        local_path
+                    FROM leaudit_document_files
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role = 'primary'
+                    ORDER BY id DESC
+                    LIMIT 1
+                    """
+                ),
+                {"document_id": DocumentId},
+            )
+        ).mappings().first()
+        if not row:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少主文件")
+        return dict(row)
+
+    def _normalize_document_source_kind(self, file_name: str, file_ext: str | None, mime_type: str | None) -> str:
+        suffix = (f".{str(file_ext).lstrip('.')}" if file_ext else Path(file_name).suffix).lower()
+        normalizedMime = (mime_type or "").lower()
+        if suffix == ".pdf" or normalizedMime == "application/pdf":
+            return "pdf"
+        if suffix == ".docx" or normalizedMime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            return "docx"
+        raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前仅支持 PDF 或 DOCX 主文档追加附件")
+
+    def _normalize_attachment_source_kind(self, file_name: str, file_ext: str | None, mime_type: str | None) -> str:
+        suffix = (f".{str(file_ext).lstrip('.')}" if file_ext else Path(file_name).suffix).lower()
+        normalizedMime = (mime_type or "").lower()
+        if suffix == ".pdf" or normalizedMime == "application/pdf":
+            return "pdf"
+        if suffix == ".docx" or normalizedMime == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+            return "docx"
+        raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, f"附件 {file_name} 仅支持 PDF 或 DOCX 格式")
+
+    def _validate_attachment_matrix(
+        self,
+        *,
+        MainSourceKind: str,
+        Files: list[tuple[str, bytes, str | None]],
+    ) -> None:
+        for fileName, _, contentType in Files:
+            attachmentKind = self._normalize_attachment_source_kind(fileName, Path(fileName).suffix.lstrip(".").lower() or None, contentType)
+            if MainSourceKind == "docx" and attachmentKind != "docx":
+                raise LeauditException(
+                    StatusCodeEnum.HTTP_400_BAD_REQUEST,
+                    "源文档为 DOCX 时，仅允许追加 DOCX 附件，且合并结果仍为 DOCX",
+                )
+
+    def _merge_docx_paths_for_merge(
+        self,
+        *,
+        DocxPaths: list[str],
+        TempPaths: list[str],
+    ) -> str:
+        if not DocxPaths:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "缺少可合并的 DOCX 文件")
+
+        mergedTemp = tempfile.NamedTemporaryFile(suffix=".docx", delete=False)
+        mergedTemp.close()
+        TempPaths.append(mergedTemp.name)
+
+        baseDoc = DocxDocument(DocxPaths[0])
+        for attachmentPath in DocxPaths[1:]:
+            attachmentDoc = DocxDocument(attachmentPath)
+            if baseDoc.paragraphs:
+                baseDoc.add_page_break()
+            for element in attachmentDoc.element.body:
+                baseDoc.element.body.append(deepcopy(element))
+        baseDoc.save(mergedTemp.name)
+        return mergedTemp.name
+
+    async def _cloneActiveFilesToNewDocument(
+        self,
+        Session,
+        *,
+        SourceDocumentId: int,
+        TargetDocumentId: int,
+        CreatedBy: int | None,
+        IncludeAttachments: bool = True,
+    ) -> None:
+        """复制当前文档的主文件与现有附件到新版本文档。"""
+        fileRoles = ["primary", "attachment"] if IncludeAttachments else ["primary"]
+        sourceFiles = (
+            await Session.execute(
+                text(
+                    """
+                    SELECT
+                        id,
+                        file_role,
+                        file_name,
+                        file_ext,
+                        mime_type,
+                        file_size,
+                        sha256,
+                        local_path,
+                        oss_url,
+                        storage_provider
+                    FROM leaudit_document_files
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role = ANY(:file_roles)
+                    ORDER BY
+                      CASE WHEN file_role = 'primary' THEN 0 ELSE 1 END,
+                      id ASC
+                    """
+                ).bindparams(bindparam("file_roles", expanding=False)),
+                {"document_id": SourceDocumentId, "file_roles": fileRoles},
+            )
+        ).mappings().all()
+        if not sourceFiles:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "源文档缺少可复制的有效文件")
+
+        for row in sourceFiles:
+            clonedFile = LeauditDocumentFile(
+                documentId=TargetDocumentId,
+                fileRole=str(row["file_role"] or "attachment"),
+                fileName=str(row["file_name"] or ""),
+                fileExt=row["file_ext"],
+                mimeType=row["mime_type"],
+                fileSize=int(row["file_size"]) if row["file_size"] is not None else None,
+                sha256=row["sha256"],
+                localPath=row["local_path"],
+                ossUrl=row["oss_url"],
+                storageProvider=row["storage_provider"],
+                isActive=True,
+                createdBy=CreatedBy,
+            )
+            Session.add(clonedFile)
+
+        await Session.flush()
+
+    async def _buildMergedPdfBytesForDocument(
+        self,
+        Session,
+        *,
+        DocumentId: int,
+    ) -> tuple[bytes, str]:
+        """读取当前主文件与附件，生成新的主 PDF 内容。"""
+        primaryFile = (
+            await Session.execute(
+                text(
+                    """
+                    SELECT id
+                    FROM leaudit_document_files
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role = 'primary'
+                    ORDER BY id DESC
+                    LIMIT 1
+                    """
+                ),
+                {"document_id": DocumentId},
+            )
+        ).scalar_one_or_none()
+        if primaryFile is None:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少主文件，无法生成合并文件")
+
+        attachmentRows = (
+            await Session.execute(
+                text(
+                    """
+                    SELECT id
+                    FROM leaudit_document_files
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role = 'attachment'
+                    ORDER BY id ASC
+                    """
+                ),
+                {"document_id": DocumentId},
+            )
+        ).scalars().all()
+        if not attachmentRows:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档没有附件，无法生成合并文件")
+
+        resolver = FileSourceResolver()
+        primaryModel = await Session.get(LeauditDocumentFile, int(primaryFile))
+        if primaryModel is None:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "主文件记录不存在，无法生成合并文件")
+        primaryPayload = await resolver.ResolvePayload(primaryModel)
+
+        attachmentModels: list[LeauditDocumentFile] = []
+        for attachmentId in attachmentRows:
+            attachmentModel = await Session.get(LeauditDocumentFile, int(attachmentId))
+            if attachmentModel is not None:
+                attachmentModels.append(attachmentModel)
+        attachmentPayloads = await resolver.ResolvePayloads(attachmentModels)
+
+        tempPaths: list[str] = []
+        try:
+            mainLocalPath = self._write_temp_file_for_merge(
+                FileName=primaryPayload.fileName,
+                Content=primaryPayload.fileContent,
+                TempPaths=tempPaths,
+            )
+            mainPdfPath = self._convert_source_to_pdf(
+                SourcePath=mainLocalPath,
+                TempPaths=tempPaths,
+            )
+            pdfPaths = [mainPdfPath]
+
+            for attachmentPayload in attachmentPayloads:
+                attachmentLocalPath = self._write_temp_file_for_merge(
+                    FileName=attachmentPayload.fileName,
+                    Content=attachmentPayload.fileContent,
+                    TempPaths=tempPaths,
+                )
+                attachmentPdfPath = self._convert_source_to_pdf(
+                    SourcePath=attachmentLocalPath,
+                    TempPaths=tempPaths,
+                )
+                pdfPaths.append(attachmentPdfPath)
+
+            mergedPdfPath = self._merge_pdf_paths_for_merge(
+                PdfPaths=pdfPaths,
+                TempPaths=tempPaths,
+            )
+            mergedBytes = Path(mergedPdfPath).read_bytes()
+            mergedName = f"{Path(primaryPayload.fileName).stem}.pdf"
+            return mergedBytes, mergedName
+        except LeauditException:
+            raise
+        except Exception as error:
+            raise LeauditException(
+                StatusCodeEnum.HTTP_500_INTERNAL_SERVER_ERROR,
+                f"生成合并PDF失败: {error}",
+            ) from error
+        finally:
+            for tempPath in reversed(tempPaths):
+                try:
+                    pathObj = Path(tempPath)
+                    if pathObj.is_file():
+                        pathObj.unlink(missing_ok=True)
+                except Exception:
+                    pass
+
+    async def _buildMergedDocxBytesForDocument(
+        self,
+        Session,
+        *,
+        DocumentId: int,
+    ) -> tuple[bytes, str]:
+        """读取当前主文件与附件，生成新的主 DOCX 内容。"""
+        primaryFile = await self._load_active_primary_file_row(Session, DocumentId=DocumentId)
+        if self._normalize_document_source_kind(
+            str(primaryFile["file_name"] or ""),
+            primaryFile.get("file_ext"),
+            primaryFile.get("mime_type"),
+        ) != "docx":
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "仅 DOCX 主文档支持生成 DOCX 合并结果")
+
+        attachmentRows = (
+            await Session.execute(
+                text(
+                    """
+                    SELECT id
+                    FROM leaudit_document_files
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role = 'attachment'
+                    ORDER BY id ASC
+                    """
+                ),
+                {"document_id": DocumentId},
+            )
+        ).scalars().all()
+        if not attachmentRows:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档没有附件，无法生成合并文件")
+
+        resolver = FileSourceResolver()
+        primaryModel = await Session.get(LeauditDocumentFile, int(primaryFile["id"]))
+        if primaryModel is None:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "主文件记录不存在，无法生成合并文件")
+        primaryPayload = await resolver.ResolvePayload(primaryModel)
+
+        attachmentModels: list[LeauditDocumentFile] = []
+        for attachmentId in attachmentRows:
+            attachmentModel = await Session.get(LeauditDocumentFile, int(attachmentId))
+            if attachmentModel is not None:
+                attachmentModels.append(attachmentModel)
+        attachmentPayloads = await resolver.ResolvePayloads(attachmentModels)
+
+        tempPaths: list[str] = []
+        try:
+            docxPaths = [
+                self._write_temp_file_for_merge(
+                    FileName=primaryPayload.fileName,
+                    Content=primaryPayload.fileContent,
+                    TempPaths=tempPaths,
+                )
+            ]
+            for attachmentPayload in attachmentPayloads:
+                attachmentKind = self._normalize_attachment_source_kind(
+                    attachmentPayload.fileName,
+                    Path(attachmentPayload.fileName).suffix.lstrip(".").lower() or None,
+                    None,
+                )
+                if attachmentKind != "docx":
+                    raise LeauditException(
+                        StatusCodeEnum.HTTP_400_BAD_REQUEST,
+                        f"DOCX 主文档仅允许追加 DOCX 附件，当前附件 {attachmentPayload.fileName} 不符合要求",
+                    )
+                docxPaths.append(
+                    self._write_temp_file_for_merge(
+                        FileName=attachmentPayload.fileName,
+                        Content=attachmentPayload.fileContent,
+                        TempPaths=tempPaths,
+                    )
+                )
+
+            mergedDocxPath = self._merge_docx_paths_for_merge(
+                DocxPaths=docxPaths,
+                TempPaths=tempPaths,
+            )
+            mergedBytes = Path(mergedDocxPath).read_bytes()
+            mergedName = f"{Path(primaryPayload.fileName).stem}.docx"
+            return mergedBytes, mergedName
+        finally:
+            for tempPath in reversed(tempPaths):
+                try:
+                    pathObj = Path(tempPath)
+                    if pathObj.is_file():
+                        pathObj.unlink(missing_ok=True)
+                except Exception:
+                    pass
+
+    def _write_temp_file_for_merge(
+        self,
+        *,
+        FileName: str,
+        Content: bytes,
+        TempPaths: list[str],
+    ) -> str:
+        """为持久化合并流程写入临时文件。"""
+        suffix = Path(FileName).suffix or ".bin"
+        with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tempFile:
+            tempFile.write(Content)
+            tempPath = tempFile.name
+        TempPaths.append(tempPath)
+        return tempPath
+
+    def _convert_source_to_pdf(
+        self,
+        *,
+        SourcePath: str,
+        TempPaths: list[str],
+    ) -> str:
+        """将源文件转换为 PDF。"""
+        source = Path(SourcePath)
+        if source.suffix.lower() == ".pdf":
+            return str(source)
+        pdfTemp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
+        pdfTemp.close()
+        TempPaths.append(pdfTemp.name)
+        doc2pdf.convert(source, pdfTemp.name, soffice="auto", pdfa=False, force=True, verify=False)
+        return pdfTemp.name
+
+    def _merge_pdf_paths_for_merge(
+        self,
+        *,
+        PdfPaths: list[str],
+        TempPaths: list[str],
+    ) -> str:
+        """合并多个 PDF 为一个临时 PDF。"""
+        mergedTemp = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
+        mergedTemp.close()
+        TempPaths.append(mergedTemp.name)
+        output = fitz.open()
+        try:
+            for pdfPath in PdfPaths:
+                source = fitz.open(pdfPath)
+                try:
+                    output.insert_pdf(source)
+                finally:
+                    source.close()
+            output.save(mergedTemp.name)
+        finally:
+            output.close()
+        return mergedTemp.name
+
+    async def _persistMergedPdfForDocument(
+        self,
+        Session,
+        *,
+        DocumentId: int,
+        TypeCode: str,
+        Region: str,
+        VersionNo: int,
+        CreatedBy: int | None,
+        FileRole: str = "primary",
+    ) -> None:
+        """为当前文档生成并替换主 PDF 文件。"""
+        mergedBytes, mergedName = await self._buildMergedPdfBytesForDocument(
+            Session,
+            DocumentId=DocumentId,
+        )
+        mergedSha256 = hashlib.sha256(mergedBytes).hexdigest()
+        mergedSize = len(mergedBytes)
+        uploadedAt = datetime.now()
+        versionLabel = f"v{VersionNo}"
+        objectKey = OssPathUtils.BuildBusinessDocKey(
+            Region=Region,
+            TypeCode=TypeCode,
+            DocumentId=DocumentId,
+            Version=versionLabel,
+            FileRole=FileRole,
+            FileName=mergedName,
+            Year=uploadedAt.year,
+            Month=uploadedAt.month,
+        )
+        ossUrl = await self.OssService.UploadBytes(
+            ObjectKey=objectKey,
+            Content=mergedBytes,
+            ContentType="application/pdf",
+        )
+
+        if FileRole == "primary":
+            await Session.execute(
+                text(
+                    """
+                    UPDATE leaudit_document_files
+                    SET is_active = false
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role IN ('primary', 'merged_pdf', 'merged_docx')
+                    """
+                ),
+                {"document_id": DocumentId},
+            )
+        else:
+            await Session.execute(
+                text(
+                    """
+                    UPDATE leaudit_document_files
+                    SET is_active = false
+                    WHERE document_id = :document_id
+                      AND deleted_at IS NULL
+                      AND is_active = true
+                      AND file_role = :file_role
+                    """
+                ),
+                {"document_id": DocumentId, "file_role": FileRole},
+            )
+
+        Session.add(
+            LeauditDocumentFile(
+                documentId=DocumentId,
+                fileRole=FileRole,
+                fileName=mergedName,
+                fileExt="pdf",
+                mimeType="application/pdf",
+                fileSize=mergedSize,
+                sha256=mergedSha256,
+                localPath=None,
+                ossUrl=ossUrl,
+                storageProvider="minio",
+                isActive=True,
+                createdBy=CreatedBy,
+            )
+        )
+        await Session.flush()
+
+    async def _persistMergedDocxForDocument(
+        self,
+        Session,
+        *,
+        DocumentId: int,
+        TypeCode: str,
+        Region: str,
+        VersionNo: int,
+        CreatedBy: int | None,
+    ) -> None:
+        """为当前文档生成并替换主 DOCX 文件。"""
+        mergedBytes, mergedName = await self._buildMergedDocxBytesForDocument(
+            Session,
+            DocumentId=DocumentId,
+        )
+        mergedSha256 = hashlib.sha256(mergedBytes).hexdigest()
+        mergedSize = len(mergedBytes)
+        uploadedAt = datetime.now()
+        versionLabel = f"v{VersionNo}"
+        objectKey = OssPathUtils.BuildBusinessDocKey(
+            Region=Region,
+            TypeCode=TypeCode,
+            DocumentId=DocumentId,
+            Version=versionLabel,
+            FileRole="primary",
+            FileName=mergedName,
+            Year=uploadedAt.year,
+            Month=uploadedAt.month,
+        )
+        ossUrl = await self.OssService.UploadBytes(
+            ObjectKey=objectKey,
+            Content=mergedBytes,
+            ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        )
+
+        await Session.execute(
+            text(
+                """
+                UPDATE leaudit_document_files
+                SET is_active = false
+                WHERE document_id = :document_id
+                  AND deleted_at IS NULL
+                  AND is_active = true
+                  AND file_role IN ('primary', 'merged_pdf', 'merged_docx')
+                """
+            ),
+            {"document_id": DocumentId},
+        )
+
+        Session.add(
+            LeauditDocumentFile(
+                documentId=DocumentId,
+                fileRole="primary",
+                fileName=mergedName,
+                fileExt="docx",
+                mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                fileSize=mergedSize,
+                sha256=mergedSha256,
+                localPath=None,
+                ossUrl=ossUrl,
+                storageProvider="minio",
+                isActive=True,
+                createdBy=CreatedBy,
+            )
+        )
+        await Session.flush()
+
+    async def _createNextVersionFromExistingDocument(
+        self,
+        Session,
+        *,
+        SourceDocumentId: int,
+        CreatedBy: int,
+        Remark: str | None = None,
+    ) -> tuple[LeauditDocument, str, str]:
+        """基于现有文档创建一个新版本，并复制当前主文件/附件。"""
+        documentColumns = await self._loadDocumentColumns(Session)
+        optionalSelects = [
+            "d.document_number AS document_number" if "document_number" in documentColumns else "NULL::text AS document_number",
+            "d.remark AS remark" if "remark" in documentColumns else "NULL::text AS remark",
+            "d.is_test_document AS is_test_document" if "is_test_document" in documentColumns else "FALSE AS is_test_document",
+            "d.audit_status AS audit_status" if "audit_status" in documentColumns else "NULL::integer AS audit_status",
+        ]
+        sourceRow = (
+            await Session.execute(
+                text(
+                    """
+                    SELECT
+                        d.id,
+                        d.biz_document_id,
+                        d.type_id,
+                        d.group_id,
+                        d.region,
+                        d.processing_status,
+                        d.version_group_key,
+                        d.version_no,
+                        d.previous_version_id,
+                        d.root_version_id,
+                        d.is_latest_version,
+                        d.normalized_name,
+                        d.review_scope,
+                        """
+                    + ",\n                        ".join(optionalSelects)
+                    + """
+                        ,
+                        dt.code AS type_code
+                    FROM leaudit_documents d
+                    LEFT JOIN leaudit_document_types dt
+                      ON dt.id = d.type_id
+                    WHERE d.id = :document_id
+                      AND d.deleted_at IS NULL
+                    LIMIT 1
+                    """
+                ),
+                {"document_id": SourceDocumentId},
+            )
+        ).mappings().first()
+        if not sourceRow:
+            raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问")
+
+        resolvedTypeCode = str(sourceRow["type_code"] or "").strip()
+        if not resolvedTypeCode:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少文档类型编码，无法创建新版本")
+
+        previousDocument = await Session.get(LeauditDocument, SourceDocumentId)
+        if previousDocument is None:
+            raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问")
+        previousDocument.isLatestVersion = False
+
+        documentFields: dict[str, object] = {
+            "bizDocumentId": time.time_ns(),
+            "typeId": int(sourceRow["type_id"]) if sourceRow["type_id"] is not None else None,
+            "groupId": int(sourceRow["group_id"]) if sourceRow["group_id"] is not None else None,
+            "region": str(sourceRow["region"] or "default").strip() or "default",
+            "processingStatus": "waiting",
+            "versionGroupKey": str(sourceRow["version_group_key"] or uuid.uuid4().hex),
+            "versionNo": int(sourceRow["version_no"] or 1) + 1,
+            "previousVersionId": SourceDocumentId,
+            "rootVersionId": int(sourceRow["root_version_id"] or SourceDocumentId),
+            "isLatestVersion": True,
+            "normalizedName": sourceRow["normalized_name"],
+            "reviewScope": str(sourceRow["review_scope"] or "standard"),
+        }
+
+        newDocument = await LeauditDocument.create_new(Session, **documentFields)
+        if newDocument.rootVersionId is None:
+            newDocument.rootVersionId = newDocument.Id
+
+        assignments: list[str] = []
+        params: dict[str, object] = {"id": int(newDocument.Id)}
+        if "document_number" in documentColumns:
+            assignments.append("document_number = :document_number")
+            params["document_number"] = sourceRow["document_number"]
+        if "remark" in documentColumns:
+            assignments.append("remark = :remark")
+            params["remark"] = Remark.strip() if Remark and Remark.strip() else sourceRow["remark"]
+        if "is_test_document" in documentColumns:
+            assignments.append("is_test_document = :is_test_document")
+            params["is_test_document"] = bool(sourceRow["is_test_document"])
+        if "audit_status" in documentColumns:
+            assignments.append("audit_status = :audit_status")
+            params["audit_status"] = int(sourceRow["audit_status"]) if sourceRow["audit_status"] is not None else None
+        if assignments:
+            assignments.append("updated_at = NOW()")
+            await Session.execute(
+                text(f"UPDATE leaudit_documents SET {', '.join(assignments)} WHERE id = :id"),
+                params,
+            )
+
+        await self._cloneActiveFilesToNewDocument(
+            Session,
+            SourceDocumentId=SourceDocumentId,
+            TargetDocumentId=newDocument.Id,
+            CreatedBy=CreatedBy,
+            IncludeAttachments=False,
+        )
+        await Session.flush()
+        return newDocument, resolvedTypeCode, str(sourceRow["region"] or "default").strip() or "default"
+
+    def _normalizeAttachmentMergeMode(self, MergeMode: str | None) -> str:
+        """标准化附件合并模式。"""
+        normalizedMode = (MergeMode or "new").strip().lower()
+        if normalizedMode not in {"overwrite", "new"}:
+            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "mergeMode 仅支持 overwrite 或 new")
+        return normalizedMode
+
    async def DeleteDocument(self, CurrentUserId: int, Id: int) -> None:
        """软删除文档，并执行数据隔离校验。"""
        async with GetAsyncSession() as Session:
            currentUser = await self._getCurrentUserContext(CurrentUserId)
-            filters = ["d.id = :id", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'"]
+            filters = [
+                "d.id = :id",
+                "d.deleted_at IS NULL",
+                "f.is_active = true",
+                "f.file_role = 'primary'",
+            ]
            params: dict[str, object] = {"id": Id}
            filters.extend(
                self._buildDocumentScopeFilters(
@@ -1141,10 +1830,13 @@ class DocumentServiceImpl(IDocumentService):
        CurrentUserId: int,
        Id: int,
        Files: list[tuple[str, bytes, str | None]],
+        MergeMode: str = "overwrite",
+        Remark: str | None = None,
    ) -> DocumentDetailVO:
        """为现有文档追加附件，并执行数据隔离校验。"""
        if not Files:
            raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "至少上传一个附件文件")
+        normalizedMergeMode = self._normalizeAttachmentMergeMode(MergeMode)

        async with GetAsyncSession() as Session:
            await self._ensureDocumentGroupColumn(Session)
@@ -1182,18 +1874,72 @@ class DocumentServiceImpl(IDocumentService):
            if not resolvedTypeCode:
                raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "当前文档缺少文档类型编码，无法追加附件")

+            primaryFileRow = await self._load_active_primary_file_row(Session, DocumentId=Id)
+            mainSourceKind = self._normalize_document_source_kind(
+                str(primaryFileRow["file_name"] or ""),
+                primaryFileRow.get("file_ext"),
+                primaryFileRow.get("mime_type"),
+            )
+            self._validate_attachment_matrix(
+                MainSourceKind=mainSourceKind,
+                Files=Files,
+            )
+
+            targetDocumentId = int(documentMeta["document_id"])
+            targetVersionNo = int(documentMeta["version_no"] or 1)
            normalizedRegion = str(documentMeta["region"] or "default").strip() or "default"
+
+            if normalizedMergeMode == "new":
+                newDocument, resolvedTypeCode, normalizedRegion = await self._createNextVersionFromExistingDocument(
+                    Session,
+                    SourceDocumentId=Id,
+                    CreatedBy=CurrentUserId,
+                    Remark=Remark,
+                )
+                targetDocumentId = int(newDocument.Id)
+                targetVersionNo = int(newDocument.versionNo or (int(documentMeta["version_no"] or 1) + 1))
+
            await self._appendAttachmentFiles(
                Session=Session,
-                DocumentId=int(documentMeta["document_id"]),
+                DocumentId=targetDocumentId,
                TypeCode=resolvedTypeCode,
                Region=normalizedRegion,
-                VersionNo=int(documentMeta["version_no"] or 1),
+                VersionNo=targetVersionNo,
                Files=Files,
                CreatedBy=CurrentUserId,
            )

-            refreshed = await self._getDocumentDetail(Session, Id, CurrentUserId, currentUser, documentColumns)
+            if mainSourceKind == "docx":
+                await self._persistMergedDocxForDocument(
+                    Session,
+                    DocumentId=targetDocumentId,
+                    TypeCode=resolvedTypeCode,
+                    Region=normalizedRegion,
+                    VersionNo=targetVersionNo,
+                    CreatedBy=CurrentUserId,
+                )
+                await self._persistMergedPdfForDocument(
+                    Session,
+                    DocumentId=targetDocumentId,
+                    TypeCode=resolvedTypeCode,
+                    Region=normalizedRegion,
+                    VersionNo=targetVersionNo,
+                    CreatedBy=CurrentUserId,
+                    FileRole="merged_pdf",
+                )
+            else:
+                await self._persistMergedPdfForDocument(
+                    Session,
+                    DocumentId=targetDocumentId,
+                    TypeCode=resolvedTypeCode,
+                    Region=normalizedRegion,
+                    VersionNo=targetVersionNo,
+                    CreatedBy=CurrentUserId,
+                )
+
+            await Session.commit()
+
+            refreshed = await self._getDocumentDetail(Session, targetDocumentId, CurrentUserId, currentUser, documentColumns)
            if not refreshed:
                raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "文档不存在或无权访问")
            return refreshed
@@ -1949,7 +2695,12 @@ class DocumentServiceImpl(IDocumentService):
    ) -> DocumentDetailVO | None:
        """查询单文档详情，并附带历史版本。"""
        params: dict[str, object] = {"id": DocumentId}
-        filters = ["d.id = :id", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'primary'"]
+        filters = [
+            "d.id = :id",
+            "d.deleted_at IS NULL",
+            "f.is_active = true",
+            "f.file_role = 'primary'",
+        ]
        if not BypassScopeCheck:
            filters.extend(
                self._buildDocumentScopeFilters(
@@ -2065,6 +2816,7 @@ class DocumentServiceImpl(IDocumentService):
                            f.id AS file_id,
                            f.file_name,
                            f.file_ext,
+                            f.oss_url,
                            ar.status AS run_status,
                            ar.result_status
                        FROM leaudit_documents d
@@ -2090,6 +2842,7 @@ class DocumentServiceImpl(IDocumentService):
                    versionNo=int(row["version_no"]),
                    fileName=row["file_name"],
                    fileExt=row["file_ext"],
+                    ossUrl=row["oss_url"],
                    processingStatus=row["processing_status"],
                    runStatus=row["run_status"],
                    resultStatus=row["result_status"],
@@ -2913,8 +3666,9 @@ class DocumentServiceImpl(IDocumentService):
                    ORDER BY
                      CASE file_role
                        WHEN 'converted_pdf' THEN 0
-                        WHEN 'merged_pdf' THEN 1
-                        WHEN 'primary' THEN 2
+                        WHEN 'primary' THEN 1
+                        WHEN 'merged_docx' THEN 2
+                        WHEN 'merged_pdf' THEN 3
                        ELSE 9
                      END,
                      id DESC