leaudit-platform-backend/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py

"""Govdoc 公文模块服务实现（阶段骨架）。

本文件为 Phase 1 骨架实现，所有方法暂返回占位结果。
后续步骤将逐步接入：
  - govdoc_bridge 执行桥接
  - govdoc_engine 引擎内核
  - 文档主档复用
  - OSS / Celery 集成
"""

from __future__ import annotations

from typing import Any

from fastapi import UploadFile

from fastapi_common.fastapi_common_logger import logger
from fastapi_modules.fastapi_leaudit.services import IGovdocService


class GovdocServiceImpl(IGovdocService):
    """公文处理与格式审查服务实现。"""

    # ── 文档 ──────────────────────────────────────────────

    async def UploadDocument(
        self,
        file: UploadFile,
        typeId: int | None = None,
        region: str = "default",
        autoRun: bool = False,
        speed: str = "normal",
        ruleVersionId: int | None = None,
        createdBy: int | None = None,
    ) -> dict[str, Any]:
        logger.info("[Govdoc] UploadDocument placeholder — file=%s region=%s", file.filename, region)
        return {
            "documentId": 0,
            "fileId": 0,
            "fileName": file.filename,
            "region": region,
            "engineType": "govdoc",
            "autoRunTriggered": autoRun,
        }

    async def ListDocuments(
        self,
        page: int = 1,
        pageSize: int = 20,
        keyword: str | None = None,
        region: str | None = None,
        status: str | None = None,
        resultStatus: str | None = None,
        createdBy: int | None = None,
        dateFrom: str | None = None,
        dateTo: str | None = None,
        userId: int | None = None,
    ) -> dict[str, Any]:
        logger.info("[Govdoc] ListDocuments placeholder — page=%s pageSize=%s", page, pageSize)
        return {"items": [], "total": 0, "page": page, "pageSize": pageSize}

    async def GetDocumentDetail(self, documentId: int, userId: int | None = None) -> dict[str, Any]:
        logger.info("[Govdoc] GetDocumentDetail placeholder — id=%s", documentId)
        return {"documentId": documentId}

    async def UpdateDocument(self, documentId: int, body: dict[str, Any], userId: int | None = None) -> dict[str, Any]:
        logger.info("[Govdoc] UpdateDocument placeholder — id=%s", documentId)
        return {"documentId": documentId, **body}

    async def DeleteDocument(self, documentId: int, userId: int | None = None) -> dict[str, Any]:
        logger.info("[Govdoc] DeleteDocument placeholder — id=%s", documentId)
        return {"documentId": documentId, "deleted": True}

    # ── 审查运行 ──────────────────────────────────────────

    async def CreateRun(
        self,
        documentId: int,
        ruleVersionId: int | None = None,
        speed: str = "normal",
        force: bool = False,
        triggerUserId: int | None = None,
    ) -> dict[str, Any]:
        logger.info("[Govdoc] CreateRun placeholder — documentId=%s", documentId)
        return {
            "runId": 0,
            "documentId": documentId,
            "status": "queued",
            "phase": "dispatch",
        }

    async def GetRunStatus(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetRunStatus placeholder — runId=%s", runId)
        return {"runId": runId, "status": "pending"}

    # ── 结果与报告 ────────────────────────────────────────

    async def GetRunResult(self, runId: int) -> dict[str, Any]:
        """从 govdoc_runs + govdoc_rule_results 读取审查结果，含 structure/outline。"""
        import json as _json
        from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
        from sqlalchemy import text

        async with GetAsyncSession() as session:
            run_row = await session.execute(
                text(
                    """SELECT id, document_id, status, phase, total_score, passed_count,
                       failed_count, skipped_count, result_status, result_summary_json,
                       rules_path, started_at, finished_at
                       FROM govdoc_runs WHERE id = :rid"""
                ),
                {"rid": runId},
            )
            run_data = run_row.mappings().first()
            if not run_data:
                return {"runId": runId, "summary": {}, "checkedRules": [], "findings": [],
                        "structure": [], "outline": [], "entities": {}}

            rules_rows = await session.execute(
                text(
                    """SELECT rule_id, rule_name, severity, category, result, skip_reason,
                       message, suggestion, actual, expected, evidence,
                       paragraph_index, paragraph_text, location_path, score
                       FROM govdoc_rule_results WHERE run_id = :rid"""
                ),
                {"rid": runId},
            )
            rule_results = [dict(r._mapping) for r in rules_rows.fetchall()]

            aux_raw = run_data.get("result_summary_json")
            aux = {}
            if aux_raw:
                try:
                    aux = _json.loads(aux_raw) if isinstance(aux_raw, str) else aux_raw
                except (TypeError, _json.JSONDecodeError):
                    pass

            findings = []
            for rr in rule_results:
                loc = {}
                if rr.get("paragraph_index") is not None:
                    loc["paragraph_index"] = rr["paragraph_index"]
                if rr.get("paragraph_text"):
                    loc["context"] = rr["paragraph_text"]
                if rr.get("location_path"):
                    loc["role"] = rr["location_path"]
                findings.append({
                    "finding_id": f"{rr['rule_id']}-{rr['paragraph_index'] or 0}",
                    "rule_id": rr["rule_id"],
                    "rule_name": rr["rule_name"],
                    "severity": rr["severity"],
                    "category": rr["category"],
                    "location": loc if loc else None,
                    "actual": rr.get("actual") or {},
                    "expected": rr.get("expected") or {},
                    "message": rr.get("message") or "",
                    "suggestion": rr.get("suggestion") or "",
                    "evidence": rr.get("evidence") or "",
                    "confidence": 1.0,
                })

            checked_rules = []
            seen = set()
            for rr in rule_results:
                rid = rr["rule_id"]
                if rid in seen:
                    continue
                seen.add(rid)
                status = rr.get("result", "pass")
                checked_rules.append({
                    "rule_id": rid,
                    "name": rr["rule_name"],
                    "severity": rr["severity"],
                    "category": rr["category"],
                    "status": status if status in ("pass", "fail", "skipped") else "pass",
                    "skip_reason": rr.get("skip_reason"),
                })

        return {
            "runId": runId,
            "summary": {
                "score": run_data.get("total_score", 100),
                "total_findings": len(findings),
                "by_severity": {},
                "by_category": {},
                "passed_count": run_data.get("passed_count", 0),
                "failed_count": run_data.get("failed_count", 0),
                "skipped_count": run_data.get("skipped_count", 0),
            },
            "checkedRules": checked_rules,
            "findings": findings,
            "structure": aux.get("structure", []),
            "outline": aux.get("outline", []),
            "entities": {},
        }

    async def GetRunFindings(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetRunFindings placeholder — runId=%s", runId)
        return {"runId": runId, "findings": []}

    async def GetRunEntities(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetRunEntities placeholder — runId=%s", runId)
        return {"runId": runId, "entities": []}

    async def GetRunParagraphs(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetRunParagraphs placeholder — runId=%s", runId)
        return {"runId": runId, "paragraphs": []}

    async def GetRunStructure(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetRunStructure placeholder — runId=%s", runId)
        return {"runId": runId, "structure": []}

    async def GetRunOutline(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetRunOutline placeholder — runId=%s", runId)
        return {"runId": runId, "outline": []}

    async def GetReportHtml(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetReportHtml placeholder — runId=%s", runId)
        return {"runId": runId, "htmlUrl": ""}

    async def GetReportDocx(self, runId: int) -> dict[str, Any]:
        logger.info("[Govdoc] GetReportDocx placeholder — runId=%s", runId)
        return {"runId": runId, "docxUrl": ""}

    async def DownloadOriginal(self, documentId: int) -> dict[str, Any]:
        logger.info("[Govdoc] DownloadOriginal placeholder — documentId=%s", documentId)
        return {"documentId": documentId, "downloadUrl": ""}

    # ── 规则 ──────────────────────────────────────────────

    async def ListRules(self, rulesPath: str | None = None) -> dict[str, Any]:
        """从 govdoc 规则 YAML 文件加载规则清单。"""
        rules = await self._load_rules_list(rulesPath)
        return {"rules": rules, "total_rules": len(rules)}

    async def GetRuleDetail(self, ruleId: str, rulesPath: str | None = None) -> dict[str, Any]:
        """获取单条规则完整详情（名称、严重度、stages、消息等）。"""
        ruleset = await self._load_ruleset(rulesPath)
        if ruleset is None:
            return {"rule_id": ruleId, "name": ruleId, "severity": "info", "category": "", "group": ""}
        for rule in ruleset.all_rules():
            if rule.rule_id == ruleId:
                return {
                    "rule_id": rule.rule_id,
                    "name": rule.name,
                    "severity": rule.severity,
                    "category": rule.category,
                    "group": "",
                    "applies_to": rule.applies_to.model_dump() if rule.applies_to else None,
                    "target": rule.target,
                    "on_missing": rule.on_missing,
                    "stages": [s.model_dump(exclude_none=True) for s in (rule.stages or [])],
                    "messages": rule.messages.model_dump() if rule.messages else {},
                }
        return {"rule_id": ruleId, "name": ruleId, "severity": "info", "category": "", "group": ""}

    # ── 规则加载助手 ────────────────────────────────────

    async def _resolve_rules_path(self, rulesPath: str | None = None) -> str | None:
        """解析规则 YAML 文件路径。

        优先级：传入参数 > govdoc_runs 表记录 > None
        """
        if rulesPath:
            return rulesPath
        # 尝试从最近的 completed run 中获取 rules_path
        try:
            from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
            from sqlalchemy import text
            async with GetAsyncSession() as session:
                row = await session.execute(
                    text(
                        """SELECT rules_path FROM govdoc_runs
                           WHERE rules_path IS NOT NULL AND status = 'completed'
                           ORDER BY id DESC LIMIT 1"""
                    )
                )
                result = row.mappings().first()
                if result and result.get("rules_path"):
                    return result["rules_path"]
        except Exception:
            pass
        return None

    async def _load_ruleset(self, rulesPath: str | None = None):
        """加载 rules.yaml 为 RuleSet 对象。"""
        resolved = await self._resolve_rules_path(rulesPath)
        if not resolved:
            logger.warning("[Govdoc] Cannot resolve rules path for GetRuleDetail/ListRules")
            return None
        from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.loader import load_rules
        return load_rules(resolved)

    async def _load_rules_list(self, rulesPath: str | None = None) -> list[dict[str, Any]]:
        """加载规则列表（简要信息）。"""
        ruleset = await self._load_ruleset(rulesPath)
        if ruleset is None:
            return []
        result = []
        for rule in ruleset.all_rules():
            result.append({
                "rule_id": rule.rule_id,
                "name": rule.name,
                "severity": rule.severity,
                "category": rule.category,
                "group": "",
            })
        return result