Files
leaudit-platform-backend/fastapi_modules/fastapi_leaudit/services/impl/govdocServiceImpl.py
T

1738 lines
72 KiB
Python

"""Govdoc 公文模块服务实现。"""
from __future__ import annotations
import hashlib
import json
import mimetypes
import time
import uuid
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
from fastapi import UploadFile
from sqlalchemy import text
from fastapi_common.fastapi_common_logger import logger
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils
from fastapi_common.fastapi_common_web.domain.responses import StatusCodeEnum
from fastapi_common.fastapi_common_web.exception.LeauditException import LeauditException
from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter
from fastapi_modules.fastapi_leaudit.govdoc_bridge.tasks import dispatch_govdoc_task
from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.docx_parser import parse_docx
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_paragraph import paragraphs_to_html
from fastapi_modules.fastapi_leaudit.models import LeauditDocument, LeauditDocumentFile
from fastapi_modules.fastapi_leaudit.services import IGovdocService, IOssService
from fastapi_modules.fastapi_leaudit.services.impl.documentServiceImpl import _find_latest_version_candidate
from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl
@dataclass(frozen=True)
class _GovdocDocumentRow:
documentId: int
region: str
processingStatus: str
currentRunId: int | None
versionGroupKey: str | None
versionNo: int
rootVersionId: int | None
previousVersionId: int | None
isLatestVersion: bool
createdAt: Any
updatedAt: Any
fileId: int
fileName: str
fileExt: str | None
mimeType: str | None
fileSize: int | None
ossUrl: str | None
createdBy: int | None
resultStatus: str | None
totalScore: float | None
passedCount: int | None
failedCount: int | None
skippedCount: int | None
resultSummaryJson: Any
hasHtmlReport: bool
hasDocxReport: bool
class GovdocServiceImpl(IGovdocService):
"""公文处理与格式审查服务实现。"""
def __init__(self, OssService: IOssService | None = None) -> None:
self.OssService = OssService or OssServiceImpl()
self.Storage = StorageAdapter()
# ── 文档 ──────────────────────────────────────────────
async def UploadDocument(
self,
file: UploadFile,
typeId: int | None = None,
region: str = "default",
autoRun: bool = True,
speed: str = "normal",
ruleVersionId: int | None = None,
createdBy: int | None = None,
) -> dict[str, Any]:
if createdBy is None:
raise LeauditException(StatusCodeEnum.HTTP_401_UNAUTHORIZED, "当前用户未登录")
if file is None or not file.filename:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件不能为空")
content = await file.read()
if not content:
raise LeauditException(StatusCodeEnum.HTTP_400_BAD_REQUEST, "上传文件内容不能为空")
normalizedRegion = (region or "default").strip() or "default"
fileName = file.filename
fileExt = Path(fileName).suffix.lstrip(".").lower() or None
if fileExt != "docx":
raise LeauditException(
StatusCodeEnum.HTTP_400_BAD_REQUEST,
"当前内部公文模块仅支持上传 DOCX 文件",
)
mimeType = file.content_type or mimetypes.guess_type(fileName)[0] or "application/octet-stream"
fileSha256 = hashlib.sha256(content).hexdigest()
uploadedAt = datetime.now()
normalizedName = self._normalize_document_name(fileName)
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
currentUser = await self._getCurrentUserContext(createdBy)
resolvedRegion = self._resolve_upload_region(currentUser, normalizedRegion)
previousVersionId: int | None = None
rootVersionId: int | None = None
versionGroupKey: str | None = None
versionNo = 1
latestCandidate = await self._find_govdoc_latest_version_candidate(
session,
typeId=typeId,
region=resolvedRegion,
normalizedName=normalizedName,
fileExt=fileExt,
)
if latestCandidate:
previousVersionId = int(latestCandidate["document_id"])
rootVersionId = int(latestCandidate["root_version_id"] or latestCandidate["document_id"])
versionGroupKey = str(latestCandidate["version_group_key"] or "")
versionNo = int(latestCandidate["version_no"] or 1) + 1
previousDocument = await session.get(LeauditDocument, previousVersionId)
if previousDocument is not None:
previousDocument.isLatestVersion = False
else:
versionGroupKey = uuid.uuid4().hex
document = await LeauditDocument.create_new(
session,
bizDocumentId=time.time_ns(),
typeId=typeId,
groupId=None,
region=resolvedRegion,
processingStatus="waiting",
currentRunId=None,
versionGroupKey=versionGroupKey,
versionNo=versionNo,
previousVersionId=previousVersionId,
rootVersionId=rootVersionId,
isLatestVersion=True,
normalizedName=normalizedName,
reviewScope="govdoc",
)
if document.rootVersionId is None:
document.rootVersionId = document.Id
await session.flush()
objectKey = OssPathUtils.BuildBusinessDocKey(
Region=resolvedRegion,
TypeCode="govdoc",
DocumentId=document.Id,
Version=f"v{document.versionNo or 1}",
FileRole="original",
FileName=fileName,
Year=uploadedAt.year,
Month=uploadedAt.month,
)
ossUrl = await self.OssService.UploadBytes(
ObjectKey=objectKey,
Content=content,
ContentType=mimeType,
)
documentFile = LeauditDocumentFile(
documentId=document.Id,
fileRole="original",
fileName=fileName,
fileExt=fileExt,
mimeType=mimeType,
fileSize=len(content),
sha256=fileSha256,
localPath=None,
ossUrl=ossUrl,
storageProvider="minio",
isActive=True,
createdBy=createdBy,
)
session.add(documentFile)
await session.flush()
await session.execute(
text(
"""
UPDATE leaudit_documents
SET engine_type = 'govdoc'
WHERE id = :document_id
"""
),
{"document_id": document.Id},
)
await session.commit()
await session.refresh(document)
await session.refresh(documentFile)
runPayload: dict[str, Any] | None = None
shouldAutoRun = bool(autoRun)
if shouldAutoRun:
runPayload = await self.CreateRun(
documentId=document.Id,
ruleVersionId=ruleVersionId,
speed=speed,
force=False,
triggerUserId=createdBy,
)
return {
"documentId": document.Id,
"fileId": documentFile.Id,
"fileName": documentFile.fileName,
"region": resolvedRegion,
"engineType": "govdoc",
"processingStatus": "processing" if runPayload else (document.processingStatus or "waiting"),
"autoRunTriggered": shouldAutoRun,
"latestRunId": runPayload["runId"] if runPayload else None,
"run": runPayload,
}
async def ListDocuments(
self,
page: int = 1,
pageSize: int = 20,
keyword: str | None = None,
fileExt: str | None = None,
region: str | None = None,
status: str | None = None,
resultStatus: str | None = None,
createdBy: int | None = None,
dateFrom: str | None = None,
dateTo: str | None = None,
userId: int | None = None,
) -> dict[str, Any]:
if userId is None:
raise LeauditException(StatusCodeEnum.HTTP_401_UNAUTHORIZED, "当前用户未登录")
currentUser = await self._getCurrentUserContext(userId)
page = max(1, int(page))
pageSize = max(1, min(int(pageSize), 100))
offset = (page - 1) * pageSize
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
params: dict[str, Any] = {
"limit": pageSize,
"offset": offset,
}
filters = [
"d.deleted_at IS NULL",
"f.deleted_at IS NULL",
"f.is_active = true",
"f.file_role = 'original'",
"COALESCE(d.engine_type, 'leaudit') = 'govdoc'",
"COALESCE(d.is_latest_version, true) = true",
]
filters.extend(
self._buildDocumentScopeFilters(
CurrentUserId=userId,
CurrentUser=currentUser,
Params=params,
DocumentAlias="d",
FileAlias="f",
RequestedRegion=region,
RequestedUserId=createdBy,
)
)
if keyword:
filters.append("(f.file_name ILIKE :keyword OR COALESCE(d.normalized_name, '') ILIKE :keyword)")
params["keyword"] = f"%{keyword.strip()}%"
if fileExt:
normalizedExt = fileExt.strip().lstrip(".").lower()
if normalizedExt:
filters.append("LOWER(COALESCE(f.file_ext, '')) = :file_ext")
params["file_ext"] = normalizedExt
if status:
filters.append("COALESCE(d.processing_status, '') = :status")
params["status"] = status.strip()
if resultStatus:
filters.append("COALESCE(gr.result_status, '') = :result_status")
params["result_status"] = resultStatus.strip()
if dateFrom:
filters.append("d.created_at >= CAST(:date_from AS date)")
params["date_from"] = dateFrom.strip()
if dateTo:
filters.append("d.created_at < (CAST(:date_to AS date) + INTERVAL '1 day')")
params["date_to"] = dateTo.strip()
whereClause = " AND ".join(filters)
async with GetAsyncSession() as session:
rows = (
await session.execute(
text(
f"""
SELECT
d.id AS document_id,
COALESCE(d.region, 'default') AS region,
COALESCE(d.processing_status, 'waiting') AS processing_status,
d.current_run_id,
d.version_group_key,
COALESCE(d.version_no, 1) AS version_no,
d.root_version_id,
d.previous_version_id,
COALESCE(d.is_latest_version, true) AS is_latest_version,
d.created_at,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
f.mime_type,
f.file_size,
f.oss_url,
f.created_by,
gr.result_status,
gr.total_score,
gr.passed_count,
gr.failed_count,
gr.skipped_count,
gr.result_summary_json,
COALESCE(vc.total_versions, 1) AS total_versions,
EXISTS(
SELECT 1
FROM govdoc_report_artifacts gra
WHERE gra.run_id = d.current_run_id
AND gra.artifact_type = 'html_report'
AND gra.deleted_at IS NULL
) AS has_html_report,
EXISTS(
SELECT 1
FROM govdoc_report_artifacts gra
WHERE gra.run_id = d.current_run_id
AND gra.artifact_type = 'annotated_docx'
AND gra.deleted_at IS NULL
) AS has_docx_report
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
LEFT JOIN govdoc_runs gr
ON gr.id = d.current_run_id
LEFT JOIN (
SELECT version_group_key, COUNT(*) AS total_versions
FROM leaudit_documents
WHERE deleted_at IS NULL
AND COALESCE(engine_type, 'leaudit') = 'govdoc'
AND COALESCE(version_group_key, '') <> ''
GROUP BY version_group_key
) vc
ON vc.version_group_key = d.version_group_key
WHERE {whereClause}
ORDER BY d.created_at DESC
LIMIT :limit OFFSET :offset
"""
),
params,
)
).mappings().all()
total = int(
(
await session.execute(
text(
f"""
SELECT COUNT(1)
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
LEFT JOIN govdoc_runs gr
ON gr.id = d.current_run_id
WHERE {whereClause}
"""
),
params,
)
).scalar_one()
)
history_by_group: dict[str, list[dict[str, Any]]] = {}
total_versions_by_group = {
str(row["version_group_key"]): int(row.get("total_versions") or 1)
for row in rows
if row.get("version_group_key")
}
group_keys = [str(row["version_group_key"]) for row in rows if row.get("version_group_key")]
if group_keys:
history_rows = (
await session.execute(
text(
"""
SELECT
d.id AS document_id,
COALESCE(d.region, 'default') AS region,
COALESCE(d.processing_status, 'waiting') AS processing_status,
d.current_run_id,
d.version_group_key,
COALESCE(d.version_no, 1) AS version_no,
d.root_version_id,
d.previous_version_id,
COALESCE(d.is_latest_version, false) AS is_latest_version,
d.created_at,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
f.mime_type,
f.file_size,
f.oss_url,
f.created_by,
gr.result_status,
gr.total_score,
gr.passed_count,
gr.failed_count,
gr.skipped_count,
gr.result_summary_json,
EXISTS(
SELECT 1
FROM govdoc_report_artifacts gra
WHERE gra.run_id = d.current_run_id
AND gra.artifact_type = 'html_report'
AND gra.deleted_at IS NULL
) AS has_html_report,
EXISTS(
SELECT 1
FROM govdoc_report_artifacts gra
WHERE gra.run_id = d.current_run_id
AND gra.artifact_type = 'annotated_docx'
AND gra.deleted_at IS NULL
) AS has_docx_report
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
LEFT JOIN govdoc_runs gr
ON gr.id = d.current_run_id
WHERE d.deleted_at IS NULL
AND COALESCE(d.engine_type, 'leaudit') = 'govdoc'
AND d.version_group_key = ANY(:group_keys)
AND COALESCE(d.is_latest_version, false) = false
ORDER BY d.version_group_key, COALESCE(d.version_no, 1) DESC, d.id DESC
"""
),
{"group_keys": group_keys},
)
).mappings().all()
for history_row in history_rows:
group_key = str(history_row["version_group_key"] or "")
history_by_group.setdefault(group_key, []).append(
self._serialize_list_item_row(
self._map_document_row(history_row),
totalVersions=total_versions_by_group.get(group_key, 1),
historyVersions=[],
)
)
items = []
for row in rows:
mapped = self._map_document_row(row)
group_key = str(mapped.versionGroupKey or "")
total_versions = int(row.get("total_versions") or 1)
history_versions = history_by_group.get(group_key, []) if group_key else []
items.append(
self._serialize_list_item_row(
mapped,
totalVersions=total_versions,
historyVersions=history_versions,
)
)
return {"items": items, "total": total, "page": page, "pageSize": pageSize}
async def GetDocumentDetail(self, documentId: int, userId: int | None = None) -> dict[str, Any]:
if userId is None:
raise LeauditException(StatusCodeEnum.HTTP_401_UNAUTHORIZED, "当前用户未登录")
currentUser = await self._getCurrentUserContext(userId)
params: dict[str, Any] = {"document_id": documentId, "limit": 20}
filters = [
"d.id = :document_id",
"d.deleted_at IS NULL",
"f.deleted_at IS NULL",
"f.is_active = true",
"f.file_role = 'original'",
"COALESCE(d.engine_type, 'leaudit') = 'govdoc'",
]
filters.extend(
self._buildDocumentScopeFilters(
CurrentUserId=userId,
CurrentUser=currentUser,
Params=params,
DocumentAlias="d",
FileAlias="f",
)
)
whereClause = " AND ".join(filters)
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
row = (
await session.execute(
text(
f"""
SELECT
d.id AS document_id,
COALESCE(d.region, 'default') AS region,
COALESCE(d.processing_status, 'waiting') AS processing_status,
d.current_run_id,
d.created_at,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
f.mime_type,
f.file_size,
f.oss_url,
f.created_by,
gr.result_status,
gr.total_score,
gr.passed_count,
gr.failed_count,
gr.skipped_count
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
LEFT JOIN govdoc_runs gr
ON gr.id = d.current_run_id
WHERE {whereClause}
LIMIT 1
"""
),
params,
)
).mappings().first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "公文文档不存在或无权访问")
runs = (
await session.execute(
text(
"""
SELECT
id,
document_id,
status,
phase,
result_status,
total_score,
passed_count,
failed_count,
skipped_count,
error_message,
created_at,
updated_at,
started_at,
finished_at
FROM govdoc_runs
WHERE document_id = :document_id
AND deleted_at IS NULL
ORDER BY id DESC
LIMIT :limit
"""
),
params,
)
).mappings().all()
artifactRows = (
await session.execute(
text(
"""
SELECT run_id, artifact_type, file_name, file_ext, mime_type, oss_url, description
FROM govdoc_report_artifacts
WHERE run_id = ANY(
SELECT id
FROM govdoc_runs
WHERE document_id = :document_id
AND deleted_at IS NULL
)
AND deleted_at IS NULL
ORDER BY id DESC
"""
),
{"document_id": documentId},
)
).mappings().all()
mapped = self._map_document_row(row)
runItems = [self._build_run_summary(item) for item in runs]
latestRunId = mapped.currentRunId or (runItems[0]["runId"] if runItems else None)
latestRun = next((item for item in runItems if item["runId"] == latestRunId), runItems[0] if runItems else None)
artifactsByRun = self._group_artifacts_by_run(artifactRows)
return {
"documentId": mapped.documentId,
"latestRunId": latestRunId,
"document": {
"documentId": mapped.documentId,
"fileId": mapped.fileId,
"filename": mapped.fileName,
"fileExt": mapped.fileExt,
"mimeType": mapped.mimeType,
"fileSize": mapped.fileSize,
"region": mapped.region,
"processingStatus": mapped.processingStatus,
"createdAt": self._iso(mapped.createdAt),
"updatedAt": self._iso(mapped.updatedAt),
},
"latestRun": latestRun,
"currentRun": latestRun,
"runs": runItems,
"reports": artifactsByRun.get(int(latestRunId), {}) if latestRunId else {},
}
async def UpdateDocument(self, documentId: int, body: dict[str, Any], userId: int | None = None) -> dict[str, Any]:
raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "当前阶段暂不开放修改公文信息")
async def DeleteDocument(self, documentId: int, userId: int | None = None) -> dict[str, Any]:
if userId is None:
raise LeauditException(StatusCodeEnum.HTTP_401_UNAUTHORIZED, "当前用户未登录")
currentUser = await self._getCurrentUserContext(userId)
params: dict[str, Any] = {"document_id": documentId}
filters = ["d.id = :document_id", "d.deleted_at IS NULL", "f.is_active = true", "f.file_role = 'original'"]
filters.extend(
self._buildDocumentScopeFilters(
CurrentUserId=userId,
CurrentUser=currentUser,
Params=params,
DocumentAlias="d",
FileAlias="f",
)
)
whereClause = " AND ".join(filters)
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
row = (
await session.execute(
text(
f"""
SELECT d.id
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
WHERE {whereClause}
LIMIT 1
"""
),
params,
)
).first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "公文文档不存在或无权访问")
await session.execute(
text("UPDATE leaudit_documents SET deleted_at = now(), updated_at = now() WHERE id = :document_id"),
{"document_id": documentId},
)
await session.commit()
return {"documentId": documentId, "deleted": True}
# ── 审查运行 ──────────────────────────────────────────
async def CreateRun(
self,
documentId: int,
ruleVersionId: int | None = None,
speed: str = "normal",
force: bool = False,
triggerUserId: int | None = None,
) -> dict[str, Any]:
if triggerUserId is None:
raise LeauditException(StatusCodeEnum.HTTP_401_UNAUTHORIZED, "当前用户未登录")
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
currentUser = await self._getCurrentUserContext(triggerUserId)
documentMeta = await self._get_document_for_run(documentId, triggerUserId, currentUser)
if documentMeta.currentRunId and not force:
currentRun = await self.GetRunStatus(documentMeta.currentRunId)
if currentRun["status"] in {"pending", "processing"}:
return {
"runId": documentMeta.currentRunId,
"documentId": documentId,
"status": currentRun["status"],
"phase": currentRun.get("phase"),
"reused": True,
}
runId = await self.Storage.CreateRun(
{
"documentId": documentId,
"documentFileId": documentMeta.fileId,
"runNo": await self._next_run_no(documentId),
"triggerSource": "upload" if not documentMeta.currentRunId else "manual",
"triggerUserId": triggerUserId,
}
)
rulesPath = await self._resolve_rules_path()
await self.Storage.UpdateDocumentStatus(documentId, "processing", runId)
task = dispatch_govdoc_task(
documentId=documentId,
runId=runId,
rulesPath=rulesPath,
triggerUserId=triggerUserId,
speed=speed,
)
async with GetAsyncSession() as session:
await session.execute(
text("UPDATE govdoc_runs SET task_id = :task_id, started_at = now(), updated_at = now() WHERE id = :run_id"),
{"task_id": str(getattr(task, "id", "") or ""), "run_id": runId},
)
await session.commit()
return {
"runId": runId,
"documentId": documentId,
"status": "pending",
"phase": "dispatch",
"taskId": str(getattr(task, "id", "") or ""),
}
async def GetRunStatus(self, runId: int) -> dict[str, Any]:
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
row = (
await session.execute(
text(
"""
SELECT
id,
document_id,
status,
phase,
result_status,
total_score,
passed_count,
failed_count,
skipped_count,
error_message,
task_id,
created_at,
updated_at,
started_at,
finished_at
FROM govdoc_runs
WHERE id = :run_id
AND deleted_at IS NULL
LIMIT 1
"""
),
{"run_id": runId},
)
).mappings().first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "审查运行不存在")
return self._build_run_summary(row)
# ── 结果与报告 ────────────────────────────────────────
async def GetRunResult(self, runId: int) -> dict[str, Any]:
"""从 govdoc_runs + govdoc_rule_results 读取审查结果,含 structure/outline。"""
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
runRow = (
await session.execute(
text(
"""
SELECT
id,
document_id,
status,
phase,
total_score,
passed_count,
failed_count,
skipped_count,
result_status,
result_summary_json,
started_at,
finished_at
FROM govdoc_runs
WHERE id = :run_id
AND deleted_at IS NULL
LIMIT 1
"""
),
{"run_id": runId},
)
).mappings().first()
if not runRow:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "审查运行不存在")
documentRow = (
await session.execute(
text(
"""
SELECT
d.id AS document_id,
f.file_name
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
WHERE d.id = :document_id
AND d.deleted_at IS NULL
LIMIT 1
"""
),
{"document_id": int(runRow["document_id"])},
)
).mappings().first()
rulesRows = (
await session.execute(
text(
"""
SELECT
rule_id,
rule_name,
severity,
category,
result,
skip_reason,
message,
suggestion,
actual,
expected,
evidence,
paragraph_index,
paragraph_text,
location_path,
score
FROM govdoc_rule_results
WHERE run_id = :run_id
AND deleted_at IS NULL
ORDER BY id ASC
"""
),
{"run_id": runId},
)
).mappings().all()
aux = self._parse_json(runRow.get("result_summary_json"))
findings = []
checkedRules = []
severityStats: dict[str, int] = {}
categoryStats: dict[str, int] = {}
seenRuleIds: set[str] = set()
for rr in rulesRows:
location = {
"paragraph_index": rr["paragraph_index"] or 0,
"role": rr.get("location_path"),
"char_start": 0,
"char_end": 0,
"context": rr.get("paragraph_text") or "",
}
if rr.get("result") == "fail":
severity = str(rr.get("severity") or "info")
category = str(rr.get("category") or "")
severityStats[severity] = severityStats.get(severity, 0) + 1
if category:
categoryStats[category] = categoryStats.get(category, 0) + 1
findings.append(
{
"finding_id": f"{rr['rule_id']}-{rr.get('paragraph_index') or len(findings)}",
"rule_id": rr["rule_id"],
"rule_name": rr.get("rule_name") or rr["rule_id"],
"severity": severity,
"category": category,
"location": location,
"actual": self._parse_json(rr.get("actual")) or {},
"expected": self._parse_json(rr.get("expected")) or {},
"message": rr.get("message") or "",
"suggestion": rr.get("suggestion") or "",
"evidence": rr.get("evidence") or "",
"confidence": 1.0,
}
)
ruleId = str(rr["rule_id"])
if ruleId in seenRuleIds:
continue
seenRuleIds.add(ruleId)
status = str(rr.get("result") or "pass")
checkedRules.append(
{
"rule_id": ruleId,
"name": rr.get("rule_name") or ruleId,
"severity": rr.get("severity") or "info",
"category": rr.get("category") or "",
"status": status if status in {"pass", "fail", "skipped"} else "pass",
"skip_reason": rr.get("skip_reason") or "",
}
)
totalScore = float(runRow.get("total_score") or 0)
return {
"runId": runId,
"documentId": int(runRow["document_id"]),
"document": {
"documentId": int(runRow["document_id"]),
"filename": str(documentRow["file_name"] or "") if documentRow else "",
},
"summary": {
"score": totalScore,
"total_findings": len(findings),
"by_severity": severityStats,
"by_category": categoryStats,
"passed_count": int(runRow.get("passed_count") or 0),
"failed_count": int(runRow.get("failed_count") or 0),
"skipped_count": int(runRow.get("skipped_count") or 0),
},
"checkedRules": checkedRules,
"findings": findings,
"structure": aux.get("structure", []),
"outline": aux.get("outline", []),
"entities": aux.get("entities", {}),
}
async def GetRunFindings(self, runId: int) -> dict[str, Any]:
result = await self.GetRunResult(runId)
return {"runId": runId, "findings": result["findings"]}
async def GetRunEntities(self, runId: int) -> dict[str, Any]:
result = await self.GetRunResult(runId)
return {"runId": runId, "entities": result.get("entities", {})}
async def GetRunParagraphs(self, runId: int) -> str:
runStatus = await self.GetRunStatus(runId)
if runStatus["status"] != "completed":
raise LeauditException(
StatusCodeEnum.HTTP_409_CONFLICT,
"当前审查尚未完成,暂时无法加载文档视图",
)
paragraphArtifact = await self._get_report_artifact(runId, "paragraph_html")
if paragraphArtifact:
content = await self.OssService.DownloadBytes(str(paragraphArtifact["oss_url"]))
return content.decode("utf-8")
documentMeta = await self._get_document_for_read(int(runStatus["documentId"]))
fileRow = await self._get_active_original_file(documentMeta.documentId)
ossUrl = getattr(fileRow, "ossUrl", None) or fileRow.get("oss_url")
fileExt = getattr(fileRow, "fileExt", None) or fileRow.get("file_ext")
if not ossUrl:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "原始文档未找到可用存储地址")
if (fileExt or "").strip().lower() != "docx":
raise LeauditException(
StatusCodeEnum.HTTP_400_BAD_REQUEST,
"当前文档视图仅支持 DOCX 原文预览,请上传 DOCX 文件",
)
tempPath = await self.OssService.DownloadToTempFile(
Source=ossUrl,
Suffix=f".{fileExt or 'docx'}",
Prefix="govdoc-run-",
)
try:
doc = parse_docx(tempPath)
findingsResult = await self.GetRunFindings(runId)
findingMap: dict[int, list[str]] = {}
for finding in findingsResult["findings"]:
pi = int(finding.get("location", {}).get("paragraph_index") or 0)
findingMap.setdefault(pi, []).append(str(finding["finding_id"]))
return paragraphs_to_html(doc, findingMap)
finally:
try:
Path(tempPath).unlink(missing_ok=True)
except Exception:
pass
async def GetRunStructure(self, runId: int) -> dict[str, Any]:
result = await self.GetRunResult(runId)
return {"runId": runId, "structure": result.get("structure", [])}
async def GetRunOutline(self, runId: int) -> dict[str, Any]:
result = await self.GetRunResult(runId)
return {"runId": runId, "outline": result.get("outline", [])}
async def GetReportHtml(self, runId: int) -> dict[str, Any]:
artifact = await self._get_report_artifact(runId, "html_report")
if not artifact:
return {"runId": runId, "htmlUrl": ""}
return {
"runId": runId,
"htmlUrl": await self.OssService.PresignGetUrl(str(artifact["oss_url"])),
}
async def GetReportDocx(self, runId: int) -> dict[str, Any]:
artifact = await self._get_report_artifact(runId, "annotated_docx")
if not artifact:
return {"runId": runId, "docxUrl": ""}
return {
"runId": runId,
"docxUrl": await self.OssService.PresignGetUrl(str(artifact["oss_url"])),
}
async def DownloadOriginal(self, documentId: int) -> dict[str, Any]:
fileRow = await self._get_active_original_file(documentId)
ossUrl = getattr(fileRow, "ossUrl", None) or fileRow.get("oss_url")
if not ossUrl:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "原始文档不存在")
return {
"documentId": documentId,
"downloadUrl": await self.OssService.PresignGetUrl(str(ossUrl)),
}
# ── 规则 ──────────────────────────────────────────────
async def ListRules(self, rulesPath: str | None = None) -> dict[str, Any]:
"""从 govdoc 规则 YAML 文件加载规则清单。"""
rules = await self._load_rules_list(rulesPath)
return {"metadata": {}, "rules": rules, "total_rules": len(rules)}
async def GetRuleDetail(self, ruleId: str, rulesPath: str | None = None) -> dict[str, Any]:
"""获取单条规则完整详情(名称、严重度、stages、消息等)。"""
ruleset = await self._load_ruleset(rulesPath)
if ruleset is None:
return {"rule_id": ruleId, "name": ruleId, "severity": "info", "category": "", "group": ""}
for rule in ruleset.all_rules():
if rule.rule_id == ruleId:
return {
"rule_id": rule.rule_id,
"name": rule.name,
"severity": rule.severity,
"category": rule.category,
"group": "",
"applies_to": rule.applies_to.model_dump() if rule.applies_to else None,
"target": rule.target,
"on_missing": rule.on_missing,
"stages": [s.model_dump(exclude_none=True) for s in (rule.stages or [])],
"messages": rule.messages.model_dump() if rule.messages else {},
}
return {"rule_id": ruleId, "name": ruleId, "severity": "info", "category": "", "group": ""}
# ── 助手 ──────────────────────────────────────────────
async def _resolve_rules_path(self, rulesPath: str | None = None) -> str | None:
"""解析规则 YAML 文件路径。"""
if rulesPath:
return rulesPath
candidates = [
Path("/home/wren-dev/Porject/leaudit-platform/rules/govdoc/govdoc_general/rules.yaml"),
Path("/home/wren-dev/Porject/leaudit-platform/rules/govdoc_general/rules.yaml"),
]
for candidate in candidates:
if candidate.is_file():
return str(candidate)
return None
async def _load_ruleset(self, rulesPath: str | None = None):
resolved = await self._resolve_rules_path(rulesPath)
if not resolved:
logger.warning("[Govdoc] Cannot resolve rules path for GetRuleDetail/ListRules")
return None
from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.loader import load_rules
return load_rules(resolved)
async def _load_rules_list(self, rulesPath: str | None = None) -> list[dict[str, Any]]:
ruleset = await self._load_ruleset(rulesPath)
if ruleset is None:
return []
result = []
for rule in ruleset.all_rules():
result.append(
{
"rule_id": rule.rule_id,
"name": rule.name,
"severity": rule.severity,
"category": rule.category,
"group": "",
}
)
return result
async def _ensureGovdocSchema(self, session) -> None:
statements = [
"""
ALTER TABLE leaudit_documents
ADD COLUMN IF NOT EXISTS engine_type VARCHAR(32) NOT NULL DEFAULT 'leaudit'
""",
"""
CREATE TABLE IF NOT EXISTS public.govdoc_runs (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
document_id BIGINT NOT NULL,
document_file_id BIGINT,
run_no INTEGER NOT NULL DEFAULT 1,
trigger_source VARCHAR(64) NOT NULL DEFAULT 'upload',
trigger_user_id BIGINT,
task_id VARCHAR(128),
status VARCHAR(64) NOT NULL DEFAULT 'pending',
phase VARCHAR(32),
engine_version VARCHAR(64),
llm_provider VARCHAR(64),
llm_model VARCHAR(128),
rules_path VARCHAR(1024),
total_score NUMERIC(10, 2),
passed_count INTEGER,
failed_count INTEGER,
skipped_count INTEGER,
result_status VARCHAR(32),
result_summary_json TEXT,
error_message TEXT,
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ DEFAULT NULL
)
""",
"""
CREATE TABLE IF NOT EXISTS public.govdoc_rule_results (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
run_id BIGINT NOT NULL,
rule_id VARCHAR(128) NOT NULL,
rule_name VARCHAR(256),
severity VARCHAR(32),
category VARCHAR(128),
message TEXT,
suggestion TEXT,
actual TEXT,
expected TEXT,
evidence TEXT,
paragraph_index INTEGER,
paragraph_text TEXT,
location_path VARCHAR(512),
result VARCHAR(32) NOT NULL DEFAULT 'pass',
skip_reason TEXT,
score NUMERIC(10, 2),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ DEFAULT NULL
)
""",
"""
CREATE TABLE IF NOT EXISTS public.govdoc_report_artifacts (
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
run_id BIGINT NOT NULL,
artifact_type VARCHAR(64) NOT NULL,
file_name VARCHAR(512) NOT NULL,
file_ext VARCHAR(32),
mime_type VARCHAR(128),
file_size BIGINT,
sha256 VARCHAR(64),
oss_url VARCHAR(2048),
storage_provider VARCHAR(32),
description VARCHAR(512),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
deleted_at TIMESTAMPTZ DEFAULT NULL
)
""",
"""
CREATE INDEX IF NOT EXISTS idx_leaudit_documents_engine_type
ON public.leaudit_documents(engine_type) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_runs_document_id
ON public.govdoc_runs(document_id) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_runs_status
ON public.govdoc_runs(status) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_runs_trigger_user_id
ON public.govdoc_runs(trigger_user_id)
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_rule_results_run_id
ON public.govdoc_rule_results(run_id) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_rule_results_rule_id
ON public.govdoc_rule_results(rule_id) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_rule_results_result
ON public.govdoc_rule_results(result) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_rule_results_paragraph
ON public.govdoc_rule_results(run_id, paragraph_index) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_report_artifacts_run_id
ON public.govdoc_report_artifacts(run_id) WHERE deleted_at IS NULL
""",
"""
CREATE INDEX IF NOT EXISTS idx_govdoc_report_artifacts_type
ON public.govdoc_report_artifacts(run_id, artifact_type) WHERE deleted_at IS NULL
""",
]
for statement in statements:
await session.execute(text(statement))
await session.commit()
async def _getCurrentUserContext(self, CurrentUserId: int) -> dict[str, Any]:
async with GetAsyncSession() as session:
row = (
await session.execute(
text(
"""
SELECT
u.id,
COALESCE(u.area, '') AS area,
COALESCE(bool_or(r.role_key IN ('super_admin', 'provincial_admin')), FALSE) AS is_global,
COALESCE(bool_or(r.role_key IN ('super_admin', 'provincial_admin', 'admin')), FALSE) AS can_manage,
COALESCE(bool_or(r.role_key = 'super_admin'), FALSE) AS is_super_admin
FROM sso_users u
LEFT JOIN user_role ur ON ur.user_id = u.id
LEFT JOIN roles r ON r.id = ur.role_id
WHERE u.id = :user_id
GROUP BY u.id, u.area
"""
),
{"user_id": CurrentUserId},
)
).mappings().first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "当前用户不存在")
return {
"area": str(row["area"] or ""),
"is_global": bool(row["is_global"]),
"can_manage": bool(row["can_manage"]),
"is_super_admin": bool(row["is_super_admin"]),
}
def _buildDocumentScopeFilters(
self,
CurrentUserId: int,
CurrentUser: dict[str, Any],
Params: dict[str, object],
DocumentAlias: str,
FileAlias: str,
RequestedRegion: str | None = None,
RequestedUserId: int | None = None,
) -> list[str]:
filters: list[str] = []
requestedRegion = (RequestedRegion or "").strip()
area = str(CurrentUser["area"] or "").strip()
if CurrentUser["is_global"]:
if requestedRegion:
filters.append(f"{DocumentAlias}.region = :requested_region")
Params["requested_region"] = requestedRegion
if RequestedUserId is not None:
filters.append(f"{FileAlias}.created_by = :requested_user_id")
Params["requested_user_id"] = RequestedUserId
return filters
if CurrentUser["can_manage"]:
if not area:
filters.append("1 = 0")
return filters
if requestedRegion and requestedRegion != area:
filters.append("1 = 0")
return filters
filters.append(f"{DocumentAlias}.region = :scope_region")
Params["scope_region"] = area
if RequestedUserId is not None:
filters.append(f"{FileAlias}.created_by = :requested_user_id")
Params["requested_user_id"] = RequestedUserId
return filters
filters.append(f"{FileAlias}.created_by = :scope_user_id")
Params["scope_user_id"] = CurrentUserId
if requestedRegion:
filters.append(f"{DocumentAlias}.region = :requested_region")
Params["requested_region"] = requestedRegion
if RequestedUserId is not None and RequestedUserId != CurrentUserId:
filters.append("1 = 0")
return filters
def _resolve_upload_region(self, currentUser: dict[str, Any], requestedRegion: str) -> str:
area = str(currentUser["area"] or "").strip()
if currentUser["is_global"]:
return requestedRegion or area or "default"
if currentUser["can_manage"]:
if area and requestedRegion and requestedRegion != area:
raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "不能上传到非本地区")
return area or requestedRegion or "default"
if area and requestedRegion and requestedRegion != area:
raise LeauditException(StatusCodeEnum.HTTP_403_FORBIDDEN, "不能上传到非本人地区")
return area or requestedRegion or "default"
def _normalize_document_name(self, fileName: str) -> str:
suffix = Path(fileName).suffix
return fileName[: -len(suffix)] if suffix else fileName
async def _get_document_for_run(
self,
documentId: int,
userId: int,
currentUser: dict[str, Any],
) -> _GovdocDocumentRow:
params: dict[str, Any] = {"document_id": documentId}
filters = [
"d.id = :document_id",
"d.deleted_at IS NULL",
"f.deleted_at IS NULL",
"f.is_active = true",
"f.file_role = 'original'",
"COALESCE(d.engine_type, 'leaudit') = 'govdoc'",
]
filters.extend(
self._buildDocumentScopeFilters(
CurrentUserId=userId,
CurrentUser=currentUser,
Params=params,
DocumentAlias="d",
FileAlias="f",
)
)
whereClause = " AND ".join(filters)
async with GetAsyncSession() as session:
row = (
await session.execute(
text(
f"""
SELECT
d.id AS document_id,
COALESCE(d.region, 'default') AS region,
COALESCE(d.processing_status, 'waiting') AS processing_status,
d.current_run_id,
d.created_at,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
f.mime_type,
f.file_size,
f.oss_url,
f.created_by,
gr.result_status,
gr.total_score,
gr.passed_count,
gr.failed_count,
gr.skipped_count
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
LEFT JOIN govdoc_runs gr
ON gr.id = d.current_run_id
WHERE {whereClause}
LIMIT 1
"""
),
params,
)
).mappings().first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "公文文档不存在或无权访问")
return self._map_document_row(row)
async def _get_document_for_read(self, documentId: int) -> _GovdocDocumentRow:
async with GetAsyncSession() as session:
row = (
await session.execute(
text(
"""
SELECT
d.id AS document_id,
COALESCE(d.region, 'default') AS region,
COALESCE(d.processing_status, 'waiting') AS processing_status,
d.current_run_id,
d.created_at,
d.updated_at,
f.id AS file_id,
f.file_name,
f.file_ext,
f.mime_type,
f.file_size,
f.oss_url,
f.created_by,
gr.result_status,
gr.total_score,
gr.passed_count,
gr.failed_count,
gr.skipped_count
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
LEFT JOIN govdoc_runs gr
ON gr.id = d.current_run_id
WHERE d.id = :document_id
AND d.deleted_at IS NULL
AND COALESCE(d.engine_type, 'leaudit') = 'govdoc'
LIMIT 1
"""
),
{"document_id": documentId},
)
).mappings().first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "公文文档不存在")
return self._map_document_row(row)
async def _get_active_original_file(self, documentId: int):
async with GetAsyncSession() as session:
row = (
await session.execute(
text(
"""
SELECT id, document_id, file_name, file_ext, mime_type, file_size, oss_url, created_by
FROM leaudit_document_files
WHERE document_id = :document_id
AND is_active = true
AND file_role = 'original'
AND deleted_at IS NULL
ORDER BY id DESC
LIMIT 1
"""
),
{"document_id": documentId},
)
).mappings().first()
if not row:
raise LeauditException(StatusCodeEnum.HTTP_404_NOT_FOUND, "原始文档不存在")
return row
async def _next_run_no(self, documentId: int) -> int:
async with GetAsyncSession() as session:
current = (
await session.execute(
text(
"""
SELECT COALESCE(MAX(run_no), 0)
FROM govdoc_runs
WHERE document_id = :document_id
AND deleted_at IS NULL
"""
),
{"document_id": documentId},
)
).scalar_one()
return int(current or 0) + 1
def _build_run_summary(self, row: Any) -> dict[str, Any]:
summary = self._build_summary_payload(
row.get("total_score"),
row.get("passed_count"),
row.get("failed_count"),
row.get("skipped_count"),
)
return {
"runId": int(row["id"]),
"documentId": int(row["document_id"]),
"status": str(row.get("status") or "pending"),
"phase": row.get("phase"),
"resultStatus": row.get("result_status"),
"score": float(row.get("total_score") or 0),
"passedCount": int(row.get("passed_count") or 0),
"failedCount": int(row.get("failed_count") or 0),
"skippedCount": int(row.get("skipped_count") or 0),
"summary": summary,
"errorMessage": row.get("error_message"),
"taskId": row.get("task_id"),
"createdAt": self._iso(row.get("created_at")),
"updatedAt": self._iso(row.get("updated_at")),
"startedAt": self._iso(row.get("started_at")),
"finishedAt": self._iso(row.get("finished_at")),
}
def _build_summary_payload(
self,
totalScore: Any,
passedCount: Any,
failedCount: Any,
skippedCount: Any,
) -> dict[str, Any]:
return {
"score": float(totalScore or 0),
"total_findings": int(failedCount or 0),
"by_severity": {},
"by_category": {},
"passed_count": int(passedCount or 0),
"failed_count": int(failedCount or 0),
"skipped_count": int(skippedCount or 0),
}
async def _find_govdoc_latest_version_candidate(
self,
session,
*,
typeId: int | None,
region: str,
normalizedName: str,
fileExt: str | None,
) -> dict[str, Any] | None:
if typeId is not None:
candidate = await _find_latest_version_candidate(
session,
type_id=int(typeId),
root_group_id=None,
region=region,
normalized_name=normalizedName,
file_ext=fileExt,
)
if candidate:
return candidate
ext_clause = ""
params: dict[str, Any] = {
"region": region,
"normalized_name": normalizedName,
}
if fileExt:
ext_clause = " AND LOWER(COALESCE(f.file_ext, '')) = :file_ext"
params["file_ext"] = fileExt.lower()
row = (
await session.execute(
text(
f"""
SELECT
d.id AS document_id,
d.version_group_key,
d.version_no,
d.root_version_id,
f.id AS file_id,
f.sha256
FROM leaudit_documents d
JOIN leaudit_document_files f
ON f.document_id = d.id
AND f.is_active = true
AND f.file_role = 'original'
AND f.deleted_at IS NULL
WHERE d.region = :region
AND d.normalized_name = :normalized_name
AND COALESCE(d.engine_type, 'leaudit') = 'govdoc'
AND COALESCE(d.is_latest_version, true) = true
AND d.deleted_at IS NULL{ext_clause}
ORDER BY COALESCE(d.version_no, 1) DESC, d.id DESC
LIMIT 1
"""
),
params,
)
).mappings().first()
return dict(row) if row else None
def _group_artifacts_by_run(self, rows: list[Any]) -> dict[int, dict[str, Any]]:
grouped: dict[int, dict[str, Any]] = {}
artifactTypeMap = {
"html_report": "htmlUrl",
"annotated_docx": "docxUrl",
"paragraph_html": "paragraphHtmlUrl",
}
for row in rows:
runId = int(row["run_id"])
payload = grouped.setdefault(
runId,
{
"artifacts": [],
"hasHtmlReport": False,
"hasDocxReport": False,
"hasParagraphHtml": False,
},
)
artifactType = str(row.get("artifact_type") or "")
if artifactType in artifactTypeMap and row.get("oss_url") and artifactTypeMap[artifactType] not in payload:
payload[artifactTypeMap[artifactType]] = row["oss_url"]
if artifactType == "html_report":
payload["hasHtmlReport"] = True
elif artifactType == "annotated_docx":
payload["hasDocxReport"] = True
elif artifactType == "paragraph_html":
payload["hasParagraphHtml"] = True
payload["artifacts"].append(
{
"artifactType": artifactType,
"fileName": row.get("file_name") or "",
"fileExt": row.get("file_ext") or "",
"mimeType": row.get("mime_type") or "",
"ossUrl": row.get("oss_url") or "",
"description": row.get("description") or "",
}
)
return grouped
def _build_list_summary_payload(self, row: _GovdocDocumentRow) -> dict[str, Any]:
summary = self._parse_json(row.resultSummaryJson) or {}
if not isinstance(summary, dict):
summary = {}
bySeverity = summary.get("by_severity")
byCategory = summary.get("by_category")
return {
"score": float(summary.get("score") or row.totalScore or 0),
"total_findings": int(summary.get("total_findings") or row.failedCount or 0),
"by_severity": bySeverity if isinstance(bySeverity, dict) else {},
"by_category": byCategory if isinstance(byCategory, dict) else {},
"passed_count": int(summary.get("passed_count") or row.passedCount or 0),
"failed_count": int(summary.get("failed_count") or row.failedCount or 0),
"skipped_count": int(summary.get("skipped_count") or row.skippedCount or 0),
}
def _serialize_list_item_row(
self,
row: _GovdocDocumentRow,
*,
totalVersions: int | None,
historyVersions: list[dict[str, Any]],
) -> dict[str, Any]:
summary = self._build_list_summary_payload(row)
return {
"documentId": row.documentId,
"fileId": row.fileId,
"fileName": row.fileName,
"fileExt": row.fileExt,
"mimeType": row.mimeType,
"fileSize": row.fileSize,
"region": row.region,
"processingStatus": row.processingStatus,
"currentRunId": row.currentRunId,
"latestRunId": row.currentRunId,
"resultStatus": row.resultStatus,
"score": float(row.totalScore) if row.totalScore is not None else None,
"passedCount": row.passedCount or 0,
"failedCount": row.failedCount or 0,
"skippedCount": row.skippedCount or 0,
"versionGroupKey": row.versionGroupKey or "",
"versionNo": int(row.versionNo or 1),
"rootVersionId": int(row.rootVersionId or row.documentId),
"previousVersionId": int(row.previousVersionId) if row.previousVersionId is not None else None,
"totalVersions": int(totalVersions or max(1, len(historyVersions) + 1)),
"historyCount": len(historyVersions),
"historyVersions": historyVersions,
"latestRun": {
"runId": row.currentRunId,
"summary": summary,
} if row.currentRunId else None,
"reports": {
"hasHtmlReport": row.hasHtmlReport,
"hasDocxReport": row.hasDocxReport,
},
"createdAt": self._iso(row.createdAt),
"updatedAt": self._iso(row.updatedAt),
}
def _map_document_row(self, row: Any) -> _GovdocDocumentRow:
return _GovdocDocumentRow(
documentId=int(row["document_id"]),
region=str(row["region"] or "default"),
processingStatus=str(row["processing_status"] or "waiting"),
currentRunId=int(row["current_run_id"]) if row.get("current_run_id") is not None else None,
versionGroupKey=str(row["version_group_key"]) if row.get("version_group_key") else None,
versionNo=int(row.get("version_no") or 1),
rootVersionId=int(row["root_version_id"]) if row.get("root_version_id") is not None else None,
previousVersionId=int(row["previous_version_id"]) if row.get("previous_version_id") is not None else None,
isLatestVersion=bool(row.get("is_latest_version", True)),
createdAt=row.get("created_at"),
updatedAt=row.get("updated_at"),
fileId=int(row["file_id"]),
fileName=str(row["file_name"] or ""),
fileExt=str(row["file_ext"]) if row.get("file_ext") else None,
mimeType=str(row["mime_type"]) if row.get("mime_type") else None,
fileSize=int(row["file_size"]) if row.get("file_size") is not None else None,
ossUrl=str(row["oss_url"]) if row.get("oss_url") else None,
createdBy=int(row["created_by"]) if row.get("created_by") is not None else None,
resultStatus=str(row["result_status"]) if row.get("result_status") else None,
totalScore=float(row["total_score"]) if row.get("total_score") is not None else None,
passedCount=int(row["passed_count"]) if row.get("passed_count") is not None else None,
failedCount=int(row["failed_count"]) if row.get("failed_count") is not None else None,
skippedCount=int(row["skipped_count"]) if row.get("skipped_count") is not None else None,
resultSummaryJson=row.get("result_summary_json"),
hasHtmlReport=bool(row.get("has_html_report")),
hasDocxReport=bool(row.get("has_docx_report")),
)
async def _get_report_artifact(self, runId: int, artifactType: str) -> Any | None:
async with GetAsyncSession() as session:
await self._ensureGovdocSchema(session)
return (
await session.execute(
text(
"""
SELECT oss_url
FROM govdoc_report_artifacts
WHERE run_id = :run_id
AND artifact_type = :artifact_type
AND deleted_at IS NULL
AND COALESCE(oss_url, '') <> ''
ORDER BY id DESC
LIMIT 1
"""
),
{"run_id": runId, "artifact_type": artifactType},
)
).mappings().first()
def _parse_json(self, raw: Any) -> Any:
if raw is None or raw == "":
return None
if isinstance(raw, (dict, list)):
return raw
try:
return json.loads(raw)
except Exception:
return None
def _iso(self, value: Any) -> str | None:
if value is None:
return None
if isinstance(value, datetime):
return value.isoformat()
return str(value)