feat: integrate govdoc module into leaudit platform
This commit is contained in:
@@ -9,14 +9,26 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
|
||||
from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.input_resolver import InputResolver
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.result_adapter import ResultAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.docx_parser import parse_docx
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.docx_annotator import annotate_docx
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_paragraph import paragraphs_to_html
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_renderer import render_html
|
||||
from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl
|
||||
|
||||
log = logger
|
||||
|
||||
@@ -33,13 +45,30 @@ class GovdocRunner:
|
||||
|
||||
InputResolver: InputResolver = field(default_factory=InputResolver)
|
||||
Storage: StorageAdapter = field(default_factory=StorageAdapter)
|
||||
ResultAdapter: ResultAdapter = field(default_factory=ResultAdapter)
|
||||
OssService: OssServiceImpl = field(default_factory=OssServiceImpl)
|
||||
ResultAdapter: Any | None = None
|
||||
|
||||
def ResolveRulesPath(self, RulesPath: str | None) -> str:
|
||||
"""解析并校验执行所需规则文件路径。"""
|
||||
candidate = (RulesPath or "").strip()
|
||||
if not candidate:
|
||||
raise ValueError("未提供 govdoc rules_path,当前任务无法执行规则审查")
|
||||
|
||||
path = Path(candidate).expanduser()
|
||||
if not path.is_absolute():
|
||||
path = Path.cwd() / path
|
||||
path = path.resolve()
|
||||
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"govdoc 规则文件不存在: {path}")
|
||||
|
||||
return str(path)
|
||||
|
||||
async def Execute(
|
||||
self,
|
||||
DocumentId: int,
|
||||
RunId: int,
|
||||
RulesPath: str,
|
||||
RulesPath: str | None = None,
|
||||
TriggerUserId: int | None = None,
|
||||
Speed: str = "normal",
|
||||
) -> dict[str, Any]:
|
||||
@@ -54,57 +83,216 @@ class GovdocRunner:
|
||||
Returns:
|
||||
执行摘要 dict。
|
||||
"""
|
||||
resolvedRulesPath = self.ResolveRulesPath(RulesPath)
|
||||
log.info(f"[Govdoc] Starting execution: runId={RunId}, documentId={DocumentId}")
|
||||
artifactTempDir: str | None = None
|
||||
inputPayload = None
|
||||
try:
|
||||
# 1. 更新 run 状态 → processing
|
||||
await self.Storage.UpdateRunStatus(RunId, "processing", Phase="parsing")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "processing", RunId)
|
||||
|
||||
# 1. 更新 run 状态 → processing
|
||||
await self.Storage.UpdateRunStatus(RunId, "processing", phase="parsing")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "processing", RunId)
|
||||
# 2. 解析输入文件
|
||||
inputPayload = await self.InputResolver.ResolveForDocument(DocumentId)
|
||||
log.info(f"[Govdoc] Input resolved: {inputPayload.fileName} → {inputPayload.localPath}")
|
||||
|
||||
# 2. 解析输入文件
|
||||
inputPayload = await self.InputResolver.ResolveForDocument(DocumentId)
|
||||
log.info(f"[Govdoc] Input resolved: {inputPayload.fileName} → {inputPayload.localPath}")
|
||||
# 3. 调用 govdoc_engine 执行审查
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.result_adapter import ResultAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as engine_run
|
||||
|
||||
# 3. 调用 govdoc_engine 执行审查
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as engine_run
|
||||
if self.ResultAdapter is None:
|
||||
self.ResultAdapter = ResultAdapter()
|
||||
|
||||
engineResult = await engine_run(
|
||||
file_path=inputPayload.localPath,
|
||||
rules_path=RulesPath,
|
||||
llm_client=None, # 使用默认 LlmClient (从平台配置加载)
|
||||
engineResult = await engine_run(
|
||||
file_path=inputPayload.localPath,
|
||||
rules_path=resolvedRulesPath,
|
||||
llm_client=None, # 使用默认 LlmClient (从平台配置加载)
|
||||
)
|
||||
engineResult.document["filename"] = inputPayload.fileName
|
||||
|
||||
# 4. 适配引擎结果
|
||||
structure = self.ResultAdapter.AdaptStructure(engineResult)
|
||||
outline = self.ResultAdapter.AdaptOutline(engineResult)
|
||||
entities = self.ResultAdapter.AdaptEntities(engineResult)
|
||||
runSummary = self.ResultAdapter.AdaptRunSummary(
|
||||
engineResult,
|
||||
Structure=structure,
|
||||
Outline=outline,
|
||||
Entities=entities,
|
||||
)
|
||||
ruleResults = self.ResultAdapter.AdaptRuleResults(engineResult)
|
||||
checkedRuleResults = self.ResultAdapter.AdaptCheckedRules(engineResult)
|
||||
artifactTempDir, artifacts = await self._GenerateArtifacts(
|
||||
DocumentId=DocumentId,
|
||||
RunId=RunId,
|
||||
InputPath=inputPayload.localPath,
|
||||
InputFileName=inputPayload.fileName,
|
||||
EngineResult=engineResult,
|
||||
RuleResults=ruleResults,
|
||||
)
|
||||
|
||||
failedRuleIds = {str(row.get("ruleId") or "") for row in ruleResults}
|
||||
for checkedRule in checkedRuleResults:
|
||||
ruleId = str(checkedRule.get("ruleId") or "")
|
||||
if checkedRule.get("result") == "fail" and ruleId in failedRuleIds:
|
||||
continue
|
||||
ruleResults.append(checkedRule)
|
||||
|
||||
# 将 rules_path 附带到 runSummary 中,供 GetRuleDetail 后续解析
|
||||
runSummary["rulesPath"] = resolvedRulesPath
|
||||
|
||||
# 5. 持久化结果
|
||||
await self.Storage.UpdateRunResult(RunId, runSummary)
|
||||
await self.Storage.SaveRuleResults(RunId, ruleResults)
|
||||
await self.Storage.SaveArtifacts(RunId, artifacts)
|
||||
|
||||
# 6. 更新终态
|
||||
await self.Storage.UpdateRunStatus(RunId, "completed", Phase="reporting")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "completed", RunId)
|
||||
|
||||
log.info(f"[Govdoc] Execution completed: runId={RunId}")
|
||||
|
||||
return {
|
||||
"runId": RunId,
|
||||
"documentId": DocumentId,
|
||||
"status": "completed",
|
||||
"ruleResultsCount": len(ruleResults),
|
||||
"structureCount": len(structure),
|
||||
"outlineCount": len(outline),
|
||||
"artifactCount": len(artifacts),
|
||||
}
|
||||
finally:
|
||||
if artifactTempDir:
|
||||
shutil.rmtree(artifactTempDir, ignore_errors=True)
|
||||
if inputPayload and inputPayload.tempDir:
|
||||
shutil.rmtree(inputPayload.tempDir, ignore_errors=True)
|
||||
|
||||
async def _GenerateArtifacts(
|
||||
self,
|
||||
DocumentId: int,
|
||||
RunId: int,
|
||||
InputPath: str,
|
||||
InputFileName: str,
|
||||
EngineResult: Any,
|
||||
RuleResults: list[dict[str, Any]],
|
||||
) -> tuple[str, list[dict[str, Any]]]:
|
||||
"""生成报告产物并上传到 OSS。"""
|
||||
artifactDir = tempfile.mkdtemp(prefix=f"govdoc_artifacts_{RunId}_")
|
||||
sourcePath = Path(InputPath)
|
||||
baseName = OssPathUtils.BuildSafeFileStem(InputFileName)
|
||||
region = await self._GetDocumentRegion(DocumentId)
|
||||
|
||||
annotatedPath = Path(artifactDir) / f"{baseName}.annotated.docx"
|
||||
annotate_docx(sourcePath, annotatedPath, EngineResult)
|
||||
|
||||
htmlReport = render_html(EngineResult)
|
||||
|
||||
doc = parse_docx(sourcePath)
|
||||
findingMap: dict[int, list[str]] = {}
|
||||
for row in RuleResults:
|
||||
if row.get("result") != "fail":
|
||||
continue
|
||||
paragraphIndex = row.get("paragraphIndex")
|
||||
if paragraphIndex is None:
|
||||
continue
|
||||
findingId = f"{row.get('ruleId') or 'finding'}-{paragraphIndex}"
|
||||
findingMap.setdefault(int(paragraphIndex), []).append(findingId)
|
||||
paragraphsHtml = paragraphs_to_html(doc, findingMap)
|
||||
|
||||
annotatedUrl = await self.OssService.UploadBytes(
|
||||
ObjectKey=OssPathUtils.BuildArtifactKey(
|
||||
Region=region,
|
||||
RunId=RunId,
|
||||
ArtifactType="annotated_docx",
|
||||
Detail=f"{baseName}.annotated.docx",
|
||||
),
|
||||
Content=annotatedPath.read_bytes(),
|
||||
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
htmlUrl = await self.OssService.UploadText(
|
||||
ObjectKey=OssPathUtils.BuildArtifactKey(
|
||||
Region=region,
|
||||
RunId=RunId,
|
||||
ArtifactType="html_report",
|
||||
Detail=f"{baseName}.report.html",
|
||||
),
|
||||
Content=htmlReport,
|
||||
ContentType="text/html; charset=utf-8",
|
||||
)
|
||||
paragraphUrl = await self.OssService.UploadText(
|
||||
ObjectKey=OssPathUtils.BuildArtifactKey(
|
||||
Region=region,
|
||||
RunId=RunId,
|
||||
ArtifactType="paragraph_html",
|
||||
Detail=f"{baseName}.paragraphs.html",
|
||||
),
|
||||
Content=paragraphsHtml,
|
||||
ContentType="text/html; charset=utf-8",
|
||||
)
|
||||
|
||||
# 4. 适配引擎结果
|
||||
structure = self.ResultAdapter.AdaptStructure(engineResult)
|
||||
outline = self.ResultAdapter.AdaptOutline(engineResult)
|
||||
runSummary = self.ResultAdapter.AdaptRunSummary(
|
||||
engineResult,
|
||||
Structure=structure,
|
||||
Outline=outline,
|
||||
)
|
||||
ruleResults = self.ResultAdapter.AdaptRuleResults(engineResult)
|
||||
entities = self.ResultAdapter.AdaptEntities(engineResult)
|
||||
artifacts = self.ResultAdapter.AdaptArtifacts(engineResult, RunId)
|
||||
return artifactDir, [
|
||||
self._BuildArtifactRow(
|
||||
artifactType="annotated_docx",
|
||||
fileName=f"{baseName}.annotated.docx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
content=annotatedPath.read_bytes(),
|
||||
ossUrl=annotatedUrl,
|
||||
description="批注 DOCX",
|
||||
),
|
||||
self._BuildArtifactRow(
|
||||
artifactType="html_report",
|
||||
fileName=f"{baseName}.report.html",
|
||||
mimeType="text/html; charset=utf-8",
|
||||
content=htmlReport.encode("utf-8"),
|
||||
ossUrl=htmlUrl,
|
||||
description="HTML 审查报告",
|
||||
),
|
||||
self._BuildArtifactRow(
|
||||
artifactType="paragraph_html",
|
||||
fileName=f"{baseName}.paragraphs.html",
|
||||
mimeType="text/html; charset=utf-8",
|
||||
content=paragraphsHtml.encode("utf-8"),
|
||||
ossUrl=paragraphUrl,
|
||||
description="段落联动视图",
|
||||
),
|
||||
]
|
||||
|
||||
# 将 rules_path 附带到 runSummary 中,供 GetRuleDetail 后续解析
|
||||
runSummary["rulesPath"] = RulesPath
|
||||
|
||||
# 5. 持久化结果
|
||||
await self.Storage.UpdateRunResult(RunId, runSummary)
|
||||
await self.Storage.SaveRuleResults(RunId, ruleResults)
|
||||
await self.Storage.SaveArtifacts(RunId, artifacts)
|
||||
|
||||
# 6. 更新终态
|
||||
await self.Storage.UpdateRunStatus(RunId, "completed", phase="reporting")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "completed", RunId)
|
||||
|
||||
log.info(f"[Govdoc] Execution completed: runId={RunId}")
|
||||
async def _GetDocumentRegion(self, DocumentId: int) -> str:
|
||||
async with GetAsyncSession() as session:
|
||||
row = (
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COALESCE(region, 'default') AS region
|
||||
FROM leaudit_documents
|
||||
WHERE id = :document_id
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{"document_id": DocumentId},
|
||||
)
|
||||
).mappings().first()
|
||||
return str(row["region"] or "default") if row else "default"
|
||||
|
||||
def _BuildArtifactRow(
|
||||
self,
|
||||
*,
|
||||
artifactType: str,
|
||||
fileName: str,
|
||||
mimeType: str,
|
||||
content: bytes,
|
||||
ossUrl: str,
|
||||
description: str,
|
||||
) -> dict[str, Any]:
|
||||
fileExt = Path(fileName).suffix.lstrip(".").lower()
|
||||
return {
|
||||
"runId": RunId,
|
||||
"documentId": DocumentId,
|
||||
"status": "completed",
|
||||
"ruleResultsCount": len(ruleResults),
|
||||
"structureCount": len(structure),
|
||||
"outlineCount": len(outline),
|
||||
"artifactCount": len(artifacts),
|
||||
"artifactType": artifactType,
|
||||
"fileName": fileName,
|
||||
"fileExt": fileExt,
|
||||
"mimeType": mimeType,
|
||||
"fileSize": len(content),
|
||||
"sha256": hashlib.sha256(content).hexdigest(),
|
||||
"ossUrl": ossUrl,
|
||||
"storageProvider": "minio",
|
||||
"description": description,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user