feat(govdoc): 新增内部公文模块全链路(后端58+前端11文件)
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
"""Govdoc 执行桥接层 —— govdoc_engine ↔ leaudit-platform 适配器。
|
||||
|
||||
本层负责把 govdoc_engine 接入当前平台的基础设施:
|
||||
- OSS/MinIO 文件下载与上传
|
||||
- 文档主档 (leaudit_documents / leaudit_document_files) 查询
|
||||
- GovdocRun / GovdocRuleResult / GovdocReportArtifact 持久化
|
||||
- Celery 异步任务调度
|
||||
- 临时文件管理与安全校验
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -0,0 +1,133 @@
|
||||
"""Govdoc Bridge — 输入文件解析器。
|
||||
|
||||
从 leaudit_document_files 中定位输入文件,从 OSS 下载到本地临时路径。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import tempfile
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
|
||||
from fastapi_common.fastapi_common_storage.oss_client import OssClient
|
||||
from sqlalchemy import select
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.models.leauditDocumentFile import LeauditDocumentFile
|
||||
|
||||
log = logger
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InputPayload:
|
||||
"""Govdoc 引擎执行所需的输入载荷。"""
|
||||
|
||||
fileName: str
|
||||
fileExt: str
|
||||
localPath: str
|
||||
sha256: str | None = None
|
||||
fileSize: int | None = None
|
||||
documentFileId: int | None = None
|
||||
tempDir: str | None = None # 需调用方在任务结束时清理
|
||||
|
||||
|
||||
class InputResolver:
|
||||
"""解析 govdoc 引擎输入文件。
|
||||
|
||||
从 leaudit_document_files 中定位输入文件 (file_role='original'),
|
||||
优先使用本地缓存路径,否则从 OSS 下载到临时目录。
|
||||
"""
|
||||
|
||||
def __init__(self, Oss: OssClient | None = None) -> None:
|
||||
self.Oss = Oss or OssClient()
|
||||
|
||||
async def ResolveForDocument(self, documentId: int) -> InputPayload:
|
||||
"""为指定文档解析输入文件载荷。
|
||||
|
||||
查找该文档最近一次激活的 original 文件记录。
|
||||
"""
|
||||
async with GetAsyncSession() as session:
|
||||
result = await session.execute(
|
||||
select(LeauditDocumentFile)
|
||||
.where(
|
||||
LeauditDocumentFile.documentId == documentId,
|
||||
LeauditDocumentFile.fileRole == "original",
|
||||
LeauditDocumentFile.isActive.is_(True),
|
||||
)
|
||||
.order_by(LeauditDocumentFile.Id.desc())
|
||||
.limit(1)
|
||||
)
|
||||
fileRow = result.scalar_one_or_none()
|
||||
|
||||
if fileRow is None:
|
||||
raise ValueError(f"未找到文档 {documentId} 的原始文件记录")
|
||||
|
||||
return await self.ResolveFromRow(fileRow)
|
||||
|
||||
async def ResolveFromRow(self, FileRow: LeauditDocumentFile) -> InputPayload:
|
||||
"""从文件记录解析输入载荷。"""
|
||||
# 优先本地路径
|
||||
if FileRow.localPath:
|
||||
LocalPath = Path(FileRow.localPath)
|
||||
if LocalPath.is_file():
|
||||
return InputPayload(
|
||||
fileName=FileRow.fileName,
|
||||
fileExt=FileRow.fileExt or _ext_from_name(FileRow.fileName),
|
||||
localPath=str(LocalPath),
|
||||
sha256=FileRow.sha256,
|
||||
fileSize=FileRow.fileSize,
|
||||
documentFileId=FileRow.Id,
|
||||
)
|
||||
|
||||
# 否则从 OSS 下载
|
||||
if FileRow.ossUrl:
|
||||
return await self._DownloadFromOss(FileRow)
|
||||
|
||||
raise ValueError(
|
||||
f"文件 {FileRow.Id} ({FileRow.fileName}) 既无可用 localPath 也无 ossUrl"
|
||||
)
|
||||
|
||||
async def _DownloadFromOss(self, FileRow: LeauditDocumentFile) -> InputPayload:
|
||||
"""从 OSS 下载文件到临时目录。"""
|
||||
try:
|
||||
content = self.Oss.DownloadBytes(FileRow.ossUrl)
|
||||
except Exception as e:
|
||||
log.error(f"从 OSS 下载文件失败: url={FileRow.ossUrl}, error={e}")
|
||||
raise
|
||||
|
||||
tempDir = tempfile.mkdtemp(prefix="govdoc_input_")
|
||||
ext = FileRow.fileExt or _ext_from_name(FileRow.fileName)
|
||||
safeName = f"input_{FileRow.Id}{ext}"
|
||||
localPath = os.path.join(tempDir, safeName)
|
||||
|
||||
with open(localPath, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
computedSha = hashlib.sha256(content).hexdigest()
|
||||
if FileRow.sha256 and computedSha != FileRow.sha256:
|
||||
log.warning(
|
||||
f"文件 SHA256 不匹配: expected={FileRow.sha256}, computed={computedSha}"
|
||||
)
|
||||
|
||||
log.info(
|
||||
f"从 OSS 下载文件: {FileRow.fileName} → {localPath} ({len(content)} bytes)"
|
||||
)
|
||||
|
||||
return InputPayload(
|
||||
fileName=FileRow.fileName,
|
||||
fileExt=ext,
|
||||
localPath=localPath,
|
||||
sha256=computedSha,
|
||||
fileSize=len(content),
|
||||
documentFileId=FileRow.Id,
|
||||
tempDir=tempDir,
|
||||
)
|
||||
|
||||
|
||||
def _ext_from_name(fileName: str) -> str:
|
||||
"""从文件名提取扩展名。"""
|
||||
_, ext = os.path.splitext(fileName)
|
||||
return ext if ext else ".docx"
|
||||
@@ -0,0 +1,112 @@
|
||||
"""Govdoc Bridge — 结果适配器。
|
||||
|
||||
将 govdoc_engine 原始结果对象 (AuditResult / Finding / SemanticEntity)
|
||||
映射为 ORM 模型字段和前端 VO 字典。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import AuditResult
|
||||
|
||||
|
||||
class ResultAdapter:
|
||||
"""Govdoc 引擎结果 → 平台数据模型适配器。
|
||||
|
||||
负责将 govdoc_engine 的原始执行结果转换为:
|
||||
- GovdocRun 状态字段更新
|
||||
- GovdocRuleResult 列表
|
||||
- GovdocReportArtifact 清单
|
||||
- 前端 VO 字典
|
||||
"""
|
||||
|
||||
def AdaptRunSummary(self, EngineResult: AuditResult) -> dict[str, Any]:
|
||||
"""从 AuditResult.summary 提取 run 汇总字段。"""
|
||||
s = EngineResult.summary
|
||||
return {
|
||||
"totalScore": s.score,
|
||||
"passedCount": s.passed_count,
|
||||
"failedCount": s.failed_count,
|
||||
"skippedCount": s.skipped_count,
|
||||
"resultStatus": "pass" if s.failed_count == 0 else "fail" if s.passed_count == 0 else "partial",
|
||||
"resultSummaryJson": None, # 可为后续扩展预留
|
||||
}
|
||||
|
||||
def AdaptRuleResults(self, EngineResult: AuditResult) -> list[dict[str, Any]]:
|
||||
"""从 AuditResult.findings 提取规则执行明细列表。"""
|
||||
results: list[dict[str, Any]] = []
|
||||
for f in EngineResult.findings:
|
||||
results.append({
|
||||
"ruleId": f.rule_id,
|
||||
"ruleName": f.rule_name,
|
||||
"severity": f.severity,
|
||||
"category": f.category,
|
||||
"message": f.message,
|
||||
"suggestion": f.suggestion,
|
||||
"actual": f.actual,
|
||||
"expected": f.expected,
|
||||
"evidence": f.evidence,
|
||||
"paragraphIndex": f.location.paragraph_index if f.location else None,
|
||||
"paragraphText": f.location.context if f.location else None,
|
||||
"locationPath": f.location.role if f.location else None,
|
||||
"result": "fail",
|
||||
"score": None,
|
||||
})
|
||||
return results
|
||||
|
||||
def AdaptCheckedRules(self, EngineResult: AuditResult) -> list[dict[str, Any]]:
|
||||
"""从 AuditResult.checked_rules 提取规则检查状态列表。"""
|
||||
results: list[dict[str, Any]] = []
|
||||
for cr in EngineResult.checked_rules:
|
||||
results.append({
|
||||
"ruleId": cr.rule_id,
|
||||
"ruleName": cr.name,
|
||||
"severity": cr.severity,
|
||||
"category": cr.category,
|
||||
"result": cr.status, # pass/fail/skipped
|
||||
"skipReason": cr.skip_reason,
|
||||
"score": None,
|
||||
})
|
||||
return results
|
||||
|
||||
def AdaptEntities(self, EngineResult: AuditResult) -> list[dict[str, Any]]:
|
||||
"""从 AuditResult.entities 提取实体识别结果。"""
|
||||
entities: list[dict[str, Any]] = []
|
||||
for name, entity in EngineResult.entities.items():
|
||||
if entity is None:
|
||||
continue
|
||||
entities.append({
|
||||
"name": entity.name,
|
||||
"text": entity.text,
|
||||
"paragraphIndices": entity.paragraph_indices,
|
||||
"primaryRole": entity.primary_role,
|
||||
"source": entity.source,
|
||||
"confidence": entity.confidence,
|
||||
})
|
||||
return entities
|
||||
|
||||
def AdaptArtifacts(self, _EngineResult: AuditResult, _RunId: int) -> list[dict[str, Any]]:
|
||||
"""从引擎结果提取报告产物清单。
|
||||
|
||||
报告文件由 reporter 模块生成后上传 OSS。
|
||||
当前返回空列表,待 report_adapter 实现后补齐。
|
||||
"""
|
||||
return []
|
||||
|
||||
def BuildDetailVO(
|
||||
self,
|
||||
Document: Any,
|
||||
Run: Any,
|
||||
RuleResults: list[dict[str, Any]],
|
||||
Entities: list[dict[str, Any]],
|
||||
Artifacts: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
"""构建前端详情页 VO。"""
|
||||
return {
|
||||
"document": Document,
|
||||
"run": Run,
|
||||
"findings": RuleResults,
|
||||
"entities": Entities,
|
||||
"reports": Artifacts,
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
"""Govdoc Bridge — 执行编排器。
|
||||
|
||||
负责组织一次完整的 govdoc 审查执行链路:
|
||||
1. 解析输入文件 (input_resolver)
|
||||
2. 调用 govdoc_engine.pipeline 执行审查
|
||||
3. 收集并适配结果 (result_adapter)
|
||||
4. 持久化结果 (storage_adapter)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.input_resolver import InputResolver
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.result_adapter import ResultAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter
|
||||
|
||||
log = logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class GovdocRunner:
|
||||
"""Govdoc 引擎一次完整审查的执行编排器。
|
||||
|
||||
当前为 Phase 1 骨架:
|
||||
- engine 集成点已标注
|
||||
- 可先跑通 run 创建 → 状态更新 → 结果持久化 的完整生命周期
|
||||
- 引擎执行部分待 govdoc_engine 迁入后接入
|
||||
"""
|
||||
|
||||
InputResolver: InputResolver = field(default_factory=InputResolver)
|
||||
Storage: StorageAdapter = field(default_factory=StorageAdapter)
|
||||
ResultAdapter: ResultAdapter = field(default_factory=ResultAdapter)
|
||||
|
||||
async def Execute(
|
||||
self,
|
||||
DocumentId: int,
|
||||
RunId: int,
|
||||
RulesPath: str,
|
||||
TriggerUserId: int | None = None,
|
||||
Speed: str = "normal",
|
||||
) -> dict[str, Any]:
|
||||
"""执行一次完整的 govdoc 审查。
|
||||
|
||||
Args:
|
||||
DocumentId: 文档 ID。
|
||||
RunId: 已创建的 govdoc_runs.id。
|
||||
TriggerUserId: 触发人。
|
||||
Speed: 执行速度 ('normal' / 'urgent')。
|
||||
|
||||
Returns:
|
||||
执行摘要 dict。
|
||||
"""
|
||||
log.info(f"[Govdoc] Starting execution: runId={RunId}, documentId={DocumentId}")
|
||||
|
||||
# 1. 更新 run 状态 → processing
|
||||
await self.Storage.UpdateRunStatus(RunId, "processing", phase="parsing")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "processing", RunId)
|
||||
|
||||
# 2. 解析输入文件
|
||||
inputPayload = await self.InputResolver.ResolveForDocument(DocumentId)
|
||||
log.info(f"[Govdoc] Input resolved: {inputPayload.fileName} → {inputPayload.localPath}")
|
||||
|
||||
# 3. 调用 govdoc_engine 执行审查
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as engine_run
|
||||
|
||||
engineResult = await engine_run(
|
||||
file_path=inputPayload.localPath,
|
||||
rules_path=RulesPath,
|
||||
llm_client=None, # 使用默认 LlmClient (从平台配置加载)
|
||||
)
|
||||
|
||||
# 4. 适配引擎结果
|
||||
runSummary = self.ResultAdapter.AdaptRunSummary(engineResult)
|
||||
ruleResults = self.ResultAdapter.AdaptRuleResults(engineResult)
|
||||
entities = self.ResultAdapter.AdaptEntities(engineResult)
|
||||
artifacts = self.ResultAdapter.AdaptArtifacts(engineResult, RunId)
|
||||
|
||||
# 5. 持久化结果
|
||||
await self.Storage.UpdateRunResult(RunId, runSummary)
|
||||
await self.Storage.SaveRuleResults(RunId, ruleResults)
|
||||
await self.Storage.SaveArtifacts(RunId, artifacts)
|
||||
|
||||
# 6. 更新终态
|
||||
await self.Storage.UpdateRunStatus(RunId, "completed", phase="reporting")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "completed", RunId)
|
||||
|
||||
log.info(f"[Govdoc] Execution completed: runId={RunId}")
|
||||
|
||||
return {
|
||||
"runId": RunId,
|
||||
"documentId": DocumentId,
|
||||
"status": "completed",
|
||||
"ruleResultsCount": len(ruleResults),
|
||||
"artifactCount": len(artifacts),
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
"""Govdoc Bridge — 存储适配器。
|
||||
|
||||
将 govdoc_engine 执行结果写入 govdoc_runs / govdoc_rule_results /
|
||||
govdoc_report_artifacts 表,并更新 leaudit_documents 状态。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
|
||||
from sqlalchemy import text
|
||||
|
||||
log = logger
|
||||
|
||||
|
||||
class StorageAdapter:
|
||||
"""Govdoc 结果持久化适配器。
|
||||
|
||||
将 bridge 层产生的结构化结果写入 PostgreSQL。
|
||||
使用原生 SQL (text) 以保持与现有 leaudit_bridge/storage_adapter 风格一致。
|
||||
"""
|
||||
|
||||
# ── Run 状态 ─────────────────────────────────────────
|
||||
|
||||
async def CreateRun(self, RunData: dict[str, Any]) -> int:
|
||||
"""创建 govdoc_runs 记录,返回 run_id。"""
|
||||
async with GetAsyncSession() as session:
|
||||
result = await session.execute(
|
||||
text(
|
||||
"""INSERT INTO govdoc_runs
|
||||
(document_id, document_file_id, run_no, trigger_source,
|
||||
trigger_user_id, status, phase, created_at, updated_at)
|
||||
VALUES (:document_id, :document_file_id, :run_no, :trigger_source,
|
||||
:trigger_user_id, 'pending', 'parsing', now(), now())
|
||||
RETURNING id"""
|
||||
),
|
||||
{
|
||||
"document_id": RunData["documentId"],
|
||||
"document_file_id": RunData.get("documentFileId"),
|
||||
"run_no": RunData.get("runNo", 1),
|
||||
"trigger_source": RunData.get("triggerSource", "manual"),
|
||||
"trigger_user_id": RunData.get("triggerUserId"),
|
||||
},
|
||||
)
|
||||
row = result.fetchone()
|
||||
run_id = row[0] if row else 0
|
||||
await session.commit()
|
||||
log.info(f"[Govdoc] Run created: runId={run_id}, documentId={RunData['documentId']}")
|
||||
return run_id
|
||||
|
||||
async def UpdateRunStatus(self, RunId: int, Status: str, Phase: str | None = None, **Extra: Any) -> None:
|
||||
"""更新 run 状态和阶段。"""
|
||||
set_clauses = ["status = :status", "updated_at = now()"]
|
||||
params: dict[str, Any] = {"rid": RunId, "status": Status}
|
||||
|
||||
if Phase is not None:
|
||||
set_clauses.append("phase = :phase")
|
||||
params["phase"] = Phase
|
||||
|
||||
if Status == "completed" or Status == "failed":
|
||||
set_clauses.append("finished_at = :finished_at")
|
||||
params["finished_at"] = datetime.now(timezone.utc)
|
||||
|
||||
async with GetAsyncSession() as session:
|
||||
await session.execute(
|
||||
text(f"UPDATE govdoc_runs SET {', '.join(set_clauses)} WHERE id = :rid"),
|
||||
params,
|
||||
)
|
||||
await session.commit()
|
||||
log.info(f"[Govdoc] Run status updated: runId={RunId}, status={Status}")
|
||||
|
||||
async def UpdateRunResult(self, RunId: int, Summary: dict[str, Any]) -> None:
|
||||
"""写入 run 结果汇总字段。"""
|
||||
async with GetAsyncSession() as session:
|
||||
await session.execute(
|
||||
text(
|
||||
"""UPDATE govdoc_runs SET
|
||||
total_score = :total_score,
|
||||
passed_count = :passed_count,
|
||||
failed_count = :failed_count,
|
||||
skipped_count = :skipped_count,
|
||||
result_status = :result_status,
|
||||
result_summary_json = :result_summary_json,
|
||||
updated_at = now()
|
||||
WHERE id = :rid"""
|
||||
),
|
||||
{
|
||||
"rid": RunId,
|
||||
"total_score": Summary.get("totalScore"),
|
||||
"passed_count": Summary.get("passedCount", 0),
|
||||
"failed_count": Summary.get("failedCount", 0),
|
||||
"skipped_count": Summary.get("skippedCount", 0),
|
||||
"result_status": Summary.get("resultStatus"),
|
||||
"result_summary_json": Summary.get("resultSummaryJson"),
|
||||
},
|
||||
)
|
||||
await session.commit()
|
||||
log.info(f"[Govdoc] Run result saved: runId={RunId}")
|
||||
|
||||
async def UpdateRunError(self, RunId: int, ErrorMessage: str) -> None:
|
||||
"""记录运行失败的错误信息。"""
|
||||
async with GetAsyncSession() as session:
|
||||
await session.execute(
|
||||
text(
|
||||
"""UPDATE govdoc_runs SET
|
||||
status = 'failed',
|
||||
error_message = :error_message,
|
||||
finished_at = now(),
|
||||
updated_at = now()
|
||||
WHERE id = :rid"""
|
||||
),
|
||||
{"rid": RunId, "error_message": ErrorMessage},
|
||||
)
|
||||
await session.commit()
|
||||
log.error(f"[Govdoc] Run failed: runId={RunId}, error={ErrorMessage[:200]}")
|
||||
|
||||
# ── 规则结果 ─────────────────────────────────────────
|
||||
|
||||
async def SaveRuleResults(self, RunId: int, Results: list[dict[str, Any]]) -> None:
|
||||
"""批量写入 govdoc_rule_results。"""
|
||||
if not Results:
|
||||
return
|
||||
|
||||
async with GetAsyncSession() as session:
|
||||
for row in Results:
|
||||
await session.execute(
|
||||
text(
|
||||
"""INSERT INTO govdoc_rule_results
|
||||
(run_id, rule_id, rule_name, severity, category,
|
||||
message, suggestion, actual, expected, evidence,
|
||||
paragraph_index, paragraph_text, location_path,
|
||||
result, score, created_at, updated_at)
|
||||
VALUES (:run_id, :rule_id, :rule_name, :severity, :category,
|
||||
:message, :suggestion, :actual, :expected, :evidence,
|
||||
:paragraph_index, :paragraph_text, :location_path,
|
||||
:result, :score, now(), now())"""
|
||||
),
|
||||
{
|
||||
"run_id": RunId,
|
||||
"rule_id": row.get("ruleId"),
|
||||
"rule_name": row.get("ruleName"),
|
||||
"severity": row.get("severity"),
|
||||
"category": row.get("category"),
|
||||
"message": row.get("message"),
|
||||
"suggestion": row.get("suggestion"),
|
||||
"actual": row.get("actual"),
|
||||
"expected": row.get("expected"),
|
||||
"evidence": row.get("evidence"),
|
||||
"paragraph_index": row.get("paragraphIndex"),
|
||||
"paragraph_text": row.get("paragraphText"),
|
||||
"location_path": row.get("locationPath"),
|
||||
"result": row.get("result", "pass"),
|
||||
"score": row.get("score"),
|
||||
},
|
||||
)
|
||||
await session.commit()
|
||||
log.info(f"[Govdoc] Rule results saved: runId={RunId}, count={len(Results)}")
|
||||
|
||||
# ── 报告产物 ─────────────────────────────────────────
|
||||
|
||||
async def SaveArtifacts(self, RunId: int, Artifacts: list[dict[str, Any]]) -> None:
|
||||
"""批量写入 govdoc_report_artifacts。"""
|
||||
if not Artifacts:
|
||||
return
|
||||
|
||||
async with GetAsyncSession() as session:
|
||||
for row in Artifacts:
|
||||
await session.execute(
|
||||
text(
|
||||
"""INSERT INTO govdoc_report_artifacts
|
||||
(run_id, artifact_type, file_name, file_ext, mime_type,
|
||||
file_size, sha256, oss_url, storage_provider, description,
|
||||
created_at, updated_at)
|
||||
VALUES (:run_id, :artifact_type, :file_name, :file_ext, :mime_type,
|
||||
:file_size, :sha256, :oss_url, :storage_provider, :description,
|
||||
now(), now())"""
|
||||
),
|
||||
{
|
||||
"run_id": RunId,
|
||||
"artifact_type": row.get("artifactType"),
|
||||
"file_name": row.get("fileName"),
|
||||
"file_ext": row.get("fileExt"),
|
||||
"mime_type": row.get("mimeType"),
|
||||
"file_size": row.get("fileSize"),
|
||||
"sha256": row.get("sha256"),
|
||||
"oss_url": row.get("ossUrl"),
|
||||
"storage_provider": row.get("storageProvider"),
|
||||
"description": row.get("description"),
|
||||
},
|
||||
)
|
||||
await session.commit()
|
||||
log.info(f"[Govdoc] Artifacts saved: runId={RunId}, count={len(Artifacts)}")
|
||||
|
||||
# ── 文档状态 ─────────────────────────────────────────
|
||||
|
||||
async def UpdateDocumentStatus(self, DocumentId: int, ProcessingStatus: str, RunId: int | None = None) -> None:
|
||||
"""更新 leaudit_documents 的处理状态和当前 run_id。"""
|
||||
async with GetAsyncSession() as session:
|
||||
if RunId is not None:
|
||||
await session.execute(
|
||||
text(
|
||||
"""UPDATE leaudit_documents SET
|
||||
processing_status = :s, current_run_id = :rid, updated_at = now()
|
||||
WHERE id = :did"""
|
||||
),
|
||||
{"s": ProcessingStatus, "rid": RunId, "did": DocumentId},
|
||||
)
|
||||
else:
|
||||
await session.execute(
|
||||
text(
|
||||
"""UPDATE leaudit_documents SET
|
||||
processing_status = :s, updated_at = now()
|
||||
WHERE id = :did"""
|
||||
),
|
||||
{"s": ProcessingStatus, "did": DocumentId},
|
||||
)
|
||||
await session.commit()
|
||||
log.info(f"[Govdoc] Document status updated: documentId={DocumentId}, status={ProcessingStatus}")
|
||||
@@ -0,0 +1,115 @@
|
||||
"""Govdoc Bridge — Celery 任务入口。
|
||||
|
||||
将 govdoc 审查执行投递到 Celery worker 队列。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
|
||||
from fastapi_admin.celery_app import celery_app
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.runner import GovdocRunner
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter
|
||||
|
||||
log = logger
|
||||
|
||||
GOVDOC_WORKER_QUEUE = "govdoc"
|
||||
GOVDOC_WORKER_QUEUE_URGENT = "govdoc_urgent"
|
||||
|
||||
|
||||
def resolve_govdoc_queue(speed: str = "normal") -> str:
|
||||
"""根据优先级返回对应的 worker 队列名。"""
|
||||
if (speed or "").strip().lower() in {"urgent", "high", "fast", "紧急"}:
|
||||
return GOVDOC_WORKER_QUEUE_URGENT
|
||||
return GOVDOC_WORKER_QUEUE
|
||||
|
||||
|
||||
def dispatch_govdoc_task(
|
||||
documentId: int,
|
||||
runId: int,
|
||||
triggerUserId: int | None = None,
|
||||
speed: str = "normal",
|
||||
) -> Any:
|
||||
"""投递 govdoc 审查任务到 Celery 队列。
|
||||
|
||||
Args:
|
||||
documentId: 文档 ID。
|
||||
runId: 已创建的 govdoc_runs.id。
|
||||
triggerUserId: 触发人。
|
||||
speed: 优先级 ('normal' / 'urgent')。
|
||||
|
||||
Returns:
|
||||
Celery AsyncResult。
|
||||
"""
|
||||
queue = resolve_govdoc_queue(speed)
|
||||
log.info(
|
||||
f"[Govdoc] Dispatching task: runId={runId}, documentId={documentId}, queue={queue}"
|
||||
)
|
||||
return govdoc_execute_task.apply_async(
|
||||
kwargs={
|
||||
"documentId": documentId,
|
||||
"runId": runId,
|
||||
"triggerUserId": triggerUserId,
|
||||
"speed": speed,
|
||||
},
|
||||
queue=queue,
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(
|
||||
bind=True,
|
||||
name="govdoc_execute_task",
|
||||
max_retries=0,
|
||||
default_retry_delay=60,
|
||||
acks_late=True,
|
||||
reject_on_worker_lost=True,
|
||||
task_time_limit=30 * 60, # 单次执行 30 分钟超时
|
||||
task_soft_time_limit=25 * 60,
|
||||
)
|
||||
def govdoc_execute_task(
|
||||
self,
|
||||
documentId: int,
|
||||
runId: int,
|
||||
triggerUserId: int | None = None,
|
||||
speed: str = "normal",
|
||||
) -> dict[str, Any]:
|
||||
"""Celery 任务:执行一次 govdoc 公文格式审查。
|
||||
|
||||
此任务由 dispatch_govdoc_task 投递,worker 消费后执行完整审查链路。
|
||||
"""
|
||||
taskId = self.request.id or "unknown"
|
||||
log.info(f"[Govdoc] Task started: taskId={taskId}, runId={runId}, documentId={documentId}")
|
||||
|
||||
storage = StorageAdapter()
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
# 更新 run 状态 → running
|
||||
loop.run_until_complete(storage.UpdateRunStatus(runId, "processing", phase="parsing"))
|
||||
|
||||
# 执行完整审查链路
|
||||
runner = GovdocRunner()
|
||||
result = loop.run_until_complete(
|
||||
runner.Execute(
|
||||
DocumentId=documentId,
|
||||
RunId=runId,
|
||||
TriggerUserId=triggerUserId,
|
||||
Speed=speed,
|
||||
)
|
||||
)
|
||||
log.info(f"[Govdoc] Task completed: taskId={taskId}, runId={runId}")
|
||||
return result
|
||||
|
||||
except Exception as exc:
|
||||
errorMessage = str(exc)[:2000]
|
||||
log.exception(f"[Govdoc] Task failed: taskId={taskId}, runId={runId}, error={errorMessage[:200]}")
|
||||
loop.run_until_complete(storage.UpdateRunError(runId, errorMessage))
|
||||
loop.run_until_complete(storage.UpdateDocumentStatus(documentId, "failed", runId))
|
||||
raise
|
||||
|
||||
finally:
|
||||
loop.close()
|
||||
Reference in New Issue
Block a user