feat: integrate govdoc module into leaudit platform
This commit is contained in:
@@ -33,7 +33,10 @@ celery_app = Celery(
|
||||
|
||||
celery_app.conf.update(
|
||||
task_default_queue=LEAUDIT_WORKER_QUEUE_NORMAL,
|
||||
imports=("fastapi_modules.fastapi_leaudit.leaudit_bridge.tasks",),
|
||||
imports=(
|
||||
"fastapi_modules.fastapi_leaudit.leaudit_bridge.tasks",
|
||||
"fastapi_modules.fastapi_leaudit.govdoc_bridge.tasks",
|
||||
),
|
||||
task_queues=(
|
||||
Queue(LEAUDIT_WORKER_QUEUE_URGENT),
|
||||
Queue(LEAUDIT_WORKER_QUEUE_NORMAL),
|
||||
@@ -56,9 +59,11 @@ celery_app.conf.update(
|
||||
celery_app.autodiscover_tasks(
|
||||
[
|
||||
"fastapi_modules.fastapi_leaudit.leaudit_bridge",
|
||||
"fastapi_modules.fastapi_leaudit.govdoc_bridge",
|
||||
],
|
||||
force=True,
|
||||
)
|
||||
|
||||
# 显式导入任务模块,避免 worker 在某些启动方式下漏注册 bridge tasks。
|
||||
from fastapi_modules.fastapi_leaudit.leaudit_bridge import tasks as _leaudit_bridge_tasks # noqa: F401,E402
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge import tasks as _govdoc_bridge_tasks # noqa: F401,E402
|
||||
|
||||
@@ -31,7 +31,7 @@ class GovdocController(BaseController):
|
||||
file: UploadFile = File(...),
|
||||
typeId: int | None = Form(default=None),
|
||||
region: str = Form(default="default"),
|
||||
autoRun: bool = Form(default=False),
|
||||
autoRun: bool = Form(default=True),
|
||||
speed: str = Form(default="normal"),
|
||||
ruleVersionId: int | None = Form(default=None),
|
||||
payload: dict[str, Any] = Depends(verify_access_token),
|
||||
@@ -56,6 +56,7 @@ class GovdocController(BaseController):
|
||||
page: int = Query(default=1, ge=1),
|
||||
pageSize: int = Query(default=20, ge=1, le=100),
|
||||
keyword: str | None = Query(default=None),
|
||||
fileExt: str | None = Query(default=None),
|
||||
region: str | None = Query(default=None),
|
||||
status: str | None = Query(default=None),
|
||||
resultStatus: str | None = Query(default=None),
|
||||
@@ -72,6 +73,7 @@ class GovdocController(BaseController):
|
||||
page=page,
|
||||
pageSize=pageSize,
|
||||
keyword=keyword,
|
||||
fileExt=fileExt,
|
||||
region=region,
|
||||
status=status,
|
||||
resultStatus=resultStatus,
|
||||
|
||||
@@ -26,6 +26,7 @@ class ResultAdapter:
|
||||
EngineResult: AuditResult,
|
||||
Structure: list[dict[str, Any]] | None = None,
|
||||
Outline: list[dict[str, Any]] | None = None,
|
||||
Entities: list[dict[str, Any]] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""从 AuditResult.summary 提取 run 汇总字段。
|
||||
|
||||
@@ -40,6 +41,10 @@ class ResultAdapter:
|
||||
aux["structure"] = Structure
|
||||
if Outline is not None:
|
||||
aux["outline"] = Outline
|
||||
if Entities is not None:
|
||||
aux["entities"] = {
|
||||
entity["name"]: entity for entity in Entities if entity.get("name")
|
||||
}
|
||||
|
||||
return {
|
||||
"totalScore": s.score,
|
||||
@@ -100,6 +105,7 @@ class ResultAdapter:
|
||||
"primaryRole": entity.primary_role,
|
||||
"source": entity.source,
|
||||
"confidence": entity.confidence,
|
||||
"extra": entity.extra,
|
||||
})
|
||||
return entities
|
||||
|
||||
|
||||
@@ -9,14 +9,26 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import shutil
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
from fastapi_common.fastapi_common_sqlalchemy.database import GetAsyncSession
|
||||
from fastapi_common.fastapi_common_storage.oss_path_utils import OssPathUtils
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.input_resolver import InputResolver
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.result_adapter import ResultAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.docx_parser import parse_docx
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.docx_annotator import annotate_docx
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_paragraph import paragraphs_to_html
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.reporter.html_renderer import render_html
|
||||
from fastapi_modules.fastapi_leaudit.services.impl.ossServiceImpl import OssServiceImpl
|
||||
|
||||
log = logger
|
||||
|
||||
@@ -33,13 +45,30 @@ class GovdocRunner:
|
||||
|
||||
InputResolver: InputResolver = field(default_factory=InputResolver)
|
||||
Storage: StorageAdapter = field(default_factory=StorageAdapter)
|
||||
ResultAdapter: ResultAdapter = field(default_factory=ResultAdapter)
|
||||
OssService: OssServiceImpl = field(default_factory=OssServiceImpl)
|
||||
ResultAdapter: Any | None = None
|
||||
|
||||
def ResolveRulesPath(self, RulesPath: str | None) -> str:
|
||||
"""解析并校验执行所需规则文件路径。"""
|
||||
candidate = (RulesPath or "").strip()
|
||||
if not candidate:
|
||||
raise ValueError("未提供 govdoc rules_path,当前任务无法执行规则审查")
|
||||
|
||||
path = Path(candidate).expanduser()
|
||||
if not path.is_absolute():
|
||||
path = Path.cwd() / path
|
||||
path = path.resolve()
|
||||
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"govdoc 规则文件不存在: {path}")
|
||||
|
||||
return str(path)
|
||||
|
||||
async def Execute(
|
||||
self,
|
||||
DocumentId: int,
|
||||
RunId: int,
|
||||
RulesPath: str,
|
||||
RulesPath: str | None = None,
|
||||
TriggerUserId: int | None = None,
|
||||
Speed: str = "normal",
|
||||
) -> dict[str, Any]:
|
||||
@@ -54,57 +83,216 @@ class GovdocRunner:
|
||||
Returns:
|
||||
执行摘要 dict。
|
||||
"""
|
||||
resolvedRulesPath = self.ResolveRulesPath(RulesPath)
|
||||
log.info(f"[Govdoc] Starting execution: runId={RunId}, documentId={DocumentId}")
|
||||
artifactTempDir: str | None = None
|
||||
inputPayload = None
|
||||
try:
|
||||
# 1. 更新 run 状态 → processing
|
||||
await self.Storage.UpdateRunStatus(RunId, "processing", Phase="parsing")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "processing", RunId)
|
||||
|
||||
# 1. 更新 run 状态 → processing
|
||||
await self.Storage.UpdateRunStatus(RunId, "processing", phase="parsing")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "processing", RunId)
|
||||
# 2. 解析输入文件
|
||||
inputPayload = await self.InputResolver.ResolveForDocument(DocumentId)
|
||||
log.info(f"[Govdoc] Input resolved: {inputPayload.fileName} → {inputPayload.localPath}")
|
||||
|
||||
# 2. 解析输入文件
|
||||
inputPayload = await self.InputResolver.ResolveForDocument(DocumentId)
|
||||
log.info(f"[Govdoc] Input resolved: {inputPayload.fileName} → {inputPayload.localPath}")
|
||||
# 3. 调用 govdoc_engine 执行审查
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.result_adapter import ResultAdapter
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as engine_run
|
||||
|
||||
# 3. 调用 govdoc_engine 执行审查
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as engine_run
|
||||
if self.ResultAdapter is None:
|
||||
self.ResultAdapter = ResultAdapter()
|
||||
|
||||
engineResult = await engine_run(
|
||||
file_path=inputPayload.localPath,
|
||||
rules_path=RulesPath,
|
||||
llm_client=None, # 使用默认 LlmClient (从平台配置加载)
|
||||
engineResult = await engine_run(
|
||||
file_path=inputPayload.localPath,
|
||||
rules_path=resolvedRulesPath,
|
||||
llm_client=None, # 使用默认 LlmClient (从平台配置加载)
|
||||
)
|
||||
engineResult.document["filename"] = inputPayload.fileName
|
||||
|
||||
# 4. 适配引擎结果
|
||||
structure = self.ResultAdapter.AdaptStructure(engineResult)
|
||||
outline = self.ResultAdapter.AdaptOutline(engineResult)
|
||||
entities = self.ResultAdapter.AdaptEntities(engineResult)
|
||||
runSummary = self.ResultAdapter.AdaptRunSummary(
|
||||
engineResult,
|
||||
Structure=structure,
|
||||
Outline=outline,
|
||||
Entities=entities,
|
||||
)
|
||||
ruleResults = self.ResultAdapter.AdaptRuleResults(engineResult)
|
||||
checkedRuleResults = self.ResultAdapter.AdaptCheckedRules(engineResult)
|
||||
artifactTempDir, artifacts = await self._GenerateArtifacts(
|
||||
DocumentId=DocumentId,
|
||||
RunId=RunId,
|
||||
InputPath=inputPayload.localPath,
|
||||
InputFileName=inputPayload.fileName,
|
||||
EngineResult=engineResult,
|
||||
RuleResults=ruleResults,
|
||||
)
|
||||
|
||||
failedRuleIds = {str(row.get("ruleId") or "") for row in ruleResults}
|
||||
for checkedRule in checkedRuleResults:
|
||||
ruleId = str(checkedRule.get("ruleId") or "")
|
||||
if checkedRule.get("result") == "fail" and ruleId in failedRuleIds:
|
||||
continue
|
||||
ruleResults.append(checkedRule)
|
||||
|
||||
# 将 rules_path 附带到 runSummary 中,供 GetRuleDetail 后续解析
|
||||
runSummary["rulesPath"] = resolvedRulesPath
|
||||
|
||||
# 5. 持久化结果
|
||||
await self.Storage.UpdateRunResult(RunId, runSummary)
|
||||
await self.Storage.SaveRuleResults(RunId, ruleResults)
|
||||
await self.Storage.SaveArtifacts(RunId, artifacts)
|
||||
|
||||
# 6. 更新终态
|
||||
await self.Storage.UpdateRunStatus(RunId, "completed", Phase="reporting")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "completed", RunId)
|
||||
|
||||
log.info(f"[Govdoc] Execution completed: runId={RunId}")
|
||||
|
||||
return {
|
||||
"runId": RunId,
|
||||
"documentId": DocumentId,
|
||||
"status": "completed",
|
||||
"ruleResultsCount": len(ruleResults),
|
||||
"structureCount": len(structure),
|
||||
"outlineCount": len(outline),
|
||||
"artifactCount": len(artifacts),
|
||||
}
|
||||
finally:
|
||||
if artifactTempDir:
|
||||
shutil.rmtree(artifactTempDir, ignore_errors=True)
|
||||
if inputPayload and inputPayload.tempDir:
|
||||
shutil.rmtree(inputPayload.tempDir, ignore_errors=True)
|
||||
|
||||
async def _GenerateArtifacts(
|
||||
self,
|
||||
DocumentId: int,
|
||||
RunId: int,
|
||||
InputPath: str,
|
||||
InputFileName: str,
|
||||
EngineResult: Any,
|
||||
RuleResults: list[dict[str, Any]],
|
||||
) -> tuple[str, list[dict[str, Any]]]:
|
||||
"""生成报告产物并上传到 OSS。"""
|
||||
artifactDir = tempfile.mkdtemp(prefix=f"govdoc_artifacts_{RunId}_")
|
||||
sourcePath = Path(InputPath)
|
||||
baseName = OssPathUtils.BuildSafeFileStem(InputFileName)
|
||||
region = await self._GetDocumentRegion(DocumentId)
|
||||
|
||||
annotatedPath = Path(artifactDir) / f"{baseName}.annotated.docx"
|
||||
annotate_docx(sourcePath, annotatedPath, EngineResult)
|
||||
|
||||
htmlReport = render_html(EngineResult)
|
||||
|
||||
doc = parse_docx(sourcePath)
|
||||
findingMap: dict[int, list[str]] = {}
|
||||
for row in RuleResults:
|
||||
if row.get("result") != "fail":
|
||||
continue
|
||||
paragraphIndex = row.get("paragraphIndex")
|
||||
if paragraphIndex is None:
|
||||
continue
|
||||
findingId = f"{row.get('ruleId') or 'finding'}-{paragraphIndex}"
|
||||
findingMap.setdefault(int(paragraphIndex), []).append(findingId)
|
||||
paragraphsHtml = paragraphs_to_html(doc, findingMap)
|
||||
|
||||
annotatedUrl = await self.OssService.UploadBytes(
|
||||
ObjectKey=OssPathUtils.BuildArtifactKey(
|
||||
Region=region,
|
||||
RunId=RunId,
|
||||
ArtifactType="annotated_docx",
|
||||
Detail=f"{baseName}.annotated.docx",
|
||||
),
|
||||
Content=annotatedPath.read_bytes(),
|
||||
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
htmlUrl = await self.OssService.UploadText(
|
||||
ObjectKey=OssPathUtils.BuildArtifactKey(
|
||||
Region=region,
|
||||
RunId=RunId,
|
||||
ArtifactType="html_report",
|
||||
Detail=f"{baseName}.report.html",
|
||||
),
|
||||
Content=htmlReport,
|
||||
ContentType="text/html; charset=utf-8",
|
||||
)
|
||||
paragraphUrl = await self.OssService.UploadText(
|
||||
ObjectKey=OssPathUtils.BuildArtifactKey(
|
||||
Region=region,
|
||||
RunId=RunId,
|
||||
ArtifactType="paragraph_html",
|
||||
Detail=f"{baseName}.paragraphs.html",
|
||||
),
|
||||
Content=paragraphsHtml,
|
||||
ContentType="text/html; charset=utf-8",
|
||||
)
|
||||
|
||||
# 4. 适配引擎结果
|
||||
structure = self.ResultAdapter.AdaptStructure(engineResult)
|
||||
outline = self.ResultAdapter.AdaptOutline(engineResult)
|
||||
runSummary = self.ResultAdapter.AdaptRunSummary(
|
||||
engineResult,
|
||||
Structure=structure,
|
||||
Outline=outline,
|
||||
)
|
||||
ruleResults = self.ResultAdapter.AdaptRuleResults(engineResult)
|
||||
entities = self.ResultAdapter.AdaptEntities(engineResult)
|
||||
artifacts = self.ResultAdapter.AdaptArtifacts(engineResult, RunId)
|
||||
return artifactDir, [
|
||||
self._BuildArtifactRow(
|
||||
artifactType="annotated_docx",
|
||||
fileName=f"{baseName}.annotated.docx",
|
||||
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
content=annotatedPath.read_bytes(),
|
||||
ossUrl=annotatedUrl,
|
||||
description="批注 DOCX",
|
||||
),
|
||||
self._BuildArtifactRow(
|
||||
artifactType="html_report",
|
||||
fileName=f"{baseName}.report.html",
|
||||
mimeType="text/html; charset=utf-8",
|
||||
content=htmlReport.encode("utf-8"),
|
||||
ossUrl=htmlUrl,
|
||||
description="HTML 审查报告",
|
||||
),
|
||||
self._BuildArtifactRow(
|
||||
artifactType="paragraph_html",
|
||||
fileName=f"{baseName}.paragraphs.html",
|
||||
mimeType="text/html; charset=utf-8",
|
||||
content=paragraphsHtml.encode("utf-8"),
|
||||
ossUrl=paragraphUrl,
|
||||
description="段落联动视图",
|
||||
),
|
||||
]
|
||||
|
||||
# 将 rules_path 附带到 runSummary 中,供 GetRuleDetail 后续解析
|
||||
runSummary["rulesPath"] = RulesPath
|
||||
|
||||
# 5. 持久化结果
|
||||
await self.Storage.UpdateRunResult(RunId, runSummary)
|
||||
await self.Storage.SaveRuleResults(RunId, ruleResults)
|
||||
await self.Storage.SaveArtifacts(RunId, artifacts)
|
||||
|
||||
# 6. 更新终态
|
||||
await self.Storage.UpdateRunStatus(RunId, "completed", phase="reporting")
|
||||
await self.Storage.UpdateDocumentStatus(DocumentId, "completed", RunId)
|
||||
|
||||
log.info(f"[Govdoc] Execution completed: runId={RunId}")
|
||||
async def _GetDocumentRegion(self, DocumentId: int) -> str:
|
||||
async with GetAsyncSession() as session:
|
||||
row = (
|
||||
await session.execute(
|
||||
text(
|
||||
"""
|
||||
SELECT COALESCE(region, 'default') AS region
|
||||
FROM leaudit_documents
|
||||
WHERE id = :document_id
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{"document_id": DocumentId},
|
||||
)
|
||||
).mappings().first()
|
||||
return str(row["region"] or "default") if row else "default"
|
||||
|
||||
def _BuildArtifactRow(
|
||||
self,
|
||||
*,
|
||||
artifactType: str,
|
||||
fileName: str,
|
||||
mimeType: str,
|
||||
content: bytes,
|
||||
ossUrl: str,
|
||||
description: str,
|
||||
) -> dict[str, Any]:
|
||||
fileExt = Path(fileName).suffix.lstrip(".").lower()
|
||||
return {
|
||||
"runId": RunId,
|
||||
"documentId": DocumentId,
|
||||
"status": "completed",
|
||||
"ruleResultsCount": len(ruleResults),
|
||||
"structureCount": len(structure),
|
||||
"outlineCount": len(outline),
|
||||
"artifactCount": len(artifacts),
|
||||
"artifactType": artifactType,
|
||||
"fileName": fileName,
|
||||
"fileExt": fileExt,
|
||||
"mimeType": mimeType,
|
||||
"fileSize": len(content),
|
||||
"sha256": hashlib.sha256(content).hexdigest(),
|
||||
"ossUrl": ossUrl,
|
||||
"storageProvider": "minio",
|
||||
"description": description,
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ govdoc_report_artifacts 表,并更新 leaudit_documents 状态。
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
@@ -16,6 +17,33 @@ from sqlalchemy import text
|
||||
log = logger
|
||||
|
||||
|
||||
_RUN_RESULT_FALLBACK_KEYS = {
|
||||
"total_score": ["totalScore", "score"],
|
||||
"passed_count": ["passedCount", "passed_count"],
|
||||
"failed_count": ["failedCount", "failed_count"],
|
||||
"skipped_count": ["skippedCount", "skipped_count"],
|
||||
"result_status": ["resultStatus", "result_status"],
|
||||
"result_summary_json": ["resultSummaryJson", "result_summary_json"],
|
||||
}
|
||||
|
||||
|
||||
def _pick_value(payload: dict[str, Any], *keys: str) -> Any:
|
||||
for key in keys:
|
||||
if key in payload:
|
||||
return payload[key]
|
||||
return None
|
||||
|
||||
|
||||
def _to_text_payload(value: Any) -> str | None:
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, (dict, list, tuple, bool, int, float)):
|
||||
return json.dumps(value, ensure_ascii=False)
|
||||
return str(value)
|
||||
|
||||
|
||||
class StorageAdapter:
|
||||
"""Govdoc 结果持久化适配器。
|
||||
|
||||
@@ -51,11 +79,23 @@ class StorageAdapter:
|
||||
log.info(f"[Govdoc] Run created: runId={run_id}, documentId={RunData['documentId']}")
|
||||
return run_id
|
||||
|
||||
async def UpdateRunStatus(self, RunId: int, Status: str, Phase: str | None = None, **Extra: Any) -> None:
|
||||
"""更新 run 状态和阶段。"""
|
||||
async def UpdateRunStatus(
|
||||
self,
|
||||
RunId: int,
|
||||
Status: str,
|
||||
Phase: str | None = None,
|
||||
**Extra: Any,
|
||||
) -> None:
|
||||
"""更新 run 状态和阶段。
|
||||
|
||||
兼容调用方传 ``phase=`` 或 ``Phase=``,避免因大小写不一致导致阶段不落库。
|
||||
"""
|
||||
set_clauses = ["status = :status", "updated_at = now()"]
|
||||
params: dict[str, Any] = {"rid": RunId, "status": Status}
|
||||
|
||||
if Phase is None:
|
||||
Phase = _pick_value(Extra, "phase", "Phase")
|
||||
|
||||
if Phase is not None:
|
||||
set_clauses.append("phase = :phase")
|
||||
params["phase"] = Phase
|
||||
@@ -63,6 +103,9 @@ class StorageAdapter:
|
||||
if Status == "completed" or Status == "failed":
|
||||
set_clauses.append("finished_at = :finished_at")
|
||||
params["finished_at"] = datetime.now(timezone.utc)
|
||||
elif Status == "processing":
|
||||
set_clauses.append("started_at = COALESCE(started_at, :started_at)")
|
||||
params["started_at"] = datetime.now(timezone.utc)
|
||||
|
||||
async with GetAsyncSession() as session:
|
||||
await session.execute(
|
||||
@@ -74,7 +117,7 @@ class StorageAdapter:
|
||||
|
||||
async def UpdateRunResult(self, RunId: int, Summary: dict[str, Any]) -> None:
|
||||
"""写入 run 结果汇总字段(含 rules_path / structure / outline)。"""
|
||||
rules_path = Summary.get("rulesPath")
|
||||
rules_path = _pick_value(Summary, "rulesPath", "rules_path")
|
||||
set_clauses = [
|
||||
"total_score = :total_score",
|
||||
"passed_count = :passed_count",
|
||||
@@ -86,12 +129,12 @@ class StorageAdapter:
|
||||
]
|
||||
params: dict[str, Any] = {
|
||||
"rid": RunId,
|
||||
"total_score": Summary.get("totalScore"),
|
||||
"passed_count": Summary.get("passedCount", 0),
|
||||
"failed_count": Summary.get("failedCount", 0),
|
||||
"skipped_count": Summary.get("skippedCount", 0),
|
||||
"result_status": Summary.get("resultStatus"),
|
||||
"result_summary_json": Summary.get("resultSummaryJson"),
|
||||
"total_score": _pick_value(Summary, *_RUN_RESULT_FALLBACK_KEYS["total_score"]),
|
||||
"passed_count": _pick_value(Summary, *_RUN_RESULT_FALLBACK_KEYS["passed_count"]) or 0,
|
||||
"failed_count": _pick_value(Summary, *_RUN_RESULT_FALLBACK_KEYS["failed_count"]) or 0,
|
||||
"skipped_count": _pick_value(Summary, *_RUN_RESULT_FALLBACK_KEYS["skipped_count"]) or 0,
|
||||
"result_status": _pick_value(Summary, *_RUN_RESULT_FALLBACK_KEYS["result_status"]),
|
||||
"result_summary_json": _pick_value(Summary, *_RUN_RESULT_FALLBACK_KEYS["result_summary_json"]),
|
||||
}
|
||||
if rules_path:
|
||||
set_clauses.append("rules_path = :rules_path")
|
||||
@@ -137,11 +180,11 @@ class StorageAdapter:
|
||||
(run_id, rule_id, rule_name, severity, category,
|
||||
message, suggestion, actual, expected, evidence,
|
||||
paragraph_index, paragraph_text, location_path,
|
||||
result, score, created_at, updated_at)
|
||||
result, skip_reason, score, created_at, updated_at)
|
||||
VALUES (:run_id, :rule_id, :rule_name, :severity, :category,
|
||||
:message, :suggestion, :actual, :expected, :evidence,
|
||||
:paragraph_index, :paragraph_text, :location_path,
|
||||
:result, :score, now(), now())"""
|
||||
:result, :skip_reason, :score, now(), now())"""
|
||||
),
|
||||
{
|
||||
"run_id": RunId,
|
||||
@@ -151,13 +194,14 @@ class StorageAdapter:
|
||||
"category": row.get("category"),
|
||||
"message": row.get("message"),
|
||||
"suggestion": row.get("suggestion"),
|
||||
"actual": row.get("actual"),
|
||||
"expected": row.get("expected"),
|
||||
"evidence": row.get("evidence"),
|
||||
"actual": _to_text_payload(row.get("actual")),
|
||||
"expected": _to_text_payload(row.get("expected")),
|
||||
"evidence": _to_text_payload(row.get("evidence")),
|
||||
"paragraph_index": row.get("paragraphIndex"),
|
||||
"paragraph_text": row.get("paragraphText"),
|
||||
"location_path": row.get("locationPath"),
|
||||
"result": row.get("result", "pass"),
|
||||
"skip_reason": _pick_value(row, "skipReason", "skip_reason"),
|
||||
"score": row.get("score"),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -11,13 +11,17 @@ from typing import Any
|
||||
from fastapi_common.fastapi_common_logger import logger
|
||||
|
||||
from fastapi_admin.celery_app import celery_app
|
||||
from fastapi_admin.config import (
|
||||
LEAUDIT_WORKER_QUEUE_NORMAL,
|
||||
LEAUDIT_WORKER_QUEUE_URGENT,
|
||||
)
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.runner import GovdocRunner
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_bridge.storage_adapter import StorageAdapter
|
||||
|
||||
log = logger
|
||||
|
||||
GOVDOC_WORKER_QUEUE = "govdoc"
|
||||
GOVDOC_WORKER_QUEUE_URGENT = "govdoc_urgent"
|
||||
GOVDOC_WORKER_QUEUE = LEAUDIT_WORKER_QUEUE_NORMAL
|
||||
GOVDOC_WORKER_QUEUE_URGENT = LEAUDIT_WORKER_QUEUE_URGENT
|
||||
|
||||
|
||||
def resolve_govdoc_queue(speed: str = "normal") -> str:
|
||||
@@ -30,6 +34,7 @@ def resolve_govdoc_queue(speed: str = "normal") -> str:
|
||||
def dispatch_govdoc_task(
|
||||
documentId: int,
|
||||
runId: int,
|
||||
rulesPath: str | None = None,
|
||||
triggerUserId: int | None = None,
|
||||
speed: str = "normal",
|
||||
) -> Any:
|
||||
@@ -52,6 +57,7 @@ def dispatch_govdoc_task(
|
||||
kwargs={
|
||||
"documentId": documentId,
|
||||
"runId": runId,
|
||||
"rulesPath": rulesPath,
|
||||
"triggerUserId": triggerUserId,
|
||||
"speed": speed,
|
||||
},
|
||||
@@ -73,6 +79,7 @@ def govdoc_execute_task(
|
||||
self,
|
||||
documentId: int,
|
||||
runId: int,
|
||||
rulesPath: str | None = None,
|
||||
triggerUserId: int | None = None,
|
||||
speed: str = "normal",
|
||||
) -> dict[str, Any]:
|
||||
@@ -89,7 +96,7 @@ def govdoc_execute_task(
|
||||
|
||||
try:
|
||||
# 更新 run 状态 → running
|
||||
loop.run_until_complete(storage.UpdateRunStatus(runId, "processing", phase="parsing"))
|
||||
loop.run_until_complete(storage.UpdateRunStatus(runId, "processing", Phase="parsing"))
|
||||
|
||||
# 执行完整审查链路
|
||||
runner = GovdocRunner()
|
||||
@@ -97,6 +104,7 @@ def govdoc_execute_task(
|
||||
runner.Execute(
|
||||
DocumentId=documentId,
|
||||
RunId=runId,
|
||||
RulesPath=rulesPath,
|
||||
TriggerUserId=triggerUserId,
|
||||
Speed=speed,
|
||||
)
|
||||
|
||||
@@ -1,30 +1,12 @@
|
||||
"""Govdoc 公文格式审查引擎内核。
|
||||
|
||||
从旧 govdoc-audit 项目裁剪迁入,去除独立 API 层、SQLite 存储层、
|
||||
本地运行记录器 (RunRecorder) 和旧配置系统。
|
||||
|
||||
导出:
|
||||
- pipeline.run() — 异步审查入口 (bridge 层主调用)
|
||||
- pipeline.audit_file() — 同步审查入口 (兼容)
|
||||
- models — 核心数据模型 (Pydantic)
|
||||
- parser — 文档解析与实体抽取
|
||||
- dsl — YAML 规则 DSL 定义与加载
|
||||
- engine — 规则执行引擎与结果模型
|
||||
- reporter — 报告生成 (HTML/DOCX/JSON)
|
||||
- llm — LLM 客户端 (OpenAI 兼容协议)
|
||||
保持包级导入轻量,避免在控制器注册阶段提前拉起 LLM/OpenAI 依赖。
|
||||
真正执行审查时再按需导入 pipeline / result 模块。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import (
|
||||
audit_file,
|
||||
run,
|
||||
)
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import (
|
||||
AuditResult,
|
||||
AuditSummary,
|
||||
CheckedRule,
|
||||
)
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
"audit_file",
|
||||
@@ -33,3 +15,31 @@ __all__ = [
|
||||
"AuditSummary",
|
||||
"CheckedRule",
|
||||
]
|
||||
|
||||
|
||||
def audit_file(*args: Any, **kwargs: Any):
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import audit_file as _audit_file
|
||||
|
||||
return _audit_file(*args, **kwargs)
|
||||
|
||||
|
||||
async def run(*args: Any, **kwargs: Any):
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as _run
|
||||
|
||||
return await _run(*args, **kwargs)
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name in {"AuditResult", "AuditSummary", "CheckedRule"}:
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import (
|
||||
AuditResult,
|
||||
AuditSummary,
|
||||
CheckedRule,
|
||||
)
|
||||
|
||||
return {
|
||||
"AuditResult": AuditResult,
|
||||
"AuditSummary": AuditSummary,
|
||||
"CheckedRule": CheckedRule,
|
||||
}[name]
|
||||
raise AttributeError(name)
|
||||
|
||||
@@ -11,7 +11,22 @@ import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from openai import AsyncOpenAI, OpenAI, APIError, APIConnectionError, RateLimitError
|
||||
try:
|
||||
from openai import AsyncOpenAI, OpenAI, APIError, APIConnectionError, RateLimitError
|
||||
_OPENAI_IMPORT_ERROR: Exception | None = None
|
||||
except ModuleNotFoundError as exc: # pragma: no cover - optional dependency
|
||||
AsyncOpenAI = None # type: ignore[assignment]
|
||||
OpenAI = None # type: ignore[assignment]
|
||||
_OPENAI_IMPORT_ERROR = exc
|
||||
|
||||
class APIError(Exception):
|
||||
status_code: int | None = None
|
||||
|
||||
class APIConnectionError(Exception):
|
||||
pass
|
||||
|
||||
class RateLimitError(Exception):
|
||||
pass
|
||||
|
||||
from fastapi_admin.config import (
|
||||
LLM_API_KEY,
|
||||
@@ -125,7 +140,13 @@ class LlmClient:
|
||||
):
|
||||
key = api_key or LLM_API_KEY
|
||||
self._misconfigured_error: LlmConfigError | None = None
|
||||
if not key:
|
||||
if OpenAI is None or AsyncOpenAI is None:
|
||||
self._client = None
|
||||
self._aclient = None
|
||||
self._misconfigured_error = LlmConfigError(
|
||||
"python package 'openai' is not installed; govdoc LLM features are unavailable."
|
||||
)
|
||||
elif not key:
|
||||
self._client = None
|
||||
self._aclient = None
|
||||
self._misconfigured_error = LlmConfigError(
|
||||
|
||||
@@ -130,12 +130,12 @@ def _merge_llm_into_entities(
|
||||
# ── 实体构建 (同步,供 sync 入口使用) ──────────────────
|
||||
|
||||
def _build_entities(
|
||||
doc, ruleset: RuleSet, llm: LlmClient,
|
||||
doc, ruleset: RuleSet, llm: LlmClient | None,
|
||||
) -> dict[str, SemanticEntity | None]:
|
||||
"""构建实体 + 差量 LLM 抽取(同步)。"""
|
||||
entities = EntityBuilder().build(doc)
|
||||
spec = _compute_missing_spec(entities, ruleset.extract.entities)
|
||||
if spec:
|
||||
if spec and llm is not None:
|
||||
llm_vals = FieldExtractor(llm).extract_missing(doc, spec)
|
||||
_merge_llm_into_entities(entities, llm_vals)
|
||||
return entities
|
||||
@@ -144,12 +144,12 @@ def _build_entities(
|
||||
# ── 实体构建 (异步,供 async 入口使用) ──────────────────
|
||||
|
||||
async def _build_entities_async(
|
||||
doc, ruleset: RuleSet, llm: LlmClient,
|
||||
doc, ruleset: RuleSet, llm: LlmClient | None,
|
||||
) -> dict[str, SemanticEntity | None]:
|
||||
"""构建实体 + 差量 LLM 抽取(异步)。"""
|
||||
entities = EntityBuilder().build(doc)
|
||||
spec = _compute_missing_spec(entities, ruleset.extract.entities)
|
||||
if spec:
|
||||
if spec and llm is not None:
|
||||
llm_vals = await FieldExtractor(llm).extract_missing_async(doc, spec)
|
||||
_merge_llm_into_entities(entities, llm_vals)
|
||||
return entities
|
||||
@@ -174,7 +174,7 @@ def audit_file(
|
||||
"""
|
||||
docx_path = Path(docx_path)
|
||||
rules_path = Path(rules_path)
|
||||
llm = llm_client or LlmClient()
|
||||
llm = llm_client
|
||||
|
||||
doc = parse_docx(docx_path)
|
||||
RoleTagger(llm_client=llm).tag(doc)
|
||||
@@ -210,7 +210,7 @@ async def run(
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
rules_path = Path(rules_path)
|
||||
llm = llm_client or LlmClient()
|
||||
llm = llm_client
|
||||
|
||||
_log.info("Govdoc pipeline start: %s", file_path.name)
|
||||
|
||||
@@ -219,18 +219,21 @@ async def run(
|
||||
_log.info(" parsed: %d paragraphs", len(doc.paragraphs))
|
||||
|
||||
# 2. 段落角色标注
|
||||
RoleTagger(llm_client=llm).tag(doc)
|
||||
if llm is not None:
|
||||
await RoleTagger(llm_client=llm).tag_async(doc)
|
||||
else:
|
||||
RoleTagger(llm_client=None).tag(doc)
|
||||
|
||||
# 3. 加载规则
|
||||
ruleset = load_rules(rules_path)
|
||||
_log.info(" rules: %d groups, %d rules", len(ruleset.groups), len(ruleset.all_rules()))
|
||||
_log.info(" rules: %d groups, %d rules", len(ruleset.rules), len(ruleset.all_rules()))
|
||||
|
||||
# 4. 实体抽取 (含差量 LLM)
|
||||
entities = await _build_entities_async(doc, ruleset, llm)
|
||||
_log.info(" entities: %d/%d resolved", sum(1 for v in entities.values() if v), len(entities))
|
||||
|
||||
# 5. 规则评估
|
||||
findings, outcomes = RuleRunner(llm_client=llm).evaluate(
|
||||
findings, outcomes = await RuleRunner(llm_client=llm).evaluate_async(
|
||||
ruleset.all_rules(), doc, entities
|
||||
)
|
||||
_log.info(" evaluated: %d findings from %d rules", len(findings), len(outcomes))
|
||||
|
||||
@@ -36,4 +36,5 @@ class GovdocRuleResult(BaseModel):
|
||||
|
||||
# 判定
|
||||
result: Mapped[str] = mapped_column("result", String(32), default="pass", comment="执行结果:pass/fail/skipped/error")
|
||||
skipReason: Mapped[str | None] = mapped_column("skip_reason", Text, comment="跳过原因,仅 skipped/error 时使用")
|
||||
score: Mapped[float | None] = mapped_column("score", Numeric(10, 2), comment="本条得分")
|
||||
|
||||
@@ -31,6 +31,7 @@ class GovdocRun(BaseModel):
|
||||
engineVersion: Mapped[str | None] = mapped_column("engine_version", String(64), comment="引擎版本号")
|
||||
llmProvider: Mapped[str | None] = mapped_column("llm_provider", String(64), comment="LLM 提供商")
|
||||
llmModel: Mapped[str | None] = mapped_column("llm_model", String(128), comment="LLM 模型名")
|
||||
rulesPath: Mapped[str | None] = mapped_column("rules_path", String(1024), comment="本次运行使用的规则文件路径")
|
||||
|
||||
# 结果汇总
|
||||
totalScore: Mapped[float | None] = mapped_column("total_score", Numeric(10, 2), comment="总分")
|
||||
|
||||
@@ -31,6 +31,7 @@ class IGovdocService(ABC):
|
||||
page: int = 1,
|
||||
pageSize: int = 20,
|
||||
keyword: str | None = None,
|
||||
fileExt: str | None = None,
|
||||
region: str | None = None,
|
||||
status: str | None = None,
|
||||
resultStatus: str | None = None,
|
||||
|
||||
@@ -163,6 +163,7 @@ class DocumentServiceImpl(IDocumentService):
|
||||
root_group_id=resolvedRootGroupId,
|
||||
region=normalizedRegion,
|
||||
normalized_name=normalizedName,
|
||||
file_ext=fileExt,
|
||||
)
|
||||
|
||||
internalDocumentNo = time.time_ns()
|
||||
@@ -2712,16 +2713,25 @@ async def _find_latest_version_candidate(
|
||||
root_group_id: int | None,
|
||||
region: str,
|
||||
normalized_name: str,
|
||||
file_ext: str | None = None,
|
||||
) -> dict | None:
|
||||
"""Find the latest primary document version candidate by normalized name.
|
||||
"""Find the latest primary document version candidate by normalized name + extension."""
|
||||
ext_clause = ""
|
||||
ext_params: dict[str, object] = {}
|
||||
if file_ext:
|
||||
ext_clause = " AND f.file_ext = :file_ext"
|
||||
ext_params["file_ext"] = file_ext
|
||||
|
||||
Preferred rule: same region + same root group + same normalized name.
|
||||
Fallback rule: when a root group cannot be resolved, keep the old same-type behavior.
|
||||
"""
|
||||
if root_group_id is not None:
|
||||
params: dict[str, object] = {
|
||||
"root_group_id": root_group_id,
|
||||
"region": region,
|
||||
"normalized_name": normalized_name,
|
||||
**ext_params,
|
||||
}
|
||||
result = await session.execute(
|
||||
text(
|
||||
"""
|
||||
f"""
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
d.version_group_key,
|
||||
@@ -2751,7 +2761,7 @@ async def _find_latest_version_candidate(
|
||||
WHERE d.region = :region
|
||||
AND d.normalized_name = :normalized_name
|
||||
AND d.is_latest_version = true
|
||||
AND d.deleted_at IS NULL
|
||||
AND d.deleted_at IS NULL{ext_clause}
|
||||
AND COALESCE(
|
||||
CASE
|
||||
WHEN eg.id IS NULL THEN NULL
|
||||
@@ -2764,19 +2774,21 @@ async def _find_latest_version_candidate(
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{
|
||||
"root_group_id": root_group_id,
|
||||
"region": region,
|
||||
"normalized_name": normalized_name,
|
||||
},
|
||||
params,
|
||||
)
|
||||
row = result.mappings().first()
|
||||
if row:
|
||||
return dict(row)
|
||||
|
||||
params = {
|
||||
"type_id": type_id,
|
||||
"region": region,
|
||||
"normalized_name": normalized_name,
|
||||
**ext_params,
|
||||
}
|
||||
result = await session.execute(
|
||||
text(
|
||||
"""
|
||||
f"""
|
||||
SELECT
|
||||
d.id AS document_id,
|
||||
d.version_group_key,
|
||||
@@ -2791,18 +2803,14 @@ async def _find_latest_version_candidate(
|
||||
AND f.file_role = 'primary'
|
||||
WHERE d.type_id = :type_id
|
||||
AND d.region = :region
|
||||
AND d.normalized_name = :normalized_name
|
||||
AND d.normalized_name = :normalized_name{ext_clause}
|
||||
AND d.is_latest_version = true
|
||||
AND d.deleted_at IS NULL
|
||||
ORDER BY d.version_no DESC, d.id DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
),
|
||||
{
|
||||
"type_id": type_id,
|
||||
"region": region,
|
||||
"normalized_name": normalized_name,
|
||||
},
|
||||
params,
|
||||
)
|
||||
row = result.mappings().first()
|
||||
return dict(row) if row else None
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,546 @@
|
||||
metadata:
|
||||
type_id: govdoc_general
|
||||
name: 内部公文通用规则
|
||||
version: "0.1.0"
|
||||
source: 公文文稿常见错误汇编(第一期)·2025-11
|
||||
description: 基于旧内部公文正式规则语义整理的当前平台规则集。
|
||||
|
||||
extract:
|
||||
# 8 个内置实体(title / doc_number / recipient / date /
|
||||
# signature / attachments / wenzhong / issuer)由代码自动产出。
|
||||
entities: []
|
||||
|
||||
rules:
|
||||
- group: 标题(错误汇编 一)
|
||||
rules:
|
||||
- rule_id: GW-T-001
|
||||
name: 标题文种合规性
|
||||
severity: error
|
||||
category: 标题
|
||||
target: title
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: ai
|
||||
prompt: |
|
||||
审查公文标题是否符合规范。
|
||||
标题:{{title.text}}
|
||||
|
||||
15 种合法文种:决议、决定、命令(令)、公报、公告、通告、意见、
|
||||
通知、通报、报告、请示、批复、议案、函、纪要
|
||||
|
||||
检查要点:
|
||||
1. 是否使用了合法文种
|
||||
2. 方案/规划/办法/细则等是否以"通知"形式下发(应为"关于印发〈xxx〉的通知")
|
||||
3. 标题中是否有"印发"等动词
|
||||
messages:
|
||||
pass: 标题文种合规
|
||||
fail: 标题文种不合规
|
||||
|
||||
- rule_id: GW-T-002
|
||||
name: 标题不可有"请求"+"请示"重复
|
||||
severity: error
|
||||
category: 标题
|
||||
target: title
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '关于请求.*的请示'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: '"请示"已包含"请求"之意,应删去"请求"'
|
||||
|
||||
- rule_id: GW-T-003
|
||||
name: 标题不可有"上报"+"报告"重复
|
||||
severity: error
|
||||
category: 标题
|
||||
target: title
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '关于上报.*的报告'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: '"报告"已包含"上报"之意,应删去"上报"'
|
||||
|
||||
- rule_id: GW-T-004
|
||||
name: 标题介词连用
|
||||
severity: warning
|
||||
category: 标题
|
||||
target: title
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '关于对.*的(批复|通知|通报)'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: '"关于"+"对" 介词连用不规范'
|
||||
|
||||
- rule_id: GW-T-005
|
||||
name: 标题文种白名单
|
||||
severity: error
|
||||
category: 文种
|
||||
target: wenzhong
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: wenzhong_whitelist
|
||||
messages:
|
||||
pass: 文种合规
|
||||
fail: 非法定文种(出现"工作情况""汇报""方案""办法"等当文种)
|
||||
|
||||
- rule_id: GW-T-006
|
||||
name: 标题回行词意完整
|
||||
severity: warning
|
||||
category: 标题
|
||||
target: title
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: ai
|
||||
prompt: |
|
||||
只在标题里**明确出现破词**时才报错。
|
||||
破词示例:「广东省烟草专卖局关于xx的通知」如果在"专"和"卖"之间有换行 → fail
|
||||
其它情况(单行标题、合理换行点、词意完整)→ **必须 pass**
|
||||
|
||||
判断准则:
|
||||
- 标题已经是单行字符串,没有明显断点 → pass
|
||||
- 不要凭直觉揣测,只判断是否能在原文中**逐字定位**破词位置
|
||||
- 找不到具体破词位置就 pass
|
||||
|
||||
标题原文:
|
||||
{{title.text}}
|
||||
messages:
|
||||
pass: 标题回行合规
|
||||
fail: 标题回行破词
|
||||
|
||||
- group: 发文字号(错误汇编 三、六.3)
|
||||
rules:
|
||||
- rule_id: GW-N-001
|
||||
name: 发文字号必须用六角括号
|
||||
severity: error
|
||||
category: 发文
|
||||
target: doc_number
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: forbid_chars
|
||||
chars: ["[", "]"]
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 发文字号年份应用六角括号「〔〕」,不得使用方括号
|
||||
|
||||
- rule_id: GW-N-002
|
||||
name: 发文字号不可加"第"字
|
||||
severity: error
|
||||
category: 发文
|
||||
target: doc_number
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '〔\d{4}〕第\d+号'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 发文字号顺序号前不应加"第"字
|
||||
|
||||
- rule_id: GW-N-003
|
||||
name: 发文字号顺序号不编虚位
|
||||
severity: error
|
||||
category: 发文
|
||||
target: doc_number
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '〔\d{4}〕0\d+号'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 发文字号顺序号不编虚位(如"02号"应为"2号")
|
||||
|
||||
- group: 格式(错误汇编 二)
|
||||
rules:
|
||||
- rule_id: GW-F-001
|
||||
name: 主标题用方正小标宋简体二号
|
||||
severity: error
|
||||
category: 格式
|
||||
target: title
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: font
|
||||
expect:
|
||||
eastasia: 方正小标宋简体
|
||||
size_pt: 22
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 主标题应使用方正小标宋简体二号
|
||||
|
||||
- rule_id: GW-F-002
|
||||
name: 一级标题用黑体三号
|
||||
severity: error
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: heading_1
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: font
|
||||
expect:
|
||||
eastasia: 黑体
|
||||
size_pt: 16
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 一级标题应使用黑体三号
|
||||
|
||||
- rule_id: GW-F-003
|
||||
name: 二级标题用楷体三号
|
||||
severity: error
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: heading_2
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: font
|
||||
expect:
|
||||
eastasia: 楷体
|
||||
size_pt: 16
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 二级标题应使用楷体三号
|
||||
|
||||
- rule_id: GW-F-004
|
||||
name: 正文用仿宋三号
|
||||
severity: warning
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: body
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: font
|
||||
expect:
|
||||
eastasia: 仿宋
|
||||
size_pt: 16
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 正文应使用仿宋(GB2312)三号
|
||||
|
||||
- rule_id: GW-F-005
|
||||
name: 附件后不加冒号
|
||||
severity: error
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: attachment_marker
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '^附件\d+:'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: '"附件1"等字样后不应加冒号'
|
||||
|
||||
- rule_id: GW-F-006
|
||||
name: 不使用"(此页无正文)"
|
||||
severity: warning
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: forbid_phrase
|
||||
phrases:
|
||||
- (此页无正文)
|
||||
- (此页无正文)
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 应通过编辑排版避免出现"(此页无正文)"
|
||||
|
||||
- rule_id: GW-F-007
|
||||
name: 附件项末尾不加标点
|
||||
severity: warning
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: cross_role
|
||||
rules:
|
||||
- type: attachment_item_no_trailing_punct
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 附件名称(内容)后不应使用标点符号
|
||||
|
||||
- rule_id: GW-F-008
|
||||
name: 三级标题用仿宋三号
|
||||
severity: warning
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: heading_3
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: font
|
||||
expect:
|
||||
eastasia: 仿宋
|
||||
size_pt: 16
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 三级标题应使用仿宋(GB2312)三号
|
||||
|
||||
- rule_id: GW-F-009
|
||||
name: 四级标题用仿宋三号
|
||||
severity: warning
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: heading_4
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: font
|
||||
expect:
|
||||
eastasia: 仿宋
|
||||
size_pt: 16
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 四级标题应使用仿宋(GB2312)三号
|
||||
|
||||
- rule_id: GW-F-010
|
||||
name: 附件标记用黑体三号不加粗
|
||||
severity: error
|
||||
category: 格式
|
||||
applies_to:
|
||||
role: attachment_marker
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: attachment_marker_style
|
||||
expect:
|
||||
eastasia: 黑体
|
||||
size_pt: 16
|
||||
bold: false
|
||||
messages:
|
||||
pass: ok
|
||||
fail: '"附件:"或"附件1"等标记应使用黑体三号,且不加粗'
|
||||
|
||||
- group: 层级序号(错误汇编 四)
|
||||
rules:
|
||||
- rule_id: GW-H-001
|
||||
name: 层级序号格式
|
||||
severity: error
|
||||
category: 层级
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: hierarchy
|
||||
forbid_patterns:
|
||||
- '^[一二三四五六七八九十]+、.*[、。]$'
|
||||
- '^\d+、'
|
||||
- '^([一二三四五六七八九十]+)、'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 层级序号格式错误
|
||||
|
||||
- rule_id: GW-H-002
|
||||
name: 二级标题换行不带句号
|
||||
severity: warning
|
||||
category: 层级
|
||||
applies_to:
|
||||
role: heading_2
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: cross_role
|
||||
rules:
|
||||
- type: h2_no_period_then_break
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 二级标题在换行分段时不应使用句号
|
||||
|
||||
- group: 标点符号(错误汇编 六)
|
||||
rules:
|
||||
- rule_id: GW-P-001
|
||||
name: 多书名号/引号并列不加顿号
|
||||
severity: warning
|
||||
category: 标点
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: punctuation
|
||||
rules:
|
||||
- type: no_dunhao_between_quotes
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 多个书名号/引号并列时不应用顿号分隔
|
||||
|
||||
- rule_id: GW-P-002
|
||||
name: 句内括号末尾不加标点
|
||||
severity: warning
|
||||
category: 标点
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: punctuation
|
||||
rules:
|
||||
- type: no_punct_inside_inline_paren
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 句内括号行文末尾通常不应含标点
|
||||
|
||||
- rule_id: GW-P-003
|
||||
name: 引号嵌套不规范
|
||||
severity: warning
|
||||
category: 标点
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: punctuation
|
||||
rules:
|
||||
- type: no_outer_quote_when_inner_quote
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 双引号内已含单引号强调时,外层不应再加双引号(如"卓'粤'创一流"应为 卓"粤"创一流)
|
||||
|
||||
- group: 文字表述与提法(错误汇编 七、八、九)
|
||||
rules:
|
||||
- rule_id: GW-W-001
|
||||
name: 易混淆词使用
|
||||
severity: warning
|
||||
category: 文字
|
||||
applies_to:
|
||||
role: any
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: confused_pair
|
||||
pairs:
|
||||
- wrong: 截至到
|
||||
correct: 截止到
|
||||
reason: '"截至" 已含"到"之意'
|
||||
- wrong: 下称
|
||||
correct: 以下简称
|
||||
reason: 标注简称应用"以下简称"
|
||||
- wrong_pattern: '截止\d{4}年'
|
||||
suggest: 截至YYYY年
|
||||
reason: 用于到某时点应为"截至"
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 易混淆词使用不当
|
||||
|
||||
- rule_id: GW-W-002
|
||||
name: 简称使用规范
|
||||
severity: warning
|
||||
category: 简称
|
||||
applies_to:
|
||||
role: body
|
||||
on_missing: skip
|
||||
stages:
|
||||
- check: ai
|
||||
prompt: |
|
||||
只在文中出现以下两种省级职务简称错误时才报错,否则一律 pass:
|
||||
- "X省省委书记" 错误(应为 "X省委书记",省字不重复)
|
||||
- "X省长" 错误(应为 "X省省长",省字不可省略)
|
||||
|
||||
若文中没有"省委书记"或"省长"等省级职务字样,**必须 pass**。
|
||||
若不能在文中找到准确的错误原文,**必须 pass**。
|
||||
不要做语气、措辞、其它简称的检查。
|
||||
|
||||
全文片段:
|
||||
{{paragraphs[0]}}
|
||||
messages:
|
||||
pass: 简称规范
|
||||
fail: 简称使用不规范
|
||||
|
||||
- rule_id: GW-W-003
|
||||
name: 成文日期用阿拉伯数字
|
||||
severity: error
|
||||
category: 提法
|
||||
target: date
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '[一二三四五六七八九十○〇零]+年'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 成文日期应使用阿拉伯数字(如"2023年10月9日")
|
||||
|
||||
- rule_id: GW-W-004
|
||||
name: 成文日期不编虚位
|
||||
severity: warning
|
||||
category: 提法
|
||||
target: date
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: regex_forbid
|
||||
pattern: '\d{4}年0\d月|\d{4}年\d{1,2}月0\d日'
|
||||
messages:
|
||||
pass: ok
|
||||
fail: 成文日期月、日不编虚位
|
||||
|
||||
- group: 发文机关(错误汇编 十)
|
||||
rules:
|
||||
- rule_id: GW-S-001
|
||||
name: 发文机关署名不能用简称
|
||||
severity: error
|
||||
category: 机关
|
||||
target: signature
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: ai
|
||||
prompt: |
|
||||
判断署名是否含**明确的简称错误**。
|
||||
典型错误:
|
||||
- "广东省烟草专卖局(公司)" — 用括号缩短两个机关 → 错
|
||||
- "省局" / "粤烟" 等单独缩写 → 错
|
||||
典型正确:
|
||||
- "广东省烟草专卖局" 单独出现 → pass(即使可能存在配套总公司,但单独存在不算简称)
|
||||
- "广东省烟草专卖局 中国烟草总公司广东省公司" → pass
|
||||
|
||||
判断准则:
|
||||
- 若署名是一个完整、官方、可独立成立的机关名 → **必须 pass**
|
||||
- 若署名带"(公司)"、"省局"、明显缩写、行业内部代号 → fail
|
||||
|
||||
署名原文:
|
||||
{{signature.text}}
|
||||
messages:
|
||||
pass: 署名规范
|
||||
fail: 发文机关署名使用了简称
|
||||
|
||||
- rule_id: GW-S-002
|
||||
name: 发文机关确定严谨性
|
||||
severity: warning
|
||||
category: 机关
|
||||
target: signature
|
||||
on_missing: fail
|
||||
stages:
|
||||
- check: ai
|
||||
prompt: |
|
||||
只判断**这一个明确条件**:
|
||||
- 标题或正文里明确涉及"党组""党的xx工作""组织部""纪委"等党务事项,
|
||||
但署名是行政机关(局/公司/委员会等),未署"党组"或党务机构 → fail
|
||||
- 其它情况(行政事务、缺乏证据、性质模糊)→ **必须 pass**
|
||||
|
||||
判断时需要看到**明确的党务关键词**(党组/党委/党的xx会议/党风/反腐倡廉等),
|
||||
没有这些关键词就 pass。
|
||||
|
||||
署名原文:{{signature.text}}
|
||||
标题:{{title.text}}
|
||||
messages:
|
||||
pass: 发文机关一致
|
||||
fail: 发文机关与文稿性质不一致
|
||||
|
||||
- group: 标题字体(target 通道示例)
|
||||
rules:
|
||||
- rule_id: GW-T-008
|
||||
name: 标题字体(语义实体通道)
|
||||
severity: warning
|
||||
category: 标题
|
||||
target: title
|
||||
on_missing: warn
|
||||
stages:
|
||||
- check: ai
|
||||
prompt: |
|
||||
判断公文标题的字体与字号是否合规。
|
||||
要求:字体 = 方正小标宋简体;字号 = 22pt(或 22.0)。
|
||||
|
||||
实际:
|
||||
- 标题:{{title.text}}
|
||||
- 字体:{{title.style.font_eastasia}}
|
||||
- 字号:{{title.style.font_size_pt}}pt
|
||||
|
||||
若实际字体为空或与要求一致 → pass
|
||||
若字体明显不符(例如 仿宋/楷体/黑体)→ fail
|
||||
若仅字号轻微差异 → warn
|
||||
messages:
|
||||
pass: 标题字体字号合规
|
||||
fail: 标题字体或字号不符合 GB/T 9704
|
||||
@@ -33,6 +33,7 @@ CREATE TABLE IF NOT EXISTS public.govdoc_runs (
|
||||
engine_version VARCHAR(64),
|
||||
llm_provider VARCHAR(64),
|
||||
llm_model VARCHAR(128),
|
||||
rules_path VARCHAR(1024),
|
||||
|
||||
-- 结果汇总
|
||||
total_score NUMERIC(10, 2),
|
||||
@@ -52,6 +53,9 @@ CREATE TABLE IF NOT EXISTS public.govdoc_runs (
|
||||
deleted_at TIMESTAMPTZ DEFAULT NULL
|
||||
);
|
||||
|
||||
ALTER TABLE public.govdoc_runs
|
||||
ADD COLUMN IF NOT EXISTS rules_path VARCHAR(1024);
|
||||
|
||||
COMMENT ON TABLE public.govdoc_runs IS '公文审查运行主表';
|
||||
COMMENT ON COLUMN public.govdoc_runs.id IS '自增主键';
|
||||
COMMENT ON COLUMN public.govdoc_runs.document_id IS '关联 leaudit_documents.id';
|
||||
@@ -65,6 +69,7 @@ COMMENT ON COLUMN public.govdoc_runs.phase IS '当前阶段:parsing/executing/
|
||||
COMMENT ON COLUMN public.govdoc_runs.engine_version IS '引擎版本号';
|
||||
COMMENT ON COLUMN public.govdoc_runs.llm_provider IS 'LLM 提供商';
|
||||
COMMENT ON COLUMN public.govdoc_runs.llm_model IS 'LLM 模型名';
|
||||
COMMENT ON COLUMN public.govdoc_runs.rules_path IS '本次运行使用的规则文件路径';
|
||||
COMMENT ON COLUMN public.govdoc_runs.total_score IS '总分';
|
||||
COMMENT ON COLUMN public.govdoc_runs.passed_count IS '通过规则数';
|
||||
COMMENT ON COLUMN public.govdoc_runs.failed_count IS '未通过规则数';
|
||||
@@ -106,6 +111,7 @@ CREATE TABLE IF NOT EXISTS public.govdoc_rule_results (
|
||||
|
||||
-- 判定
|
||||
result VARCHAR(32) NOT NULL DEFAULT 'pass',
|
||||
skip_reason TEXT,
|
||||
score NUMERIC(10, 2),
|
||||
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||
@@ -113,6 +119,9 @@ CREATE TABLE IF NOT EXISTS public.govdoc_rule_results (
|
||||
deleted_at TIMESTAMPTZ DEFAULT NULL
|
||||
);
|
||||
|
||||
ALTER TABLE public.govdoc_rule_results
|
||||
ADD COLUMN IF NOT EXISTS skip_reason TEXT;
|
||||
|
||||
COMMENT ON TABLE public.govdoc_rule_results IS '公文规则执行结果明细表';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.id IS '自增主键';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.run_id IS '关联 govdoc_runs.id';
|
||||
@@ -129,6 +138,7 @@ COMMENT ON COLUMN public.govdoc_rule_results.paragraph_index IS '段落索引';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.paragraph_text IS '段落原文';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.location_path IS '文档结构位置路径';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.result IS '执行结果:pass/fail/skipped/error';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.skip_reason IS '跳过原因,仅 skipped/error 时使用';
|
||||
COMMENT ON COLUMN public.govdoc_rule_results.score IS '本条得分';
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_govdoc_rule_results_run_id ON public.govdoc_rule_results(run_id) WHERE deleted_at IS NULL;
|
||||
@@ -195,4 +205,4 @@ END $$;
|
||||
-- 为 engine_type 加索引,方便按模块过滤文档列表
|
||||
CREATE INDEX IF NOT EXISTS idx_leaudit_documents_engine_type ON public.leaudit_documents(engine_type) WHERE deleted_at IS NULL;
|
||||
|
||||
COMMIT;
|
||||
COMMIT;
|
||||
|
||||
Reference in New Issue
Block a user