feat: integrate govdoc module into leaudit platform
This commit is contained in:
@@ -1,30 +1,12 @@
|
||||
"""Govdoc 公文格式审查引擎内核。
|
||||
|
||||
从旧 govdoc-audit 项目裁剪迁入,去除独立 API 层、SQLite 存储层、
|
||||
本地运行记录器 (RunRecorder) 和旧配置系统。
|
||||
|
||||
导出:
|
||||
- pipeline.run() — 异步审查入口 (bridge 层主调用)
|
||||
- pipeline.audit_file() — 同步审查入口 (兼容)
|
||||
- models — 核心数据模型 (Pydantic)
|
||||
- parser — 文档解析与实体抽取
|
||||
- dsl — YAML 规则 DSL 定义与加载
|
||||
- engine — 规则执行引擎与结果模型
|
||||
- reporter — 报告生成 (HTML/DOCX/JSON)
|
||||
- llm — LLM 客户端 (OpenAI 兼容协议)
|
||||
保持包级导入轻量,避免在控制器注册阶段提前拉起 LLM/OpenAI 依赖。
|
||||
真正执行审查时再按需导入 pipeline / result 模块。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import (
|
||||
audit_file,
|
||||
run,
|
||||
)
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import (
|
||||
AuditResult,
|
||||
AuditSummary,
|
||||
CheckedRule,
|
||||
)
|
||||
from typing import Any
|
||||
|
||||
__all__ = [
|
||||
"audit_file",
|
||||
@@ -33,3 +15,31 @@ __all__ = [
|
||||
"AuditSummary",
|
||||
"CheckedRule",
|
||||
]
|
||||
|
||||
|
||||
def audit_file(*args: Any, **kwargs: Any):
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import audit_file as _audit_file
|
||||
|
||||
return _audit_file(*args, **kwargs)
|
||||
|
||||
|
||||
async def run(*args: Any, **kwargs: Any):
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.pipeline import run as _run
|
||||
|
||||
return await _run(*args, **kwargs)
|
||||
|
||||
|
||||
def __getattr__(name: str):
|
||||
if name in {"AuditResult", "AuditSummary", "CheckedRule"}:
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import (
|
||||
AuditResult,
|
||||
AuditSummary,
|
||||
CheckedRule,
|
||||
)
|
||||
|
||||
return {
|
||||
"AuditResult": AuditResult,
|
||||
"AuditSummary": AuditSummary,
|
||||
"CheckedRule": CheckedRule,
|
||||
}[name]
|
||||
raise AttributeError(name)
|
||||
|
||||
@@ -11,7 +11,22 @@ import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from openai import AsyncOpenAI, OpenAI, APIError, APIConnectionError, RateLimitError
|
||||
try:
|
||||
from openai import AsyncOpenAI, OpenAI, APIError, APIConnectionError, RateLimitError
|
||||
_OPENAI_IMPORT_ERROR: Exception | None = None
|
||||
except ModuleNotFoundError as exc: # pragma: no cover - optional dependency
|
||||
AsyncOpenAI = None # type: ignore[assignment]
|
||||
OpenAI = None # type: ignore[assignment]
|
||||
_OPENAI_IMPORT_ERROR = exc
|
||||
|
||||
class APIError(Exception):
|
||||
status_code: int | None = None
|
||||
|
||||
class APIConnectionError(Exception):
|
||||
pass
|
||||
|
||||
class RateLimitError(Exception):
|
||||
pass
|
||||
|
||||
from fastapi_admin.config import (
|
||||
LLM_API_KEY,
|
||||
@@ -125,7 +140,13 @@ class LlmClient:
|
||||
):
|
||||
key = api_key or LLM_API_KEY
|
||||
self._misconfigured_error: LlmConfigError | None = None
|
||||
if not key:
|
||||
if OpenAI is None or AsyncOpenAI is None:
|
||||
self._client = None
|
||||
self._aclient = None
|
||||
self._misconfigured_error = LlmConfigError(
|
||||
"python package 'openai' is not installed; govdoc LLM features are unavailable."
|
||||
)
|
||||
elif not key:
|
||||
self._client = None
|
||||
self._aclient = None
|
||||
self._misconfigured_error = LlmConfigError(
|
||||
|
||||
@@ -130,12 +130,12 @@ def _merge_llm_into_entities(
|
||||
# ── 实体构建 (同步,供 sync 入口使用) ──────────────────
|
||||
|
||||
def _build_entities(
|
||||
doc, ruleset: RuleSet, llm: LlmClient,
|
||||
doc, ruleset: RuleSet, llm: LlmClient | None,
|
||||
) -> dict[str, SemanticEntity | None]:
|
||||
"""构建实体 + 差量 LLM 抽取(同步)。"""
|
||||
entities = EntityBuilder().build(doc)
|
||||
spec = _compute_missing_spec(entities, ruleset.extract.entities)
|
||||
if spec:
|
||||
if spec and llm is not None:
|
||||
llm_vals = FieldExtractor(llm).extract_missing(doc, spec)
|
||||
_merge_llm_into_entities(entities, llm_vals)
|
||||
return entities
|
||||
@@ -144,12 +144,12 @@ def _build_entities(
|
||||
# ── 实体构建 (异步,供 async 入口使用) ──────────────────
|
||||
|
||||
async def _build_entities_async(
|
||||
doc, ruleset: RuleSet, llm: LlmClient,
|
||||
doc, ruleset: RuleSet, llm: LlmClient | None,
|
||||
) -> dict[str, SemanticEntity | None]:
|
||||
"""构建实体 + 差量 LLM 抽取(异步)。"""
|
||||
entities = EntityBuilder().build(doc)
|
||||
spec = _compute_missing_spec(entities, ruleset.extract.entities)
|
||||
if spec:
|
||||
if spec and llm is not None:
|
||||
llm_vals = await FieldExtractor(llm).extract_missing_async(doc, spec)
|
||||
_merge_llm_into_entities(entities, llm_vals)
|
||||
return entities
|
||||
@@ -174,7 +174,7 @@ def audit_file(
|
||||
"""
|
||||
docx_path = Path(docx_path)
|
||||
rules_path = Path(rules_path)
|
||||
llm = llm_client or LlmClient()
|
||||
llm = llm_client
|
||||
|
||||
doc = parse_docx(docx_path)
|
||||
RoleTagger(llm_client=llm).tag(doc)
|
||||
@@ -210,7 +210,7 @@ async def run(
|
||||
"""
|
||||
file_path = Path(file_path)
|
||||
rules_path = Path(rules_path)
|
||||
llm = llm_client or LlmClient()
|
||||
llm = llm_client
|
||||
|
||||
_log.info("Govdoc pipeline start: %s", file_path.name)
|
||||
|
||||
@@ -219,18 +219,21 @@ async def run(
|
||||
_log.info(" parsed: %d paragraphs", len(doc.paragraphs))
|
||||
|
||||
# 2. 段落角色标注
|
||||
RoleTagger(llm_client=llm).tag(doc)
|
||||
if llm is not None:
|
||||
await RoleTagger(llm_client=llm).tag_async(doc)
|
||||
else:
|
||||
RoleTagger(llm_client=None).tag(doc)
|
||||
|
||||
# 3. 加载规则
|
||||
ruleset = load_rules(rules_path)
|
||||
_log.info(" rules: %d groups, %d rules", len(ruleset.groups), len(ruleset.all_rules()))
|
||||
_log.info(" rules: %d groups, %d rules", len(ruleset.rules), len(ruleset.all_rules()))
|
||||
|
||||
# 4. 实体抽取 (含差量 LLM)
|
||||
entities = await _build_entities_async(doc, ruleset, llm)
|
||||
_log.info(" entities: %d/%d resolved", sum(1 for v in entities.values() if v), len(entities))
|
||||
|
||||
# 5. 规则评估
|
||||
findings, outcomes = RuleRunner(llm_client=llm).evaluate(
|
||||
findings, outcomes = await RuleRunner(llm_client=llm).evaluate_async(
|
||||
ruleset.all_rules(), doc, entities
|
||||
)
|
||||
_log.info(" evaluated: %d findings from %d rules", len(findings), len(outcomes))
|
||||
|
||||
Reference in New Issue
Block a user