feat(govdoc): 新增内部公文模块全链路(后端58+前端11文件)
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
"""Check 原语注册中心:通过 register 装饰器收集,runner 通过 get 查找。"""
|
||||
|
||||
from __future__ import annotations
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase
|
||||
|
||||
_REGISTRY: dict[str, type[CheckBase]] = {}
|
||||
|
||||
|
||||
def register(name: str):
|
||||
def deco(cls):
|
||||
cls.name = name
|
||||
_REGISTRY[name] = cls
|
||||
return cls
|
||||
return deco
|
||||
|
||||
|
||||
def get_check(name: str) -> type[CheckBase]:
|
||||
if name not in _REGISTRY:
|
||||
raise KeyError(f"unknown check: {name}; known: {list(_REGISTRY)}")
|
||||
return _REGISTRY[name]
|
||||
|
||||
|
||||
def all_checks() -> list[str]:
|
||||
return list(_REGISTRY.keys())
|
||||
@@ -0,0 +1,151 @@
|
||||
"""LLM 语义检查。三级输出:pass / warn / fail。"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Any
|
||||
from pydantic import BaseModel
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
|
||||
CheckBase, CheckContext, CheckHit, CheckResult,
|
||||
)
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.llm.client import LlmClient, LlmJsonError, _format_exc
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_OUT_FORMAT = """
|
||||
请以 JSON 输出:
|
||||
{"result": "pass|warn|fail", "reason": "<简短理由>", "suggestion": "<改进建议;pass 时填空>"}
|
||||
"""
|
||||
|
||||
_VAR_RE = re.compile(r"\{\{\s*([^}]+?)\s*\}\}")
|
||||
|
||||
|
||||
def _resolve_dot_path(root: Any, path: str) -> str:
|
||||
"""点语法属性访问:title.style.font_eastasia → entities['title'].style.font_eastasia。"""
|
||||
cur: Any = root
|
||||
for seg in path.split("."):
|
||||
if cur is None:
|
||||
return ""
|
||||
if isinstance(cur, dict):
|
||||
cur = cur.get(seg)
|
||||
elif isinstance(cur, BaseModel):
|
||||
cur = getattr(cur, seg, None)
|
||||
else:
|
||||
cur = getattr(cur, seg, None)
|
||||
if cur is None:
|
||||
return ""
|
||||
if isinstance(cur, (dict, list)):
|
||||
return str(cur)
|
||||
return str(cur)
|
||||
|
||||
|
||||
def _interpolate(
|
||||
template: str,
|
||||
paragraphs: list,
|
||||
entities: dict | None = None,
|
||||
target: Any = None,
|
||||
) -> str:
|
||||
"""渲染顺序:① paragraphs[N] ② entities 点语法
|
||||
③ target 隐式(无前缀时视为 target.<key>)。"""
|
||||
entities = entities or {}
|
||||
|
||||
def repl(m):
|
||||
key = m.group(1).strip()
|
||||
# ① paragraphs[N] 索引
|
||||
if key.startswith("paragraphs["):
|
||||
try:
|
||||
idx = int(key[len("paragraphs["):].rstrip("]"))
|
||||
return paragraphs[idx].text
|
||||
except (ValueError, IndexError):
|
||||
return ""
|
||||
# ② entities 点语法:title.text / title.style.font_eastasia
|
||||
head, _, rest = key.partition(".")
|
||||
if head in entities:
|
||||
entity = entities[head]
|
||||
if entity is None:
|
||||
return ""
|
||||
return _resolve_dot_path(entity, rest) if rest else entity.text
|
||||
# ③ target 隐式:未带前缀且 target 存在
|
||||
if target is not None:
|
||||
v = _resolve_dot_path(target, key)
|
||||
if v:
|
||||
return v
|
||||
return ""
|
||||
|
||||
return _VAR_RE.sub(repl, template)
|
||||
|
||||
|
||||
@register("ai")
|
||||
class AiCheck(CheckBase):
|
||||
def __init__(self, llm_client: LlmClient | None = None):
|
||||
self.client = llm_client or LlmClient()
|
||||
|
||||
def _build_prompt(self, ctx: CheckContext) -> str:
|
||||
prompt = _interpolate(
|
||||
ctx.stage.prompt or "",
|
||||
ctx.paragraphs,
|
||||
ctx.entities,
|
||||
ctx.target,
|
||||
)
|
||||
return prompt + "\n\n" + _OUT_FORMAT
|
||||
|
||||
def _interpret(self, ctx: CheckContext, resp: dict) -> CheckResult:
|
||||
result = resp.get("result", "fail")
|
||||
reason = resp.get("reason", "")
|
||||
suggestion = resp.get("suggestion", "")
|
||||
if result == "pass":
|
||||
return CheckResult(passed=True, hits=[])
|
||||
target_p = ctx.paragraphs[0] if ctx.paragraphs else None
|
||||
confidence = 0.95 if result == "fail" else 0.7
|
||||
return CheckResult(passed=False, hits=[CheckHit(
|
||||
paragraph=target_p,
|
||||
char_start=0,
|
||||
char_end=len(target_p.text) if target_p else 0,
|
||||
actual={"llm_reason": reason, "llm_suggestion": suggestion},
|
||||
expected={},
|
||||
message=reason or "LLM 判定不通过",
|
||||
confidence=confidence,
|
||||
)])
|
||||
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
label = f"ai_{ctx.rule_id or 'unknown'}"
|
||||
try:
|
||||
resp = self.client.chat_json(
|
||||
[{"role": "user", "content": self._build_prompt(ctx)}],
|
||||
label=label,
|
||||
)
|
||||
except LlmJsonError as e:
|
||||
_log.warning("AI check skipped (LLM JSON error): %s", _format_exc(e))
|
||||
return CheckResult(
|
||||
passed=True, hits=[], skipped=True,
|
||||
skip_reason=f"LLM 返回内容无法解析为 JSON:{e}",
|
||||
)
|
||||
except Exception as e:
|
||||
_log.warning("AI check skipped (LLM error): %s", _format_exc(e))
|
||||
return CheckResult(
|
||||
passed=True, hits=[], skipped=True,
|
||||
skip_reason=f"LLM 调用失败:{e}",
|
||||
)
|
||||
return self._interpret(ctx, resp)
|
||||
|
||||
async def run_async(self, ctx: CheckContext) -> CheckResult:
|
||||
label = f"ai_{ctx.rule_id or 'unknown'}"
|
||||
try:
|
||||
resp = await self.client.chat_json_async(
|
||||
[{"role": "user", "content": self._build_prompt(ctx)}],
|
||||
label=label,
|
||||
)
|
||||
except LlmJsonError as e:
|
||||
_log.warning("AI check skipped (LLM JSON error): %s", _format_exc(e))
|
||||
return CheckResult(
|
||||
passed=True, hits=[], skipped=True,
|
||||
skip_reason=f"LLM 返回内容无法解析为 JSON:{e}",
|
||||
)
|
||||
except Exception as e:
|
||||
_log.warning("AI check skipped (LLM error): %s", _format_exc(e))
|
||||
return CheckResult(
|
||||
passed=True, hits=[], skipped=True,
|
||||
skip_reason=f"LLM 调用失败:{e}",
|
||||
)
|
||||
return self._interpret(ctx, resp)
|
||||
@@ -0,0 +1,48 @@
|
||||
"""Check 原语基类与上下文。"""
|
||||
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, TYPE_CHECKING
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.schema import RuleStage
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckContext:
|
||||
document: Document
|
||||
paragraphs: list[Paragraph]
|
||||
stage: RuleStage
|
||||
entities: dict[str, "SemanticEntity | None"] = field(default_factory=dict)
|
||||
target: "SemanticEntity | None" = None
|
||||
rule_id: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckHit:
|
||||
paragraph: Paragraph | None
|
||||
char_start: int = 0
|
||||
char_end: int = 0
|
||||
actual: dict[str, Any] | None = None
|
||||
expected: dict[str, Any] | None = None
|
||||
message: str | None = None
|
||||
confidence: float = 1.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckResult:
|
||||
passed: bool
|
||||
hits: list[CheckHit] = field(default_factory=list)
|
||||
skipped: bool = False
|
||||
skip_reason: str = ""
|
||||
|
||||
|
||||
class CheckBase:
|
||||
"""所有 check 原语的抽象基类。"""
|
||||
|
||||
name: str = ""
|
||||
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
raise NotImplementedError
|
||||
@@ -0,0 +1,34 @@
|
||||
"""易混淆词对(字面 + 正则)。"""
|
||||
|
||||
import re
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
|
||||
|
||||
@register("confused_pair")
|
||||
class ConfusedPairCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
pairs = ctx.stage.pairs or []
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
for pair in pairs:
|
||||
wrong = pair.get("wrong")
|
||||
wrong_pat = pair.get("wrong_pattern")
|
||||
correct = pair.get("correct") or pair.get("suggest", "")
|
||||
reason = pair.get("reason", "")
|
||||
if wrong and wrong in p.text:
|
||||
start = p.text.find(wrong)
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=start, char_end=start + len(wrong),
|
||||
actual={"text": wrong}, expected={"text": correct},
|
||||
message=f"\"{wrong}\" 应为 \"{correct}\"。{reason}",
|
||||
))
|
||||
elif wrong_pat:
|
||||
for m in re.finditer(wrong_pat, p.text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=m.start(), char_end=m.end(),
|
||||
actual={"text": m.group(0)},
|
||||
expected={"text": correct},
|
||||
message=f"\"{m.group(0)}\" 应为 \"{correct}\"。{reason}",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
@@ -0,0 +1,69 @@
|
||||
"""跨段关系 check:例如二级标题以句号结尾后又新起一段。"""
|
||||
|
||||
import re
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
|
||||
|
||||
# 单个附件项末尾的标点:编号(数字+点) + 内容 + 末尾标点
|
||||
_ATTACH_ITEM_TRAIL_PUNCT = re.compile(r"\d+[\..][^\d;。,;,.]+?[;。,;,.]")
|
||||
# 整段是一个附件项
|
||||
_ATTACH_ITEM_LINE = re.compile(r"^\d+[\..].+[;。,;,.]\s*$")
|
||||
|
||||
|
||||
@register("cross_role")
|
||||
class CrossRoleCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
rules = ctx.stage.rules or []
|
||||
paras = ctx.document.paragraphs
|
||||
hits: list[CheckHit] = []
|
||||
for r in rules:
|
||||
t = r.get("type")
|
||||
if t == "h2_no_period_then_break":
|
||||
for i, p in enumerate(paras):
|
||||
if p.role == "heading_2" and p.text.rstrip().endswith(("。", ".")):
|
||||
if i + 1 < len(paras) and paras[i + 1].text.strip():
|
||||
hits.append(CheckHit(
|
||||
paragraph=p,
|
||||
char_start=len(p.text) - 1, char_end=len(p.text),
|
||||
actual={"text": p.text},
|
||||
message="二级标题在换行分段时不应使用句号;如使用句号则应紧接正文",
|
||||
))
|
||||
elif t == "attachment_item_no_trailing_punct":
|
||||
hits.extend(_attachment_item_hits(paras))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
|
||||
|
||||
def _attachment_item_hits(paras):
|
||||
"""从 attachment_marker 起扫描附件区块,找末尾带标点的附件项。"""
|
||||
hits: list[CheckHit] = []
|
||||
in_attachment = False
|
||||
for p in paras:
|
||||
text = p.text.strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
if p.role == "attachment_marker":
|
||||
in_attachment = True
|
||||
# 同段内可能出现 "附件:1.xxx;2.yyy。" 多项一行
|
||||
for m in _ATTACH_ITEM_TRAIL_PUNCT.finditer(text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p,
|
||||
char_start=m.start(), char_end=m.end(),
|
||||
actual={"snippet": m.group(0)},
|
||||
message=f'附件项末尾不应有标点:"{m.group(0)}"',
|
||||
))
|
||||
continue
|
||||
|
||||
if p.role in ("signature", "date", "heading_1"):
|
||||
in_attachment = False
|
||||
continue
|
||||
|
||||
if in_attachment and _ATTACH_ITEM_LINE.match(text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p,
|
||||
char_start=len(p.text) - 1, char_end=len(p.text),
|
||||
actual={"text": p.text},
|
||||
message=f'附件项末尾不应有标点:"{text}"',
|
||||
))
|
||||
return hits
|
||||
@@ -0,0 +1,162 @@
|
||||
"""字体/字号/复合样式/行距 check。"""
|
||||
|
||||
import re
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Paragraph, ParagraphStyle
|
||||
|
||||
|
||||
def _font_match(actual: str | None, expect: str) -> bool:
|
||||
if not actual:
|
||||
return False
|
||||
return expect in actual or actual in expect
|
||||
|
||||
|
||||
def _size_match(actual: float | None, expect: float, tol: float = 0.5) -> bool:
|
||||
if actual is None:
|
||||
return False
|
||||
return abs(actual - expect) <= tol
|
||||
|
||||
|
||||
def _style_matches(style: ParagraphStyle, expect: dict) -> bool:
|
||||
if "eastasia" in expect and not _font_match(style.font_eastasia, expect["eastasia"]):
|
||||
return False
|
||||
if "size_pt" in expect and not _size_match(
|
||||
style.font_size_pt, float(expect["size_pt"])
|
||||
):
|
||||
return False
|
||||
if "bold" in expect and bool(style.bold) != bool(expect["bold"]):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@register("font")
|
||||
class FontCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
expect = ctx.stage.expect or {}
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
ok = True
|
||||
actual = {
|
||||
"font": p.style.font_eastasia,
|
||||
"size": p.style.font_size_pt,
|
||||
}
|
||||
if "eastasia" in expect and not _font_match(p.style.font_eastasia, expect["eastasia"]):
|
||||
ok = False
|
||||
if "size_pt" in expect and not _size_match(
|
||||
p.style.font_size_pt, float(expect["size_pt"])
|
||||
):
|
||||
ok = False
|
||||
if not ok:
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=0, char_end=len(p.text),
|
||||
actual=actual, expected=expect,
|
||||
message=f"字体或字号不符合(实际 {actual['font']} {actual['size']}pt,期望 {expect})",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
|
||||
|
||||
@register("style_match")
|
||||
class StyleMatchCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
expect = ctx.stage.expect or {}
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
ok = True
|
||||
actual = {
|
||||
"font": p.style.font_eastasia,
|
||||
"size": p.style.font_size_pt,
|
||||
"bold": p.style.bold,
|
||||
"italic": p.style.italic,
|
||||
"alignment": p.style.alignment,
|
||||
}
|
||||
if "eastasia" in expect and not _font_match(p.style.font_eastasia, expect["eastasia"]):
|
||||
ok = False
|
||||
if "size_pt" in expect and not _size_match(
|
||||
p.style.font_size_pt, float(expect["size_pt"])
|
||||
):
|
||||
ok = False
|
||||
if "bold" in expect and bool(p.style.bold) != bool(expect["bold"]):
|
||||
ok = False
|
||||
if "alignment" in expect and p.style.alignment != expect["alignment"]:
|
||||
ok = False
|
||||
if not ok:
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=0, char_end=len(p.text),
|
||||
actual=actual, expected=expect, message="样式不符合",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
|
||||
|
||||
_ATTACHMENT_MARKER_RE = re.compile(r"^\s*(附件[::]|附件\d+)")
|
||||
|
||||
|
||||
@register("attachment_marker_style")
|
||||
class AttachmentMarkerStyleCheck(CheckBase):
|
||||
"""只校验“附件:”或“附件1”等标记本身,不校验后续附件名称。"""
|
||||
|
||||
DEFAULT_EXPECT = {"eastasia": "黑体", "size_pt": 16, "bold": False}
|
||||
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
expect = ctx.stage.expect or self.DEFAULT_EXPECT
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
match = _ATTACHMENT_MARKER_RE.match(p.text)
|
||||
if not match:
|
||||
continue
|
||||
marker_end = match.end(1)
|
||||
marker_styles = _marker_run_styles(p, marker_end)
|
||||
if not marker_styles:
|
||||
marker_styles = [p.style]
|
||||
bad_style = next(
|
||||
(style for style in marker_styles if not _style_matches(style, expect)),
|
||||
None,
|
||||
)
|
||||
if bad_style is not None:
|
||||
hits.append(CheckHit(
|
||||
paragraph=p,
|
||||
char_start=match.start(1),
|
||||
char_end=marker_end,
|
||||
actual={
|
||||
"font": bad_style.font_eastasia,
|
||||
"size": bad_style.font_size_pt,
|
||||
"bold": bad_style.bold,
|
||||
},
|
||||
expected=expect,
|
||||
message="附件标记样式不符合",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
|
||||
|
||||
def _marker_run_styles(p: Paragraph, marker_end: int) -> list[ParagraphStyle]:
|
||||
styles: list[ParagraphStyle] = []
|
||||
cursor = 0
|
||||
for run in p.runs:
|
||||
run_start = cursor
|
||||
run_end = cursor + len(run.text)
|
||||
cursor = run_end
|
||||
if run_end <= 0 or run_start >= marker_end:
|
||||
continue
|
||||
if run.text.strip():
|
||||
styles.append(run.style)
|
||||
return styles
|
||||
|
||||
|
||||
@register("line_spacing")
|
||||
class LineSpacingCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
expect = ctx.stage.expect or {}
|
||||
target = float(expect.get("multiple", 1.5))
|
||||
tol = float(expect.get("tol", 0.05))
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
actual = p.style.line_spacing
|
||||
if actual is None or abs(actual - target) > tol:
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=0, char_end=len(p.text),
|
||||
actual={"line_spacing": actual},
|
||||
expected={"line_spacing": target},
|
||||
message=f"行距应为 {target},实际 {actual}",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
@@ -0,0 +1,42 @@
|
||||
"""短语/字符黑名单。"""
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
|
||||
|
||||
@register("forbid_phrase")
|
||||
class ForbidPhraseCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
phrases = ctx.stage.phrases or []
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
for phr in phrases:
|
||||
start = p.text.find(phr)
|
||||
if start >= 0:
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=start, char_end=start + len(phr),
|
||||
actual={"text": phr}, expected={"forbid": phr},
|
||||
message=f"出现禁用短语 \"{phr}\"",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
|
||||
|
||||
@register("forbid_chars")
|
||||
class ForbidCharsCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
chars = ctx.stage.chars or []
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
for c in chars:
|
||||
start = 0
|
||||
while True:
|
||||
idx = p.text.find(c, start)
|
||||
if idx < 0:
|
||||
break
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=idx, char_end=idx + len(c),
|
||||
actual={"char": c}, expected={"forbid": c},
|
||||
message=f"禁用字符 \"{c}\" 出现在 idx {idx}",
|
||||
))
|
||||
start = idx + len(c)
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
@@ -0,0 +1,29 @@
|
||||
"""层级序号格式 check。"""
|
||||
|
||||
import re
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
|
||||
|
||||
@register("hierarchy")
|
||||
class HierarchyCheck(CheckBase):
|
||||
"""检查层级序号格式:
|
||||
- expected_order: 各级允许的模式(正向白名单,按 level 升序)
|
||||
- forbid_patterns: 禁用模式(黑名单,命中即报错)
|
||||
"""
|
||||
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
forbid = [re.compile(p) for p in (ctx.stage.forbid_patterns or [])]
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
text = p.text.strip()
|
||||
for f in forbid:
|
||||
m = f.search(text)
|
||||
if m:
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=m.start(), char_end=m.end(),
|
||||
actual={"text": m.group(0)},
|
||||
expected={"forbid_pattern": f.pattern},
|
||||
message=f"层级序号格式错误:命中禁用模式 {f.pattern}",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
@@ -0,0 +1,46 @@
|
||||
"""标点符号专项规则。"""
|
||||
|
||||
import re
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
|
||||
|
||||
# 多书名号或引号并列时不应用顿号分隔(中文/中文标点)
|
||||
_QUOTE_DUNHAO_RE = re.compile(r"([”》])、([“《])")
|
||||
# 句内括号末尾(除问号/叹号/省略号外)不应有标点
|
||||
_PAREN_PUNCT_RE = re.compile(r"[((][^))]*?[,。;:、][))]")
|
||||
# 引号嵌套:双引号内含单引号包裹的强调短语(如 "卓'粤'创一流")
|
||||
_NESTED_QUOTE_RE = re.compile(r"“[^“”]*?‘[^‘’]+’[^“”]*?”")
|
||||
|
||||
|
||||
@register("punctuation")
|
||||
class PunctuationCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
rules = ctx.stage.rules or []
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
for r in rules:
|
||||
t = r.get("type")
|
||||
if t == "no_dunhao_between_quotes":
|
||||
for m in _QUOTE_DUNHAO_RE.finditer(p.text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=m.start(), char_end=m.end(),
|
||||
actual={"text": m.group(0)},
|
||||
expected={"text": m.group(0).replace("、", "")},
|
||||
message="多个引号/书名号并列不应用顿号分隔",
|
||||
))
|
||||
elif t == "no_punct_inside_inline_paren":
|
||||
for m in _PAREN_PUNCT_RE.finditer(p.text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=m.start(), char_end=m.end(),
|
||||
actual={"text": m.group(0)},
|
||||
message="句内括号末尾通常不应含标点",
|
||||
))
|
||||
elif t == "no_outer_quote_when_inner_quote":
|
||||
for m in _NESTED_QUOTE_RE.finditer(p.text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=m.start(), char_end=m.end(),
|
||||
actual={"text": m.group(0)},
|
||||
message="双引号内已含单引号强调时,外层不应再加双引号",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
@@ -0,0 +1,36 @@
|
||||
"""regex_require / regex_forbid。"""
|
||||
|
||||
import re
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
|
||||
|
||||
|
||||
@register("regex_require")
|
||||
class RegexRequireCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
pat = re.compile(ctx.stage.pattern or "")
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
if not pat.search(p.text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=0, char_end=len(p.text),
|
||||
actual={"text": p.text}, expected={"pattern": ctx.stage.pattern},
|
||||
message=f"未匹配模式 {ctx.stage.pattern}",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
|
||||
|
||||
@register("regex_forbid")
|
||||
class RegexForbidCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
pat = re.compile(ctx.stage.pattern or "")
|
||||
hits: list[CheckHit] = []
|
||||
for p in ctx.paragraphs:
|
||||
for m in pat.finditer(p.text):
|
||||
hits.append(CheckHit(
|
||||
paragraph=p, char_start=m.start(), char_end=m.end(),
|
||||
actual={"text": m.group(0)},
|
||||
expected={"forbid_pattern": ctx.stage.pattern},
|
||||
message=f"出现禁止模式 {ctx.stage.pattern}(命中 \"{m.group(0)}\")",
|
||||
))
|
||||
return CheckResult(passed=not hits, hits=hits)
|
||||
@@ -0,0 +1,28 @@
|
||||
"""required check:目标实体或选中段落必须有非空文本。"""
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
|
||||
CheckBase, CheckContext, CheckHit, CheckResult,
|
||||
)
|
||||
|
||||
|
||||
@register("required")
|
||||
class RequiredCheck(CheckBase):
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
# target 通道:检查实体 text 是否非空
|
||||
if ctx.target is not None:
|
||||
if ctx.target.text and ctx.target.text.strip():
|
||||
return CheckResult(passed=True, hits=[])
|
||||
anchor = ctx.paragraphs[0] if ctx.paragraphs else None
|
||||
return CheckResult(passed=False, hits=[
|
||||
CheckHit(paragraph=anchor, message=f"实体 {ctx.target.name} 缺失或为空")
|
||||
])
|
||||
|
||||
# applies_to 通道:所有段落必须非空
|
||||
empty = [p for p in ctx.paragraphs if not p.text.strip()]
|
||||
if empty:
|
||||
return CheckResult(
|
||||
passed=False,
|
||||
hits=[CheckHit(paragraph=p, message="段落为空") for p in empty],
|
||||
)
|
||||
return CheckResult(passed=True, hits=[])
|
||||
@@ -0,0 +1,42 @@
|
||||
"""文种白名单(15 种法定公文文种)。"""
|
||||
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
|
||||
from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
|
||||
CheckBase, CheckContext, CheckHit, CheckResult,
|
||||
)
|
||||
|
||||
|
||||
LEGAL_WENZHONG = {
|
||||
"决议", "决定", "命令", "令", "公报", "公告", "通告",
|
||||
"意见", "通知", "通报", "报告", "请示", "批复",
|
||||
"议案", "函", "纪要",
|
||||
}
|
||||
|
||||
|
||||
@register("wenzhong_whitelist")
|
||||
class WenzhongWhitelistCheck(CheckBase):
|
||||
"""检查文种是否在 15 种法定文种白名单内。
|
||||
|
||||
数据来源:
|
||||
1. ctx.entities["wenzhong"].text ← 推荐
|
||||
2. ctx.target.text (当 rule.target = wenzhong 时)
|
||||
"""
|
||||
|
||||
def run(self, ctx: CheckContext) -> CheckResult:
|
||||
wz = ""
|
||||
wz_entity = ctx.entities.get("wenzhong") if ctx.entities else None
|
||||
if wz_entity is not None:
|
||||
wz = (wz_entity.text or "").strip()
|
||||
elif ctx.target is not None and ctx.target.name == "wenzhong":
|
||||
wz = (ctx.target.text or "").strip()
|
||||
|
||||
if not wz:
|
||||
return CheckResult(passed=True, hits=[])
|
||||
if wz in LEGAL_WENZHONG:
|
||||
return CheckResult(passed=True, hits=[])
|
||||
return CheckResult(passed=False, hits=[CheckHit(
|
||||
paragraph=None,
|
||||
actual={"wenzhong": wz},
|
||||
expected={"wenzhong_whitelist": sorted(LEGAL_WENZHONG)},
|
||||
message=f"非法定文种 \"{wz}\",应为 15 种法定公文文种之一",
|
||||
)])
|
||||
Reference in New Issue
Block a user