feat(govdoc): 新增内部公文模块全链路（后端58+前端11文件）

2026-05-13 14:37:12 +08:00
parent 99699e20e1
commit 5d777599bf
63 changed files with 7608 additions and 0 deletions
@@ -0,0 +1,24 @@
+"""Check 原语注册中心：通过 register 装饰器收集，runner 通过 get 查找。"""
+
+from __future__ import annotations
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase
+
+_REGISTRY: dict[str, type[CheckBase]] = {}
+
+
+def register(name: str):
+    def deco(cls):
+        cls.name = name
+        _REGISTRY[name] = cls
+        return cls
+    return deco
+
+
+def get_check(name: str) -> type[CheckBase]:
+    if name not in _REGISTRY:
+        raise KeyError(f"unknown check: {name}; known: {list(_REGISTRY)}")
+    return _REGISTRY[name]
+
+
+def all_checks() -> list[str]:
+    return list(_REGISTRY.keys())
@@ -0,0 +1,151 @@
+"""LLM 语义检查。三级输出：pass / warn / fail。"""
+
+import logging
+import re
+from typing import Any
+from pydantic import BaseModel
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
+    CheckBase, CheckContext, CheckHit, CheckResult,
+)
+from fastapi_modules.fastapi_leaudit.govdoc_engine.llm.client import LlmClient, LlmJsonError, _format_exc
+
+_log = logging.getLogger(__name__)
+
+
+_OUT_FORMAT = """
+请以 JSON 输出：
+{"result": "pass|warn|fail", "reason": "<简短理由>", "suggestion": "<改进建议；pass 时填空>"}
+"""
+
+_VAR_RE = re.compile(r"\{\{\s*([^}]+?)\s*\}\}")
+
+
+def _resolve_dot_path(root: Any, path: str) -> str:
+    """点语法属性访问：title.style.font_eastasia → entities['title'].style.font_eastasia。"""
+    cur: Any = root
+    for seg in path.split("."):
+        if cur is None:
+            return ""
+        if isinstance(cur, dict):
+            cur = cur.get(seg)
+        elif isinstance(cur, BaseModel):
+            cur = getattr(cur, seg, None)
+        else:
+            cur = getattr(cur, seg, None)
+    if cur is None:
+        return ""
+    if isinstance(cur, (dict, list)):
+        return str(cur)
+    return str(cur)
+
+
+def _interpolate(
+    template: str,
+    paragraphs: list,
+    entities: dict | None = None,
+    target: Any = None,
+) -> str:
+    """渲染顺序：① paragraphs[N] ② entities 点语法
+    ③ target 隐式（无前缀时视为 target.<key>）。"""
+    entities = entities or {}
+
+    def repl(m):
+        key = m.group(1).strip()
+        # ① paragraphs[N] 索引
+        if key.startswith("paragraphs["):
+            try:
+                idx = int(key[len("paragraphs["):].rstrip("]"))
+                return paragraphs[idx].text
+            except (ValueError, IndexError):
+                return ""
+        # ② entities 点语法：title.text / title.style.font_eastasia
+        head, _, rest = key.partition(".")
+        if head in entities:
+            entity = entities[head]
+            if entity is None:
+                return ""
+            return _resolve_dot_path(entity, rest) if rest else entity.text
+        # ③ target 隐式：未带前缀且 target 存在
+        if target is not None:
+            v = _resolve_dot_path(target, key)
+            if v:
+                return v
+        return ""
+
+    return _VAR_RE.sub(repl, template)
+
+
+@register("ai")
+class AiCheck(CheckBase):
+    def __init__(self, llm_client: LlmClient | None = None):
+        self.client = llm_client or LlmClient()
+
+    def _build_prompt(self, ctx: CheckContext) -> str:
+        prompt = _interpolate(
+            ctx.stage.prompt or "",
+            ctx.paragraphs,
+            ctx.entities,
+            ctx.target,
+        )
+        return prompt + "\n\n" + _OUT_FORMAT
+
+    def _interpret(self, ctx: CheckContext, resp: dict) -> CheckResult:
+        result = resp.get("result", "fail")
+        reason = resp.get("reason", "")
+        suggestion = resp.get("suggestion", "")
+        if result == "pass":
+            return CheckResult(passed=True, hits=[])
+        target_p = ctx.paragraphs[0] if ctx.paragraphs else None
+        confidence = 0.95 if result == "fail" else 0.7
+        return CheckResult(passed=False, hits=[CheckHit(
+            paragraph=target_p,
+            char_start=0,
+            char_end=len(target_p.text) if target_p else 0,
+            actual={"llm_reason": reason, "llm_suggestion": suggestion},
+            expected={},
+            message=reason or "LLM 判定不通过",
+            confidence=confidence,
+        )])
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        label = f"ai_{ctx.rule_id or 'unknown'}"
+        try:
+            resp = self.client.chat_json(
+                [{"role": "user", "content": self._build_prompt(ctx)}],
+                label=label,
+            )
+        except LlmJsonError as e:
+            _log.warning("AI check skipped (LLM JSON error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 返回内容无法解析为 JSON：{e}",
+            )
+        except Exception as e:
+            _log.warning("AI check skipped (LLM error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 调用失败：{e}",
+            )
+        return self._interpret(ctx, resp)
+
+    async def run_async(self, ctx: CheckContext) -> CheckResult:
+        label = f"ai_{ctx.rule_id or 'unknown'}"
+        try:
+            resp = await self.client.chat_json_async(
+                [{"role": "user", "content": self._build_prompt(ctx)}],
+                label=label,
+            )
+        except LlmJsonError as e:
+            _log.warning("AI check skipped (LLM JSON error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 返回内容无法解析为 JSON：{e}",
+            )
+        except Exception as e:
+            _log.warning("AI check skipped (LLM error): %s", _format_exc(e))
+            return CheckResult(
+                passed=True, hits=[], skipped=True,
+                skip_reason=f"LLM 调用失败：{e}",
+            )
+        return self._interpret(ctx, resp)
@@ -0,0 +1,48 @@
+"""Check 原语基类与上下文。"""
+
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, TYPE_CHECKING
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
+from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.schema import RuleStage
+
+if TYPE_CHECKING:
+    from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity
+
+
+@dataclass
+class CheckContext:
+    document: Document
+    paragraphs: list[Paragraph]
+    stage: RuleStage
+    entities: dict[str, "SemanticEntity | None"] = field(default_factory=dict)
+    target: "SemanticEntity | None" = None
+    rule_id: str = ""
+
+
+@dataclass
+class CheckHit:
+    paragraph: Paragraph | None
+    char_start: int = 0
+    char_end: int = 0
+    actual: dict[str, Any] | None = None
+    expected: dict[str, Any] | None = None
+    message: str | None = None
+    confidence: float = 1.0
+
+
+@dataclass
+class CheckResult:
+    passed: bool
+    hits: list[CheckHit] = field(default_factory=list)
+    skipped: bool = False
+    skip_reason: str = ""
+
+
+class CheckBase:
+    """所有 check 原语的抽象基类。"""
+
+    name: str = ""
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        raise NotImplementedError
@@ -0,0 +1,34 @@
+"""易混淆词对（字面 + 正则）。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("confused_pair")
+class ConfusedPairCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        pairs = ctx.stage.pairs or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for pair in pairs:
+                wrong = pair.get("wrong")
+                wrong_pat = pair.get("wrong_pattern")
+                correct = pair.get("correct") or pair.get("suggest", "")
+                reason = pair.get("reason", "")
+                if wrong and wrong in p.text:
+                    start = p.text.find(wrong)
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=start, char_end=start + len(wrong),
+                        actual={"text": wrong}, expected={"text": correct},
+                        message=f"\"{wrong}\" 应为 \"{correct}\"。{reason}",
+                    ))
+                elif wrong_pat:
+                    for m in re.finditer(wrong_pat, p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            expected={"text": correct},
+                            message=f"\"{m.group(0)}\" 应为 \"{correct}\"。{reason}",
+                        ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,69 @@
+"""跨段关系 check：例如二级标题以句号结尾后又新起一段。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+# 单个附件项末尾的标点：编号(数字+点) + 内容 + 末尾标点
+_ATTACH_ITEM_TRAIL_PUNCT = re.compile(r"\d+[\.．][^\d；。，;,.]+?[；。，;,.]")
+# 整段是一个附件项
+_ATTACH_ITEM_LINE = re.compile(r"^\d+[\.．].+[；。，;,.]\s*$")
+
+
+@register("cross_role")
+class CrossRoleCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        rules = ctx.stage.rules or []
+        paras = ctx.document.paragraphs
+        hits: list[CheckHit] = []
+        for r in rules:
+            t = r.get("type")
+            if t == "h2_no_period_then_break":
+                for i, p in enumerate(paras):
+                    if p.role == "heading_2" and p.text.rstrip().endswith(("。", ".")):
+                        if i + 1 < len(paras) and paras[i + 1].text.strip():
+                            hits.append(CheckHit(
+                                paragraph=p,
+                                char_start=len(p.text) - 1, char_end=len(p.text),
+                                actual={"text": p.text},
+                                message="二级标题在换行分段时不应使用句号；如使用句号则应紧接正文",
+                            ))
+            elif t == "attachment_item_no_trailing_punct":
+                hits.extend(_attachment_item_hits(paras))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+def _attachment_item_hits(paras):
+    """从 attachment_marker 起扫描附件区块，找末尾带标点的附件项。"""
+    hits: list[CheckHit] = []
+    in_attachment = False
+    for p in paras:
+        text = p.text.strip()
+        if not text:
+            continue
+
+        if p.role == "attachment_marker":
+            in_attachment = True
+            # 同段内可能出现 "附件：1.xxx；2.yyy。" 多项一行
+            for m in _ATTACH_ITEM_TRAIL_PUNCT.finditer(text):
+                hits.append(CheckHit(
+                    paragraph=p,
+                    char_start=m.start(), char_end=m.end(),
+                    actual={"snippet": m.group(0)},
+                    message=f'附件项末尾不应有标点："{m.group(0)}"',
+                ))
+            continue
+
+        if p.role in ("signature", "date", "heading_1"):
+            in_attachment = False
+            continue
+
+        if in_attachment and _ATTACH_ITEM_LINE.match(text):
+            hits.append(CheckHit(
+                paragraph=p,
+                char_start=len(p.text) - 1, char_end=len(p.text),
+                actual={"text": p.text},
+                message=f'附件项末尾不应有标点："{text}"',
+            ))
+    return hits
@@ -0,0 +1,162 @@
+"""字体/字号/复合样式/行距 check。"""
+
+import re
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Paragraph, ParagraphStyle
+
+
+def _font_match(actual: str | None, expect: str) -> bool:
+    if not actual:
+        return False
+    return expect in actual or actual in expect
+
+
+def _size_match(actual: float | None, expect: float, tol: float = 0.5) -> bool:
+    if actual is None:
+        return False
+    return abs(actual - expect) <= tol
+
+
+def _style_matches(style: ParagraphStyle, expect: dict) -> bool:
+    if "eastasia" in expect and not _font_match(style.font_eastasia, expect["eastasia"]):
+        return False
+    if "size_pt" in expect and not _size_match(
+        style.font_size_pt, float(expect["size_pt"])
+    ):
+        return False
+    if "bold" in expect and bool(style.bold) != bool(expect["bold"]):
+        return False
+    return True
+
+
+@register("font")
+class FontCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or {}
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            ok = True
+            actual = {
+                "font": p.style.font_eastasia,
+                "size": p.style.font_size_pt,
+            }
+            if "eastasia" in expect and not _font_match(p.style.font_eastasia, expect["eastasia"]):
+                ok = False
+            if "size_pt" in expect and not _size_match(
+                p.style.font_size_pt, float(expect["size_pt"])
+            ):
+                ok = False
+            if not ok:
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual=actual, expected=expect,
+                    message=f"字体或字号不符合（实际 {actual['font']} {actual['size']}pt，期望 {expect}）",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+@register("style_match")
+class StyleMatchCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or {}
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            ok = True
+            actual = {
+                "font": p.style.font_eastasia,
+                "size": p.style.font_size_pt,
+                "bold": p.style.bold,
+                "italic": p.style.italic,
+                "alignment": p.style.alignment,
+            }
+            if "eastasia" in expect and not _font_match(p.style.font_eastasia, expect["eastasia"]):
+                ok = False
+            if "size_pt" in expect and not _size_match(
+                p.style.font_size_pt, float(expect["size_pt"])
+            ):
+                ok = False
+            if "bold" in expect and bool(p.style.bold) != bool(expect["bold"]):
+                ok = False
+            if "alignment" in expect and p.style.alignment != expect["alignment"]:
+                ok = False
+            if not ok:
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual=actual, expected=expect, message="样式不符合",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+_ATTACHMENT_MARKER_RE = re.compile(r"^\s*(附件[:：]|附件\d+)")
+
+
+@register("attachment_marker_style")
+class AttachmentMarkerStyleCheck(CheckBase):
+    """只校验“附件：”或“附件1”等标记本身，不校验后续附件名称。"""
+
+    DEFAULT_EXPECT = {"eastasia": "黑体", "size_pt": 16, "bold": False}
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or self.DEFAULT_EXPECT
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            match = _ATTACHMENT_MARKER_RE.match(p.text)
+            if not match:
+                continue
+            marker_end = match.end(1)
+            marker_styles = _marker_run_styles(p, marker_end)
+            if not marker_styles:
+                marker_styles = [p.style]
+            bad_style = next(
+                (style for style in marker_styles if not _style_matches(style, expect)),
+                None,
+            )
+            if bad_style is not None:
+                hits.append(CheckHit(
+                    paragraph=p,
+                    char_start=match.start(1),
+                    char_end=marker_end,
+                    actual={
+                        "font": bad_style.font_eastasia,
+                        "size": bad_style.font_size_pt,
+                        "bold": bad_style.bold,
+                    },
+                    expected=expect,
+                    message="附件标记样式不符合",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+def _marker_run_styles(p: Paragraph, marker_end: int) -> list[ParagraphStyle]:
+    styles: list[ParagraphStyle] = []
+    cursor = 0
+    for run in p.runs:
+        run_start = cursor
+        run_end = cursor + len(run.text)
+        cursor = run_end
+        if run_end <= 0 or run_start >= marker_end:
+            continue
+        if run.text.strip():
+            styles.append(run.style)
+    return styles
+
+
+@register("line_spacing")
+class LineSpacingCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        expect = ctx.stage.expect or {}
+        target = float(expect.get("multiple", 1.5))
+        tol = float(expect.get("tol", 0.05))
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            actual = p.style.line_spacing
+            if actual is None or abs(actual - target) > tol:
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual={"line_spacing": actual},
+                    expected={"line_spacing": target},
+                    message=f"行距应为 {target}，实际 {actual}",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,42 @@
+"""短语/字符黑名单。"""
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("forbid_phrase")
+class ForbidPhraseCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        phrases = ctx.stage.phrases or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for phr in phrases:
+                start = p.text.find(phr)
+                if start >= 0:
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=start, char_end=start + len(phr),
+                        actual={"text": phr}, expected={"forbid": phr},
+                        message=f"出现禁用短语 \"{phr}\"",
+                    ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+@register("forbid_chars")
+class ForbidCharsCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        chars = ctx.stage.chars or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for c in chars:
+                start = 0
+                while True:
+                    idx = p.text.find(c, start)
+                    if idx < 0:
+                        break
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=idx, char_end=idx + len(c),
+                        actual={"char": c}, expected={"forbid": c},
+                        message=f"禁用字符 \"{c}\" 出现在 idx {idx}",
+                    ))
+                    start = idx + len(c)
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,29 @@
+"""层级序号格式 check。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("hierarchy")
+class HierarchyCheck(CheckBase):
+    """检查层级序号格式：
+    - expected_order: 各级允许的模式（正向白名单，按 level 升序）
+    - forbid_patterns: 禁用模式（黑名单，命中即报错）
+    """
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        forbid = [re.compile(p) for p in (ctx.stage.forbid_patterns or [])]
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            text = p.text.strip()
+            for f in forbid:
+                m = f.search(text)
+                if m:
+                    hits.append(CheckHit(
+                        paragraph=p, char_start=m.start(), char_end=m.end(),
+                        actual={"text": m.group(0)},
+                        expected={"forbid_pattern": f.pattern},
+                        message=f"层级序号格式错误：命中禁用模式 {f.pattern}",
+                    ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,46 @@
+"""标点符号专项规则。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+# 多书名号或引号并列时不应用顿号分隔（中文/中文标点）
+_QUOTE_DUNHAO_RE = re.compile(r"([”》])、([“《])")
+# 句内括号末尾（除问号/叹号/省略号外）不应有标点
+_PAREN_PUNCT_RE = re.compile(r"[（(][^）)]*?[，。；：、][）)]")
+# 引号嵌套：双引号内含单引号包裹的强调短语（如 "卓'粤'创一流"）
+_NESTED_QUOTE_RE = re.compile(r"“[^“”]*?‘[^‘’]+’[^“”]*?”")
+
+
+@register("punctuation")
+class PunctuationCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        rules = ctx.stage.rules or []
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for r in rules:
+                t = r.get("type")
+                if t == "no_dunhao_between_quotes":
+                    for m in _QUOTE_DUNHAO_RE.finditer(p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            expected={"text": m.group(0).replace("、", "")},
+                            message="多个引号/书名号并列不应用顿号分隔",
+                        ))
+                elif t == "no_punct_inside_inline_paren":
+                    for m in _PAREN_PUNCT_RE.finditer(p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            message="句内括号末尾通常不应含标点",
+                        ))
+                elif t == "no_outer_quote_when_inner_quote":
+                    for m in _NESTED_QUOTE_RE.finditer(p.text):
+                        hits.append(CheckHit(
+                            paragraph=p, char_start=m.start(), char_end=m.end(),
+                            actual={"text": m.group(0)},
+                            message="双引号内已含单引号强调时，外层不应再加双引号",
+                        ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,36 @@
+"""regex_require / regex_forbid。"""
+
+import re
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckBase, CheckContext, CheckHit, CheckResult
+
+
+@register("regex_require")
+class RegexRequireCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        pat = re.compile(ctx.stage.pattern or "")
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            if not pat.search(p.text):
+                hits.append(CheckHit(
+                    paragraph=p, char_start=0, char_end=len(p.text),
+                    actual={"text": p.text}, expected={"pattern": ctx.stage.pattern},
+                    message=f"未匹配模式 {ctx.stage.pattern}",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
+
+
+@register("regex_forbid")
+class RegexForbidCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        pat = re.compile(ctx.stage.pattern or "")
+        hits: list[CheckHit] = []
+        for p in ctx.paragraphs:
+            for m in pat.finditer(p.text):
+                hits.append(CheckHit(
+                    paragraph=p, char_start=m.start(), char_end=m.end(),
+                    actual={"text": m.group(0)},
+                    expected={"forbid_pattern": ctx.stage.pattern},
+                    message=f"出现禁止模式 {ctx.stage.pattern}（命中 \"{m.group(0)}\"）",
+                ))
+        return CheckResult(passed=not hits, hits=hits)
@@ -0,0 +1,28 @@
+"""required check：目标实体或选中段落必须有非空文本。"""
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
+    CheckBase, CheckContext, CheckHit, CheckResult,
+)
+
+
+@register("required")
+class RequiredCheck(CheckBase):
+    def run(self, ctx: CheckContext) -> CheckResult:
+        # target 通道：检查实体 text 是否非空
+        if ctx.target is not None:
+            if ctx.target.text and ctx.target.text.strip():
+                return CheckResult(passed=True, hits=[])
+            anchor = ctx.paragraphs[0] if ctx.paragraphs else None
+            return CheckResult(passed=False, hits=[
+                CheckHit(paragraph=anchor, message=f"实体 {ctx.target.name} 缺失或为空")
+            ])
+
+        # applies_to 通道：所有段落必须非空
+        empty = [p for p in ctx.paragraphs if not p.text.strip()]
+        if empty:
+            return CheckResult(
+                passed=False,
+                hits=[CheckHit(paragraph=p, message="段落为空") for p in empty],
+            )
+        return CheckResult(passed=True, hits=[])
@@ -0,0 +1,42 @@
+"""文种白名单（15 种法定公文文种）。"""
+
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import (
+    CheckBase, CheckContext, CheckHit, CheckResult,
+)
+
+
+LEGAL_WENZHONG = {
+    "决议", "决定", "命令", "令", "公报", "公告", "通告",
+    "意见", "通知", "通报", "报告", "请示", "批复",
+    "议案", "函", "纪要",
+}
+
+
+@register("wenzhong_whitelist")
+class WenzhongWhitelistCheck(CheckBase):
+    """检查文种是否在 15 种法定文种白名单内。
+
+    数据来源：
+    1. ctx.entities["wenzhong"].text   ← 推荐
+    2. ctx.target.text  （当 rule.target = wenzhong 时）
+    """
+
+    def run(self, ctx: CheckContext) -> CheckResult:
+        wz = ""
+        wz_entity = ctx.entities.get("wenzhong") if ctx.entities else None
+        if wz_entity is not None:
+            wz = (wz_entity.text or "").strip()
+        elif ctx.target is not None and ctx.target.name == "wenzhong":
+            wz = (ctx.target.text or "").strip()
+
+        if not wz:
+            return CheckResult(passed=True, hits=[])
+        if wz in LEGAL_WENZHONG:
+            return CheckResult(passed=True, hits=[])
+        return CheckResult(passed=False, hits=[CheckHit(
+            paragraph=None,
+            actual={"wenzhong": wz},
+            expected={"wenzhong_whitelist": sorted(LEGAL_WENZHONG)},
+            message=f"非法定文种 \"{wz}\"，应为 15 种法定公文文种之一",
+        )])
@@ -0,0 +1,81 @@
+"""审查结果数据结构。"""
+
+from __future__ import annotations
+from collections import Counter
+from typing import Literal
+from pydantic import BaseModel, Field
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Finding
+from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity
+
+
+class CheckedRule(BaseModel):
+    rule_id: str
+    name: str
+    severity: str
+    category: str
+    status: Literal["pass", "fail", "skipped"]
+    skip_reason: str = ""
+
+
+class StructureItem(BaseModel):
+    """文档结构里一种 role 的统计。"""
+
+    role: str
+    label: str
+    count: int
+    expected: bool
+    paragraph_indices: list[int] = Field(default_factory=list)
+    samples: list[str] = Field(default_factory=list)
+    char_total: int = 0
+    dominant_font: str | None = None
+    dominant_size_pt: float | None = None
+    style_uniform: bool = True
+
+
+class OutlineNode(BaseModel):
+    """大纲节点（heading_1~4 的层级树）。"""
+
+    paragraph_index: int
+    level: int
+    text: str
+    children: list["OutlineNode"] = Field(default_factory=list)
+
+
+class AuditSummary(BaseModel):
+    score: int = 100
+    total_findings: int = 0
+    by_severity: dict[str, int] = Field(default_factory=dict)
+    by_category: dict[str, int] = Field(default_factory=dict)
+    passed_count: int = 0
+    failed_count: int = 0
+    skipped_count: int = 0
+
+
+class AuditResult(BaseModel):
+    audit_id: str
+    document: dict = Field(default_factory=dict)
+    summary: AuditSummary = Field(default_factory=AuditSummary)
+    findings: list[Finding] = Field(default_factory=list)
+    checked_rules: list[CheckedRule] = Field(default_factory=list)
+    structure: list[StructureItem] = Field(default_factory=list)
+    outline: list[OutlineNode] = Field(default_factory=list)
+    entities: dict[str, SemanticEntity | None] = Field(default_factory=dict)
+
+    def compute_summary(self) -> None:
+        sev_count = Counter(f.severity for f in self.findings)
+        cat_count = Counter(f.category for f in self.findings)
+        score = 100
+        score -= 10 * sev_count.get("error", 0)
+        score -= 3 * sev_count.get("warning", 0)
+        passed = sum(1 for r in self.checked_rules if r.status == "pass")
+        failed = sum(1 for r in self.checked_rules if r.status == "fail")
+        skipped = sum(1 for r in self.checked_rules if r.status == "skipped")
+        self.summary = AuditSummary(
+            score=max(0, score),
+            total_findings=len(self.findings),
+            by_severity=dict(sev_count),
+            by_category=dict(cat_count),
+            passed_count=passed,
+            failed_count=failed,
+            skipped_count=skipped,
+        )
@@ -0,0 +1,242 @@
+"""规则评估引擎：跑一条规则的多 stage。"""
+
+from __future__ import annotations
+import asyncio
+import uuid
+from dataclasses import dataclass, field
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Finding, Location
+from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.schema import Rule
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import get_check  # noqa: F401  (确保注册)
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.base import CheckContext, CheckResult, CheckHit
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks.ai_check import AiCheck
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.selector import select_paragraphs
+from fastapi_modules.fastapi_leaudit.govdoc_engine.parser.entities import SemanticEntity
+from fastapi_modules.fastapi_leaudit.govdoc_engine.llm.client import LlmClient
+
+# 触发所有 check 类的 @register
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import required as _r  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import font as _f  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import regex_check as _rc  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import confused_pair as _cp  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import forbid as _fb  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import wenzhong as _wz  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import hierarchy as _h  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import punctuation as _p  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import cross_role as _cr  # noqa: F401
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.checks import ai_check as _ai  # noqa: F401
+
+
+@dataclass
+class RuleOutcome:
+    """单条规则的执行结果（含 skipped 状态）。"""
+
+    rule: Rule
+    findings: list[Finding] = field(default_factory=list)
+    skipped: bool = False
+    skip_reason: str = ""
+
+
+class RuleRunner:
+    def __init__(self, llm_client: LlmClient | None = None):
+        self.llm = llm_client
+
+    # -- 上下文装配 -----------------------------------------------------
+    def _resolve_target(
+        self,
+        rule: Rule,
+        doc: Document,
+        entities: dict[str, SemanticEntity | None],
+    ) -> tuple[list, SemanticEntity | None, RuleOutcome | None]:
+        """根据 rule.target 或 rule.applies_to 选段落。
+
+        返回 (paragraphs, target_entity, early_outcome)；
+        若 early_outcome 非 None，调用方应直接返回（命中 on_missing 提前结束）。
+        """
+        if rule.target:
+            target_entity = entities.get(rule.target)
+            if target_entity is None:
+                return [], None, self._handle_missing(rule)
+            paragraphs = [
+                doc.paragraphs[i]
+                for i in target_entity.paragraph_indices
+                if 0 <= i < len(doc.paragraphs)
+            ]
+            return paragraphs, target_entity, None
+        # applies_to 通道（多段扫描）
+        return select_paragraphs(doc, rule.applies_to), None, None
+
+    def _handle_missing(self, rule: Rule) -> RuleOutcome:
+        mode = rule.on_missing
+        if mode == "pass":
+            return RuleOutcome(rule=rule)
+        reason = f"目标实体「{rule.target}」未识别到"
+        if mode == "skip":
+            return RuleOutcome(rule=rule, skipped=True, skip_reason=reason)
+        severity = "error" if mode == "fail" else "warning"
+        finding = Finding(
+            finding_id=f"F-{uuid.uuid4().hex[:8]}",
+            rule_id=rule.rule_id,
+            rule_name=rule.name,
+            severity=severity,
+            category=rule.category,
+            location=Location(paragraph_index=-1),
+            message=reason,
+            suggestion=rule.messages.fail or "",
+            evidence="", confidence=0.9,
+        )
+        return RuleOutcome(rule=rule, findings=[finding])
+
+    @staticmethod
+    def _merge_skip(outcome: RuleOutcome, result: CheckResult) -> None:
+        if not outcome.skip_reason:
+            outcome.skip_reason = result.skip_reason or "stage skipped"
+        outcome.skipped = True
+
+    # -- 同步路径 -------------------------------------------------------
+    def run_rule(
+        self,
+        rule: Rule,
+        doc: Document,
+        entities: dict[str, SemanticEntity | None] | None = None,
+    ) -> RuleOutcome:
+        entities = entities or {}
+        paragraphs, target, early = self._resolve_target(rule, doc, entities)
+        if early is not None:
+            return early
+
+        outcome = RuleOutcome(rule=rule)
+        for stage in rule.stages:
+            if stage.check == "ai":
+                check = AiCheck(llm_client=self.llm)
+            else:
+                check_cls = get_check(stage.check)
+                check = check_cls()
+
+            ctx = CheckContext(
+                document=doc,
+                paragraphs=paragraphs,
+                stage=stage,
+                entities=entities,
+                target=target,
+                rule_id=rule.rule_id,
+            )
+            result: CheckResult = check.run(ctx)
+            if result.skipped:
+                self._merge_skip(outcome, result)
+                continue
+            if not result.passed:
+                outcome.findings = [self._hit_to_finding(rule, h) for h in result.hits]
+                outcome.skipped = False
+                outcome.skip_reason = ""
+                return outcome
+        return outcome
+
+    def run_all(
+        self,
+        rules: list[Rule],
+        doc: Document,
+        entities: dict[str, SemanticEntity | None] | None = None,
+    ) -> list[Finding]:
+        flat, _ = self.evaluate(rules, doc, entities)
+        return flat
+
+    def evaluate(
+        self,
+        rules: list[Rule],
+        doc: Document,
+        entities: dict[str, SemanticEntity | None] | None = None,
+    ) -> tuple[list[Finding], list[RuleOutcome]]:
+        flat: list[Finding] = []
+        outcomes: list[RuleOutcome] = []
+        for r in rules:
+            o = self.run_rule(r, doc, entities)
+            flat.extend(o.findings)
+            outcomes.append(o)
+        return flat, outcomes
+
+    # -- 异步路径 -------------------------------------------------------
+    async def run_rule_async(
+        self,
+        rule: Rule,
+        doc: Document,
+        entities: dict[str, SemanticEntity | None] | None = None,
+    ) -> RuleOutcome:
+        entities = entities or {}
+        paragraphs, target, early = self._resolve_target(rule, doc, entities)
+        if early is not None:
+            return early
+
+        outcome = RuleOutcome(rule=rule)
+        for stage in rule.stages:
+            ctx = CheckContext(
+                document=doc,
+                paragraphs=paragraphs,
+                stage=stage,
+                entities=entities,
+                target=target,
+                rule_id=rule.rule_id,
+            )
+            if stage.check == "ai":
+                result = await AiCheck(llm_client=self.llm).run_async(ctx)
+            else:
+                check_cls = get_check(stage.check)
+                result = check_cls().run(ctx)
+            if result.skipped:
+                self._merge_skip(outcome, result)
+                continue
+            if not result.passed:
+                outcome.findings = [self._hit_to_finding(rule, h) for h in result.hits]
+                outcome.skipped = False
+                outcome.skip_reason = ""
+                return outcome
+        return outcome
+
+    async def run_all_async(
+        self,
+        rules: list[Rule],
+        doc: Document,
+        entities: dict[str, SemanticEntity | None] | None = None,
+    ) -> list[Finding]:
+        flat, _ = await self.evaluate_async(rules, doc, entities)
+        return flat
+
+    async def evaluate_async(
+        self,
+        rules: list[Rule],
+        doc: Document,
+        entities: dict[str, SemanticEntity | None] | None = None,
+    ) -> tuple[list[Finding], list[RuleOutcome]]:
+        outcomes_list = await asyncio.gather(
+            *(self.run_rule_async(r, doc, entities) for r in rules)
+        )
+        flat: list[Finding] = []
+        outcomes: list[RuleOutcome] = []
+        for o in outcomes_list:
+            flat.extend(o.findings)
+            outcomes.append(o)
+        return flat, outcomes
+
+    def _hit_to_finding(self, rule: Rule, hit: CheckHit) -> Finding:
+        para = hit.paragraph
+        loc = Location(
+            paragraph_index=para.index if para else -1,
+            role=para.role if para else None,
+            char_start=hit.char_start,
+            char_end=hit.char_end,
+            context=para.text if para else "",
+        )
+        msg = hit.message or rule.messages.fail
+        return Finding(
+            finding_id=f"F-{uuid.uuid4().hex[:8]}",
+            rule_id=rule.rule_id,
+            rule_name=rule.name,
+            severity=rule.severity,
+            category=rule.category,
+            location=loc,
+            actual=hit.actual or {},
+            expected=hit.expected or {},
+            message=msg,
+            suggestion=rule.messages.fail or "",
+            evidence=rule.messages.fail or "",
+            confidence=hit.confidence,
+        )
@@ -0,0 +1,27 @@
+"""applies_to → 段落集合。"""
+
+from __future__ import annotations
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
+from fastapi_modules.fastapi_leaudit.govdoc_engine.dsl.schema import AppliesTo
+
+
+def select_paragraphs(doc: Document, applies_to: AppliesTo) -> list[Paragraph]:
+    if applies_to.paragraph_index is not None:
+        idx = applies_to.paragraph_index
+        if 0 <= idx < len(doc.paragraphs):
+            return [doc.paragraphs[idx]]
+        return []
+
+    if applies_to.role == "any":
+        return list(doc.paragraphs)
+
+    targets: set[str] = set()
+    if applies_to.role:
+        targets.add(applies_to.role)
+    if applies_to.roles:
+        targets.update(applies_to.roles)
+
+    if not targets:
+        return list(doc.paragraphs)
+
+    return [p for p in doc.paragraphs if p.role in targets]
@@ -0,0 +1,93 @@
+"""从 Document 派生出 structure（按 role 分类统计）+ outline（heading 层级树）。"""
+
+from __future__ import annotations
+from collections import Counter
+from fastapi_modules.fastapi_leaudit.govdoc_engine.models import Document, Paragraph
+from fastapi_modules.fastapi_leaudit.govdoc_engine.engine.result import OutlineNode, StructureItem
+
+
+_ROLE_LABELS: list[tuple[str, str, bool]] = [
+    # (role, 中文标签, 是否常规公文必备)
+    ("title", "标题", True),
+    ("doc_number", "发文字号", True),
+    ("recipient", "主送机关", True),
+    ("heading_1", "一级标题", False),
+    ("heading_2", "二级标题", False),
+    ("heading_3", "三级标题", False),
+    ("heading_4", "四级标题", False),
+    ("body", "正文", True),
+    ("attachment_marker", "附件标记", False),
+    ("attachment_title", "附件标题", False),
+    ("signature", "署名", True),
+    ("date", "成文日期", True),
+    ("no_text_marker", "(此页无正文)", False),
+    ("unknown", "未识别", False),
+]
+
+_HEADING_LEVELS = {
+    "heading_1": 1,
+    "heading_2": 2,
+    "heading_3": 3,
+    "heading_4": 4,
+}
+
+
+def _dominant_style(paragraphs: list[Paragraph]) -> tuple[str | None, float | None, bool]:
+    """返回 (字体众数, 字号众数, 是否所有段落样式一致)。"""
+    if not paragraphs:
+        return None, None, True
+    fonts = Counter(p.style.font_eastasia for p in paragraphs if p.style.font_eastasia)
+    sizes = Counter(p.style.font_size_pt for p in paragraphs if p.style.font_size_pt is not None)
+    dom_font = fonts.most_common(1)[0][0] if fonts else None
+    dom_size = sizes.most_common(1)[0][0] if sizes else None
+    uniform = len(fonts) <= 1 and len(sizes) <= 1
+    return dom_font, dom_size, uniform
+
+
+def build_structure(doc: Document) -> list[StructureItem]:
+    items: list[StructureItem] = []
+    for role, label, expected in _ROLE_LABELS:
+        paragraphs = [p for p in doc.paragraphs if p.role == role]
+        if not paragraphs and not expected:
+            # 非必备 role 没出现就不展示，保持面板紧凑
+            continue
+        samples = [p.text[:60] for p in paragraphs[:3]]
+        font, size, uniform = _dominant_style(paragraphs)
+        items.append(StructureItem(
+            role=role,
+            label=label,
+            count=len(paragraphs),
+            expected=expected,
+            paragraph_indices=[p.index for p in paragraphs],
+            samples=samples,
+            char_total=sum(len(p.text) for p in paragraphs),
+            dominant_font=font,
+            dominant_size_pt=size,
+            style_uniform=uniform,
+        ))
+    return items
+
+
+def build_outline(doc: Document) -> list[OutlineNode]:
+    """按段落顺序 + heading 层级生成树。"""
+    headings = [
+        (p.index, _HEADING_LEVELS[p.role], p.text)
+        for p in doc.paragraphs
+        if p.role in _HEADING_LEVELS
+    ]
+    if not headings:
+        return []
+
+    roots: list[OutlineNode] = []
+    stack: list[OutlineNode] = []
+    for idx, level, text in headings:
+        node = OutlineNode(paragraph_index=idx, level=level, text=text)
+        # 弹出比当前 level 更深的祖先
+        while stack and stack[-1].level >= level:
+            stack.pop()
+        if stack:
+            stack[-1].children.append(node)
+        else:
+            roots.append(node)
+        stack.append(node)
+    return roots